Skip to content

Commit c8196e4

Browse files
committed
Merge branch 'main' into extsensitive
2 parents 51dae16 + cfa1753 commit c8196e4

105 files changed

Lines changed: 10524 additions & 1415 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

Cargo.lock

Lines changed: 338 additions & 33 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ members = [
88
"shared/yeast-macros",
99
"ruby/extractor",
1010
"unified/extractor",
11+
"unified/extractor/tree-sitter-swift",
1112
"rust/extractor",
1213
"rust/extractor/macros",
1314
"rust/ast-generator",

MODULE.bazel

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ use_repo(
102102
tree_sitter_extractors_deps,
103103
"vendor_ts__anyhow-1.0.100",
104104
"vendor_ts__argfile-0.2.1",
105+
"vendor_ts__cc-1.2.61",
105106
"vendor_ts__chalk-ir-0.104.0",
106107
"vendor_ts__chrono-0.4.42",
107108
"vendor_ts__clap-4.5.48",
@@ -149,11 +150,12 @@ use_repo(
149150
"vendor_ts__tracing-subscriber-0.3.20",
150151
"vendor_ts__tree-sitter-0.26.8",
151152
"vendor_ts__tree-sitter-embedded-template-0.25.0",
153+
"vendor_ts__tree-sitter-generate-0.26.8",
152154
"vendor_ts__tree-sitter-json-0.24.8",
155+
"vendor_ts__tree-sitter-language-0.1.5",
153156
"vendor_ts__tree-sitter-python-0.23.6",
154157
"vendor_ts__tree-sitter-ql-0.23.1",
155158
"vendor_ts__tree-sitter-ruby-0.23.1",
156-
"vendor_ts__tree-sitter-swift-0.7.2",
157159
"vendor_ts__triomphe-0.1.14",
158160
"vendor_ts__ungrammar-1.16.1",
159161
"vendor_ts__zstd-0.13.3",

csharp/ql/src/Security Features/CWE-502/UnsafeDeserialization.qhelp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,17 @@
77
<p>Deserializing an object from untrusted input may result in security problems, such
88
as denial of service or remote code execution.</p>
99

10+
<p>
11+
Note that a deserialization method is only dangerous if it can instantiate
12+
arbitrary classes. Serialization frameworks that use a schema to instantiate
13+
only expected, predefined types are generally not tracked by this query. Such
14+
frameworks are generally safe with respect to arbitrary-class-instantiation and
15+
gadget-chain attacks when the schema is trusted and does not permit
16+
user-controlled type resolution. However, care must be taken to ensure the schema
17+
strictly limits the allowed types. Permitting common standard library classes
18+
can still leave the application vulnerable to gadget-chain attacks.
19+
</p>
20+
1021
</overview>
1122
<recommendation>
1223

csharp/ql/src/Security Features/CWE-502/UnsafeDeserializationUntrustedInput.qhelp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,17 @@
77
<p>Deserializing an object from untrusted input may result in security problems, such
88
as denial of service or remote code execution.</p>
99

10+
<p>
11+
Note that a deserialization method is only dangerous if it can instantiate
12+
arbitrary classes. Serialization frameworks that use a schema to instantiate
13+
only expected, predefined types are generally not tracked by this query. Such
14+
frameworks are generally safe with respect to arbitrary-class-instantiation and
15+
gadget-chain attacks when the schema is trusted and does not permit
16+
user-controlled type resolution. However, care must be taken to ensure the schema
17+
strictly limits the allowed types. Permitting common standard library classes
18+
can still leave the application vulnerable to gadget-chain attacks.
19+
</p>
20+
1021
</overview>
1122
<recommendation>
1223

go/extractor/BUILD.bazel

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
load("@rules_go//go:def.bzl", "go_library")
1+
load("@rules_go//go:def.bzl", "go_library", "go_test")
22
load("@rules_java//java:defs.bzl", "java_library")
33
load("@rules_pkg//pkg:mappings.bzl", "pkg_files")
44

@@ -60,3 +60,10 @@ pkg_files(
6060
},
6161
visibility = ["//go:__pkg__"],
6262
)
63+
64+
go_test(
65+
name = "extractor_test",
66+
srcs = ["extractor_test.go"],
67+
embed = [":extractor"],
68+
deps = ["@org_golang_x_tools//go/packages"],
69+
)

go/extractor/extractor.go

Lines changed: 65 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,63 @@ func init() {
5959
}
6060
}
6161

62+
// isExactTestPackage checks if a package ID represents an exact test match.
63+
// Returns true for IDs like "github.com/foo/bar [github.com/foo/bar.test]"
64+
// Returns false for IDs like "github.com/foo/bar [github.com/foo/bar/nested.test]"
65+
func isExactTestPackage(pkg *packages.Package) bool {
66+
// Test packages have IDs in the format: "pkgpath [pkgpath.test]"
67+
// or for nested test dependencies: "pkgpath [pkgpath/nested.test]"
68+
expectedTestID := pkg.PkgPath + " [" + pkg.PkgPath + ".test]"
69+
return pkg.ID == expectedTestID
70+
}
71+
72+
// isBetterPackage determines if pkg is a better choice than current for extraction.
73+
// Preferences:
74+
// 1. Exact test package (e.g., "pkg [pkg.test]") over nested test dependencies
75+
// 2. More Syntax nodes (more files to extract)
76+
// 3. Longer ID string as tiebreaker
77+
func isBetterPackage(pkg, current *packages.Package) bool {
78+
pkgIsExact := isExactTestPackage(pkg)
79+
currentIsExact := isExactTestPackage(current)
80+
81+
// Prefer exact test packages
82+
if pkgIsExact != currentIsExact {
83+
return pkgIsExact
84+
}
85+
86+
// Prefer packages with more syntax nodes (more files)
87+
pkgSyntaxCount := len(pkg.Syntax)
88+
currentSyntaxCount := len(current.Syntax)
89+
if pkgSyntaxCount != currentSyntaxCount {
90+
return pkgSyntaxCount > currentSyntaxCount
91+
}
92+
93+
// Fall back to string length
94+
return len(pkg.ID) > len(current.ID)
95+
}
96+
97+
// selectBestPackages builds a map from package paths to their best package variants.
98+
// In the context of a `go test -c` compilation, we see the same package more than
99+
// once, with IDs like "abc.com/pkgname [abc.com/pkgname.test]" to distinguish the version
100+
// that contains and is used by test code.
101+
// We prefer the version with the most complete test coverage, which is typically:
102+
// 1. The exact test package (e.g., "pkg [pkg.test]") over nested test dependencies
103+
// 2. The package with the most Syntax nodes (most files to extract)
104+
// 3. The longest ID string as a tiebreaker
105+
func selectBestPackages(pkgs []*packages.Package) map[string]*packages.Package {
106+
bestPackageIds := make(map[string]*packages.Package)
107+
packages.Visit(pkgs, nil, func(pkg *packages.Package) {
108+
if bestSoFar, present := bestPackageIds[pkg.PkgPath]; present {
109+
if isBetterPackage(pkg, bestSoFar) {
110+
bestPackageIds[pkg.PkgPath] = pkg
111+
}
112+
} else {
113+
bestPackageIds[pkg.PkgPath] = pkg
114+
}
115+
})
116+
return bestPackageIds
117+
}
118+
62119
// ExtractWithFlags extracts the packages specified by the given patterns and build flags
63120
func ExtractWithFlags(buildFlags []string, patterns []string, extractTests bool, sourceRoot string) error {
64121
startTime := time.Now()
@@ -153,22 +210,8 @@ func ExtractWithFlags(buildFlags []string, patterns []string, extractTests bool,
153210

154211
pkgsNotFound := make([]string, 0, len(pkgs))
155212

156-
// Build a map from package paths to their longest IDs--
157-
// in the context of a `go test -c` compilation, we will see the same package more than
158-
// once, with IDs like "abc.com/pkgname [abc.com/pkgname.test]" to distinguish the version
159-
// that contains and is used by test code.
160-
// For our purposes it is simplest to just ignore the non-test version, since the test
161-
// version seems to be a superset of it.
162-
longestPackageIds := make(map[string]string)
163-
packages.Visit(pkgs, nil, func(pkg *packages.Package) {
164-
if longestIDSoFar, present := longestPackageIds[pkg.PkgPath]; present {
165-
if len(pkg.ID) > len(longestIDSoFar) {
166-
longestPackageIds[pkg.PkgPath] = pkg.ID
167-
}
168-
} else {
169-
longestPackageIds[pkg.PkgPath] = pkg.ID
170-
}
171-
})
213+
// Build a map from package paths to their best IDs
214+
bestPackageIds := selectBestPackages(pkgs)
172215

173216
// Do a post-order traversal and extract the package scope of each package
174217
packages.Visit(pkgs, nil, func(pkg *packages.Package) {
@@ -257,15 +300,15 @@ func ExtractWithFlags(buildFlags []string, patterns []string, extractTests bool,
257300
// extract AST information for all packages
258301
packages.Visit(pkgs, nil, func(pkg *packages.Package) {
259302

260-
// If this is a variant of a package that also occurs with a longer ID, skip it;
303+
// If this is a variant of a package that also occurs with a better ID, skip it;
261304
// otherwise we would extract the same file more than once including extracting the
262305
// body of methods twice, causing database inconsistencies.
263306
//
264-
// We prefer the version with the longest ID because that is (so far as I know) always
265-
// the version that defines more entities -- the only case I'm aware of being a test
266-
// variant of a package, which includes test-only functions in addition to the complete
267-
// contents of the main variant.
268-
if pkg.ID != longestPackageIds[pkg.PkgPath] {
307+
// We prefer the version with the most complete test coverage, prioritizing:
308+
// 1. Exact test packages (e.g., "pkg [pkg.test]") over nested test dependencies
309+
// 2. Packages with more Syntax nodes (more files to extract)
310+
// 3. Longer ID strings as a tiebreaker
311+
if pkg.ID != bestPackageIds[pkg.PkgPath].ID {
269312
return
270313
}
271314

0 commit comments

Comments
 (0)