-
Notifications
You must be signed in to change notification settings - Fork 337
Implement SCA Reachability runtime detection: report vulnerable classes and callsites via telemetry #11352
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Implement SCA Reachability runtime detection: report vulnerable classes and callsites via telemetry #11352
Changes from all commits
28f12e0
e607887
fb9d011
62b290d
93d58f2
a5ccd80
f8f9d02
82ea806
39eef44
dc8ffd3
849f376
3ea0e05
525a81c
7f5e116
e0c7fee
a79907d
77ba03f
7c69a89
19a5813
3b76b33
6008ac9
17146c0
750b3c3
fdb74e4
b90b654
fdcb421
579bbd0
9c89c1b
00185f1
b3582e8
a12d3f9
32fb0d1
54332fa
d469821
ab5850b
2fbf3ed
bbd32bb
d312b48
d6a419e
310aa66
e0a2067
b0421ab
364fc13
b529d09
90be403
5dd23b1
164a838
2e1e8ee
058a344
b425f33
6925358
24c53f0
0da4c37
354cd35
5a1400a
50df877
58df388
c327257
ba0aa7a
7b449cf
05e2537
42039ba
7eb20bf
1d29fb6
b00c531
53c36f5
ca35526
834c65c
54a7fa7
6904082
0255588
17b4b4a
41af332
11fd65f
e6ba094
a2dc4fc
782e028
504349a
2de3cb2
baf5389
c2a70f2
2d83337
7c7aff3
a8b04c3
a5e55c8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -24,5 +24,6 @@ start_server "profiling" "-javaagent:${TRACER} -Ddd.profiling.enabled=true -Dser | |
| start_server "appsec" "-javaagent:${TRACER} -Ddd.appsec.enabled=true -Dserver.port=8083" "taskset -c 37-38 " & | ||
| start_server "iast" "-javaagent:${TRACER} -Ddd.iast.enabled=true -Dserver.port=8084" "taskset -c 39-40 " & | ||
| start_server "code_origins" "-javaagent:${TRACER} -Ddd.code.origin.for.spans.enabled=true -Dserver.port=8085" "taskset -c 41-42 " & | ||
| start_server "sca" "-javaagent:${TRACER} -Ddd.appsec.enabled=true -Ddd.appsec.sca.enabled=true -Dserver.port=8086" "taskset -c 43-44 " & | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Did you validate that the cores 43 and 44 are available so we can pin?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not explicitly - assumed it from the pattern (second socket 24–47, existing servers use up to 41–42). That said, looking at the previous benchmark run the SCA variant was missing from the load results entirely; turns out we also forgot to add it to |
||
|
|
||
| wait | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,120 @@ | ||
| package datadog.gradle.plugin.sca | ||
|
|
||
| import datadog.gradle.sca.GhsaEnrichmentParser | ||
| import groovy.json.JsonOutput | ||
| import groovy.json.JsonSlurper | ||
| import java.net.HttpURLConnection | ||
| import java.net.URL | ||
| import org.gradle.api.GradleException | ||
| import org.gradle.api.Plugin | ||
| import org.gradle.api.Project | ||
|
|
||
| /** | ||
| * Registers the [generateScaCvesJson] task that downloads GHSA enrichments from | ||
| * `sca-reachability-database` and generates `sca_cves.json` bundled in the appsec JAR. | ||
| * | ||
| * This is a **temporary** build-time approach. The symbol database will be delivered | ||
| * via Remote Config in a future iteration, at which point this plugin and the committed | ||
| * `sca_cves.json` file will be removed. | ||
| * | ||
| * Usage: `apply plugin: 'dd-trace-java.sca-enrichments'`. The task runs only when | ||
| * `-PrefreshSca` is passed or the output file is absent; CI uses the committed copy. | ||
| */ | ||
| @Suppress("unused") | ||
| class ScaEnrichmentsPlugin : Plugin<Project> { | ||
|
|
||
| companion object { | ||
| private const val SCA_ENRICHMENTS_API = | ||
| "https://api.github.com/repos/DataDog/sca-reachability-database/contents/enrichments" | ||
| } | ||
|
|
||
| override fun apply(project: Project) { | ||
| val outputFile = project.file("src/main/resources/sca_cves.json") | ||
|
|
||
| val generateTask = | ||
| project.tasks.register("generateScaCvesJson") { | ||
| description = | ||
| "Downloads GHSA enrichments from sca-reachability-database and updates " + | ||
| "src/main/resources/sca_cves.json. Run with -PrefreshSca to force a refresh. " + | ||
| "sca_cves.json is committed to the repo so CI does not need network access." | ||
| group = "build" | ||
| outputs.file(outputFile) | ||
| onlyIf { project.hasProperty("refreshSca") || !outputFile.exists() } | ||
|
|
||
| doLast { | ||
| val token = System.getenv("GITHUB_TOKEN") | ||
|
|
||
| logger.lifecycle("Fetching GHSA enrichment index from GitHub...") | ||
| @Suppress("UNCHECKED_CAST") | ||
| val fileList = githubFetch(SCA_ENRICHMENTS_API, token) as List<Map<String, Any>> | ||
| val ghsaFiles = | ||
| fileList.filter { | ||
| it["name"]?.toString()?.endsWith(".json") == true && it["type"] == "file" | ||
| } | ||
| logger.lifecycle("Found ${ghsaFiles.size} enrichment files") | ||
|
|
||
| val entries = mutableListOf<Any>() | ||
| ghsaFiles.forEach { fileInfo -> | ||
| val ghsaId = fileInfo["name"]!!.toString().removeSuffix(".json") | ||
| val rawContent = githubFetchRaw(fileInfo["download_url"]!!.toString(), token) | ||
| entries.addAll(GhsaEnrichmentParser.parse(ghsaId, rawContent)) | ||
| } | ||
|
|
||
| outputFile.writeText(JsonOutput.toJson(mapOf("version" to 1, "entries" to entries))) | ||
| logger.lifecycle( | ||
| "sca_cves.json: ${entries.size} entries from ${ghsaFiles.size} GHSA files") | ||
| logger.lifecycle( | ||
| "Remember to commit src/main/resources/sca_cves.json after updating the database.") | ||
| } | ||
| } | ||
|
|
||
| // Defer wiring until after the java plugin adds processResources. | ||
| project.pluginManager.withPlugin("java") { | ||
| project.tasks.named("processResources") { | ||
| dependsOn(generateTask) | ||
| doLast { | ||
| // Minify only sca_cves.json — not all JSON files in the module output. | ||
| project | ||
| .fileTree(mapOf("dir" to outputs.files.asPath, "includes" to listOf("**/sca_cves.json"))) | ||
| .forEach { f -> f.writeText(JsonOutput.toJson(JsonSlurper().parse(f))) } | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| private fun githubConnect(url: String, token: String?): HttpURLConnection { | ||
| val connection = URL(url).openConnection() as HttpURLConnection | ||
| connection.setRequestProperty("Accept", "application/vnd.github+json") | ||
| connection.setRequestProperty("X-GitHub-Api-Version", "2022-11-28") | ||
| if (!token.isNullOrEmpty()) { | ||
| connection.setRequestProperty("Authorization", "Bearer $token") | ||
| } | ||
| connection.connectTimeout = 10_000 | ||
| connection.readTimeout = 30_000 | ||
| val code = connection.responseCode | ||
| if (code != 200) { | ||
| throw GradleException( | ||
| "GitHub API returned HTTP $code for $url.\n" + | ||
| "Unauthenticated rate limit is 60 req/hr. Set GITHUB_TOKEN to raise it.") | ||
| } | ||
| return connection | ||
| } | ||
|
|
||
| private fun githubFetch(url: String, token: String?): Any { | ||
| val conn = githubConnect(url, token) | ||
| return try { | ||
| JsonSlurper().parse(conn.inputStream) | ||
| } finally { | ||
| conn.disconnect() | ||
| } | ||
| } | ||
|
|
||
| private fun githubFetchRaw(url: String, token: String?): String { | ||
| val conn = githubConnect(url, token) | ||
| return try { | ||
| conn.inputStream.bufferedReader().readText() | ||
| } finally { | ||
| conn.disconnect() | ||
| } | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,83 @@ | ||
| package datadog.gradle.sca | ||
|
|
||
| import com.fasterxml.jackson.databind.JsonNode | ||
| import com.fasterxml.jackson.databind.ObjectMapper | ||
|
|
||
| /** | ||
| * Parses GHSA enrichment JSON files from the sca-reachability-database into the internal | ||
| * sca_cves.json format consumed by SCA Reachability at runtime. | ||
| * | ||
| * Key transformations: | ||
| * - Filters entries to JVM language only | ||
| * - Expands multi-package GHSA entries into N records (one per Maven artifact), because | ||
| * each artifact may have different version ranges for the same set of class symbols | ||
| * - Converts class FQNs to JVM internal format (slashes) so the ClassFileTransformer | ||
| * can do O(1) map lookups without per-class string conversion | ||
| * - Sets method=null for all symbols — field exists for forward compatibility when the | ||
| * database adds method-level symbols in the future (see APPSEC-62260) | ||
| */ | ||
| object GhsaEnrichmentParser { | ||
|
|
||
| private val mapper = ObjectMapper() | ||
|
|
||
| /** | ||
| * Parses a single GHSA enrichment file. | ||
| * | ||
| * @param ghsaId the GHSA identifier (e.g. "GHSA-645p-88qh-w398"), used as vuln_id | ||
| * @param jsonContent the raw JSON content of the enrichment file | ||
| * @return list of sca_cves.json entry maps, one per affected Maven artifact | ||
| */ | ||
| fun parse(ghsaId: String, jsonContent: String): List<Map<String, Any?>> { | ||
| val root = mapper.readTree(jsonContent) | ||
| require(root.isArray) { "GHSA enrichment file $ghsaId must be a JSON array, got ${root.nodeType}" } | ||
|
|
||
| val entries = mutableListOf<Map<String, Any?>>() | ||
|
|
||
| for (entry in root) { | ||
| if (entry.path("language").asText() != "jvm") continue | ||
|
|
||
| val symbols = extractSymbols(entry) | ||
| if (symbols.isEmpty()) continue | ||
|
|
||
| for (pkg in entry.path("package")) { | ||
| if (pkg.path("ecosystem").asText() != "maven") continue | ||
| val artifact = pkg.path("name").asText().takeIf { it.isNotEmpty() } ?: continue | ||
| val versionRanges = pkg.path("version_range").map { it.asText() } | ||
|
|
||
| entries += mapOf( | ||
| "vuln_id" to ghsaId, | ||
| "artifact" to artifact, | ||
| "version_ranges" to versionRanges, | ||
| "symbols" to symbols, | ||
| ) | ||
| } | ||
| } | ||
|
|
||
| return entries | ||
| } | ||
|
|
||
| private fun extractSymbols(entry: JsonNode): List<Map<String, Any?>> { | ||
| val symbols = mutableListOf<Map<String, Any?>>() | ||
| val imports = entry.path("ecosystem_specific").path("imports") | ||
| if (imports.isMissingNode || !imports.isArray) return symbols | ||
|
|
||
| for (importGroup in imports) { | ||
| for (symbol in importGroup.path("symbols")) { | ||
| if (symbol.path("type").asText() != "class") continue | ||
| val pkg = symbol.path("value").asText().takeIf { it.isNotEmpty() } ?: continue | ||
| val name = symbol.path("name").asText().takeIf { it.isNotEmpty() } ?: continue | ||
|
|
||
| // JVM internal format (slashes) — avoids per-class conversion in the | ||
| // ClassFileTransformer hot path at runtime. | ||
| // TODO(APPSEC-62260): verify inner-class format when database adds method-level symbols. | ||
| // If GHSA uses dot notation for inner classes (e.g. name="Outer.Inner"), the replace below | ||
| // produces com/example/Outer/Inner instead of the correct com/example/Outer$Inner. | ||
| // When the database team defines the format, update this to handle the $ separator. | ||
| val internalName = "$pkg.$name".replace('.', '/') | ||
| symbols += mapOf("class" to internalName, "method" to null) | ||
|
jandro996 marked this conversation as resolved.
|
||
| } | ||
| } | ||
|
|
||
| return symbols | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,75 @@ | ||
| package datadog.gradle.plugin.sca | ||
|
|
||
| import datadog.gradle.plugin.GradleFixture | ||
| import org.assertj.core.api.Assertions.assertThat | ||
| import org.gradle.testkit.runner.TaskOutcome | ||
| import org.junit.jupiter.api.BeforeEach | ||
| import org.junit.jupiter.api.Test | ||
|
|
||
| class ScaEnrichmentsPluginTest : GradleFixture() { | ||
|
|
||
| @BeforeEach | ||
| fun setup() { | ||
| writeSettings("""rootProject.name = "test-appsec"""") | ||
| writeRootProject( | ||
| """ | ||
| plugins { | ||
| java | ||
| id("dd-trace-java.sca-enrichments") | ||
| } | ||
| """ | ||
| ) | ||
| } | ||
|
|
||
| @Test | ||
| fun `generateScaCvesJson is SKIPPED when file exists and refreshSca is not set`() { | ||
| file("src/main/resources/sca_cves.json").also { | ||
| it.parentFile.mkdirs() | ||
| it.writeText("{\"version\":1,\"entries\":[]}") | ||
| } | ||
|
|
||
| val result = run("generateScaCvesJson") | ||
|
|
||
| assertThat(result.task(":generateScaCvesJson")?.outcome).isEqualTo(TaskOutcome.SKIPPED) | ||
| } | ||
|
|
||
| @Test | ||
| fun `generateScaCvesJson attempts to run when refreshSca is set even if file exists`() { | ||
| file("src/main/resources/sca_cves.json").also { | ||
| it.parentFile.mkdirs() | ||
| it.writeText("{}") | ||
| } | ||
|
|
||
| // With -PrefreshSca the onlyIf condition is true; task will fail at the GitHub fetch | ||
| // (no network in tests) but must NOT be SKIPPED | ||
| val result = run("generateScaCvesJson", "-PrefreshSca", expectFailure = true) | ||
|
|
||
| assertThat(result.task(":generateScaCvesJson")?.outcome) | ||
| .isNotNull | ||
| .isNotEqualTo(TaskOutcome.SKIPPED) | ||
| } | ||
|
|
||
| @Test | ||
| fun `generateScaCvesJson attempts to run when output file does not exist`() { | ||
| // File absent: onlyIf returns true; task will fail at GitHub fetch but must not be SKIPPED | ||
| val result = run("generateScaCvesJson", expectFailure = true) | ||
|
|
||
| assertThat(result.task(":generateScaCvesJson")?.outcome) | ||
| .isNotNull | ||
| .isNotEqualTo(TaskOutcome.SKIPPED) | ||
| } | ||
|
|
||
| @Test | ||
| fun `processResources depends on generateScaCvesJson`() { | ||
| file("src/main/resources/sca_cves.json").also { | ||
| it.parentFile.mkdirs() | ||
| it.writeText("{\"version\":1,\"entries\":[]}") | ||
| } | ||
|
|
||
| val result = run("processResources") | ||
|
|
||
| // generateScaCvesJson must appear as SKIPPED (file exists, no -PrefreshSca) | ||
| assertThat(result.task(":generateScaCvesJson")?.outcome).isEqualTo(TaskOutcome.SKIPPED) | ||
| assertThat(result.task(":processResources")?.outcome).isEqualTo(TaskOutcome.SUCCESS) | ||
| } | ||
| } |
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hi Alejandro! Our benchmarks are being migrated to the
apm-sdks-benchmarksimplementation, e.g. https://github.com/DataDog/apm-sdks-benchmarks/blob/main/.gitlab/ci-java-load-parallel.yml and https://github.com/DataDog/apm-sdks-benchmarks/blob/main/.gitlab/ci-java-startup-parallel.yml, so ideally no more changes are made to the localbenchmarks/folder 😅WDYT of this PR that ports your changes here to the new implementation? https://github.com/DataDog/apm-sdks-benchmarks/pull/161 (It's tested here: #11504)
Apologies for the confusion -- my plan was to remove these
dd-trace-javabenchmarks (#11502) earlier, but I had been waiting for more data from theapm-sdks-benchmarksone.