Skip to content
38 changes: 38 additions & 0 deletions vulnerabilities/importers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
from vulnerabilities.pipelines.v2_importers import aosp_importer as aosp_importer_v2
from vulnerabilities.pipelines.v2_importers import apache_httpd_importer as apache_httpd_v2
from vulnerabilities.pipelines.v2_importers import archlinux_importer as archlinux_importer_v2
from vulnerabilities.pipelines.v2_importers import collect_fix_commits as collect_fix_commits_v2
from vulnerabilities.pipelines.v2_importers import curl_importer as curl_importer_v2
from vulnerabilities.pipelines.v2_importers import (
elixir_security_importer as elixir_security_importer_v2,
Expand Down Expand Up @@ -135,5 +136,42 @@
ubuntu_usn.UbuntuUSNImporter,
fireeye.FireyeImporter,
oss_fuzz.OSSFuzzImporter,
collect_fix_commits_v2.CollectLinuxFixCommitsPipeline,
collect_fix_commits_v2.CollectBusyBoxFixCommitsPipeline,
collect_fix_commits_v2.CollectNginxFixCommitsPipeline,
collect_fix_commits_v2.CollectApacheTomcatFixCommitsPipeline,
collect_fix_commits_v2.CollectMysqlServerFixCommitsPipeline,
collect_fix_commits_v2.CollectPostgresqlFixCommitsPipeline,
collect_fix_commits_v2.CollectMongodbFixCommitsPipeline,
collect_fix_commits_v2.CollectRedisFixCommitsPipeline,
collect_fix_commits_v2.CollectSqliteFixCommitsPipeline,
collect_fix_commits_v2.CollectPhpFixCommitsPipeline,
collect_fix_commits_v2.CollectPythonCpythonFixCommitsPipeline,
collect_fix_commits_v2.CollectRubyFixCommitsPipeline,
collect_fix_commits_v2.CollectGoFixCommitsPipeline,
collect_fix_commits_v2.CollectNodeJsFixCommitsPipeline,
collect_fix_commits_v2.CollectRustFixCommitsPipeline,
collect_fix_commits_v2.CollectOpenjdkFixCommitsPipeline,
collect_fix_commits_v2.CollectSwiftFixCommitsPipeline,
collect_fix_commits_v2.CollectDjangoFixCommitsPipeline,
collect_fix_commits_v2.CollectRailsFixCommitsPipeline,
collect_fix_commits_v2.CollectLaravelFixCommitsPipeline,
collect_fix_commits_v2.CollectSpringFrameworkFixCommitsPipeline,
collect_fix_commits_v2.CollectReactFixCommitsPipeline,
collect_fix_commits_v2.CollectAngularFixCommitsPipeline,
collect_fix_commits_v2.CollectWordpressFixCommitsPipeline,
collect_fix_commits_v2.CollectDockerMobyFixCommitsPipeline,
collect_fix_commits_v2.CollectKubernetesFixCommitsPipeline,
collect_fix_commits_v2.CollectQemuFixCommitsPipeline,
collect_fix_commits_v2.CollectXenProjectFixCommitsPipeline,
collect_fix_commits_v2.CollectVirtualboxFixCommitsPipeline,
collect_fix_commits_v2.CollectContainerdFixCommitsPipeline,
collect_fix_commits_v2.CollectAnsibleFixCommitsPipeline,
collect_fix_commits_v2.CollectTerraformFixCommitsPipeline,
collect_fix_commits_v2.CollectWiresharkFixCommitsPipeline,
collect_fix_commits_v2.CollectTcpdumpFixCommitsPipeline,
collect_fix_commits_v2.CollectGitFixCommitsPipeline,
collect_fix_commits_v2.CollectJenkinsFixCommitsPipeline,
collect_fix_commits_v2.CollectGitlabFixCommitsPipeline,
]
)
186 changes: 186 additions & 0 deletions vulnerabilities/pipelines/v2_importers/collect_fix_commits.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
from vulnerabilities.pipes.vcs_collector_utils import CollectVCSFixCommitPipeline


class CollectLinuxFixCommitsPipeline(CollectVCSFixCommitPipeline):
pipeline_id = "collect_linux_fix_commits"
repo_url = "https://github.com/torvalds/linux"


class CollectBusyBoxFixCommitsPipeline(CollectVCSFixCommitPipeline):
pipeline_id = "collect_busybox_fix_commits"
repo_url = "https://github.com/mirror/busybox"


class CollectNginxFixCommitsPipeline(CollectVCSFixCommitPipeline):
pipeline_id = "collect_nginx_fix_commits"
repo_url = "https://github.com/nginx/nginx"


class CollectApacheTomcatFixCommitsPipeline(CollectVCSFixCommitPipeline):
pipeline_id = "collect_apache_tomcat_fix_commits"
repo_url = "https://github.com/apache/tomcat"


class CollectMysqlServerFixCommitsPipeline(CollectVCSFixCommitPipeline):
pipeline_id = "collect_mysql_server_fix_commits"
repo_url = "https://github.com/mysql/mysql-server"


class CollectPostgresqlFixCommitsPipeline(CollectVCSFixCommitPipeline):
pipeline_id = "collect_postgresql_fix_commits"
repo_url = "https://github.com/postgres/postgres"


class CollectMongodbFixCommitsPipeline(CollectVCSFixCommitPipeline):
pipeline_id = "collect_mongodb_fix_commits"
repo_url = "https://github.com/mongodb/mongo"


class CollectRedisFixCommitsPipeline(CollectVCSFixCommitPipeline):
pipeline_id = "collect_redis_fix_commits"
repo_url = "https://github.com/redis/redis"


class CollectSqliteFixCommitsPipeline(CollectVCSFixCommitPipeline):
pipeline_id = "collect_sqlite_fix_commits"
repo_url = "https://github.com/sqlite/sqlite"


class CollectPhpFixCommitsPipeline(CollectVCSFixCommitPipeline):
pipeline_id = "collect_php_fix_commits"
repo_url = "https://github.com/php/php-src"


class CollectPythonCpythonFixCommitsPipeline(CollectVCSFixCommitPipeline):
pipeline_id = "collect_python_cpython_fix_commits"
repo_url = "https://github.com/python/cpython"


class CollectRubyFixCommitsPipeline(CollectVCSFixCommitPipeline):
pipeline_id = "collect_ruby_fix_commits"
repo_url = "https://github.com/ruby/ruby"


class CollectGoFixCommitsPipeline(CollectVCSFixCommitPipeline):
pipeline_id = "collect_go_fix_commits"
repo_url = "https://github.com/golang/go"


class CollectNodeJsFixCommitsPipeline(CollectVCSFixCommitPipeline):
pipeline_id = "collect_node_js_fix_commits"
repo_url = "https://github.com/nodejs/node"


class CollectRustFixCommitsPipeline(CollectVCSFixCommitPipeline):
pipeline_id = "collect_rust_fix_commits"
repo_url = "https://github.com/rust-lang/rust"


class CollectOpenjdkFixCommitsPipeline(CollectVCSFixCommitPipeline):
pipeline_id = "collect_openjdk_fix_commits"
repo_url = "https://github.com/openjdk/jdk"


class CollectSwiftFixCommitsPipeline(CollectVCSFixCommitPipeline):
pipeline_id = "collect_swift_fix_commits"
repo_url = "https://github.com/swiftlang/swift"


class CollectDjangoFixCommitsPipeline(CollectVCSFixCommitPipeline):
pipeline_id = "collect_django_fix_commits"
repo_url = "https://github.com/django/django"


class CollectRailsFixCommitsPipeline(CollectVCSFixCommitPipeline):
pipeline_id = "collect_rails_fix_commits"
repo_url = "https://github.com/rails/rails"


class CollectLaravelFixCommitsPipeline(CollectVCSFixCommitPipeline):
pipeline_id = "collect_laravel_fix_commits"
repo_url = "https://github.com/laravel/framework"


class CollectSpringFrameworkFixCommitsPipeline(CollectVCSFixCommitPipeline):
pipeline_id = "collect_spring_framework_fix_commits"
repo_url = "https://github.com/spring-projects/spring-framework"


class CollectReactFixCommitsPipeline(CollectVCSFixCommitPipeline):
pipeline_id = "collect_react_fix_commits"
repo_url = "https://github.com/facebook/react"


class CollectAngularFixCommitsPipeline(CollectVCSFixCommitPipeline):
pipeline_id = "collect_angular_fix_commits"
repo_url = "https://github.com/angular/angular"


class CollectWordpressFixCommitsPipeline(CollectVCSFixCommitPipeline):
pipeline_id = "collect_wordpress_fix_commits"
repo_url = "https://github.com/WordPress/WordPress"


class CollectDockerMobyFixCommitsPipeline(CollectVCSFixCommitPipeline):
pipeline_id = "collect_docker_moby_fix_commits"
repo_url = "https://github.com/moby/moby"


class CollectKubernetesFixCommitsPipeline(CollectVCSFixCommitPipeline):
pipeline_id = "collect_kubernetes_fix_commits"
repo_url = "https://github.com/kubernetes/kubernetes"


class CollectQemuFixCommitsPipeline(CollectVCSFixCommitPipeline):
pipeline_id = "collect_qemu_fix_commits"
repo_url = "https://gitlab.com/qemu-project/qemu"


class CollectXenProjectFixCommitsPipeline(CollectVCSFixCommitPipeline):
pipeline_id = "collect_xen_project_fix_commits"
repo_url = "https://github.com/xen-project/xen"


class CollectVirtualboxFixCommitsPipeline(CollectVCSFixCommitPipeline):
pipeline_id = "collect_virtualbox_fix_commits"
repo_url = "https://github.com/mirror/vbox"


class CollectContainerdFixCommitsPipeline(CollectVCSFixCommitPipeline):
pipeline_id = "collect_containerd_fix_commits"
repo_url = "https://github.com/containerd/containerd"


class CollectAnsibleFixCommitsPipeline(CollectVCSFixCommitPipeline):
pipeline_id = "collect_ansible_fix_commits"
repo_url = "https://github.com/ansible/ansible"


class CollectTerraformFixCommitsPipeline(CollectVCSFixCommitPipeline):
pipeline_id = "collect_terraform_fix_commits"
repo_url = "https://github.com/hashicorp/terraform"


class CollectWiresharkFixCommitsPipeline(CollectVCSFixCommitPipeline):
pipeline_id = "collect_wireshark_fix_commits"
repo_url = "https://gitlab.com/wireshark/wireshark"


class CollectTcpdumpFixCommitsPipeline(CollectVCSFixCommitPipeline):
pipeline_id = "collect_tcpdump_fix_commits"
repo_url = "https://github.com/the-tcpdump-group/tcpdump"


class CollectGitFixCommitsPipeline(CollectVCSFixCommitPipeline):
pipeline_id = "collect_git_fix_commits"
repo_url = "https://github.com/git/git"


class CollectJenkinsFixCommitsPipeline(CollectVCSFixCommitPipeline):
pipeline_id = "collect_jenkins_fix_commits"
repo_url = "https://github.com/jenkinsci/jenkins"


class CollectGitlabFixCommitsPipeline(CollectVCSFixCommitPipeline):
pipeline_id = "collect_gitlab_fix_commits"
repo_url = "https://gitlab.com/gitlab-org/gitlab-foss"
133 changes: 133 additions & 0 deletions vulnerabilities/pipes/vcs_collector_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# VulnerableCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import re
import shutil
import tempfile
from collections import defaultdict

from git import Repo
from packageurl.contrib.url2purl import url2purl

from vulnerabilities.importer import AdvisoryData
from vulnerabilities.importer import AffectedPackageV2
from vulnerabilities.importer import PackageCommitPatchData
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2


class CollectVCSFixCommitPipeline(VulnerableCodeBaseImporterPipelineV2):
"""
Pipeline to collect fix commits from any git repository.
"""

repo_url: str
patterns: list[str] = [
r"\bCVE-\d{4}-\d{4,19}\b",
r"GHSA-[2-9cfghjmpqrvwx]{4}-[2-9cfghjmpqrvwx]{4}-[2-9cfghjmpqrvwx]{4}",
]

@classmethod
def steps(cls):
return (
cls.clone,
cls.collect_and_store_advisories,
cls.clean_downloads,
)

def clone(self):
"""Clone the repository."""
self.repo = Repo.clone_from(
url=self.repo_url,
to_path=tempfile.mkdtemp(),
bare=True,
no_checkout=True,
multi_options=["--filter=blob:none"],
)

def advisories_count(self) -> int:
return 0

def extract_vulnerability_id(self, commit) -> list[str]:
"""
Extract vulnerability id from a commit message.
Returns a list of matched vulnerability IDs
"""
matches = []
for pattern in self.patterns:
found = re.findall(pattern, commit.message, flags=re.IGNORECASE)
matches.extend(found)
return matches

def collect_fix_commits(self):
"""
Iterate through repository commits and group them by vulnerability identifiers.
return a list with (vuln_id, [(commit_id, commit_message)]).
"""
self.log("Processing git repository fix commits (grouped by vulnerability IDs).")

grouped_commits = defaultdict(list)
for commit in self.repo.iter_commits("--all"):
matched_ids = self.extract_vulnerability_id(commit)
if not matched_ids:
continue

commit_id = commit.hexsha
commit_message = commit.message.strip()

for vuln_id in matched_ids:
grouped_commits[vuln_id].append((commit_id, commit_message))

self.log(f"Found {len(grouped_commits)} vulnerabilities with related commits.")
self.log("Finished processing all commits.")
return grouped_commits

def collect_advisories(self):
"""
Generate AdvisoryData objects for each vulnerability ID grouped with its related commits.
"""
self.log("Generating AdvisoryData objects from grouped commits.")
grouped_commits = self.collect_fix_commits()
purl = url2purl(self.repo_url)
for vuln_id, commits_data in grouped_commits.items():

if not commits_data or not vuln_id:
continue

summary = ""
commit_hash_set = set()
for commit_hash, commit_message in commits_data:
summary += f"{commit_hash}:{commit_message}\n"
commit_hash_set.add(commit_hash)

affected_packages = [
AffectedPackageV2(
package=purl,
fixed_by_commit_patches=[
PackageCommitPatchData(vcs_url=self.repo_url, commit_hash=commit_hash)
for commit_hash in commit_hash_set
],
)
]

yield AdvisoryData(
advisory_id=vuln_id,
summary=summary,
affected_packages=affected_packages,
url=self.repo_url,
)

def clean_downloads(self):
"""Cleanup any temporary repository data."""
self.log("Cleaning up local repository resources.")
if hasattr(self, "repo") and self.repo.working_dir:
shutil.rmtree(path=self.repo.working_dir)

def on_failure(self):
"""Ensure cleanup is always performed on failure."""
self.clean_downloads()
Loading