Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -126,3 +126,5 @@ wcwidth==0.2.5
websocket-client==0.59.0
yarl==1.7.2
zipp==3.19.1
PyGithub==2.6.1
python-gitlab~=7.1.0
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ install_requires =

# networking
GitPython>=3.1.17
PyGithub>=2.6.1
requests>=2.25.1
fetchcode>=0.6.0

Expand Down
4 changes: 4 additions & 0 deletions vulnerabilities/importers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
from vulnerabilities.pipelines.v2_importers import aosp_importer as aosp_importer_v2
from vulnerabilities.pipelines.v2_importers import apache_httpd_importer as apache_httpd_v2
from vulnerabilities.pipelines.v2_importers import archlinux_importer as archlinux_importer_v2
from vulnerabilities.pipelines.v2_importers import collect_issue_pr as collect_issue_pr_v2
from vulnerabilities.pipelines.v2_importers import curl_importer as curl_importer_v2
from vulnerabilities.pipelines.v2_importers import (
elixir_security_importer as elixir_security_importer_v2,
Expand Down Expand Up @@ -100,6 +101,7 @@
epss_importer_v2.EPSSImporterPipeline,
nginx_importer_v2.NginxImporterPipeline,
mattermost_importer_v2.MattermostImporterPipeline,
collect_issue_pr_v2.CollectIssuePRPipeline,
nvd_importer.NVDImporterPipeline,
github_importer.GitHubAPIImporterPipeline,
gitlab_importer.GitLabImporterPipeline,
Expand Down Expand Up @@ -135,5 +137,7 @@
ubuntu_usn.UbuntuUSNImporter,
fireeye.FireyeImporter,
oss_fuzz.OSSFuzzImporter,
collect_issue_pr_v2.CollectKubernetesPRSIssues,
collect_issue_pr_v2.CollectWiresharkPRSIssues,
]
)
104 changes: 104 additions & 0 deletions vulnerabilities/pipelines/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,25 +8,33 @@
#

import logging
import re
import traceback
from abc import abstractmethod
from collections import defaultdict
from datetime import datetime
from datetime import timezone
from timeit import default_timer as timer
from traceback import format_exc as traceback_format_exc
from typing import Iterable
from typing import List
from urllib.parse import urlparse

import gitlab
from aboutcode.pipeline import LoopProgress
from aboutcode.pipeline import PipelineDefinition
from aboutcode.pipeline import humanize_time
from github import Github

from vulnerabilities.importer import AdvisoryData
from vulnerabilities.importer import ReferenceV2
from vulnerabilities.improver import MAX_CONFIDENCE
from vulnerabilities.models import Advisory
from vulnerabilities.models import PipelineRun
from vulnerabilities.pipes.advisory import import_advisory
from vulnerabilities.pipes.advisory import insert_advisory
from vulnerabilities.pipes.advisory import insert_advisory_v2
from vulnerablecode.settings import env

module_logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -321,3 +329,99 @@ def collect_and_store_advisories(self):
continue

self.log(f"Successfully collected {collected_advisory_count:,d} advisories")


class VCSCollector(VulnerableCodeBaseImporterPipeline):
"""
Pipeline to collect GitHub/GitLab issues and PRs related to vulnerabilities.
"""

vcs_url: str
CVE_PATTERN = re.compile(r"(CVE-\d{4}-\d+)", re.IGNORECASE)
SUPPORTED_IDENTIFIERS = ["CVE-"]

collected_items: dict = {}

def advisories_count(self) -> int:
return 0

@classmethod
def steps(cls):
return (
cls.configure_target,
cls.fetch_entries,
cls.collect_items,
cls.collect_and_store_advisories,
)

def configure_target(self):
parsed_url = urlparse(self.repo_url)
parts = parsed_url.path.strip("/").split("/")
if len(parts) < 2:
raise ValueError(f"Invalid URL: {self.repo_url}")

self.repo_name = f"{parts[0]}/{parts[1]}"

@abstractmethod
def fetch_entries(self):
raise NotImplementedError

@abstractmethod
def collect_items(self):
raise NotImplementedError

def collect_advisories(self):
"""
Generate AdvisoryData objects for each vulnerability ID grouped with its related GitHub/Gitlab issues and PRs.
"""
self.log("Generating AdvisoryData objects from GitHub/Gitlab issues and PRs.")
for vuln_id, refs in self.collected_items.items():
print(vuln_id, refs)
references = [ReferenceV2(reference_type=ref_id, url=url) for ref_id, url in refs]
yield AdvisoryData(
advisory_id=vuln_id,
aliases=[],
references_v2=references,
url=self.repo_url,
)


class GitHubCollector(VCSCollector):
def fetch_entries(self):
"""Fetch GitHub Data Entries"""
github_token = env.str("GITHUB_TOKEN")
g = Github(login_or_token=github_token)
base_query = f"repo:{self.repo_name} ({' OR '.join(self.SUPPORTED_IDENTIFIERS)})"
self.issues = g.search_issues(f"{base_query} is:issue")
self.prs = g.search_issues(f"{base_query} is:pr")

def collect_items(self):
self.collected_items = defaultdict(list)

for i_type, items in [("Issue", self.issues), ("PR", self.prs)]:
for item in items:
matches = self.CVE_PATTERN.findall(item.title + " " + (item.body or ""))
for match in matches:
self.collected_items[match].append(("Issue", item.html_url))


class GitLabCollector(VCSCollector):
def fetch_entries(self):
"""Fetch GitLab Data Entries"""
gitlab_token = env.str("GITLAB_TOKEN")
gl = gitlab.Gitlab("https://gitlab.com/", private_token=gitlab_token)
project = gl.projects.get(self.repo_name)
base_query = " ".join(self.SUPPORTED_IDENTIFIERS)
self.issues = project.search(scope="issues", search=base_query)
self.prs = project.search(scope="merge_requests", search=base_query)

def collect_items(self):
self.collected_items = defaultdict(list)
for i_type, items in [("Issue", self.issues), ("PR", self.prs)]:
for item in items:
title = item.get("title") or ""
description = item.get("description") or ""
matches = self.CVE_PATTERN.findall(title + " " + description)
for match in matches:
url = item.get("web_url")
self.collected_items[match].append((i_type, url))
20 changes: 20 additions & 0 deletions vulnerabilities/pipelines/v2_importers/collect_issue_pr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# VulnerableCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#
from vulnerabilities.pipelines import GitHubCollector
from vulnerabilities.pipelines import GitLabCollector


class CollectKubernetesPRSIssues(GitHubCollector):
pipeline_id = "collect-kubernetes-prs-issues"
repo_url = "https://github.com/kubernetes/kubernetes"


class CollectWiresharkPRSIssues(GitLabCollector):
pipeline_id = "collect-wireshark-prs-issues"
repo_url = "https://gitlab.com/wireshark/wireshark"
126 changes: 126 additions & 0 deletions vulnerabilities/tests/pipelines/v2_importers/test_collect_issue_pr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# VulnerableCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import json
from pathlib import Path
from types import SimpleNamespace
from unittest.mock import MagicMock

import pytest

from vulnerabilities.pipelines import GitHubCollector
from vulnerabilities.pipelines import GitLabCollector
from vulnerabilities.tests import util_tests

TEST_DATA = Path(__file__).parent.parent.parent / "test_data" / "collect_issue_pr"


@pytest.mark.django_db
def test_collect_github_issues_and_prs():
pipeline = GitHubCollector()
pipeline.issues = [
SimpleNamespace(
title="Fix the CVE-2023-1234 found",
body="This resolves a security issue",
html_url="https://github.com/issue1",
),
SimpleNamespace(
title="vulnerability 1",
body="Fix CVE-2023-124",
html_url="https://github.com/issue2",
),
SimpleNamespace(
title="vulnerability 2",
body="vulnerability 2",
html_url="https://github.com/issue3",
),
]

pipeline.prs = [
SimpleNamespace(
title="Patch addressing CVE-2023-1234",
body="Also fixes CVE-2023-1234",
html_url="https://github.com/pr1",
)
]

pipeline.collect_items()
expected = {
"CVE-2023-1234": [
("Issue", "https://github.com/issue1"),
("Issue", "https://github.com/pr1"),
("Issue", "https://github.com/pr1"),
],
"CVE-2023-124": [("Issue", "https://github.com/issue2")],
}

assert pipeline.collected_items == expected


@pytest.mark.django_db
def test_collect_gitlab_issues_and_prs():
pipeline = GitLabCollector()
pipeline.issues = [
{
"title": "vulnerability CVE-2024-1234",
"description": "vulnerability 1",
"web_url": "https://github.com/issue1",
},
]

pipeline.prs = [
{
"title": "Patch addressing",
"description": "Also fixes CVE-2023-1234",
"web_url": "https://github.com/pr1",
}
]

pipeline.collect_items()
expected = {
"CVE-2024-1234": [("Issue", "https://github.com/issue1")],
"CVE-2023-1234": [("PR", "https://github.com/pr1")],
}

assert pipeline.collected_items == expected


@pytest.mark.parametrize(
"input_file, expected_file, repo_url, pipeline_class",
[
(
"github_issues_and_pr.json",
"expected_github.json",
"https://github.com/test/repo",
GitHubCollector,
),
(
"gitlab_issues_and_pr.json",
"expected_gitlab.json",
"https://gitlab.com/test/repo",
GitLabCollector,
),
],
)
@pytest.mark.django_db
def test_collect_advisories_from_json(input_file, expected_file, repo_url, pipeline_class):
input_file = TEST_DATA / input_file
expected_file = TEST_DATA / expected_file

issues_and_prs = json.loads(input_file.read_text(encoding="utf-8"))

pipeline = pipeline_class()
pipeline.pipeline_id = "collect-prs-issues"
pipeline.repo_url = repo_url
pipeline.log = MagicMock()

pipeline.collect_items = MagicMock(return_value=issues_and_prs)

result = [adv.to_dict() for adv in pipeline.collect_advisories()]
util_tests.check_results_against_json(result, expected_file)
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[]
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[]
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"CVE-2023-1234": [
[
"Issue",
"https://example.com/issue1"
],
[
"PR",
"https://example.com/pr1"
]
],
"GHSA-zzz-111": [
[
"PR",
"https://example.com/pr1"
]
],
"PYSEC-2024-5678": [
[
"PR",
"https://example.com/pr1"
]
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{}