Skip to content

Commit bc59a76

Browse files
authored
Merge pull request #1008 from gerrod3/attestation
Add PackageProvenanceContent model
2 parents d69a7ca + dffd84e commit bc59a76

File tree

7 files changed

+258
-2
lines changed

7 files changed

+258
-2
lines changed

CHANGES/+attestations.feature

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Added the ability to upload PEP 740 Provenance files to repositories.
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# Generated by Django 4.2.26 on 2025-11-10 09:11
2+
3+
from django.db import migrations, models
4+
import django.db.models.deletion
5+
import pulpcore.app.util
6+
7+
8+
class Migration(migrations.Migration):
9+
10+
dependencies = [
11+
("python", "0017_pythonpackagecontent_size"),
12+
]
13+
14+
operations = [
15+
migrations.CreateModel(
16+
name="PackageProvenance",
17+
fields=[
18+
(
19+
"content_ptr",
20+
models.OneToOneField(
21+
auto_created=True,
22+
on_delete=django.db.models.deletion.CASCADE,
23+
parent_link=True,
24+
primary_key=True,
25+
serialize=False,
26+
to="core.content",
27+
),
28+
),
29+
("provenance", models.JSONField()),
30+
("sha256", models.CharField(max_length=64)),
31+
(
32+
"_pulp_domain",
33+
models.ForeignKey(
34+
default=pulpcore.app.util.get_domain_pk,
35+
on_delete=django.db.models.deletion.PROTECT,
36+
to="core.domain",
37+
),
38+
),
39+
(
40+
"package",
41+
models.ForeignKey(
42+
on_delete=django.db.models.deletion.CASCADE,
43+
related_name="provenances",
44+
to="python.pythonpackagecontent",
45+
),
46+
),
47+
],
48+
options={
49+
"default_related_name": "%(app_label)s_%(model_name)s",
50+
"unique_together": {("sha256", "_pulp_domain")},
51+
},
52+
bases=("core.content",),
53+
),
54+
]

pulp_python/app/models.py

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,16 @@
1+
import hashlib
2+
import json
13
from logging import getLogger
24

35
from aiohttp.web import json_response
46
from django.contrib.postgres.fields import ArrayField
57
from django.core.exceptions import ObjectDoesNotExist
68
from django.db import models
79
from django.conf import settings
10+
from django_lifecycle import (
11+
BEFORE_SAVE,
12+
hook,
13+
)
814
from pulpcore.plugin.models import (
915
AutoAddObjPermsMixin,
1016
Content,
@@ -15,6 +21,7 @@
1521
)
1622
from pulpcore.plugin.responses import ArtifactResponse
1723

24+
from pypi_attestations import Provenance
1825
from pathlib import PurePath
1926
from .utils import (
2027
artifact_to_python_content_data,
@@ -235,6 +242,43 @@ class Meta:
235242
]
236243

237244

245+
class PackageProvenance(Content):
246+
"""
247+
PEP 740 provenance objects.
248+
"""
249+
250+
TYPE = "provenance"
251+
repo_key_fields = ("package_id",)
252+
253+
package = models.ForeignKey(
254+
PythonPackageContent, on_delete=models.CASCADE, related_name="provenances"
255+
)
256+
provenance = models.JSONField(null=False)
257+
sha256 = models.CharField(max_length=64, null=False)
258+
259+
_pulp_domain = models.ForeignKey("core.Domain", default=get_domain_pk, on_delete=models.PROTECT)
260+
261+
@staticmethod
262+
def calculate_sha256(provenance):
263+
"""Calculates the sha256 from the provenance."""
264+
provenance_json = json.dumps(provenance, sort_keys=True).encode("utf-8")
265+
hasher = hashlib.sha256(provenance_json)
266+
return hasher.hexdigest()
267+
268+
@hook(BEFORE_SAVE)
269+
def set_sha256_hook(self):
270+
"""Ensure that sha256 is set before saving."""
271+
self.sha256 = self.calculate_sha256(self.provenance)
272+
273+
@property
274+
def as_model(self):
275+
return Provenance.model_validate(self.provenance)
276+
277+
class Meta:
278+
default_related_name = "%(app_label)s_%(model_name)s"
279+
unique_together = ("sha256", "_pulp_domain")
280+
281+
238282
class PythonPublication(Publication, AutoAddObjPermsMixin):
239283
"""
240284
A Publication for PythonContent.
@@ -295,7 +339,7 @@ class PythonRepository(Repository, AutoAddObjPermsMixin):
295339
"""
296340

297341
TYPE = "python"
298-
CONTENT_TYPES = [PythonPackageContent]
342+
CONTENT_TYPES = [PythonPackageContent, PackageProvenance]
299343
REMOTE_TYPES = [PythonRemote]
300344
PULL_THROUGH_SUPPORTED = True
301345

pulp_python/app/serializers.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
from django.db.utils import IntegrityError
66
from packaging.requirements import Requirement
77
from rest_framework import serializers
8+
from pydantic import ValidationError
9+
from pypi_attestations import Distribution, Provenance, VerificationError
810

911
from pulpcore.plugin import models as core_models
1012
from pulpcore.plugin import serializers as core_serializers
@@ -464,6 +466,65 @@ class Meta:
464466
model = python_models.PythonPackageContent
465467

466468

469+
class PackageProvenanceSerializer(core_serializers.NoArtifactContentUploadSerializer):
470+
"""
471+
A Serializer for PackageProvenance.
472+
"""
473+
474+
package = core_serializers.DetailRelatedField(
475+
help_text=_("The package that the provenance is for."),
476+
view_name_pattern=r"content(-.*/.*)-detail",
477+
queryset=python_models.PythonPackageContent.objects.all(),
478+
)
479+
provenance = serializers.JSONField(read_only=True, default=dict)
480+
sha256 = serializers.CharField(read_only=True)
481+
verify = serializers.BooleanField(
482+
default=True,
483+
write_only=True,
484+
help_text=_("Verify each attestation in the provenance."),
485+
)
486+
487+
def deferred_validate(self, data):
488+
"""
489+
Validate that the provenance is valid and pointing to the correct package.
490+
"""
491+
data = super().deferred_validate(data)
492+
try:
493+
provenance = Provenance.model_validate_json(data["file"].read())
494+
data["provenance"] = provenance.model_dump(mode="json")
495+
except ValidationError as e:
496+
raise serializers.ValidationError(
497+
_("The uploaded provenance is not valid: {}".format(e))
498+
)
499+
if data.pop("verify"):
500+
dist = Distribution(name=data["package"].filename, digest=data["package"].sha256)
501+
try:
502+
for attestation_bundle in provenance.attestation_bundles:
503+
publisher = attestation_bundle.publisher
504+
policy = publisher._as_policy()
505+
for attestation in attestation_bundle.attestations:
506+
attestation.verify(policy, dist)
507+
except VerificationError as e:
508+
raise serializers.ValidationError(_("Provenance verification failed: {}".format(e)))
509+
return data
510+
511+
def retrieve(self, validated_data):
512+
sha256 = python_models.PackageProvenance.calculate_sha256(validated_data["provenance"])
513+
content = python_models.PackageProvenance.objects.filter(
514+
sha256=sha256, _pulp_domain=get_domain()
515+
).first()
516+
return content
517+
518+
class Meta:
519+
fields = core_serializers.NoArtifactContentUploadSerializer.Meta.fields + (
520+
"package",
521+
"provenance",
522+
"sha256",
523+
"verify",
524+
)
525+
model = python_models.PackageProvenance
526+
527+
467528
class MultipleChoiceArrayField(serializers.MultipleChoiceField):
468529
"""
469530
A wrapper to make sure this DRF serializer works properly with ArrayFields.

pulp_python/app/viewsets.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,39 @@ def upload(self, request):
428428
return Response(serializer.data, status=status.HTTP_201_CREATED, headers=headers)
429429

430430

431+
class PackageProvenanceViewSet(core_viewsets.NoArtifactContentUploadViewSet):
432+
"""
433+
PackageProvenance represents a PEP 740 provenance object for a Python package.
434+
435+
Use ?minimal=true to get a human readable representation of the provenance.
436+
"""
437+
438+
endpoint_name = "provenance"
439+
queryset = python_models.PackageProvenance.objects.all()
440+
serializer_class = python_serializers.PackageProvenanceSerializer
441+
442+
DEFAULT_ACCESS_POLICY = {
443+
"statements": [
444+
{
445+
"action": ["list", "retrieve"],
446+
"principal": "authenticated",
447+
"effect": "allow",
448+
},
449+
{
450+
"action": ["create"],
451+
"principal": "authenticated",
452+
"effect": "allow",
453+
"condition": [
454+
"has_required_repo_perms_on_upload:python.modify_pythonrepository",
455+
"has_required_repo_perms_on_upload:python.view_pythonrepository",
456+
"has_upload_param_model_or_domain_or_obj_perms:core.change_upload",
457+
],
458+
},
459+
],
460+
"queryset_scoping": {"function": "scope_queryset"},
461+
}
462+
463+
431464
class PythonRemoteViewSet(core_viewsets.RemoteViewSet, core_viewsets.RolesMixin):
432465
"""
433466
<!-- User-facing documentation, rendered as html-->
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
import pytest
2+
import requests
3+
4+
from pypi_simple import PyPISimple
5+
6+
from pulpcore.tests.functional.utils import PulpTaskError
7+
8+
9+
@pytest.mark.parallel
10+
def test_crd_provenance(python_bindings, python_content_factory, monitor_task):
11+
"""
12+
Test creating and reading a provenance.
13+
"""
14+
filename = "twine-6.2.0-py3-none-any.whl"
15+
with PyPISimple() as client:
16+
page = client.get_project_page("twine")
17+
for package in page.packages:
18+
if package.filename == filename:
19+
content = python_content_factory(filename, url=package.url)
20+
break
21+
provenance = python_bindings.ContentProvenanceApi.create(
22+
package=content.pulp_href,
23+
file_url=package.provenance_url,
24+
)
25+
task = monitor_task(provenance.task)
26+
provenance = python_bindings.ContentProvenanceApi.read(task.created_resources[0])
27+
assert provenance.package == content.pulp_href
28+
r = requests.get(package.provenance_url)
29+
assert r.status_code == 200
30+
assert r.json() == provenance.provenance
31+
32+
33+
@pytest.mark.parallel
34+
def test_verify_provenance(python_bindings, python_content_factory, monitor_task):
35+
"""
36+
Test verifying a provenance.
37+
"""
38+
filename = "twine-6.2.0.tar.gz"
39+
with PyPISimple() as client:
40+
page = client.get_project_page("twine")
41+
for package in page.packages:
42+
if package.filename == filename:
43+
break
44+
wrong_content = python_content_factory() # shelf-reader-0.1.tar.gz
45+
provenance = python_bindings.ContentProvenanceApi.create(
46+
package=wrong_content.pulp_href,
47+
file_url=package.provenance_url,
48+
)
49+
with pytest.raises(PulpTaskError) as e:
50+
monitor_task(provenance.task)
51+
assert e.value.task.state == "failed"
52+
assert "twine-6.2.0.tar.gz != shelf-reader-0.1.tar.gz" in e.value.task.error["description"]
53+
54+
# Test creating a provenance without verifying
55+
provenance = python_bindings.ContentProvenanceApi.create(
56+
package=wrong_content.pulp_href,
57+
file_url=package.provenance_url,
58+
verify=False,
59+
)
60+
task = monitor_task(provenance.task)
61+
provenance = python_bindings.ContentProvenanceApi.read(task.created_resources[0])
62+
assert provenance.package == wrong_content.pulp_href

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ dependencies = [
2929
"pulpcore>=3.85.3,<3.100",
3030
"pkginfo>=1.12.0,<1.13.0",
3131
"bandersnatch>=6.6.0,<6.7",
32-
"pypi-simple>=1.5.0,<2.0",
32+
"pypi-simple>=1.8.0,<2.0",
33+
"pypi-attestations==0.0.28", # API is not stable
3334
]
3435

3536
[project.urls]

0 commit comments

Comments
 (0)