Skip to content

Commit 4dd000a

Browse files
committed
Expose metadata file
1 parent 37626da commit 4dd000a

File tree

5 files changed

+220
-11
lines changed

5 files changed

+220
-11
lines changed

pulp_python/app/serializers.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
)
2323
from pulp_python.app.utils import (
2424
DIST_EXTENSIONS,
25+
artifact_to_metadata_artifact,
2526
artifact_to_python_content_data,
2627
get_project_metadata_from_file,
2728
parse_project_metadata,
@@ -93,11 +94,35 @@ class Meta:
9394
model = python_models.PythonDistribution
9495

9596

97+
class PythonSingleContentArtifactField(core_serializers.SingleContentArtifactField):
98+
"""
99+
Custom field with overridden get_attribute method. Meant to be used only in
100+
PythonPackageContentSerializer to handle possible existence of metadata artifact.
101+
"""
102+
103+
def get_attribute(self, instance):
104+
if instance._artifacts.count() == 0:
105+
return None
106+
elif instance._artifacts.count() == 1:
107+
return instance._artifacts.all()[0]
108+
else:
109+
main_content_artifacts = instance.contentartifact_set.exclude(
110+
relative_path__endswith=".metadata"
111+
)
112+
if main_content_artifacts.exists():
113+
return main_content_artifacts.first().artifact
114+
return instance._artifacts.all()[0]
115+
116+
96117
class PythonPackageContentSerializer(core_serializers.SingleArtifactContentUploadSerializer):
97118
"""
98119
A Serializer for PythonPackageContent.
99120
"""
100121

122+
artifact = PythonSingleContentArtifactField(
123+
help_text=_("Artifact file representing the physical content"),
124+
)
125+
101126
# Core metadata
102127
# Version 1.0
103128
author = serializers.CharField(
@@ -386,8 +411,21 @@ def deferred_validate(self, data):
386411
if attestations := data.pop("attestations", None):
387412
data["provenance"] = self.handle_attestations(filename, data["sha256"], attestations)
388413

414+
# Create metadata artifact for wheel files
415+
if filename.endswith(".whl"):
416+
if metadata_artifact := artifact_to_metadata_artifact(filename, artifact):
417+
data["metadata_artifact"] = metadata_artifact
418+
data["metadata_sha256"] = metadata_artifact.sha256
419+
389420
return data
390421

422+
def get_artifacts(self, validated_data):
423+
artifacts = super().get_artifacts(validated_data)
424+
if metadata_artifact := validated_data.pop("metadata_artifact", None):
425+
relative_path = f"{validated_data['filename']}.metadata"
426+
artifacts[relative_path] = metadata_artifact
427+
return artifacts
428+
391429
def retrieve(self, validated_data):
392430
content = python_models.PythonPackageContent.objects.filter(
393431
sha256=validated_data["sha256"], _pulp_domain=get_domain()

pulp_python/app/tasks/sync.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -229,11 +229,15 @@ async def create_content(self, pkg):
229229
create a Content Unit to put into the pipeline
230230
"""
231231
declared_contents = {}
232+
page = await aget_remote_simple_page(pkg.name, self.remote)
233+
upstream_pkgs = {pkg.filename: pkg for pkg in page.packages}
234+
232235
for version, dists in pkg.releases.items():
233236
for package in dists:
234237
entry = parse_metadata(pkg.info, version, package)
235238
url = entry.pop("url")
236239
size = package["size"] or None
240+
d_artifacts = []
237241

238242
artifact = Artifact(sha256=entry["sha256"], size=size)
239243
package = PythonPackageContent(**entry)
@@ -245,11 +249,28 @@ async def create_content(self, pkg):
245249
remote=self.remote,
246250
deferred_download=self.deferred_download,
247251
)
248-
dc = DeclarativeContent(content=package, d_artifacts=[da])
252+
d_artifacts.append(da)
253+
254+
if upstream_pkg := upstream_pkgs.get(entry["filename"]):
255+
if upstream_pkg.has_metadata:
256+
url = upstream_pkg.metadata_url
257+
md_sha256 = upstream_pkg.metadata_digests.get("sha256")
258+
artifact = Artifact(sha256=md_sha256)
259+
260+
metadata_artifact = DeclarativeArtifact(
261+
artifact=artifact,
262+
url=url,
263+
relative_path=f"{entry['filename']}.metadata",
264+
remote=self.remote,
265+
deferred_download=self.deferred_download,
266+
)
267+
d_artifacts.append(metadata_artifact)
268+
269+
dc = DeclarativeContent(content=package, d_artifacts=d_artifacts)
249270
declared_contents[entry["filename"]] = dc
250271
await self.python_stage.put(dc)
251272

252-
if pkg.releases and (page := await aget_remote_simple_page(pkg.name, self.remote)):
273+
if pkg.releases and page:
253274
if self.remote.provenance:
254275
await self.sync_provenance(page, declared_contents)
255276

pulp_python/app/tasks/upload.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
Provenance,
1616
verify_provenance,
1717
)
18-
from pulp_python.app.utils import artifact_to_python_content_data
18+
from pulp_python.app.utils import artifact_to_metadata_artifact, artifact_to_python_content_data
1919

2020

2121
def upload(artifact_sha256, filename, attestations=None, repository_pk=None):
@@ -97,6 +97,11 @@ def create_content(artifact_sha256, filename, domain):
9797
def create():
9898
content = PythonPackageContent.objects.create(**data)
9999
ContentArtifact.objects.create(artifact=artifact, content=content, relative_path=filename)
100+
101+
if metadata_artifact := artifact_to_metadata_artifact(filename, artifact):
102+
ContentArtifact.objects.create(
103+
artifact=metadata_artifact, content=content, relative_path=f"{filename}.metadata"
104+
)
100105
return content
101106

102107
new_content = create()

pulp_python/app/utils.py

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import hashlib
2+
import logging
23
import pkginfo
34
import re
45
import shutil
@@ -14,10 +15,13 @@
1415
from packaging.requirements import Requirement
1516
from packaging.version import parse, InvalidVersion
1617
from pypi_simple import ACCEPT_JSON_PREFERRED, ProjectPage
17-
from pulpcore.plugin.models import Remote
18+
from pulpcore.plugin.models import Artifact, Remote
1819
from pulpcore.plugin.exceptions import TimeoutException
1920

2021

22+
log = logging.getLogger(__name__)
23+
24+
2125
PYPI_LAST_SERIAL = "X-PYPI-LAST-SERIAL"
2226
"""TODO This serial constant is temporary until Python repositories implements serials"""
2327
PYPI_SERIAL_CONSTANT = 1000000000
@@ -206,25 +210,34 @@ def get_project_metadata_from_file(filename):
206210
return metadata
207211

208212

209-
def compute_metadata_sha256(filename: str) -> str | None:
213+
def extract_wheel_metadata(filename: str) -> bytes | None:
210214
"""
211-
Compute SHA256 hash of the metadata file from a Python package.
215+
Extract the metadata file content from a wheel file.
212216
213-
Returns SHA256 hash or None if metadata cannot be extracted.
217+
Returns the raw metadata content as bytes or None if metadata cannot be extracted.
214218
"""
215219
if not filename.endswith(".whl"):
216220
return None
217221
try:
218222
with zipfile.ZipFile(filename, "r") as f:
219223
for file_path in f.namelist():
220224
if file_path.endswith(".dist-info/METADATA"):
221-
metadata_content = f.read(file_path)
222-
return hashlib.sha256(metadata_content).hexdigest()
223-
except (zipfile.BadZipFile, KeyError, OSError):
224-
pass
225+
return f.read(file_path)
226+
except (zipfile.BadZipFile, KeyError, OSError) as e:
227+
log.warning(f"Failed to extract metadata file from {filename}: {e}")
225228
return None
226229

227230

231+
def compute_metadata_sha256(filename: str) -> str | None:
232+
"""
233+
Compute SHA256 hash of the metadata file from a Python package.
234+
235+
Returns SHA256 hash or None if metadata cannot be extracted.
236+
"""
237+
metadata_content = extract_wheel_metadata(filename)
238+
return hashlib.sha256(metadata_content).hexdigest() if metadata_content else None
239+
240+
228241
def artifact_to_python_content_data(filename, artifact, domain=None):
229242
"""
230243
Takes the artifact/filename and returns the metadata needed to create a PythonPackageContent.
@@ -233,6 +246,7 @@ def artifact_to_python_content_data(filename, artifact, domain=None):
233246
# because pkginfo validates that the filename has a valid extension before
234247
# reading it
235248
with tempfile.NamedTemporaryFile("wb", dir=".", suffix=filename) as temp_file:
249+
artifact.file.seek(0)
236250
shutil.copyfileobj(artifact.file, temp_file)
237251
temp_file.flush()
238252
metadata = get_project_metadata_from_file(temp_file.name)
@@ -245,6 +259,28 @@ def artifact_to_python_content_data(filename, artifact, domain=None):
245259
return data
246260

247261

262+
def artifact_to_metadata_artifact(filename: str, artifact: Artifact) -> Artifact | None:
263+
"""
264+
Creates artifact for metadata from the provided wheel artifact.
265+
"""
266+
if not filename.endswith(".whl"):
267+
return None
268+
269+
with tempfile.NamedTemporaryFile("wb", dir=".", suffix=filename) as temp_file:
270+
artifact.file.seek(0)
271+
shutil.copyfileobj(artifact.file, temp_file)
272+
temp_file.flush()
273+
metadata_content = extract_wheel_metadata(temp_file.name)
274+
if not metadata_content:
275+
return None
276+
with tempfile.NamedTemporaryFile(suffix=".metadata") as metadata_temp:
277+
metadata_temp.write(metadata_content)
278+
metadata_temp.flush()
279+
metadata_artifact = Artifact.init_and_validate(metadata_temp.name)
280+
metadata_artifact.save()
281+
return metadata_artifact
282+
283+
248284
def fetch_json_release_metadata(name: str, version: str, remotes: set[Remote]) -> dict:
249285
"""
250286
Fetches metadata for a specific release from PyPI's JSON API. A release can contain
@@ -408,6 +444,7 @@ def find_artifact():
408444
_art = models.RemoteArtifact.objects.filter(content_artifact=content_artifact).first()
409445
return _art
410446

447+
# todo: fix .first()
411448
content_artifact = content.contentartifact_set.first()
412449
artifact = find_artifact()
413450
origin = settings.CONTENT_ORIGIN or settings.PYPI_API_HOSTNAME or ""

pulp_python/tests/functional/api/test_pypi_apis.py

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,13 @@
1010
PYTHON_MD_PYPI_SUMMARY,
1111
PYTHON_EGG_FILENAME,
1212
PYTHON_EGG_SHA256,
13+
PYTHON_WHEEL_FILENAME,
1314
PYTHON_WHEEL_SHA256,
15+
PYTHON_WHEEL_URL,
16+
PYTHON_XS_PROJECT_SPECIFIER,
1417
SHELF_PYTHON_JSON,
1518
)
19+
from pulpcore.pytest_plugin import pulp_content_url
1620

1721

1822
PYPI_LAST_SERIAL = "X-PYPI-LAST-SERIAL"
@@ -137,6 +141,110 @@ def test_package_upload_simple(
137141
assert summary.added["python.python"]["count"] == 1
138142

139143

144+
# todo: tests + moving
145+
# PythonPackageSingleArtifactContentUploadViewSet - create
146+
def test_wheel_package_upload_with_metadata_1(
147+
delete_orphans_pre,
148+
pulp_content_url,
149+
python_content_factory,
150+
python_distribution_factory,
151+
python_repo,
152+
):
153+
# pdb.set_trace()
154+
python_content_factory(
155+
repository=python_repo, relative_path=PYTHON_WHEEL_FILENAME, url=PYTHON_WHEEL_URL
156+
)
157+
distro = python_distribution_factory(repository=python_repo)
158+
159+
# Test that metadata is accessible
160+
relative_path = f"{distro.base_path}/{PYTHON_WHEEL_FILENAME}.metadata"
161+
metadata_url = urljoin(pulp_content_url, relative_path)
162+
metadata_response = requests.get(metadata_url)
163+
assert metadata_response.status_code == 200
164+
assert len(metadata_response.content) > 0
165+
assert "Name: shelf-reader" in metadata_response.text
166+
167+
168+
# PythonPackageSingleArtifactContentUploadViewSet - upload
169+
def test_wheel_package_upload_with_metadata_2(
170+
delete_orphans_pre,
171+
download_python_file,
172+
monitor_task,
173+
pulp_content_url,
174+
python_bindings,
175+
python_distribution_factory,
176+
python_repo,
177+
):
178+
python_file = download_python_file(PYTHON_WHEEL_FILENAME, PYTHON_WHEEL_URL)
179+
content_body = {"file": python_file}
180+
content = python_bindings.ContentPackagesApi.upload(**content_body)
181+
182+
body = {"add_content_units": [content.pulp_href]}
183+
monitor_task(python_bindings.RepositoriesPythonApi.modify(python_repo.pulp_href, body).task)
184+
distro = python_distribution_factory(repository=python_repo)
185+
186+
# Test that metadata is accessible
187+
relative_path = f"{distro.base_path}/{PYTHON_WHEEL_FILENAME}.metadata"
188+
metadata_url = urljoin(pulp_content_url, relative_path)
189+
metadata_response = requests.get(metadata_url)
190+
assert metadata_response.status_code == 200
191+
assert len(metadata_response.content) > 0
192+
assert "Name: shelf-reader" in metadata_response.text
193+
194+
195+
# PythonRepositoryViewSet - sync
196+
def test_wheel_package_upload_with_metadata_3(
197+
delete_orphans_pre,
198+
pulp_content_url,
199+
python_distribution_factory,
200+
python_remote_factory,
201+
python_repo_with_sync,
202+
):
203+
remote = python_remote_factory(includes=PYTHON_XS_PROJECT_SPECIFIER)
204+
repo = python_repo_with_sync(remote)
205+
distro = python_distribution_factory(repository=repo)
206+
207+
# Test that metadata is accessible
208+
relative_path = f"{distro.base_path}/{PYTHON_WHEEL_FILENAME}.metadata"
209+
metadata_url = urljoin(pulp_content_url, relative_path)
210+
metadata_response = requests.get(metadata_url)
211+
assert metadata_response.status_code == 200
212+
assert len(metadata_response.content) > 0
213+
assert "Name: shelf-reader" in metadata_response.text
214+
215+
216+
# SimpleView - create
217+
def test_wheel_package_upload_with_metadata_4(
218+
delete_orphans_pre,
219+
monitor_task,
220+
pulp_content_url,
221+
python_content_summary,
222+
python_empty_repo_distro,
223+
python_package_dist_directory,
224+
):
225+
repo, distro = python_empty_repo_distro()
226+
url = urljoin(distro.base_url, "simple/")
227+
dist_dir, egg_file, wheel_file = python_package_dist_directory
228+
response = requests.post(
229+
url,
230+
data={"sha256_digest": PYTHON_WHEEL_SHA256},
231+
files={"content": open(wheel_file, "rb")},
232+
auth=("admin", "password"),
233+
)
234+
assert response.status_code == 202
235+
monitor_task(response.json()["task"])
236+
summary = python_content_summary(repository=repo)
237+
assert summary.added["python.python"]["count"] == 1
238+
239+
# Test that metadata is accessible
240+
relative_path = f"{distro.base_path}/{PYTHON_WHEEL_FILENAME}.metadata"
241+
metadata_url = urljoin(pulp_content_url, relative_path)
242+
metadata_response = requests.get(metadata_url)
243+
assert metadata_response.status_code == 200
244+
assert len(metadata_response.content) > 0
245+
assert "Name: shelf-reader" in metadata_response.text
246+
247+
140248
@pytest.mark.parallel
141249
def test_twine_upload(
142250
pulpcore_bindings,

0 commit comments

Comments
 (0)