Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions dandi/cli/cmd_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,17 @@
@click.option(
"--sync", is_flag=True, help="Delete local assets that do not exist on the server"
)
@click.option(
"--zarr",
"zarr_filters",
multiple=True,
metavar="FILTER",
help=(
"Filter entries within Zarr assets. Format: TYPE:PATTERN where TYPE "
"is 'glob', 'path', or 'regex'. Predefined: 'metadata'. "
"Can be specified multiple times (OR logic)."
),
)
@instance_option(
default=None,
help=(
Expand Down Expand Up @@ -151,6 +162,7 @@ def download(
format: DownloadFormat,
download_types: set[str],
sync: bool,
zarr_filters: tuple[str, ...],
dandi_instance: str,
path_type: PathType,
preserve_tree: bool,
Expand Down Expand Up @@ -191,6 +203,7 @@ def download(
get_assets="assets" in download_types or preserve_tree,
preserve_tree=preserve_tree,
sync=sync,
zarr_filters=zarr_filters,
path_type=path_type,
# develop_debug=develop_debug
)
19 changes: 18 additions & 1 deletion dandi/cli/cmd_ls.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,13 @@
from .base import devel_option, lgr, map_to_click_exceptions
from .formatter import JSONFormatter, JSONLinesFormatter, PYOUTFormatter, YAMLFormatter
from ..consts import ZARR_EXTENSIONS, metadata_all_fields
from ..dandiarchive import DandisetURL, _dandi_url_parser, parse_dandi_url
from ..dandiapi import BaseRemoteZarrAsset
from ..dandiarchive import (
AssetZarrEntryURL,
DandisetURL,
_dandi_url_parser,
parse_dandi_url,
)
from ..dandiset import Dandiset
from ..misctypes import Digest
from ..support.pyout import PYOUT_SHORT_NAMES, PYOUT_SHORT_NAMES_rev
Expand Down Expand Up @@ -143,6 +149,17 @@ def assets_gen():
if metadata in ("all", "assets"):
rec["metadata"] = a.get_raw_metadata()
yield rec
# If URL points into a zarr, also list entries
if isinstance(parsed_url, AssetZarrEntryURL) and isinstance(
a, BaseRemoteZarrAsset
):
for entry in a.iterfiles(
prefix=parsed_url.zarr_subpath
):
yield {
"path": f"{a.path}/{entry}",
"size": entry.size,
}
else:
# For now we support only individual files
yield path
Expand Down
14 changes: 13 additions & 1 deletion dandi/cli/cmd_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
instance_option,
map_to_click_exceptions,
)
from ..upload import UploadExisting, UploadValidation
from ..upload import UploadExisting, UploadValidation, ZarrMode


@click.command()
Expand Down Expand Up @@ -48,6 +48,16 @@
default="require",
show_default=True,
)
@click.option(
"--zarr-mode",
type=click.Choice(list(ZarrMode)),
default="full",
help=(
"Zarr sync mode: 'full' (default) syncs completely; "
"'patch' uploads/updates without deleting remote files."
),
show_default=True,
)
@click.argument("paths", nargs=-1) # , type=click.Path(exists=True, dir_okay=False))
# &
# Development options: Set DANDI_DEVEL for them to become available
Expand Down Expand Up @@ -75,6 +85,7 @@ def upload(
dandi_instance: str,
existing: UploadExisting,
validation: UploadValidation,
zarr_mode: ZarrMode,
# Development options should come as kwargs
allow_any_path: bool = False,
upload_dandiset_metadata: bool = False,
Expand Down Expand Up @@ -115,4 +126,5 @@ def upload(
jobs=jobs,
jobs_per_file=jobs_per_file,
sync=sync,
zarr_mode=zarr_mode,
)
7 changes: 7 additions & 0 deletions dandi/cli/tests/test_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def test_download_defaults(mocker):
get_assets=True,
preserve_tree=False,
sync=False,
zarr_filters=(),
path_type=PathType.EXACT,
)

Expand All @@ -44,6 +45,7 @@ def test_download_all_types(mocker):
get_assets=True,
preserve_tree=False,
sync=False,
zarr_filters=(),
path_type=PathType.EXACT,
)

Expand All @@ -63,6 +65,7 @@ def test_download_metadata_only(mocker):
get_assets=False,
preserve_tree=False,
sync=False,
zarr_filters=(),
path_type=PathType.EXACT,
)

Expand All @@ -82,6 +85,7 @@ def test_download_assets_only(mocker):
get_assets=True,
preserve_tree=False,
sync=False,
zarr_filters=(),
path_type=PathType.EXACT,
)

Expand Down Expand Up @@ -116,6 +120,7 @@ def test_download_gui_instance_in_dandiset(mocker):
get_assets=True,
preserve_tree=False,
sync=False,
zarr_filters=(),
path_type=PathType.EXACT,
)

Expand All @@ -142,6 +147,7 @@ def test_download_api_instance_in_dandiset(mocker):
get_assets=True,
preserve_tree=False,
sync=False,
zarr_filters=(),
path_type=PathType.EXACT,
)

Expand All @@ -168,6 +174,7 @@ def test_download_url_instance_match(mocker):
get_assets=True,
preserve_tree=False,
sync=False,
zarr_filters=(),
path_type=PathType.EXACT,
)

Expand Down
91 changes: 85 additions & 6 deletions dandi/dandiarchive.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
PUBLISHED_VERSION_REGEX,
RETRY_STATUSES,
VERSION_REGEX,
ZARR_EXTENSIONS,
DandiInstance,
known_instances,
)
Expand Down Expand Up @@ -445,6 +446,72 @@ def get_assets(
)


def split_zarr_location(location: str) -> tuple[str, str] | None:
"""Split a location into ``(asset_path, zarr_subpath)`` if it crosses a zarr boundary.

Scans path components for segments ending with a Zarr extension
(``.zarr``, ``.ngff``). If found **and** there are remaining components
after the boundary, returns the split; otherwise returns ``None``.

Parameters
----------
location : str
A POSIX-style path, e.g. ``"sub-1/file.ome.zarr/0/0/0"``.

Returns
-------
tuple[str, str] | None
``(asset_path, zarr_subpath)`` when a zarr boundary with a subpath
is detected, otherwise ``None``.

Examples
--------
>>> split_zarr_location("sub-1/file.ome.zarr/0/0/0")
('sub-1/file.ome.zarr', '0/0/0')
>>> split_zarr_location("sub-1/file.ome.zarr") # no subpath
>>> split_zarr_location("sub-1/file.nwb") # no zarr extension
"""
parts = [p for p in location.split("/") if p]
for i, part in enumerate(parts):
if any(part.endswith(ext) for ext in ZARR_EXTENSIONS):
asset_path = "/".join(parts[: i + 1])
zarr_subpath = "/".join(parts[i + 1 :])
return (asset_path, zarr_subpath) if zarr_subpath else None
return None


@dataclass
class AssetZarrEntryURL(SingleAssetURL):
"""Parsed from a URL that points into entries within a Zarr asset.

For example, ``dandi://dandi/000108/sub-1/file.ome.zarr/0/0/0`` would
produce ``asset_path="sub-1/file.ome.zarr"`` and ``zarr_subpath="0/0/0"``.
"""

asset_path: str # e.g., "sub-1/file.ome.zarr"
zarr_subpath: str # e.g., "0/0/0"

def get_assets(
self, client: DandiAPIClient, order: str | None = None, strict: bool = False
) -> Iterator[BaseRemoteAsset]:
"""Yield the zarr asset whose path equals ``asset_path``.

If the asset does not exist, a `NotFoundError` is raised when
``strict`` is true; otherwise nothing is yielded.
"""
try:
dandiset = self.get_dandiset(client, lazy=not strict)
assert dandiset is not None
dandiset.version_id # Force version evaluation
except NotFoundError:
if strict:
raise
else:
return
with _maybe_strict(strict):
yield dandiset.get_asset_by_path(self.asset_path)


@dataclass
class AssetFolderURL(MultiAssetURL):
"""
Expand Down Expand Up @@ -845,12 +912,24 @@ def parse(
path=location,
)
else:
parsed_url = AssetItemURL(
instance=instance,
dandiset_id=dandiset_id,
version_id=version_id,
path=location,
)
# Check if location crosses a zarr boundary
zarr_split = split_zarr_location(location)
if zarr_split is not None:
asset_path, zarr_subpath = zarr_split
parsed_url = AssetZarrEntryURL(
instance=instance,
dandiset_id=dandiset_id,
version_id=version_id,
asset_path=asset_path,
zarr_subpath=zarr_subpath,
)
else:
parsed_url = AssetItemURL(
instance=instance,
dandiset_id=dandiset_id,
version_id=version_id,
path=location,
)
elif asset_id:
if dandiset_id is None:
parsed_url = BaseAssetIDURL(instance=instance, asset_id=asset_id)
Expand Down
Loading
Loading