diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0108c47..1fc7edb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,6 +14,14 @@ jobs: os: [ ubuntu-latest, windows-latest, macos-latest ] fail-fast: false runs-on: ${{ matrix.os }} + services: + s3mock: + # This service will only start if the runner is Linux + image: ${{ matrix.os == 'ubuntu-latest' && 'adobe/s3mock:3.11.0' || '' }} + ports: + - 9090:9090 + env: + initialBuckets: zarr-test-bucket defaults: run: shell: bash @@ -49,8 +57,13 @@ jobs: - name: Test env: MAVEN_OPTS: "-Xmx6g" - run: mvn --no-transfer-progress test -DargLine="-Xmx6g" - + run: | + if [ "${{ matrix.os }}" == "ubuntu-latest" ]; then + mvn --no-transfer-progress test -DargLine="-Xmx6g" + else + # Skip S3 tests on Windows/macOS where the service isn't running (labeled with @Tag("s3")) + mvn --no-transfer-progress test -DargLine="-Xmx6g" -DexcludedGroups="s3" + fi - name: Assemble JAR run: mvn package -DskipTests diff --git a/.gitignore b/.gitignore index fcafa91..4c52107 100644 --- a/.gitignore +++ b/.gitignore @@ -45,4 +45,5 @@ build/ /main.py /pyproject.toml /uv.lock -**/__pycache__ \ No newline at end of file +**/__pycache__ +/dependency-reduced-pom.xml diff --git a/pom.xml b/pom.xml index 3df9b78..5d1e750 100644 --- a/pom.xml +++ b/pom.xml @@ -121,6 +121,11 @@ 4.13.1 test + + org.apache.commons + commons-compress + 1.28.0 + @@ -139,6 +144,10 @@ 3.2.5 false + + + 1.44 + diff --git a/src/main/java/dev/zarr/zarrjava/core/Array.java b/src/main/java/dev/zarr/zarrjava/core/Array.java index a8efaeb..a08f62b 100644 --- a/src/main/java/dev/zarr/zarrjava/core/Array.java +++ b/src/main/java/dev/zarr/zarrjava/core/Array.java @@ -3,6 +3,7 @@ import dev.zarr.zarrjava.ZarrException; import dev.zarr.zarrjava.core.codec.CodecPipeline; import dev.zarr.zarrjava.store.FilesystemStore; +import dev.zarr.zarrjava.store.Store; import dev.zarr.zarrjava.store.StoreHandle; import dev.zarr.zarrjava.utils.IndexingUtils; import dev.zarr.zarrjava.utils.MultiArrayUtils; @@ -16,6 +17,9 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.util.Arrays; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; import java.util.stream.Stream; public abstract class Array extends AbstractNode { @@ -289,6 +293,15 @@ public ucar.ma2.Array read(final long[] offset, final int[] shape, final boolean if (parallel) { chunkStream = chunkStream.parallel(); } + + boolean isListableStore = storeHandle.store instanceof Store.ListableStore; + Set> existingKeys; + if (isListableStore) { + existingKeys = storeHandle.list().map(Arrays::asList).collect(Collectors.toSet()); + } else { + existingKeys = null; + } + chunkStream.forEach( chunkCoords -> { try { @@ -306,9 +319,13 @@ public ucar.ma2.Array read(final long[] offset, final int[] shape, final boolean final String[] chunkKeys = metadata.chunkKeyEncoding().encodeChunkKey(chunkCoords); final StoreHandle chunkHandle = storeHandle.resolve(chunkKeys); - if (!chunkHandle.exists()) { - return; - } + + // chunkHandle.exists() can be expensive on some store types, so we optimize for ListableStore + if (isListableStore) { + if (existingKeys.stream().noneMatch(Arrays.asList(chunkKeys)::equals)) + return; + } else if (!chunkHandle.exists()) return; + if (codecPipeline.supportsPartialDecode()) { final ucar.ma2.Array chunkArray = codecPipeline.decodePartial(chunkHandle, Utils.toLongArray(chunkProjection.chunkOffset), chunkProjection.shape); diff --git a/src/main/java/dev/zarr/zarrjava/core/Group.java b/src/main/java/dev/zarr/zarrjava/core/Group.java index c89617a..7b425a3 100644 --- a/src/main/java/dev/zarr/zarrjava/core/Group.java +++ b/src/main/java/dev/zarr/zarrjava/core/Group.java @@ -9,7 +9,6 @@ import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; -import java.util.Objects; import java.util.stream.Stream; public abstract class Group extends AbstractNode { @@ -64,20 +63,15 @@ public static Group open(String path) throws IOException, ZarrException { } @Nullable - public abstract Node get(String key) throws ZarrException, IOException; + public abstract Node get(String[] key) throws ZarrException, IOException; - public Stream list() { - return storeHandle.list() - .map(key -> { - try { - return get(key); - } catch (Exception e) { - throw new RuntimeException(e); - } - }) - .filter(Objects::nonNull); + @Nullable + public Node get(String key) throws ZarrException, IOException { + return get(new String[]{key}); } + public abstract Stream list(); + public Node[] listAsArray() { try (Stream nodeStream = list()) { return nodeStream.toArray(Node[]::new); diff --git a/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java b/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java new file mode 100644 index 0000000..934da19 --- /dev/null +++ b/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java @@ -0,0 +1,314 @@ +package dev.zarr.zarrjava.store; + +import org.apache.commons.compress.archivers.zip.Zip64Mode; +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream; +import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Comparator; +import java.util.stream.Stream; +import java.util.zip.CRC32; +import java.util.zip.ZipEntry; + + +/** + * A Store implementation that buffers reads and writes and flushes them to an underlying Store as a zip file. + */ +public class BufferedZipStore extends ZipStore { + + private final Store.ListableStore bufferStore; + private final boolean flushOnWrite; + private final Comparator zipEntryComparator = (a, b) -> { + boolean aIsZarr = a.length > 0 && a[a.length - 1].equals("zarr.json"); + boolean bIsZarr = b.length > 0 && b[b.length - 1].equals("zarr.json"); + // first all zarr.json files + if (aIsZarr && !bIsZarr) { + return -1; + } else if (!aIsZarr && bIsZarr) { + return 1; + } else if (aIsZarr && bIsZarr) { + // sort zarr.json in BFS order within same depth by lexicographical order + if (a.length != b.length) { + return Integer.compare(a.length, b.length); + } else { + return String.join("/", a).compareTo(String.join("/", b)); + } + } else { + // then all other files in lexicographical order + return String.join("/", a).compareTo(String.join("/", b)); + } + }; + private String archiveComment; + + public BufferedZipStore(@Nonnull StoreHandle underlyingStore, @Nonnull Store.ListableStore bufferStore, @Nullable String archiveComment, boolean flushOnWrite) { + super(underlyingStore); + this.bufferStore = bufferStore; + this.archiveComment = archiveComment; + this.flushOnWrite = flushOnWrite; + try { + loadBuffer(); + } catch (IOException e) { + throw new RuntimeException("Failed to load buffer from underlying store", e); + } + } + + public BufferedZipStore(@Nonnull StoreHandle underlyingStore, @Nonnull Store.ListableStore bufferStore, @Nullable String archiveComment) { + this(underlyingStore, bufferStore, archiveComment, false); + } + + public BufferedZipStore(@Nonnull StoreHandle underlyingStore, @Nonnull Store.ListableStore bufferStore) { + this(underlyingStore, bufferStore, null); + } + + public BufferedZipStore(@Nonnull StoreHandle underlyingStore, String archiveComment) { + this(underlyingStore, new MemoryStore(), archiveComment); + } + + public BufferedZipStore(@Nonnull StoreHandle underlyingStore) { + this(underlyingStore, (String) null); + } + + public BufferedZipStore(@Nonnull Path underlyingStore, String archiveComment) { + this(new FilesystemStore(underlyingStore.getParent()).resolve(underlyingStore.getFileName().toString()), archiveComment); + } + + public BufferedZipStore(@Nonnull Path underlyingStore) { + this(underlyingStore, null); + } + + public BufferedZipStore(@Nonnull String underlyingStorePath, String archiveComment) { + this(Paths.get(underlyingStorePath), archiveComment); + } + + public BufferedZipStore(@Nonnull String underlyingStorePath) { + this(underlyingStorePath, null); + } + + public BufferedZipStore(@Nonnull StoreHandle underlyingStore, @Nonnull Store.ListableStore bufferStore, boolean flushOnWrite) { + this(underlyingStore, bufferStore, null, flushOnWrite); + } + + public BufferedZipStore(@Nonnull StoreHandle underlyingStore, String archiveComment, boolean flushOnWrite) { + this(underlyingStore, new MemoryStore(), archiveComment, flushOnWrite); + } + + public BufferedZipStore(@Nonnull StoreHandle underlyingStore, boolean flushOnWrite) { + this(underlyingStore, (String) null, flushOnWrite); + } + + public BufferedZipStore(@Nonnull Path underlyingStore, String archiveComment, boolean flushOnWrite) { + this(new FilesystemStore(underlyingStore.getParent()).resolve(underlyingStore.getFileName().toString()), archiveComment, flushOnWrite); + } + + public BufferedZipStore(@Nonnull Path underlyingStore, boolean flushOnWrite) { + this(underlyingStore, null, flushOnWrite); + } + + public BufferedZipStore(@Nonnull String underlyingStorePath, String archiveComment, boolean flushOnWrite) { + this(Paths.get(underlyingStorePath), archiveComment, flushOnWrite); + } + + public BufferedZipStore(@Nonnull String underlyingStorePath, boolean flushOnWrite) { + this(underlyingStorePath, null, flushOnWrite); + } + + private void writeBuffer() throws IOException { + // create zip file bytes from buffer store and write to underlying store + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try (ZipArchiveOutputStream zos = new ZipArchiveOutputStream(baos)) { + zos.setUseZip64(Zip64Mode.AsNeeded); + if (archiveComment != null) { + zos.setComment(archiveComment); + } + bufferStore.list().sorted(zipEntryComparator).forEach(keys -> { + try { + if (keys == null || keys.length == 0) { + // skip root entry + return; + } + String entryName = String.join("/", keys); + ByteBuffer bb = bufferStore.get(keys); + if (bb == null) { + // directory entry: ensure trailing slash + if (!entryName.endsWith("/")) { + entryName = entryName + "/"; + } + ZipArchiveEntry dirEntry = new ZipArchiveEntry(entryName); + dirEntry.setMethod(ZipEntry.STORED); + dirEntry.setSize(0); + dirEntry.setCrc(0); + zos.putArchiveEntry(dirEntry); + zos.closeArchiveEntry(); + } else { + // read bytes from ByteBuffer without modifying original + ByteBuffer dup = bb.duplicate(); + int len = dup.remaining(); + byte[] bytes = new byte[len]; + dup.get(bytes); + + // compute CRC and set size for STORED (no compression) + CRC32 crc = new CRC32(); + crc.update(bytes, 0, bytes.length); + ZipArchiveEntry fileEntry = new ZipArchiveEntry(entryName); + fileEntry.setMethod(ZipEntry.STORED); + fileEntry.setSize(bytes.length); + fileEntry.setCrc(crc.getValue()); + + zos.putArchiveEntry(fileEntry); + zos.write(bytes); + zos.closeArchiveEntry(); + } + } catch (IOException e) { + // wrap checked exception so it can be rethrown from stream for handling below + throw new RuntimeException(e); + } + }); + zos.finish(); + } catch (RuntimeException e) { + // unwrap and rethrow IOExceptions thrown inside the lambda + if (e.getCause() instanceof IOException) { + throw (IOException) e.getCause(); + } + throw e; + } + + byte[] zipBytes = baos.toByteArray(); + // write zip bytes back to underlying store + underlyingStore.set(ByteBuffer.wrap(zipBytes)); + } + + public void deleteArchiveComment() throws IOException { + this.setArchiveComment(null); + } + + /** + * Loads the buffer from the underlying store zip file. + */ + private void loadBuffer() throws IOException { + String loadedArchiveComment = super.getArchiveComment(); + if (loadedArchiveComment != null && this.archiveComment == null) { + // don't overwrite existing archiveComment + this.archiveComment = loadedArchiveComment; + } + + InputStream inputStream = underlyingStore.getInputStream(); + if (inputStream == null) { + return; + } + try (ZipArchiveInputStream zis = new ZipArchiveInputStream(inputStream)) { + ZipArchiveEntry entry; + while ((entry = zis.getNextEntry()) != null) { + if (entry.isDirectory()) { + continue; + } + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + byte[] tmp = new byte[8192]; + int read; + while ((read = zis.read(tmp)) != -1) { + baos.write(tmp, 0, read); + } + byte[] bytes = baos.toByteArray(); + bufferStore.set(new String[]{entry.getName()}, ByteBuffer.wrap(bytes)); + } + } + } + + /** + * Flushes the buffer and archiveComment to the underlying store as a zip file. + */ + public void flush() throws IOException { + writeBuffer(); + } + + @Override + public String getArchiveComment() { + return archiveComment; + } + + public void setArchiveComment(@Nullable String archiveComment) throws IOException { + this.archiveComment = archiveComment; + if (flushOnWrite) { + writeBuffer(); + } + } + + @Override + public Stream list(String[] keys) { + return bufferStore.list(keys); + } + + @Override + public boolean exists(String[] keys) { + return bufferStore.exists(keys); + } + + @Nullable + @Override + public ByteBuffer get(String[] keys) { + return bufferStore.get(keys); + } + + @Nullable + @Override + public ByteBuffer get(String[] keys, long start) { + return bufferStore.get(keys, start); + } + + @Nullable + @Override + public ByteBuffer get(String[] keys, long start, long end) { + return bufferStore.get(keys, start, end); + } + + @Override + public void set(String[] keys, ByteBuffer bytes) { + bufferStore.set(keys, bytes); + if (flushOnWrite) { + try { + writeBuffer(); + } catch (IOException e) { + throw new RuntimeException("Failed to flush buffer to underlying store after set operation", e); + } + } + } + + @Override + public void delete(String[] keys) { + bufferStore.delete(keys); + if (flushOnWrite) { + try { + writeBuffer(); + } catch (IOException e) { + throw new RuntimeException("Failed to flush buffer to underlying store after delete operation", e); + } + } + } + + @Nonnull + @Override + public StoreHandle resolve(String... keys) { + return new StoreHandle(this, keys); + } + + @Override + public InputStream getInputStream(String[] keys, long start, long end) { + return bufferStore.getInputStream(keys, start, end); + } + + public long getSize(String[] keys) { + return bufferStore.getSize(keys); + } + + @Override + public String toString() { + return "BufferedZipStore(" + underlyingStore.toString() + ")"; + } +} diff --git a/src/main/java/dev/zarr/zarrjava/store/FilesystemStore.java b/src/main/java/dev/zarr/zarrjava/store/FilesystemStore.java index 3d2e447..90da970 100644 --- a/src/main/java/dev/zarr/zarrjava/store/FilesystemStore.java +++ b/src/main/java/dev/zarr/zarrjava/store/FilesystemStore.java @@ -1,10 +1,12 @@ package dev.zarr.zarrjava.store; import dev.zarr.zarrjava.utils.Utils; +import org.apache.commons.io.input.BoundedInputStream; import javax.annotation.Nonnull; import javax.annotation.Nullable; import java.io.IOException; +import java.io.InputStream; import java.nio.ByteBuffer; import java.nio.channels.SeekableByteChannel; import java.nio.file.*; @@ -118,9 +120,19 @@ public void delete(String[] keys) { } } - public Stream list(String[] keys) { + public Stream list(String[] keys) { + Path keyPath = resolveKeys(keys); try { - return Files.list(resolveKeys(keys)).map(p -> p.toFile().getName()); + return Files.walk(keyPath) + .filter(path -> !path.equals(keyPath)) + .map(path -> { + Path relativePath = keyPath.relativize(path); + String[] parts = new String[relativePath.getNameCount()]; + for (int i = 0; i < relativePath.getNameCount(); i++) { + parts[i] = relativePath.getName(i).toString(); + } + return parts; + }); } catch (IOException e) { throw new RuntimeException(e); } @@ -137,4 +149,38 @@ public String toString() { return this.path.toUri().toString().replaceAll("\\/$", ""); } + @Override + public InputStream getInputStream(String[] keys, long start, long end) { + Path keyPath = resolveKeys(keys); + try { + if (!Files.exists(keyPath)) { + return null; + } + InputStream inputStream = Files.newInputStream(keyPath); + if (start > 0) { + long skipped = inputStream.skip(start); + if (skipped < start) { + throw new IOException("Unable to skip to the desired start position."); + } + } + if (end != -1) { + long bytesToRead = end - start; + return new BoundedInputStream(inputStream, bytesToRead); + } else { + return inputStream; + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + public long getSize(String[] keys) { + try { + return Files.size(resolveKeys(keys)); + } catch (NoSuchFileException e) { + return -1; + } catch (IOException e) { + throw new RuntimeException(e); + } + } } diff --git a/src/main/java/dev/zarr/zarrjava/store/HttpStore.java b/src/main/java/dev/zarr/zarrjava/store/HttpStore.java index cb7ddc2..31d5a0a 100644 --- a/src/main/java/dev/zarr/zarrjava/store/HttpStore.java +++ b/src/main/java/dev/zarr/zarrjava/store/HttpStore.java @@ -4,7 +4,9 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; +import java.io.FilterInputStream; import java.io.IOException; +import java.io.InputStream; import java.nio.ByteBuffer; public class HttpStore implements Store { @@ -100,4 +102,59 @@ public StoreHandle resolve(String... keys) { public String toString() { return uri; } + + @Override + @Nullable + public InputStream getInputStream(String[] keys, long start, long end) { + if (start < 0) { + throw new IllegalArgumentException("Argument 'start' needs to be non-negative."); + } + Request request = new Request.Builder().url(resolveKeys(keys)).header( + "Range", String.format("Bytes=%d-%d", start, end - 1)).build(); + Call call = httpClient.newCall(request); + try { + Response response = call.execute(); + ResponseBody body = response.body(); + if (body == null) return null; + InputStream stream = body.byteStream(); + + // Ensure closing the stream also closes the response + return new FilterInputStream(stream) { + @Override + public void close() throws IOException { + super.close(); + body.close(); + } + }; + } catch (IOException e) { + return null; + } + } + + @Override + public long getSize(String[] keys) { + // Explicitly request "identity" encoding to prevent OkHttp from adding "gzip" + // and subsequently stripping the Content-Length header. + Request request = new Request.Builder() + .head() + .url(resolveKeys(keys)) + .header("Accept-Encoding", "identity") + .build(); + + Call call = httpClient.newCall(request); + try { + Response response = call.execute(); + if (!response.isSuccessful()) { + return -1; + } + + String contentLength = response.header("Content-Length"); + if (contentLength != null) { + return Long.parseLong(contentLength); + } + return -1; + } catch (IOException e) { + throw new RuntimeException(e); + } + } } diff --git a/src/main/java/dev/zarr/zarrjava/store/MemoryStore.java b/src/main/java/dev/zarr/zarrjava/store/MemoryStore.java index 42ac6ff..ea855c0 100644 --- a/src/main/java/dev/zarr/zarrjava/store/MemoryStore.java +++ b/src/main/java/dev/zarr/zarrjava/store/MemoryStore.java @@ -2,6 +2,7 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; +import java.io.InputStream; import java.nio.ByteBuffer; import java.util.*; import java.util.concurrent.ConcurrentHashMap; @@ -45,7 +46,7 @@ public ByteBuffer get(String[] keys, long start, long end) { if (bytes == null) return null; if (end < 0) end = bytes.length; if (end > Integer.MAX_VALUE) throw new IllegalArgumentException("End index too large"); - return ByteBuffer.wrap(bytes, (int) start, (int) end); + return ByteBuffer.wrap(bytes, (int) start, (int) (end - start)); } @@ -59,19 +60,18 @@ public void delete(String[] keys) { map.remove(resolveKeys(keys)); } - public Stream list(String[] keys) { + public Stream list(String[] keys) { List prefix = resolveKeys(keys); - Set allKeys = new HashSet<>(); + Set> allKeys = new HashSet<>(); for (List k : map.keySet()) { if (k.size() <= prefix.size() || !k.subList(0, prefix.size()).equals(prefix)) continue; - for (int i = 0; i < k.size(); i++) { - List subKey = k.subList(0, i + 1); - allKeys.add(String.join("/", subKey)); + for (int i = prefix.size(); i < k.size(); i++) { + allKeys.add(k.subList(prefix.size(), i + 1)); } } - return allKeys.stream(); + return allKeys.stream().map(k -> k.toArray(new String[0])); } @Nonnull @@ -84,5 +84,22 @@ public StoreHandle resolve(String... keys) { public String toString() { return String.format("", hashCode()); } -} + @Override + public InputStream getInputStream(String[] keys, long start, long end) { + byte[] bytes = map.get(resolveKeys(keys)); + if (bytes == null) return null; + if (end < 0) end = bytes.length; + if (end > Integer.MAX_VALUE) throw new IllegalArgumentException("End index too large"); + return new java.io.ByteArrayInputStream(bytes, (int) start, (int) (end - start)); + } + + @Override + public long getSize(String[] keys) { + byte[] bytes = map.get(resolveKeys(keys)); + if (bytes == null) { + return -1; + } + return bytes.length; + } +} diff --git a/src/main/java/dev/zarr/zarrjava/store/ReadOnlyZipStore.java b/src/main/java/dev/zarr/zarrjava/store/ReadOnlyZipStore.java new file mode 100644 index 0000000..1e50da5 --- /dev/null +++ b/src/main/java/dev/zarr/zarrjava/store/ReadOnlyZipStore.java @@ -0,0 +1,229 @@ +package dev.zarr.zarrjava.store; + +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream; +import org.apache.commons.io.input.BoundedInputStream; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.stream.Stream; + + +/** + * A Store implementation that provides read-only access to a zip archive stored in an underlying Store. + * Compared to BufferedZipStore, this implementation reads directly from the zip archive without parsing + * its contents into a buffer store first making it more efficient for read-only access to large zip archives. + */ +public class ReadOnlyZipStore extends ZipStore { + + public ReadOnlyZipStore(@Nonnull StoreHandle underlyingStore) { + super(underlyingStore); + } + + public ReadOnlyZipStore(@Nonnull Path underlyingStore) { + this(new FilesystemStore(underlyingStore.getParent()).resolve(underlyingStore.getFileName().toString())); + } + + public ReadOnlyZipStore(@Nonnull String underlyingStorePath) { + this(Paths.get(underlyingStorePath)); + } + + String resolveKeys(String[] keys) { + return String.join("/", keys); + } + + String[] resolveEntryKeys(String entryKey) { + return entryKey.split("/"); + } + + @Override + public boolean exists(String[] keys) { + return get(keys, 0, 0) != null; + } + + @Nullable + @Override + public ByteBuffer get(String[] keys) { + return get(keys, 0); + } + + @Nullable + @Override + public ByteBuffer get(String[] keys, long start) { + return get(keys, start, -1); + } + + @Nullable + @Override + public ByteBuffer get(String[] keys, long start, long end) { + InputStream inputStream = underlyingStore.getInputStream(); + if (inputStream == null) { + return null; + } + try (ZipArchiveInputStream zis = new ZipArchiveInputStream(inputStream)) { + ZipArchiveEntry entry; + while ((entry = zis.getNextEntry()) != null) { + String entryName = entry.getName(); + + if (entryName.startsWith("/")) { + entryName = entryName.substring(1); + } + if (entry.isDirectory() || !entryName.equals(resolveKeys(keys))) { + continue; + } + + long skipResult = zis.skip(start); + if (skipResult != start) { + throw new IOException("Failed to skip to start position " + start + " in zip entry " + entryName); + } + + long bytesToRead; + if (end != -1) bytesToRead = end - start; + else bytesToRead = Long.MAX_VALUE; + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + byte[] bufferArray = new byte[8192]; + int len; + while (bytesToRead > 0 && (len = zis.read(bufferArray, 0, (int) Math.min(bufferArray.length, bytesToRead))) != -1) { + baos.write(bufferArray, 0, len); + bytesToRead -= len; + } + byte[] bytes = baos.toByteArray(); + return ByteBuffer.wrap(bytes); + } + } catch (IOException e) { + return null; + } + return null; + } + + @Override + public void set(String[] keys, ByteBuffer bytes) { + throw new UnsupportedOperationException("ReadOnlyZipStore does not support set operation."); + } + + @Override + public void delete(String[] keys) { + throw new UnsupportedOperationException("ReadOnlyZipStore does not support delete operation."); + } + + @Nonnull + @Override + public StoreHandle resolve(String... keys) { + return new StoreHandle(this, keys); + } + + @Override + public String toString() { + return "ReadOnlyZipStore(" + underlyingStore.toString() + ")"; + } + + @Override + public Stream list(String[] keys) { + Stream.Builder builder = Stream.builder(); + + InputStream inputStream = underlyingStore.getInputStream(); + if (inputStream == null) { + return builder.build(); + } + try (ZipArchiveInputStream zis = new ZipArchiveInputStream(inputStream)) { + ZipArchiveEntry entry; + String prefix = resolveKeys(keys); + while ((entry = zis.getNextEntry()) != null) { + String entryName = entry.getName(); + if (entryName.startsWith("/")) { + entryName = entryName.substring(1); + } + if (entryName.endsWith("/")) { + entryName = entryName.substring(0, entryName.length() - 1); + } + if (!entryName.startsWith(prefix) || entryName.equals(prefix)) { + continue; + } + entryName = entryName.substring(prefix.length()); + if (entryName.startsWith("/")) { + entryName = entryName.substring(1); + } + String[] entryKeys = resolveEntryKeys(entryName); + builder.add(entryKeys); + } + } catch (IOException ignored) { + } + return builder.build(); + } + + @Override + public InputStream getInputStream(String[] keys, long start, long end) { + InputStream baseStream = underlyingStore.getInputStream(); + + try { + ZipArchiveInputStream zis = new ZipArchiveInputStream(baseStream); + ZipArchiveEntry entry; + while ((entry = zis.getNextEntry()) != null) { + String entryName = entry.getName(); + + if (entryName.startsWith("/")) { + entryName = entryName.substring(1); + } + if (entry.isDirectory() || !entryName.equals(resolveKeys(keys))) { + continue; + } + + long skipResult = zis.skip(start); + if (skipResult != start) { + throw new IOException("Failed to skip to start position " + start + " in zip entry " + entryName); + } + + long bytesToRead; + if (end != -1) bytesToRead = end - start; + else bytesToRead = Long.MAX_VALUE; + + return new BoundedInputStream(zis, bytesToRead); + } + return null; + } catch (IOException ignored) { + } + return null; + } + + @Override + public long getSize(String[] keys) { + InputStream inputStream = underlyingStore.getInputStream(); + if (inputStream == null) { + throw new RuntimeException(new IOException("Underlying store input stream is null")); + } + try (ZipArchiveInputStream zis = new ZipArchiveInputStream(inputStream)) { + ZipArchiveEntry entry; + while ((entry = zis.getNextEntry()) != null) { + String entryName = entry.getName(); + + if (entryName.startsWith("/")) { + entryName = entryName.substring(1); + } + if (entry.isDirectory() || !entryName.equals(resolveKeys(keys))) { + continue; + } + long size = entry.getSize(); + if (size < 0) { + // read the entire entry to determine size + size = 0; + byte[] bufferArray = new byte[8192]; + int len; + while ((len = zis.read(bufferArray)) != -1) { + size += len; + } + } + return size; + } + return -1; // file not found + } catch (IOException e) { + throw new RuntimeException(e); + } + } +} diff --git a/src/main/java/dev/zarr/zarrjava/store/S3Store.java b/src/main/java/dev/zarr/zarrjava/store/S3Store.java index ff1ad7d..3dcdbfd 100644 --- a/src/main/java/dev/zarr/zarrjava/store/S3Store.java +++ b/src/main/java/dev/zarr/zarrjava/store/S3Store.java @@ -12,6 +12,8 @@ import java.io.IOException; import java.io.InputStream; import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; import java.util.stream.Stream; public class S3Store implements Store, Store.ListableStore { @@ -71,7 +73,7 @@ public ByteBuffer get(String[] keys, long start) { GetObjectRequest req = GetObjectRequest.builder() .bucket(bucketName) .key(resolveKeys(keys)) - .range(String.valueOf(start)) + .range(String.format("bytes=%d-", start)) .build(); return get(req); } @@ -82,7 +84,7 @@ public ByteBuffer get(String[] keys, long start, long end) { GetObjectRequest req = GetObjectRequest.builder() .bucket(bucketName) .key(resolveKeys(keys)) - .range(start + "-" + end) + .range(String.format("bytes=%d-%d", start, end - 1)) // S3 range is inclusive .build(); return get(req); } @@ -104,15 +106,26 @@ public void delete(String[] keys) { } @Override - public Stream list(String[] keys) { + public Stream list(String[] keys) { final String fullKey = resolveKeys(keys); ListObjectsRequest req = ListObjectsRequest.builder() .bucket(bucketName).prefix(fullKey) .build(); ListObjectsResponse res = s3client.listObjects(req); - return res.contents() - .stream() - .map(p -> p.key().substring(fullKey.length() + 1)); + return res.contents().stream() + .map(S3Object::key) + .flatMap(key -> { + List pathSegments = new ArrayList<>(); + int index = fullKey.length(); + while ((index = key.indexOf('/', index + 1)) != -1) { + pathSegments.add(key.substring(fullKey.length() + 1, index)); + } + pathSegments.add(key.substring(fullKey.length() + 1)); + return pathSegments.stream(); + }) + .distinct() + .map(s -> s.split("/")) + .filter(arr -> arr.length > 0); } @Nonnull @@ -121,6 +134,30 @@ public StoreHandle resolve(String... keys) { return new StoreHandle(this, keys); } + @Override + public InputStream getInputStream(String[] keys, long start, long end) { + GetObjectRequest req = GetObjectRequest.builder() + .bucket(bucketName) + .key(resolveKeys(keys)) + .range(String.format("bytes=%d-%d", start, end - 1)) // S3 range is inclusive + .build(); + ResponseInputStream responseInputStream = s3client.getObject(req); + return responseInputStream; + } + + @Override + public long getSize(String[] keys) { + HeadObjectRequest req = HeadObjectRequest.builder() + .bucket(bucketName) + .key(resolveKeys(keys)) + .build(); + try { + return s3client.headObject(req).contentLength(); + } catch (NoSuchKeyException e) { + return -1; + } + } + @Override public String toString() { return "s3://" + bucketName + "/" + prefix; diff --git a/src/main/java/dev/zarr/zarrjava/store/Store.java b/src/main/java/dev/zarr/zarrjava/store/Store.java index 41996a6..3747aed 100644 --- a/src/main/java/dev/zarr/zarrjava/store/Store.java +++ b/src/main/java/dev/zarr/zarrjava/store/Store.java @@ -2,6 +2,7 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; +import java.io.InputStream; import java.nio.ByteBuffer; import java.util.stream.Stream; @@ -25,8 +26,35 @@ public interface Store { @Nonnull StoreHandle resolve(String... keys); + InputStream getInputStream(String[] keys, long start, long end); + + default InputStream getInputStream(String[] keys) { + return getInputStream(keys, 0, -1); + } + + /** + * Gets the size in bytes of the data stored at the given keys. + * + * @param keys The keys identifying the data. + * @return The size in bytes of the data stored at the given keys. -1 if the keys do not exist. + */ + long getSize(String[] keys); + interface ListableStore extends Store { - Stream list(String[] keys); + /** + * Lists all keys in the store that match the given prefix keys. Keys are represented as arrays of strings, + * where each string is a segment of the key path. + * Keys that are exactly equal to the prefix are not included in the results. + * Keys that do not contain data (i.e. "directories") are included in the results. + * + * @param keys The prefix keys to match. + * @return A stream of key arrays that match the given prefix. Prefixed keys are not included in the results. + */ + Stream list(String[] keys); + + default Stream list() { + return list(new String[]{}); + } } } diff --git a/src/main/java/dev/zarr/zarrjava/store/StoreHandle.java b/src/main/java/dev/zarr/zarrjava/store/StoreHandle.java index d435646..e7bd8eb 100644 --- a/src/main/java/dev/zarr/zarrjava/store/StoreHandle.java +++ b/src/main/java/dev/zarr/zarrjava/store/StoreHandle.java @@ -4,6 +4,7 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; +import java.io.InputStream; import java.nio.ByteBuffer; import java.nio.file.NoSuchFileException; import java.nio.file.Path; @@ -12,9 +13,9 @@ public class StoreHandle { @Nonnull - final Store store; + public final Store store; @Nonnull - final String[] keys; + public final String[] keys; public StoreHandle(@Nonnull Store store, @Nonnull String... keys) { this.store = store; @@ -45,6 +46,14 @@ public ByteBuffer read(long start, long end) { return store.get(keys, start, end); } + public InputStream getInputStream(int start, int end) { + return store.getInputStream(keys, start, end); + } + + public InputStream getInputStream() { + return store.getInputStream(keys); + } + public void set(ByteBuffer bytes) { store.set(keys, bytes); } @@ -57,13 +66,17 @@ public boolean exists() { return store.exists(keys); } - public Stream list() { + public Stream list() { if (!(store instanceof Store.ListableStore)) { throw new UnsupportedOperationException("The underlying store does not support listing."); } return ((Store.ListableStore) store).list(keys); } + public long getSize() { + return store.getSize(keys); + } + @Override public String toString() { return store + "/" + String.join("/", keys); diff --git a/src/main/java/dev/zarr/zarrjava/store/ZipStore.java b/src/main/java/dev/zarr/zarrjava/store/ZipStore.java new file mode 100644 index 0000000..603d8e3 --- /dev/null +++ b/src/main/java/dev/zarr/zarrjava/store/ZipStore.java @@ -0,0 +1,83 @@ +package dev.zarr.zarrjava.store; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import java.io.IOException; +import java.nio.ByteBuffer; + +public abstract class ZipStore implements Store, Store.ListableStore { + public final StoreHandle underlyingStore; + + public ZipStore(@Nonnull StoreHandle underlyingStore) { + this.underlyingStore = underlyingStore; + } + + // adopted from https://stackoverflow.com/a/9918966 + @Nullable + public static String getZipCommentFromBuffer(byte[] bufArray) throws IOException { + // End of Central Directory (EOCD) record magic number + byte[] EOCD = {0x50, 0x4b, 0x05, 0x06}; + int buffLen = bufArray.length; + // Check the buffer from the end + search: + for (int i = buffLen - EOCD.length - 22; i >= 0; i--) { + for (int k = 0; k < EOCD.length; k++) { + if (bufArray[i + k] != EOCD[k]) { + continue search; + } + } + // End of Central Directory found! + int commentLen = bufArray[i + 20] + bufArray[i + 21] * 256; + int realLen = buffLen - i - 22; + if (commentLen != realLen) { + throw new IOException("ZIP comment size mismatch: " + + "directory says len is " + commentLen + + ", but file ends after " + realLen + " bytes!"); + } + return new String(bufArray, i + 22, commentLen); + } + return null; + } + + public String getArchiveComment() throws IOException { + // Attempt to read from the end of the file to find the EOCD record. + // We try a small chunk first (1KB) which covers most short comments (or no comment), + // then the maximum possible EOCD size (approx 65KB). + long fileSize = underlyingStore.getSize(); + if (fileSize < 22) { + return null; + } + int[] readSizes = {1024, 65535 + 22}; + + for (int size : readSizes) { + ByteBuffer buffer; + + if (fileSize < size) { + buffer = underlyingStore.read(); + } else { + buffer = underlyingStore.read(fileSize - size); + } + + if (buffer == null) { + return null; + } + + byte[] bufArray; + if (buffer.hasArray()) { + bufArray = buffer.array(); + } else { + bufArray = new byte[buffer.remaining()]; + buffer.duplicate().get(bufArray); + } + + String comment = getZipCommentFromBuffer(bufArray); + if (comment != null) { + return comment; + } + if (fileSize < size) { + break; + } + } + return null; + } +} \ No newline at end of file diff --git a/src/main/java/dev/zarr/zarrjava/v2/Group.java b/src/main/java/dev/zarr/zarrjava/v2/Group.java index 29bd477..d3229e4 100644 --- a/src/main/java/dev/zarr/zarrjava/v2/Group.java +++ b/src/main/java/dev/zarr/zarrjava/v2/Group.java @@ -16,7 +16,10 @@ import java.nio.file.NoSuchFileException; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.Arrays; +import java.util.Objects; import java.util.function.Function; +import java.util.stream.Stream; import static dev.zarr.zarrjava.v2.Node.makeObjectMapper; import static dev.zarr.zarrjava.v2.Node.makeObjectWriter; @@ -170,7 +173,7 @@ public static Group create(String path, Attributes attributes) throws IOExceptio * @throws IOException if there is an error accessing the storage */ @Nullable - public Node get(String key) throws ZarrException, IOException { + public Node get(String[] key) throws ZarrException, IOException { StoreHandle keyHandle = storeHandle.resolve(key); try { return Node.open(keyHandle); @@ -179,6 +182,26 @@ public Node get(String key) throws ZarrException, IOException { } } + @Override + public Stream list() { + return storeHandle.list().map(key -> { + if (key.length <= 1) return null; // exclude root from list + String fileName = key[key.length - 1]; + StoreHandle parent = storeHandle.resolve(Arrays.copyOf(key, key.length - 1)); + try { + if (fileName.equals(ZARRAY)) { + return Array.open(parent); + } + if (fileName.equals(ZGROUP)) { + return (dev.zarr.zarrjava.core.Node) Group.open(parent); + } + } catch (Exception e) { + throw new RuntimeException(e); + } + return null; + }).filter(Objects::nonNull); + } + /** * Creates a new subgroup with default metadata at the specified key. * diff --git a/src/main/java/dev/zarr/zarrjava/v3/Group.java b/src/main/java/dev/zarr/zarrjava/v3/Group.java index 305dcd1..5df6d5b 100644 --- a/src/main/java/dev/zarr/zarrjava/v3/Group.java +++ b/src/main/java/dev/zarr/zarrjava/v3/Group.java @@ -15,7 +15,9 @@ import java.nio.file.NoSuchFileException; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.Arrays; import java.util.function.Function; +import java.util.stream.Stream; import static dev.zarr.zarrjava.v3.Node.makeObjectMapper; import static dev.zarr.zarrjava.v3.Node.makeObjectWriter; @@ -112,10 +114,7 @@ public static Group create(@Nonnull StoreHandle storeHandle, @Nonnull GroupMetad * @throws IOException if the metadata cannot be serialized * @throws ZarrException if the attributes are invalid */ - public static Group create( - @Nonnull StoreHandle storeHandle, - @Nonnull Attributes attributes - ) throws IOException, ZarrException { + public static Group create(@Nonnull StoreHandle storeHandle, @Nonnull Attributes attributes) throws IOException, ZarrException { return create(storeHandle, new GroupMetadata(attributes)); } @@ -184,7 +183,7 @@ public static Group create(String path) throws IOException, ZarrException { * @throws IOException if there is an error accessing the storage */ @Nullable - public Node get(String key) throws ZarrException, IOException { + public Node get(String[] key) throws ZarrException, IOException { StoreHandle keyHandle = storeHandle.resolve(key); try { return Node.open(keyHandle); @@ -193,6 +192,21 @@ public Node get(String key) throws ZarrException, IOException { } } + @Override + public Stream list() { + Stream metadataKeys = storeHandle.list() + .filter(key -> key[key.length - 1].equals(ZARR_JSON)) + .filter(key -> key.length > 1); // exclude root from list + return metadataKeys.map(key -> { + try { + return get(Arrays.copyOf(key, key.length - 1)); + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + } + + /** * Creates a new subgroup with the provided metadata at the specified key. * @@ -200,8 +214,7 @@ public Node get(String key) throws ZarrException, IOException { * @param groupMetadata the metadata of the Zarr group * @throws IOException if the metadata cannot be serialized */ - public Group createGroup(String key, GroupMetadata groupMetadata) - throws IOException, ZarrException { + public Group createGroup(String key, GroupMetadata groupMetadata) throws IOException, ZarrException { return Group.create(storeHandle.resolve(key), groupMetadata); } @@ -212,8 +225,7 @@ public Group createGroup(String key, GroupMetadata groupMetadata) * @param attributes attributes of the Zarr group * @throws IOException if the metadata cannot be serialized */ - public Group createGroup(String key, Attributes attributes) - throws IOException, ZarrException { + public Group createGroup(String key, Attributes attributes) throws IOException, ZarrException { return Group.create(storeHandle.resolve(key), new GroupMetadata(attributes)); } @@ -237,8 +249,7 @@ public Group createGroup(String key) throws IOException { * @throws IOException if the metadata cannot be serialized * @throws ZarrException if the array cannot be created */ - public Array createArray(String key, ArrayMetadata arrayMetadata) - throws IOException, ZarrException { + public Array createArray(String key, ArrayMetadata arrayMetadata) throws IOException, ZarrException { return Array.create(storeHandle.resolve(key), arrayMetadata); } @@ -249,9 +260,7 @@ public Array createArray(String key, ArrayMetadata arrayMetadata) * @param arrayMetadataBuilderMapper a function building the metadata of the Zarr array * @throws IOException if the metadata cannot be serialized */ - public Array createArray(String key, - Function arrayMetadataBuilderMapper) - throws IOException, ZarrException { + public Array createArray(String key, Function arrayMetadataBuilderMapper) throws IOException, ZarrException { return Array.create(storeHandle.resolve(key), arrayMetadataBuilderMapper, false); } @@ -262,8 +271,7 @@ private Group writeMetadata() throws IOException { private Group writeMetadata(GroupMetadata newGroupMetadata) throws IOException { ObjectWriter objectWriter = makeObjectWriter(); ByteBuffer metadataBytes = ByteBuffer.wrap(objectWriter.writeValueAsBytes(newGroupMetadata)); - storeHandle.resolve(ZARR_JSON) - .set(metadataBytes); + storeHandle.resolve(ZARR_JSON).set(metadataBytes); this.metadata = newGroupMetadata; return this; } @@ -276,8 +284,7 @@ private Group writeMetadata(GroupMetadata newGroupMetadata) throws IOException { * @throws ZarrException if the new attributes are invalid * @throws IOException if the metadata cannot be serialized */ - public Group updateAttributes(Function attributeMapper) - throws ZarrException, IOException { + public Group updateAttributes(Function attributeMapper) throws ZarrException, IOException { return setAttributes(attributeMapper.apply(metadata.attributes)); } diff --git a/src/test/java/dev/zarr/zarrjava/Utils.java b/src/test/java/dev/zarr/zarrjava/Utils.java new file mode 100644 index 0000000..e07bbb4 --- /dev/null +++ b/src/test/java/dev/zarr/zarrjava/Utils.java @@ -0,0 +1,81 @@ +package dev.zarr.zarrjava; + +import java.io.*; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; +import java.util.zip.ZipOutputStream; + +public class Utils { + + public static void zipFile(Path sourceDir, Path targetDir) throws IOException { + FileOutputStream fos = new FileOutputStream(targetDir.toFile()); + ZipOutputStream zipOut = new ZipOutputStream(fos); + + File fileToZip = new File(sourceDir.toUri()); + + zipFile(fileToZip, "", zipOut); + zipOut.close(); + fos.close(); + } + + static void zipFile(File fileToZip, String fileName, ZipOutputStream zipOut) throws IOException { + if (fileToZip.isDirectory()) { + if (fileName.endsWith("/")) { + zipOut.putNextEntry(new ZipEntry(fileName)); + zipOut.closeEntry(); + } else { + zipOut.putNextEntry(new ZipEntry(fileName + "/")); + zipOut.closeEntry(); + } + File[] children = fileToZip.listFiles(); + for (File childFile : children) { + zipFile(childFile, fileName + "/" + childFile.getName(), zipOut); + } + return; + } + FileInputStream fis = new FileInputStream(fileToZip); + ZipEntry zipEntry = new ZipEntry(fileName); + zipOut.putNextEntry(zipEntry); + byte[] bytes = new byte[1024]; + int length; + while ((length = fis.read(bytes)) >= 0) { + zipOut.write(bytes, 0, length); + } + fis.close(); + } + + /** + * Unzip sourceZip into targetDir. + * Protects against Zip Slip by ensuring extracted paths remain inside targetDir. + */ + public static void unzipFile(Path sourceZip, Path targetDir) throws IOException { + Files.createDirectories(targetDir); + try (FileInputStream fis = new FileInputStream(sourceZip.toFile()); + ZipInputStream zis = new ZipInputStream(fis)) { + ZipEntry entry; + while ((entry = zis.getNextEntry()) != null) { + Path outPath = targetDir.resolve(entry.getName()).normalize(); + Path targetDirNorm = targetDir.normalize(); + if (!outPath.startsWith(targetDirNorm)) { + throw new IOException("Zip entry is outside of the target dir: " + entry.getName()); + } + if (entry.isDirectory() || entry.getName().endsWith("/")) { + Files.createDirectories(outPath); + } else { + Files.createDirectories(outPath.getParent()); + try (BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(outPath.toFile()))) { + byte[] buffer = new byte[1024]; + int len; + while ((len = zis.read(buffer)) > 0) { + bos.write(buffer, 0, len); + } + } + } + zis.closeEntry(); + } + } + } + +} diff --git a/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java b/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java deleted file mode 100644 index cbb21f8..0000000 --- a/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java +++ /dev/null @@ -1,146 +0,0 @@ -package dev.zarr.zarrjava; - -import com.fasterxml.jackson.databind.ObjectMapper; -import dev.zarr.zarrjava.core.Attributes; -import dev.zarr.zarrjava.store.FilesystemStore; -import dev.zarr.zarrjava.store.HttpStore; -import dev.zarr.zarrjava.store.MemoryStore; -import dev.zarr.zarrjava.store.S3Store; -import dev.zarr.zarrjava.v3.*; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.CsvSource; -import software.amazon.awssdk.auth.credentials.AnonymousCredentialsProvider; -import software.amazon.awssdk.regions.Region; -import software.amazon.awssdk.services.s3.S3Client; -import software.amazon.awssdk.services.s3.S3Configuration; - -import java.io.IOException; -import java.net.URI; -import java.nio.file.Files; -import java.util.Arrays; -import java.util.stream.Stream; - -import static dev.zarr.zarrjava.v3.Node.makeObjectMapper; - -public class ZarrStoreTest extends ZarrTest { - @Test - public void testFileSystemStores() throws IOException, ZarrException { - FilesystemStore fsStore = new FilesystemStore(TESTDATA); - ObjectMapper objectMapper = makeObjectMapper(); - - GroupMetadata groupMetadata = objectMapper.readValue( - Files.readAllBytes(TESTDATA.resolve("l4_sample").resolve("zarr.json")), - GroupMetadata.class - ); - - String groupMetadataString = objectMapper.writeValueAsString(groupMetadata); - Assertions.assertTrue(groupMetadataString.contains("\"zarr_format\":3")); - Assertions.assertTrue(groupMetadataString.contains("\"node_type\":\"group\"")); - - ArrayMetadata arrayMetadata = objectMapper.readValue(Files.readAllBytes(TESTDATA.resolve( - "l4_sample").resolve("color").resolve("1").resolve("zarr.json")), - ArrayMetadata.class); - - String arrayMetadataString = objectMapper.writeValueAsString(arrayMetadata); - Assertions.assertTrue(arrayMetadataString.contains("\"zarr_format\":3")); - Assertions.assertTrue(arrayMetadataString.contains("\"node_type\":\"array\"")); - Assertions.assertTrue(arrayMetadataString.contains("\"shape\":[1,4096,4096,2048]")); - - Assertions.assertInstanceOf(Array.class, Array.open(fsStore.resolve("l4_sample", "color", "1"))); - - Node[] subNodes = Group.open(fsStore.resolve("l4_sample")).list().toArray(Node[]::new); - Assertions.assertEquals(2, subNodes.length); - Assertions.assertInstanceOf(Group.class, subNodes[0]); - - Array[] colorSubNodes = ((Group) Group.open(fsStore.resolve("l4_sample")).get("color")).list().toArray(Array[]::new); - - Assertions.assertEquals(5, colorSubNodes.length); - Assertions.assertInstanceOf(Array.class, colorSubNodes[0]); - - Array array = (Array) ((Group) Group.open(fsStore.resolve("l4_sample")).get("color")).get("1"); - Assertions.assertArrayEquals(new long[]{1, 4096, 4096, 2048}, array.metadata().shape); - } - - @Test - public void testS3Store() throws IOException, ZarrException { - S3Store s3Store = new S3Store(S3Client.builder() - .endpointOverride(URI.create("https://uk1s3.embassy.ebi.ac.uk")) - .region(Region.US_EAST_1) // required, but ignored - .serviceConfiguration( - S3Configuration.builder() - .pathStyleAccessEnabled(true) // required - .build() - ) - .credentialsProvider(AnonymousCredentialsProvider.create()) - .build(), "idr", "zarr/v0.5/idr0033A"); - - Array arrayV3 = Array.open(s3Store.resolve("BR00109990_C2.zarr", "0", "0")); - Assertions.assertArrayEquals(new long[]{5, 1552, 2080}, arrayV3.metadata().shape); - Assertions.assertEquals(574, arrayV3.read(new long[]{0, 0, 0}, new int[]{1, 1, 1}).getInt(0)); - - dev.zarr.zarrjava.core.Array arrayCore = dev.zarr.zarrjava.core.Array.open(s3Store.resolve("BR00109990_C2.zarr", "0", "0")); - Assertions.assertArrayEquals(new long[]{5, 1552, 2080}, arrayCore.metadata().shape); - Assertions.assertEquals(574, arrayCore.read(new long[]{0, 0, 0}, new int[]{1, 1, 1}).getInt(0)); - } - - @Test - public void testHttpStore() throws IOException, ZarrException { - HttpStore httpStore = new dev.zarr.zarrjava.store.HttpStore("https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.5/idr0033A"); - Array array = Array.open(httpStore.resolve("BR00109990_C2.zarr", "0", "0")); - - Assertions.assertArrayEquals(new long[]{5, 1552, 2080}, array.metadata().shape); - } - - @ParameterizedTest - @CsvSource({"false", "true",}) - public void testMemoryStoreV3(boolean useParallel) throws ZarrException, IOException { - int[] testData = new int[1024 * 1024]; - Arrays.setAll(testData, p -> p); - - Group group = Group.create(new MemoryStore().resolve()); - Array array = group.createArray("array", b -> b - .withShape(1024, 1024) - .withDataType(DataType.UINT32) - .withChunkShape(5, 5) - ); - array.write(ucar.ma2.Array.factory(ucar.ma2.DataType.UINT, new int[]{1024, 1024}, testData), useParallel); - group.createGroup("subgroup"); - group.setAttributes(new Attributes(b -> b.set("some", "value"))); - Stream nodes = group.list(); - Assertions.assertEquals(2, nodes.count()); - - ucar.ma2.Array result = array.read(useParallel); - Assertions.assertArrayEquals(testData, (int[]) result.get1DJavaArray(ucar.ma2.DataType.UINT)); - Attributes attrs = group.metadata().attributes; - Assertions.assertNotNull(attrs); - Assertions.assertEquals("value", attrs.getString("some")); - } - - @ParameterizedTest - @CsvSource({"false", "true",}) - public void testMemoryStoreV2(boolean useParallel) throws ZarrException, IOException { - int[] testData = new int[1024 * 1024]; - Arrays.setAll(testData, p -> p); - - dev.zarr.zarrjava.v2.Group group = dev.zarr.zarrjava.v2.Group.create(new MemoryStore().resolve()); - dev.zarr.zarrjava.v2.Array array = group.createArray("array", b -> b - .withShape(1024, 1024) - .withDataType(dev.zarr.zarrjava.v2.DataType.UINT32) - .withChunks(5, 5) - ); - array.write(ucar.ma2.Array.factory(ucar.ma2.DataType.UINT, new int[]{1024, 1024}, testData), useParallel); - group.createGroup("subgroup"); - Stream nodes = group.list(); - group.setAttributes(new Attributes().set("description", "test group")); - Assertions.assertEquals(2, nodes.count()); - - ucar.ma2.Array result = array.read(useParallel); - Assertions.assertArrayEquals(testData, (int[]) result.get1DJavaArray(ucar.ma2.DataType.UINT)); - Attributes attrs = group.metadata().attributes; - Assertions.assertNotNull(attrs); - Assertions.assertEquals("test group", attrs.getString("description")); - - } -} diff --git a/src/test/java/dev/zarr/zarrjava/ZarrV2Test.java b/src/test/java/dev/zarr/zarrjava/ZarrV2Test.java index 084d831..534a351 100644 --- a/src/test/java/dev/zarr/zarrjava/ZarrV2Test.java +++ b/src/test/java/dev/zarr/zarrjava/ZarrV2Test.java @@ -248,8 +248,9 @@ public void testGroup() throws IOException, ZarrException { .withChunks(5, 5) ); array.write(new long[]{2, 2}, ucar.ma2.Array.factory(ucar.ma2.DataType.UBYTE, new int[]{8, 8})); - - Assertions.assertArrayEquals(new int[]{5, 5}, ((Array) ((Group) group.listAsArray()[0]).listAsArray()[0]).metadata().chunks); + Array[] arrays = group.list().filter(n -> n instanceof Array).toArray(Array[]::new); + Assertions.assertEquals(1, arrays.length); + Assertions.assertArrayEquals(new int[]{5, 5}, arrays[0].metadata().chunks); } @Test @@ -403,8 +404,6 @@ public void testMemoryStore() throws ZarrException, IOException { ); group.createGroup("subgroup"); Assertions.assertEquals(2, group.list().count()); - for (String s : storeHandle.list().toArray(String[]::new)) - System.out.println(s); } } \ No newline at end of file diff --git a/src/test/java/dev/zarr/zarrjava/ZarrV3Test.java b/src/test/java/dev/zarr/zarrjava/ZarrV3Test.java index 9a669be..44ea9d8 100644 --- a/src/test/java/dev/zarr/zarrjava/ZarrV3Test.java +++ b/src/test/java/dev/zarr/zarrjava/ZarrV3Test.java @@ -553,7 +553,9 @@ public void testGroup() throws IOException, ZarrException { ); array.write(new long[]{2, 2}, ucar.ma2.Array.factory(ucar.ma2.DataType.UBYTE, new int[]{8, 8})); - Assertions.assertArrayEquals(new int[]{5, 5}, ((Array) ((Group) group.listAsArray()[0]).listAsArray()[0]).metadata().chunkShape()); + Array[] arrays = group.list().filter(n -> n instanceof Array).toArray(Array[]::new); + Assertions.assertEquals(1, arrays.length); + Assertions.assertArrayEquals(new int[]{5, 5}, arrays[0].metadata().chunkShape()); } @Test diff --git a/src/test/java/dev/zarr/zarrjava/store/BufferedZipStoreTest.java b/src/test/java/dev/zarr/zarrjava/store/BufferedZipStoreTest.java new file mode 100644 index 0000000..4b55797 --- /dev/null +++ b/src/test/java/dev/zarr/zarrjava/store/BufferedZipStoreTest.java @@ -0,0 +1,168 @@ +package dev.zarr.zarrjava.store; + +import dev.zarr.zarrjava.Utils; +import dev.zarr.zarrjava.ZarrException; +import dev.zarr.zarrjava.core.Array; +import dev.zarr.zarrjava.core.Group; +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipFile; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Collections; +import java.util.zip.ZipEntry; + +import static dev.zarr.zarrjava.Utils.unzipFile; + +public class BufferedZipStoreTest extends WritableStoreTest { + + Path testGroupDir = TESTOUTPUT.resolve("testZipStore.zip"); + + @BeforeAll + void writeTestGroup() throws ZarrException, IOException { + Path sourceDir = TESTOUTPUT.resolve("testZipStore"); + FilesystemStore fsStore = new FilesystemStore(sourceDir); + writeTestGroupV3(fsStore.resolve(), true); + Utils.zipFile(sourceDir, testGroupDir); + } + + @Override + StoreHandle storeHandleWithData() { + return new BufferedZipStore(testGroupDir).resolve("zarr.json"); + } + + @Test + public void testOpenZipStore() throws ZarrException, IOException { + BufferedZipStore zipStore = new BufferedZipStore(testGroupDir); + assertIsTestGroupV3(Group.open(zipStore.resolve()), true); + } + + @ParameterizedTest + @CsvSource({"false", "true",}) + public void testWriteZipStore(boolean flushOnWrite) throws ZarrException, IOException { + Path path = TESTOUTPUT.resolve("testWriteZipStore" + (flushOnWrite ? "Flush" : "NoFlush") + ".zip"); + BufferedZipStore zipStore = new BufferedZipStore(path, flushOnWrite); + writeTestGroupV3(zipStore.resolve(), true); + if (!flushOnWrite) zipStore.flush(); + + BufferedZipStore zipStoreRead = new BufferedZipStore(path); + assertIsTestGroupV3(Group.open(zipStoreRead.resolve()), true); + + Path unzippedPath = TESTOUTPUT.resolve("testWriteZipStoreUnzipped" + (flushOnWrite ? "Flush" : "NoFlush")); + + unzipFile(path, unzippedPath); + FilesystemStore fsStore = new FilesystemStore(unzippedPath); + assertIsTestGroupV3(Group.open(fsStore.resolve()), true); + } + + @ParameterizedTest + @CsvSource({"false", "true",}) + public void testZipStoreWithComment(boolean flushOnWrite) throws ZarrException, IOException { + Path path = TESTOUTPUT.resolve("testZipStoreWithComment" + (flushOnWrite ? "Flush" : "NoFlush") + ".zip"); + String comment = "{\"ome\": { \"version\": \"XX.YY\" }}"; + BufferedZipStore zipStore = new BufferedZipStore(path, comment, flushOnWrite); + writeTestGroupV3(zipStore.resolve(), true); + if (!flushOnWrite) zipStore.flush(); + + try (java.util.zip.ZipFile zipFile = new java.util.zip.ZipFile(path.toFile())) { + String retrievedComment = zipFile.getComment(); + Assertions.assertEquals(comment, retrievedComment, "ZIP archive comment does not match expected value."); + } + + Assertions.assertEquals(comment, new BufferedZipStore(path).getArchiveComment(), "ZIP archive comment from store does not match expected value."); + } + + /** + * Test that ZipStore meets requirements for underlying store of Zipped OME-Zarr + * + * @see RFC-9: Zipped OME-Zarr + */ + @Test + public void testZipStoreRequirements() throws ZarrException, IOException { + Path path = TESTOUTPUT.resolve("testZipStoreRequirements.zip"); + BufferedZipStore zipStore = new BufferedZipStore(path); + + dev.zarr.zarrjava.v3.Group group = dev.zarr.zarrjava.v3.Group.create(zipStore.resolve()); + Array array = group.createArray("a1", b -> b + .withShape(1024, 1024) + .withDataType(dev.zarr.zarrjava.v3.DataType.UINT32) + .withChunkShape(512, 512) + ); + array.write(ucar.ma2.Array.factory(ucar.ma2.DataType.UINT, new int[]{1024, 1024}, testDataInt()), true); + + dev.zarr.zarrjava.v3.Group g1 = group.createGroup("g1"); + g1.createGroup("g1_1").createGroup("g1_1_1"); + g1.createGroup("g1_2"); + group.createGroup("g2").createGroup("g2_1"); + group.createGroup("g3"); + + zipStore.flush(); + + try (ZipFile zip = new ZipFile(path.toFile())) { + ArrayList entries = Collections.list(zip.getEntries()); + + // no compression + for (ZipArchiveEntry e : entries) { + Assertions.assertEquals(ZipEntry.STORED, e.getMethod(), "Entry " + e.getName() + " is compressed"); + } + + // correct order of zarr.json files + String[] expectedFirstEntries = new String[]{ + "zarr.json", + "a1/zarr.json", + "g1/zarr.json", + "g2/zarr.json", + "g3/zarr.json", + "g1/g1_1/zarr.json", + "g1/g1_2/zarr.json", + "g2/g2_1/zarr.json", + "g1/g1_1/g1_1_1/zarr.json" + }; + String[] actualFirstEntries = entries.stream() + .map(ZipArchiveEntry::getName) + .limit(expectedFirstEntries.length) + .toArray(String[]::new); + + Assertions.assertArrayEquals(expectedFirstEntries, actualFirstEntries, "zarr.json files are not in the expected breadth-first order"); + } + } + + @ParameterizedTest + @CsvSource({"false", "true",}) + public void testZipStoreV2(boolean flushOnWrite) throws ZarrException, IOException { + Path path = TESTOUTPUT.resolve("testZipStoreV2" + (flushOnWrite ? "Flush" : "NoFlush") + ".zip"); + BufferedZipStore zipStore = new BufferedZipStore(path, flushOnWrite); + writeTestGroupV2(zipStore.resolve(), true); + if (!flushOnWrite) zipStore.flush(); + + BufferedZipStore zipStoreRead = new BufferedZipStore(path); + assertIsTestGroupV2(Group.open(zipStoreRead.resolve()), true); + + Path unzippedPath = TESTOUTPUT.resolve("testZipStoreV2Unzipped"); + + unzipFile(path, unzippedPath); + FilesystemStore fsStore = new FilesystemStore(unzippedPath); + assertIsTestGroupV2(Group.open(fsStore.resolve()), true); + } + + + @Override + Store writableStore() { + Path path = TESTOUTPUT.resolve("writableStore.ZIP"); + if (Files.exists(path)) { + try{ + Files.delete(path); + }catch (IOException e) { + throw new RuntimeException("Failed to delete existing test ZIP store at: " + path.toAbsolutePath(), e); + } + } + return new BufferedZipStore(TESTOUTPUT.resolve("writableStore.ZIP"), true); + } +} diff --git a/src/test/java/dev/zarr/zarrjava/store/FileSystemStoreTest.java b/src/test/java/dev/zarr/zarrjava/store/FileSystemStoreTest.java new file mode 100644 index 0000000..ae6a088 --- /dev/null +++ b/src/test/java/dev/zarr/zarrjava/store/FileSystemStoreTest.java @@ -0,0 +1,62 @@ +package dev.zarr.zarrjava.store; + +import com.fasterxml.jackson.databind.ObjectMapper; +import dev.zarr.zarrjava.ZarrException; +import dev.zarr.zarrjava.core.*; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.nio.file.Files; + +import static dev.zarr.zarrjava.v3.Node.makeObjectMapper; + +public class FileSystemStoreTest extends WritableStoreTest{ + + @Override + StoreHandle storeHandleWithData() { + return new FilesystemStore(TESTDATA).resolve("l4_sample", "zarr.json"); + } + + @Test + public void testFileSystemStores() throws IOException, ZarrException { + FilesystemStore fsStore = new FilesystemStore(TESTDATA); + ObjectMapper objectMapper = makeObjectMapper(); + + GroupMetadata groupMetadata = objectMapper.readValue( + Files.readAllBytes(TESTDATA.resolve("l4_sample").resolve("zarr.json")), + dev.zarr.zarrjava.v3.GroupMetadata.class + ); + + String groupMetadataString = objectMapper.writeValueAsString(groupMetadata); + Assertions.assertTrue(groupMetadataString.contains("\"zarr_format\":3")); + Assertions.assertTrue(groupMetadataString.contains("\"node_type\":\"group\"")); + + ArrayMetadata arrayMetadata = objectMapper.readValue(Files.readAllBytes(TESTDATA.resolve( + "l4_sample").resolve("color").resolve("1").resolve("zarr.json")), + dev.zarr.zarrjava.v3.ArrayMetadata.class); + + String arrayMetadataString = objectMapper.writeValueAsString(arrayMetadata); + Assertions.assertTrue(arrayMetadataString.contains("\"zarr_format\":3")); + Assertions.assertTrue(arrayMetadataString.contains("\"node_type\":\"array\"")); + Assertions.assertTrue(arrayMetadataString.contains("\"shape\":[1,4096,4096,2048]")); + + Assertions.assertInstanceOf(Array.class, Array.open(fsStore.resolve("l4_sample", "color", "1"))); + + Node[] subNodes = Group.open(fsStore.resolve("l4_sample")).list().toArray(Node[]::new); + Assertions.assertEquals(12, subNodes.length); + + Array[] colorSubNodes = ((Group) Group.open(fsStore.resolve("l4_sample")).get("color")).list().toArray(Array[]::new); + + Assertions.assertEquals(5, colorSubNodes.length); + Assertions.assertInstanceOf(Array.class, colorSubNodes[0]); + + Array array = (Array) ((Group) Group.open(fsStore.resolve("l4_sample")).get("color")).get("1"); + Assertions.assertArrayEquals(new long[]{1, 4096, 4096, 2048}, array.metadata().shape); + } + + @Override + Store writableStore() { + return new FilesystemStore(TESTOUTPUT.resolve("writableFSStore")); + } +} diff --git a/src/test/java/dev/zarr/zarrjava/store/HttpStoreTest.java b/src/test/java/dev/zarr/zarrjava/store/HttpStoreTest.java new file mode 100644 index 0000000..751ab6a --- /dev/null +++ b/src/test/java/dev/zarr/zarrjava/store/HttpStoreTest.java @@ -0,0 +1,37 @@ +package dev.zarr.zarrjava.store; + +import dev.zarr.zarrjava.ZarrException; +import dev.zarr.zarrjava.core.Array; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.io.IOException; + +public class HttpStoreTest extends StoreTest { + + @Override + StoreHandle storeHandleWithData() { + HttpStore httpStore = new dev.zarr.zarrjava.store.HttpStore("https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.5/idr0033A"); + return httpStore.resolve("BR00109990_C2.zarr", "0", "0"); + + } + + @Test + public void testHttpStore() throws IOException, ZarrException { + Array array = Array.open(storeHandleWithData()); + Assertions.assertArrayEquals(new long[]{5, 1552, 2080}, array.metadata().shape); + } + + @Override + @Test + public void testStoreGetSize() { + // size is not defined in BR00109990_C2.zarr + long size = storeHandleWithData().getSize(); + Assertions.assertEquals(-1, size); + } + + @Override + void testList() throws ZarrException, IOException { + // listing is not supported in HttpStore + } +} diff --git a/src/test/java/dev/zarr/zarrjava/store/MemoryStoreTest.java b/src/test/java/dev/zarr/zarrjava/store/MemoryStoreTest.java new file mode 100644 index 0000000..c57905a --- /dev/null +++ b/src/test/java/dev/zarr/zarrjava/store/MemoryStoreTest.java @@ -0,0 +1,30 @@ +package dev.zarr.zarrjava.store; + +import dev.zarr.zarrjava.ZarrException; +import dev.zarr.zarrjava.core.Array; +import dev.zarr.zarrjava.core.Attributes; +import dev.zarr.zarrjava.core.Node; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.stream.Stream; + +public class MemoryStoreTest extends WritableStoreTest { + + + + @Override + Store writableStore() { + return new MemoryStore(); + } + + @Override + StoreHandle storeHandleWithData() { + StoreHandle memoryStoreHandle = new MemoryStore().resolve(); + memoryStoreHandle.set(ByteBuffer.wrap(testData())); + return memoryStoreHandle; + } +} diff --git a/src/test/java/dev/zarr/zarrjava/store/OnlineS3StoreTest.java b/src/test/java/dev/zarr/zarrjava/store/OnlineS3StoreTest.java new file mode 100644 index 0000000..885cbf6 --- /dev/null +++ b/src/test/java/dev/zarr/zarrjava/store/OnlineS3StoreTest.java @@ -0,0 +1,73 @@ +package dev.zarr.zarrjava.store; + +import dev.zarr.zarrjava.ZarrException; +import dev.zarr.zarrjava.core.Array; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; +import software.amazon.awssdk.auth.credentials.AnonymousCredentialsProvider; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.S3Configuration; + +import java.io.IOException; +import java.net.URI; +import java.nio.ByteBuffer; + +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +public class OnlineS3StoreTest extends StoreTest { + StoreHandle storeHandle; + + @BeforeAll + void createStore() { + S3Store s3Store = new S3Store(S3Client.builder() + .endpointOverride(URI.create("https://uk1s3.embassy.ebi.ac.uk")) + .region(Region.US_EAST_1) // required, but ignored + .serviceConfiguration( + S3Configuration.builder() + .pathStyleAccessEnabled(true) // required + .build() + ) + .credentialsProvider(AnonymousCredentialsProvider.create()) + .build(), "idr", "zarr/v0.5/idr0033A"); + storeHandle = s3Store.resolve("BR00109990_C2.zarr", "0", "0"); + } + + @Test + public void testOpen() throws IOException, ZarrException { + Array arrayV3 = Array.open(storeHandle); + Assertions.assertArrayEquals(new long[]{5, 1552, 2080}, arrayV3.metadata().shape); + Assertions.assertEquals(574, arrayV3.read(new long[]{0, 0, 0}, new int[]{1, 1, 1}).getInt(0)); + + dev.zarr.zarrjava.core.Array arrayCore = dev.zarr.zarrjava.core.Array.open(storeHandle); + Assertions.assertArrayEquals(new long[]{5, 1552, 2080}, arrayCore.metadata().shape); + Assertions.assertEquals(574, arrayCore.read(new long[]{0, 0, 0}, new int[]{1, 1, 1}).getInt(0)); + } + + + @Test + public void testGet() { + StoreHandle s3StoreHandle = storeHandle.resolve("zarr.json"); + S3Store s3Store = (S3Store) s3StoreHandle.store; + ByteBuffer buffer = s3Store.get(s3StoreHandle.keys); + ByteBuffer bufferWithStart = s3Store.get(s3StoreHandle.keys, 10); + Assertions.assertEquals(10, buffer.remaining() - bufferWithStart.remaining()); + + ByteBuffer bufferWithStartAndEnd = s3Store.get(s3StoreHandle.keys, 0, 10); + Assertions.assertEquals(10, bufferWithStartAndEnd.remaining()); + } + + @Override + StoreHandle storeHandleWithData() { + return storeHandle.resolve("zarr.json"); + } + + @Override + @Test + void testList() { + Assertions.assertTrue(storeHandle.list().count() > 1); + } +} + + diff --git a/src/test/java/dev/zarr/zarrjava/store/ReadOnlyZipStoreTest.java b/src/test/java/dev/zarr/zarrjava/store/ReadOnlyZipStoreTest.java new file mode 100644 index 0000000..7b73af4 --- /dev/null +++ b/src/test/java/dev/zarr/zarrjava/store/ReadOnlyZipStoreTest.java @@ -0,0 +1,93 @@ +package dev.zarr.zarrjava.store; + +import dev.zarr.zarrjava.Utils; +import dev.zarr.zarrjava.ZarrException; +import dev.zarr.zarrjava.core.Group; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.stream.Collectors; + +public class ReadOnlyZipStoreTest extends StoreTest { + + Path storePath = TESTOUTPUT.resolve("readOnlyZipStoreTest.zip"); + StoreHandle storeHandleWithData; + + @BeforeAll + void writeStoreHandleWithData() throws ZarrException, IOException { + Path source = TESTDATA.resolve("v2_sample").resolve("subgroup"); + Utils.zipFile(source, storePath); + storeHandleWithData = new ReadOnlyZipStore(storePath).resolve("array", "0.0.0"); + } + + @Override + StoreHandle storeHandleWithData() { + return storeHandleWithData; + } + + @Override + @Test + void testList() { + ReadOnlyZipStore zipStore = new ReadOnlyZipStore(storePath); + BufferedZipStore bufferedZipStore = new BufferedZipStore(storePath); + + java.util.Set expectedKeys = bufferedZipStore.resolve().list() + .map(node -> String.join("/", node)) + .collect(Collectors.toSet()); + java.util.Set actualKeys = zipStore.resolve().list() + .map(node -> String.join("/", node)) + .collect(Collectors.toSet()); + Assertions.assertEquals(expectedKeys, actualKeys); + } + + @Test + public void testOpen() throws ZarrException, IOException { + Path sourceDir = TESTOUTPUT.resolve("testZipStore"); + Path targetDir = TESTOUTPUT.resolve("testZipStore.zip"); + FilesystemStore fsStore = new FilesystemStore(sourceDir); + writeTestGroupV3(fsStore.resolve(), true); + + Utils.zipFile(sourceDir, targetDir); + + ReadOnlyZipStore readOnlyZipStore = new ReadOnlyZipStore(targetDir); + assertIsTestGroupV3(Group.open(readOnlyZipStore.resolve()), true); + } + + + @Test + public void testReadFromBufferedZipStore() throws ZarrException, IOException { + Path path = TESTOUTPUT.resolve("testReadOnlyZipStore.zip"); + String archiveComment = "This is a test ZIP archive comment."; + BufferedZipStore zipStore = new BufferedZipStore(path, archiveComment); + writeTestGroupV3(zipStore.resolve(), true); + zipStore.flush(); + + ReadOnlyZipStore readOnlyZipStore = new ReadOnlyZipStore(path); + Assertions.assertEquals(archiveComment, readOnlyZipStore.getArchiveComment(), "ZIP archive comment from ReadOnlyZipStore does not match expected value."); + + java.util.Set expectedSubgroupKeys = new java.util.HashSet<>(Arrays.asList( + "array/c/1/1", + "array/c/0/0", + "array/c/0/1", + "zarr.json", + "array", + "array/c/1/0", + "array/c/1", + "array/c/0", + "array/zarr.json", + "array/c" + )); + + java.util.Set actualKeys = readOnlyZipStore.resolve("subgroup").list() + .map(node -> String.join("/", node)) + .collect(Collectors.toSet()); + + Assertions.assertEquals(expectedSubgroupKeys, actualKeys); + + assertIsTestGroupV3(Group.open(readOnlyZipStore.resolve()), true); + } +} diff --git a/src/test/java/dev/zarr/zarrjava/store/S3StoreTest.java b/src/test/java/dev/zarr/zarrjava/store/S3StoreTest.java new file mode 100644 index 0000000..520d3a0 --- /dev/null +++ b/src/test/java/dev/zarr/zarrjava/store/S3StoreTest.java @@ -0,0 +1,97 @@ +package dev.zarr.zarrjava.store; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; +import software.amazon.awssdk.core.sync.RequestBody; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.S3Configuration; +import software.amazon.awssdk.services.s3.model.PutObjectRequest; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.net.URI; +import java.nio.ByteBuffer; + +/** + * Tests for S3Store + *

+ * Requires a local S3 mock server running at http://localhost:9090 + * with a bucket named "zarr-test-bucket" + *

+ * Execute the following command to start a local S3 mock server: + *

+ * docker run -p 9090:9090 -p 9191:9191 -e "initialBuckets=zarr-test-bucket" adobe/s3mock:3.11.0
+ * 
+ */ +@Tag("s3") +public class S3StoreTest extends WritableStoreTest { + + String s3Endpoint = "http://localhost:9090"; + String bucketName = "zarr-test-bucket"; + S3Client s3Client; + String testDataKey = "testData"; + + @BeforeAll + void setUpS3Client() { + s3Client = S3Client.builder() + .endpointOverride(URI.create(s3Endpoint)) + .region(Region.US_EAST_1) // required, but ignored + .serviceConfiguration( + S3Configuration.builder() + .pathStyleAccessEnabled(true) // required + .build() + ) + .credentialsProvider(StaticCredentialsProvider.create( + AwsBasicCredentials.create("accessKey", "secretKey") + )) + .build(); + // Clean up the bucket + try { + s3Client.listObjectsV2Paginator(builder -> builder.bucket(bucketName).build()) + .contents() + .forEach(s3Object -> { + s3Client.deleteObject(builder -> builder.bucket(bucketName).key(s3Object.key()).build()); + }); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Test + void testReadWriteS3Store() { + S3Store s3Store = new S3Store(s3Client, bucketName, ""); + + StoreHandle storeHandle = s3Store.resolve("testfile"); + byte[] testData = new byte[100]; + for (int i = 0; i < testData.length; i++) { + testData[i] = (byte) i; + } + storeHandle.set(ByteBuffer.wrap(testData)); + ByteBuffer retrievedData = storeHandle.read(); + byte[] retrievedBytes = new byte[retrievedData.remaining()]; + retrievedData.get(retrievedBytes); + Assertions.assertArrayEquals(testData, retrievedBytes); + } + + + @Override + Store writableStore() { + return new S3Store(s3Client, bucketName, ""); + } + + @Override + StoreHandle storeHandleWithData() { + try (InputStream byteStream = new ByteArrayInputStream(testData())) { + s3Client.putObject(PutObjectRequest.builder().bucket(bucketName).key("/" + testDataKey).build(), RequestBody.fromContentProvider(() -> byteStream, "application/octet-stream")); + } catch (IOException e) { + throw new RuntimeException(e); + } + return new S3Store(s3Client, bucketName, "").resolve(testDataKey); + } +} diff --git a/src/test/java/dev/zarr/zarrjava/store/StoreTest.java b/src/test/java/dev/zarr/zarrjava/store/StoreTest.java new file mode 100644 index 0000000..ece5318 --- /dev/null +++ b/src/test/java/dev/zarr/zarrjava/store/StoreTest.java @@ -0,0 +1,129 @@ +package dev.zarr.zarrjava.store; + +import dev.zarr.zarrjava.ZarrException; +import dev.zarr.zarrjava.ZarrTest; +import dev.zarr.zarrjava.core.Array; +import dev.zarr.zarrjava.core.Attributes; +import dev.zarr.zarrjava.core.Group; +import dev.zarr.zarrjava.core.Node; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; +import ucar.ma2.DataType; + +import java.io.IOException; +import java.io.InputStream; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +public abstract class StoreTest extends ZarrTest { + + abstract StoreHandle storeHandleWithData(); + + @Test + public void testInputStream() throws IOException { + StoreHandle storeHandle = storeHandleWithData(); + InputStream is = storeHandle.getInputStream(10, 20); + byte[] buffer = new byte[10]; + int bytesRead = is.read(buffer); + Assertions.assertEquals(10, bytesRead); + byte[] expectedBuffer = new byte[10]; + storeHandle.read(10, 20).get(expectedBuffer); + Assertions.assertArrayEquals(expectedBuffer, buffer); + } + + + @Test + public void testStoreGetSize() { + StoreHandle storeHandle = storeHandleWithData(); + long size = storeHandle.getSize(); + long actual_size = storeHandle.read().remaining(); + Assertions.assertEquals(actual_size, size); + } + + + byte[] testData() { + byte[] testData = new byte[1024 * 1024]; + for (int i = 0; i < testData.length; i++) { + testData[i] = (byte) (i % 256); + } + return testData; + } + int[] testDataInt() { + int[] testData = new int[1024 * 1024]; + for (int i = 0; i < testData.length; i++) { + testData[i] = i; + } + return testData; + } + + + Group writeTestGroupV3(StoreHandle storeHandle, boolean useParallel) throws ZarrException, IOException { + + dev.zarr.zarrjava.v3.Group group = dev.zarr.zarrjava.v3.Group.create(storeHandle); + dev.zarr.zarrjava.v3.Array array = group.createArray("array", b -> b + .withShape(1024, 1024) + .withDataType(dev.zarr.zarrjava.v3.DataType.UINT8) + .withChunkShape(512, 512) + ); + array.write(ucar.ma2.Array.factory(DataType.BYTE, new int[]{1024, 1024}, testData()), useParallel); + dev.zarr.zarrjava.v3.Group subgroup = group.createGroup("subgroup"); + dev.zarr.zarrjava.v3.Array subgrouparray = subgroup.createArray("array", b -> b + .withShape(1024, 1024) + .withDataType(dev.zarr.zarrjava.v3.DataType.UINT8) + .withChunkShape(512, 512) + ); + subgrouparray.write(ucar.ma2.Array.factory(DataType.BYTE, new int[]{1024, 1024}, testData()), useParallel); + + group.setAttributes(new Attributes(b -> b.set("some", "value"))); + return group; + } + + void assertIsTestGroupV3(Group group, boolean useParallel) throws ZarrException, IOException { + Stream nodes = group.list(); + List nodeList = nodes.collect(Collectors.toList()); + Assertions.assertEquals(3, nodeList.size()); + Array array = (Array) group.get("array"); + Assertions.assertNotNull(array); + ucar.ma2.Array result = array.read(useParallel); + Assertions.assertArrayEquals(testData(), (byte[]) result.get1DJavaArray(DataType.BYTE)); + Group subgroup = (Group) group.get("subgroup"); + Array subgrouparray = (Array) subgroup.get("array"); + result = subgrouparray.read(useParallel); + Assertions.assertArrayEquals(testData(), (byte[]) result.get1DJavaArray(ucar.ma2.DataType.BYTE)); + Attributes attrs = group.metadata().attributes(); + Assertions.assertNotNull(attrs); + Assertions.assertEquals("value", attrs.getString("some")); + } + + + dev.zarr.zarrjava.v2.Group writeTestGroupV2(StoreHandle storeHandle, boolean useParallel) throws ZarrException, IOException { + dev.zarr.zarrjava.v2.Group group = dev.zarr.zarrjava.v2.Group.create(storeHandle); + dev.zarr.zarrjava.v2.Array array = group.createArray("array", b -> b + .withShape(1024, 1024) + .withDataType(dev.zarr.zarrjava.v2.DataType.UINT8) + .withChunks(512, 512) + ); + array.write(ucar.ma2.Array.factory(DataType.BYTE, new int[]{1024, 1024}, testData()), useParallel); + group.createGroup("subgroup"); + group.setAttributes(new Attributes().set("some", "value")); + return group; + } + + void assertIsTestGroupV2(Group group, boolean useParallel) throws ZarrException, IOException { + Stream nodes = group.list(); + Assertions.assertEquals(2, nodes.count()); + Array array = (Array) group.get("array"); + Assertions.assertNotNull(array); + ucar.ma2.Array result = array.read(useParallel); + Assertions.assertArrayEquals(testData(), (byte[]) result.get1DJavaArray(DataType.BYTE)); + Attributes attrs = group.metadata().attributes(); + Assertions.assertNotNull(attrs); + Assertions.assertEquals("value", attrs.getString("some")); + } + + @Test + abstract void testList() throws ZarrException, IOException; +} diff --git a/src/test/java/dev/zarr/zarrjava/store/WritableStoreTest.java b/src/test/java/dev/zarr/zarrjava/store/WritableStoreTest.java new file mode 100644 index 0000000..e1c9b45 --- /dev/null +++ b/src/test/java/dev/zarr/zarrjava/store/WritableStoreTest.java @@ -0,0 +1,112 @@ +package dev.zarr.zarrjava.store; + +import dev.zarr.zarrjava.ZarrException; +import dev.zarr.zarrjava.core.Array; +import dev.zarr.zarrjava.core.Attributes; +import dev.zarr.zarrjava.core.Group; +import dev.zarr.zarrjava.core.Node; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +public abstract class WritableStoreTest extends StoreTest { + abstract Store writableStore(); + + @Test + public void testList() throws IOException, ZarrException { + StoreHandle storeHandle = writableStore().resolve("testList"); + boolean useParallel = true; + writeTestGroupV3(storeHandle, useParallel); + java.util.Set expectedSubgroupKeys = new java.util.HashSet<>(Arrays.asList( + "array/c/1/1", + "array/c/0/0", + "array/c/0/1", + "zarr.json", + "array", + "array/c/1/0", + "array/c/1", + "array/c/0", + "array/zarr.json", + "array/c" + )); + + java.util.Set actualKeys = storeHandle.resolve("subgroup").list() + .map(node -> String.join("/", node)) + .collect(Collectors.toSet()); + Assertions.assertEquals(expectedSubgroupKeys, actualKeys); + + List allKeys = storeHandle.list() + .map(node -> String.join("/", node)) + .collect(Collectors.toList()); + Assertions.assertEquals(21, allKeys.size(), "Total number of keys in store should be 21 but was: " + allKeys); + } + + @Test + public void testWriteRead() throws IOException, ZarrException { + StoreHandle storeHandle = writableStore().resolve("testWriteRead"); + boolean useParallel = true; + Group group = writeTestGroupV3(storeHandle, useParallel); + assertIsTestGroupV3(group, useParallel); + } + + @ParameterizedTest + @CsvSource({"false", "true",}) + public void testWriteReadV3(boolean useParallel) throws ZarrException, IOException { + int[] testData = testDataInt(); + Store store = writableStore(); + StoreHandle storeHandle = store.resolve("testWriteReadV3").resolve(store.getClass().getSimpleName()).resolve("" + useParallel); + + dev.zarr.zarrjava.v3.Group group = dev.zarr.zarrjava.v3.Group.create(storeHandle); + Array array = group.createArray("array", b -> b + .withShape(1024, 1024) + .withDataType(dev.zarr.zarrjava.v3.DataType.UINT32) + .withChunkShape(64, 64) + ); + array.write(ucar.ma2.Array.factory(ucar.ma2.DataType.UINT, new int[]{1024, 1024}, testData), useParallel); + group.createGroup("subgroup"); + group.setAttributes(new Attributes(b -> b.set("some", "value"))); + Stream nodes = group.list(); + Assertions.assertEquals(2, nodes.count()); + + ucar.ma2.Array result = array.read(useParallel); + Assertions.assertArrayEquals(testData, (int[]) result.get1DJavaArray(ucar.ma2.DataType.UINT)); + Attributes attrs = group.metadata().attributes; + Assertions.assertNotNull(attrs); + Assertions.assertEquals("value", attrs.getString("some")); + } + + @ParameterizedTest + @CsvSource({"false", "true",}) + public void testWriteReadV2(boolean useParallel) throws ZarrException, IOException { + int[] testData = testDataInt(); + Store store = writableStore(); + StoreHandle storeHandle = store.resolve("testMemoryStoreV2").resolve(store.getClass().getSimpleName()).resolve("" + useParallel); + dev.zarr.zarrjava.v2.Group group = dev.zarr.zarrjava.v2.Group.create(storeHandle); + dev.zarr.zarrjava.v2.Array array = group.createArray("array", b -> b + .withShape(1024, 1024) + .withDataType(dev.zarr.zarrjava.v2.DataType.UINT32) + .withChunks(512, 512) + ); + array.write(ucar.ma2.Array.factory(ucar.ma2.DataType.UINT, new int[]{1024, 1024}, testData), useParallel); + group.createGroup("subgroup"); + Stream nodes = group.list(); + group.setAttributes(new Attributes().set("description", "test group")); + Assertions.assertEquals(2, nodes.count()); + + ucar.ma2.Array result = array.read(useParallel); + Assertions.assertArrayEquals(testData, (int[]) result.get1DJavaArray(ucar.ma2.DataType.UINT)); + Attributes attrs = group.metadata().attributes; + Assertions.assertNotNull(attrs); + Assertions.assertEquals("test group", attrs.getString("description")); + } + +}