diff --git a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java index e2e79d8eaed..4f0e342804c 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java +++ b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java @@ -118,6 +118,12 @@ void validateMagic(byte[] magic) throws InvalidAvroMagicException { /** Initialize the stream by reading from its head. */ void initialize(InputStream in, byte[] magic) throws IOException { + initialize(in, magic, SchemaCache.NO_CACHE); + } + + + /** Initialize the stream by reading from its head. */ + protected void initialize(InputStream in, byte[] magic, SchemaCache schemaCache) throws IOException { this.header = new Header(); this.vin = DecoderFactory.get().binaryDecoder(in, vin); magic = (magic == null) ? readMagic() : magic; @@ -140,8 +146,8 @@ void initialize(InputStream in, byte[] magic) throws IOException { // finalize the header header.metaKeyList = Collections.unmodifiableList(header.metaKeyList); - header.schema = new Schema.Parser(NameValidator.NO_VALIDATION).setValidateDefaults(false) - .parse(getMetaString(DataFileConstants.SCHEMA)); + header.schema = schemaCache.getOrParseSchema(getMetaString(DataFileConstants.SCHEMA)); + this.codec = resolveCodec(); reader.setSchema(header.schema); } diff --git a/lang/java/avro/src/main/java/org/apache/avro/file/SchemaCache.java b/lang/java/avro/src/main/java/org/apache/avro/file/SchemaCache.java new file mode 100644 index 00000000000..338e9a34cf7 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/file/SchemaCache.java @@ -0,0 +1,33 @@ +package org.apache.avro.file; + +import org.apache.avro.NameValidator; +import org.apache.avro.Schema; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; + +public abstract class SchemaCache { + public abstract Schema getOrParseSchema(String metaString); + + protected Schema parse(String metaString) { + return new Schema.Parser(NameValidator.NO_VALIDATION).setValidateDefaults(false) + .parse(metaString); + } + + public static final SchemaCache NO_CACHE = new SchemaCache() { + @Override + public Schema getOrParseSchema(String metaString) { + return parse(metaString); + } + }; + public static SchemaCache createConcurrentCache() { + return new SchemaCache() { + private final ConcurrentMap cache = new ConcurrentHashMap<>(); + + @Override + public Schema getOrParseSchema(String metaString) { + return cache.computeIfAbsent(metaString, this::parse); + } + }; + } + +}