From 4afc6b5009c2f33da19938ebf0b1bde1c2d9b69f Mon Sep 17 00:00:00 2001 From: tballison Date: Thu, 11 Jun 2026 06:43:22 +0200 Subject: [PATCH 1/2] TIKA-4756 -- add HAS_SIGNATURE_FIELDS --- .../main/java/org/apache/tika/metadata/PDF.java | 6 ++++++ .../org/apache/tika/parser/pdf/PDFParser.java | 14 ++++++++++---- .../apache/tika/parser/pdf/PDFParserTest.java | 16 +++++++++++++--- 3 files changed, 29 insertions(+), 7 deletions(-) diff --git a/tika-core/src/main/java/org/apache/tika/metadata/PDF.java b/tika-core/src/main/java/org/apache/tika/metadata/PDF.java index f8521893653..51451e71dfa 100644 --- a/tika-core/src/main/java/org/apache/tika/metadata/PDF.java +++ b/tika-core/src/main/java/org/apache/tika/metadata/PDF.java @@ -151,6 +151,12 @@ public interface PDF { */ Property HAS_ACROFORM_FIELDS = Property.internalBoolean(PDF_PREFIX + "hasAcroFormFields"); + /** + * Has at least one AcroForm signature field (/FT /Sig), whether or not it has been signed. + * For documents that have been actually signed, see {@link TikaCoreProperties#HAS_SIGNATURE}. + */ + Property HAS_SIGNATURE_FIELDS = Property.internalBoolean(PDF_PREFIX + "hasSignatureFields"); + Property HAS_MARKED_CONTENT = Property.internalBoolean(PDF_PREFIX + "hasMarkedContent"); /** diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParser.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParser.java index f4e734f5320..25aa853e546 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParser.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParser.java @@ -53,6 +53,7 @@ import org.apache.pdfbox.pdmodel.fixup.processor.AcroFormDefaultsProcessor; import org.apache.pdfbox.pdmodel.interactive.digitalsignature.PDSignature; import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm; +import org.apache.pdfbox.pdmodel.interactive.form.PDSignatureField; import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; @@ -398,13 +399,19 @@ private void checkAccessPermissions(PDFParserConfig.AccessCheckMode mode, Metada } private void extractSignatures(PDDocument pdfDocument, Metadata metadata) { + List sigFields = pdfDocument.getSignatureFields(); + if (sigFields.isEmpty()) { + return; + } + metadata.set(PDF.HAS_SIGNATURE_FIELDS, true); + boolean hasSignature = false; - for (PDSignature signature : pdfDocument.getSignatureDictionaries()) { + for (PDSignatureField sigField : sigFields) { + PDSignature signature = sigField.getSignature(); if (signature == null) { continue; } PDMetadataExtractor.addNotNull(signature.getName(), metadata, TikaCoreProperties.SIGNATURE_NAME); - Calendar date = signature.getSignDate(); if (date != null) { metadata.add(TikaCoreProperties.SIGNATURE_DATE, date); @@ -414,11 +421,10 @@ private void extractSignatures(PDDocument pdfDocument, Metadata metadata) { PDMetadataExtractor.addNotNull(signature.getLocation(), metadata, TikaCoreProperties.SIGNATURE_LOCATION); PDMetadataExtractor.addNotNull(signature.getReason(), metadata, TikaCoreProperties.SIGNATURE_REASON); hasSignature = true; - } if (hasSignature) { - metadata.set(TikaCoreProperties.HAS_SIGNATURE, hasSignature); + metadata.set(TikaCoreProperties.HAS_SIGNATURE, true); } } diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java index 947a45dbdd9..987d2c70836 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java @@ -621,17 +621,27 @@ public void testTurningOffBookmarks() throws Exception { //TIKA-1226 @Test public void testSignatureInAcroForm() throws Exception { - //The current test doc does not contain any content in the signature area. - //This just tests that a RuntimeException is not thrown. - //TODO: find a better test file for this issue. XMLResult result = getXML("testPDF_acroform3.pdf"); Metadata m = result.metadata; assertEquals("true", m.get(PDF.HAS_XMP)); assertEquals("true", m.get(PDF.HAS_ACROFORM_FIELDS)); assertEquals("false", m.get(PDF.HAS_XFA)); + assertEquals("true", m.get(PDF.HAS_SIGNATURE_FIELDS)); + assertNull(m.get(TikaCoreProperties.HAS_SIGNATURE)); assertContains("
  • aTextField: TIKA-1226
  • ", result.xml); } + //TIKA-4756 + @Test + public void testUnsignedSignatureField() throws Exception { + // PDF has an AcroForm with /SigFlags 1 and a /Sig type field, but no actual signature value. + // Should detect the signature field but not report hasSignature. + Metadata m = getXML("testPDF_sigflags.pdf").metadata; + assertEquals("true", m.get(PDF.HAS_ACROFORM_FIELDS)); + assertEquals("true", m.get(PDF.HAS_SIGNATURE_FIELDS)); + assertNull(m.get(TikaCoreProperties.HAS_SIGNATURE)); + } + @Test public void testSingleCloseDoc() throws Exception { //TIKA-1341 From f55b97f30cc38e88b033673ad1014a4aea399e99 Mon Sep 17 00:00:00 2001 From: tballison Date: Thu, 11 Jun 2026 06:45:21 +0200 Subject: [PATCH 2/2] TIKA-4756 -- git add --- .../resources/test-documents/testPDF_sigflags.pdf | Bin 0 -> 659 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/resources/test-documents/testPDF_sigflags.pdf diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/resources/test-documents/testPDF_sigflags.pdf b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/resources/test-documents/testPDF_sigflags.pdf new file mode 100644 index 0000000000000000000000000000000000000000..feedb6cc1fa1a13279ee2b9214b1085452a050ce GIT binary patch literal 659 zcmah{L2lbH5WMRZb1~o^T9OiFyMST9jUzNDf*OvB9)cWbX{9Pqq=2G}B+to5`mEl9 zENR$rdMO1#;4Ei$W~tG7m3>s7l4$hj@AFHfAklAcqxl?SS=c`7?H&APwZvF_rS>;e zjJ)Dw^#_Qop|%`wjhKNAZXm9jI%`cmKw+_fSeu##B%vzu$9sY}GtRVq&E<%?(l_(u zL(>eJW%D4!bl!Pkv7y3o8zm*C{A&NzGBn(6UC{m6lr=es9Ah@o8Mm{f*kk?!<8@1B zAPkJ-7g_RfAT7JDcPCc7(}R_>e%Cq9A-**skJhgfES~Zm z|AQq7dpcmwnx-WiZhh19bo@WTS8&xl5mJbY6|U#;tkSwDN<&(2D><2}ipru&Co`qw aOv%OnPu%!`gEQ8Jcsf?mXmtKPi(Uc3tE~9| literal 0 HcmV?d00001