Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions tika-core/src/main/java/org/apache/tika/metadata/PDF.java
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,12 @@ public interface PDF {
*/
Property HAS_ACROFORM_FIELDS = Property.internalBoolean(PDF_PREFIX + "hasAcroFormFields");

/**
* Has at least one AcroForm signature field (/FT /Sig), whether or not it has been signed.
* For documents that have been actually signed, see {@link TikaCoreProperties#HAS_SIGNATURE}.
*/
Property HAS_SIGNATURE_FIELDS = Property.internalBoolean(PDF_PREFIX + "hasSignatureFields");

Property HAS_MARKED_CONTENT = Property.internalBoolean(PDF_PREFIX + "hasMarkedContent");

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
import org.apache.pdfbox.pdmodel.fixup.processor.AcroFormDefaultsProcessor;
import org.apache.pdfbox.pdmodel.interactive.digitalsignature.PDSignature;
import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
import org.apache.pdfbox.pdmodel.interactive.form.PDSignatureField;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

Expand Down Expand Up @@ -398,13 +399,19 @@ private void checkAccessPermissions(PDFParserConfig.AccessCheckMode mode, Metada
}

private void extractSignatures(PDDocument pdfDocument, Metadata metadata) {
List<PDSignatureField> sigFields = pdfDocument.getSignatureFields();
if (sigFields.isEmpty()) {
return;
}
metadata.set(PDF.HAS_SIGNATURE_FIELDS, true);

boolean hasSignature = false;
for (PDSignature signature : pdfDocument.getSignatureDictionaries()) {
for (PDSignatureField sigField : sigFields) {
PDSignature signature = sigField.getSignature();
if (signature == null) {
continue;
}
PDMetadataExtractor.addNotNull(signature.getName(), metadata, TikaCoreProperties.SIGNATURE_NAME);

Calendar date = signature.getSignDate();
if (date != null) {
metadata.add(TikaCoreProperties.SIGNATURE_DATE, date);
Expand All @@ -414,11 +421,10 @@ private void extractSignatures(PDDocument pdfDocument, Metadata metadata) {
PDMetadataExtractor.addNotNull(signature.getLocation(), metadata, TikaCoreProperties.SIGNATURE_LOCATION);
PDMetadataExtractor.addNotNull(signature.getReason(), metadata, TikaCoreProperties.SIGNATURE_REASON);
hasSignature = true;

}

if (hasSignature) {
metadata.set(TikaCoreProperties.HAS_SIGNATURE, hasSignature);
metadata.set(TikaCoreProperties.HAS_SIGNATURE, true);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -621,17 +621,27 @@ public void testTurningOffBookmarks() throws Exception {
//TIKA-1226
@Test
public void testSignatureInAcroForm() throws Exception {
//The current test doc does not contain any content in the signature area.
//This just tests that a RuntimeException is not thrown.
//TODO: find a better test file for this issue.
XMLResult result = getXML("testPDF_acroform3.pdf");
Metadata m = result.metadata;
assertEquals("true", m.get(PDF.HAS_XMP));
assertEquals("true", m.get(PDF.HAS_ACROFORM_FIELDS));
assertEquals("false", m.get(PDF.HAS_XFA));
assertEquals("true", m.get(PDF.HAS_SIGNATURE_FIELDS));
assertNull(m.get(TikaCoreProperties.HAS_SIGNATURE));
assertContains("<li>aTextField: TIKA-1226</li>", result.xml);
}

//TIKA-4756
@Test
public void testUnsignedSignatureField() throws Exception {
// PDF has an AcroForm with /SigFlags 1 and a /Sig type field, but no actual signature value.
// Should detect the signature field but not report hasSignature.
Metadata m = getXML("testPDF_sigflags.pdf").metadata;
assertEquals("true", m.get(PDF.HAS_ACROFORM_FIELDS));
assertEquals("true", m.get(PDF.HAS_SIGNATURE_FIELDS));
assertNull(m.get(TikaCoreProperties.HAS_SIGNATURE));
}

@Test
public void testSingleCloseDoc() throws Exception {
//TIKA-1341
Expand Down
Binary file not shown.
Loading