diff --git a/constants/annotation_common.h b/constants/annotation_common.h index 29cc8f1a6..aa4edca10 100644 --- a/constants/annotation_common.h +++ b/constants/annotation_common.h @@ -20,6 +20,12 @@ inline constexpr char kM[] = "M"; inline constexpr char kF[] = "F"; inline constexpr char kAP[] = "AP"; inline constexpr char kAS[] = "AS"; +inline constexpr char kA[] = "A"; +inline constexpr char kAA[] = "AA"; +inline constexpr char kPopup[] = "Popup"; +inline constexpr char kBS[] = "BS"; +inline constexpr char kBE[] = "BE"; +inline constexpr char kMK[] = "MK"; inline constexpr char kBorder[] = "Border"; inline constexpr char kC[] = "C"; inline constexpr char kStructParent[] = "StructParent"; diff --git a/core/fpdfapi/parser/BUILD.gn b/core/fpdfapi/parser/BUILD.gn index 94ab93ee7..575c5e909 100644 --- a/core/fpdfapi/parser/BUILD.gn +++ b/core/fpdfapi/parser/BUILD.gn @@ -57,6 +57,12 @@ source_set("parser") { "cpdf_read_validator.h", "cpdf_reference.cpp", "cpdf_reference.h", + "cpdf_revision_classifier.cpp", + "cpdf_revision_classifier.h", + "cpdf_revision_diff.cpp", + "cpdf_revision_diff.h", + "cpdf_revision_provider.cpp", + "cpdf_revision_provider.h", "cpdf_security_handler.cpp", "cpdf_security_handler.h", "cpdf_simple_parser.cpp", diff --git a/core/fpdfapi/parser/cpdf_crypto_handler.cpp b/core/fpdfapi/parser/cpdf_crypto_handler.cpp index 2b91d5f5e..d9537669b 100644 --- a/core/fpdfapi/parser/cpdf_crypto_handler.cpp +++ b/core/fpdfapi/parser/cpdf_crypto_handler.cpp @@ -53,7 +53,12 @@ bool CPDF_CryptoHandler::IsSignatureDictionary( if (!type_obj) { type_obj = dictionary->GetDirectObjectFor(pdfium::form_fields::kFT); } - return type_obj && type_obj->GetString() == pdfium::form_fields::kSig; + if (!type_obj) { + return false; + } + + const ByteString type = type_obj->GetString(); + return type == pdfium::form_fields::kSig || type == "DocTimeStamp"; } DataVector CPDF_CryptoHandler::EncryptContent( diff --git a/core/fpdfapi/parser/cpdf_parser.cpp b/core/fpdfapi/parser/cpdf_parser.cpp index 30dcb1108..9dc567e8d 100644 --- a/core/fpdfapi/parser/cpdf_parser.cpp +++ b/core/fpdfapi/parser/cpdf_parser.cpp @@ -22,6 +22,7 @@ #include "core/fpdfapi/parser/cpdf_object_stream.h" #include "core/fpdfapi/parser/cpdf_read_validator.h" #include "core/fpdfapi/parser/cpdf_reference.h" +#include "core/fpdfapi/parser/cpdf_revision_provider.h" #include "core/fpdfapi/parser/cpdf_security_handler.h" #include "core/fpdfapi/parser/cpdf_stream.h" #include "core/fpdfapi/parser/cpdf_stream_acc.h" @@ -468,6 +469,9 @@ bool CPDF_Parser::LoadAllCrossRefTablesAndStreams(FX_FILESIZE xref_offset) { xref_stream_ = true; } + revision_xref_list_ = xref_list; + revision_xref_stream_list_ = xref_stream_list; + return true; } @@ -676,6 +680,26 @@ bool CPDF_Parser::LoadCrossRefTable(FX_FILESIZE pos, bool skip) { return true; } +bool CPDF_Parser::ExtractCrossRefTableEntriesAt(FX_FILESIZE pos, + ObjectMap* out_objects) { + if (!out_objects) + return false; + + const FX_FILESIZE saved_pos = syntax_->GetPos(); + syntax_->SetPos(pos); + std::vector objects; + bool ok = ParseCrossRefTable(&objects); + syntax_->SetPos(saved_pos); + + if (!ok) + return false; + + for (const auto& obj : objects) + (*out_objects)[obj.obj_num] = obj.info; + + return true; +} + void CPDF_Parser::MergeCrossRefObjectsData( const std::vector& objects) { for (const auto& obj : objects) { @@ -1392,6 +1416,21 @@ std::vector CPDF_Parser::GetTrailerEnds() { return trailer_ends; } +const CPDF_RevisionProvider* CPDF_Parser::GetRevisionProvider() { + if (!has_parsed_ || revision_xref_list_.empty()) + return nullptr; + + if (!revision_provider_) { + revision_provider_ = std::make_unique(); + if (!revision_provider_->Build(this, revision_xref_list_, + revision_xref_stream_list_)) { + revision_provider_.reset(); + return nullptr; + } + } + return revision_provider_.get(); +} + bool CPDF_Parser::WriteToArchive(IFX_ArchiveStream* archive, FX_FILESIZE src_size) { static constexpr FX_FILESIZE kBufferSize = 4096; diff --git a/core/fpdfapi/parser/cpdf_parser.h b/core/fpdfapi/parser/cpdf_parser.h index 613a1d750..d9c775903 100644 --- a/core/fpdfapi/parser/cpdf_parser.h +++ b/core/fpdfapi/parser/cpdf_parser.h @@ -29,6 +29,7 @@ class CPDF_LinearizedHeader; class CPDF_Object; class CPDF_ObjectStream; class CPDF_ReadValidator; +class CPDF_RevisionProvider; class CPDF_SecurityHandler; class CPDF_SyntaxParser; class IFX_ArchiveStream; @@ -119,6 +120,16 @@ class CPDF_Parser { std::vector GetTrailerEnds(); bool WriteToArchive(IFX_ArchiveStream* archive, FX_FILESIZE src_size); + using ObjectMap = std::map; + + // Re-parse a classic xref table at |pos| and return entries as an ObjectMap. + // Does not mutate cross_ref_table_ or any other parser state. + bool ExtractCrossRefTableEntriesAt(FX_FILESIZE pos, ObjectMap* out_objects); + + // Returns the revision provider, building it lazily on first access. + // Returns nullptr if the parser has not successfully loaded a document. + const CPDF_RevisionProvider* GetRevisionProvider(); + const CPDF_CrossRefTable* GetCrossRefTableForTesting() const { return cross_ref_table_.get(); } @@ -211,6 +222,12 @@ class CPDF_Parser { std::set parsing_obj_nums_; RetainPtr security_handler_; + + // Populated by LoadAllCrossRefTablesAndStreams. + // Index 0 = newest (main) revision. After Build() reversal, index 0 = oldest. + std::vector revision_xref_list_; + std::vector revision_xref_stream_list_; + std::unique_ptr revision_provider_; }; #endif // CORE_FPDFAPI_PARSER_CPDF_PARSER_H_ diff --git a/core/fpdfapi/parser/cpdf_revision_classifier.cpp b/core/fpdfapi/parser/cpdf_revision_classifier.cpp new file mode 100644 index 000000000..e70572af1 --- /dev/null +++ b/core/fpdfapi/parser/cpdf_revision_classifier.cpp @@ -0,0 +1,519 @@ +// Copyright 2024 The PDFium Authors +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "core/fpdfapi/parser/cpdf_revision_classifier.h" + +#include +#include +#include + +#include "constants/annotation_common.h" +#include "constants/form_fields.h" +#include "core/fpdfapi/parser/cpdf_array.h" +#include "core/fpdfapi/parser/cpdf_dictionary.h" +#include "core/fpdfapi/parser/cpdf_document.h" +#include "core/fpdfapi/parser/cpdf_parser.h" +#include "core/fpdfapi/parser/cpdf_reference.h" +#include "core/fpdfapi/parser/cpdf_stream.h" + +namespace { + +bool ShouldSkipSupportKey(const ByteString& key) { + return key == "P" || key == "Parent" || key == "Prev" || key == "First"; +} + +// BFS that follows CPDF_Reference into their targets using +// doc->GetIndirectObject(). This matches the ObjectTreeTraverser pattern +// but is scoped to a specific support root. +void CollectReferencesRecursive(CPDF_Document* doc, + const CPDF_Object* root, + std::set* visited) { + std::queue> queue; + queue.push(RetainPtr(root)); + + while (!queue.empty()) { + RetainPtr obj = std::move(queue.front()); + queue.pop(); + + if (!obj) + continue; + + if (obj->IsReference()) { + uint32_t ref_num = obj->AsReference()->GetRefObjNum(); + if (!ref_num || !visited->insert(ref_num).second) + continue; + RetainPtr resolved = + doc->GetIndirectObject(ref_num); + if (resolved) + queue.push(std::move(resolved)); + continue; + } + + if (const CPDF_Dictionary* dict = obj->AsDictionary()) { + CPDF_DictionaryLocker locker(dict); + for (const auto& [key, value] : locker) { + if (ShouldSkipSupportKey(key)) + continue; + queue.push(value); + } + } else if (const CPDF_Array* arr = obj->AsArray()) { + CPDF_ArrayLocker locker(arr); + for (const auto& elem : locker) + queue.push(elem); + } else if (const CPDF_Stream* stream = obj->AsStream()) { + RetainPtr stream_dict = stream->GetDict(); + if (stream_dict) { + CPDF_DictionaryLocker locker(stream_dict.Get()); + for (const auto& [key, value] : locker) { + if (ShouldSkipSupportKey(key)) + continue; + queue.push(value); + } + } + } + } +} + +// Enumerate all indirect objects from the /AP dictionary. +// Covers all modes (N/R/D) and all states (Yes/Off/etc.). +// +// GetDirectObjectFor() resolves references, so directly-embedded (non-indirect) +// sub-dicts or streams will have GetObjNum() == 0 and contribute no object +// number to the result set. Only their indirect descendants (found via +// CollectReferencesRecursive) will be collected. This is intentional: the +// revision diff only tracks indirect objects, so inline objects are invisible +// to the diff and need not be classified. +std::set CollectAPObjectNumbers(CPDF_Document* doc, + const CPDF_Dictionary* owner_dict) { + std::set result; + RetainPtr ap_dict = + owner_dict->GetDictFor(pdfium::annotation::kAP); + if (!ap_dict) + return result; + + for (const char* mode_key : {"N", "R", "D"}) { + RetainPtr mode_obj = + ap_dict->GetDirectObjectFor(mode_key); + if (!mode_obj) + continue; + + if (mode_obj->IsStream()) { + if (uint32_t num = mode_obj->GetObjNum(); num) + result.insert(num); + CollectReferencesRecursive(doc, mode_obj.Get(), &result); + } else if (const CPDF_Dictionary* state_dict = mode_obj->AsDictionary()) { + CPDF_DictionaryLocker locker(state_dict); + for (const auto& [key, value] : locker) { + if (uint32_t num = value->GetObjNum(); num) + result.insert(num); + CollectReferencesRecursive(doc, value.Get(), &result); + } + } + } + return result; +} + +std::set CollectValueObjectNumbers(CPDF_Document* doc, + const CPDF_Dictionary* owner_dict) { + std::set result; + const CPDF_Object* value = owner_dict->GetObjectFor("V"); + if (!value) + return result; + + if (value->IsReference()) + result.insert(value->AsReference()->GetRefObjNum()); + CollectReferencesRecursive(doc, value, &result); + return result; +} + +void CollectSupportRoot(CPDF_Document* doc, + const CPDF_Dictionary* owner_dict, + const char* key, + std::set* result) { + const CPDF_Object* root = owner_dict->GetObjectFor(key); + if (!root) + return; + + if (root->IsReference()) + result->insert(root->AsReference()->GetRefObjNum()); + CollectReferencesRecursive(doc, root, result); +} + +std::set CollectAnnotationSupportObjectNumbers( + CPDF_Document* doc, + const CPDF_Dictionary* owner_dict) { + std::set result = CollectAPObjectNumbers(doc, owner_dict); + + for (const char* key : {pdfium::annotation::kPopup, pdfium::annotation::kA, + pdfium::annotation::kAA, pdfium::annotation::kBS, + pdfium::annotation::kBE, pdfium::annotation::kMK, + pdfium::annotation::kOC}) { + CollectSupportRoot(doc, owner_dict, key, &result); + } + + return result; +} + +uint32_t GetAcroFormObjNum(const CPDF_Dictionary* root) { + if (!root) + return 0; + const CPDF_Object* acroform = root->GetObjectFor("AcroForm"); + if (!acroform) + return 0; + if (acroform->IsReference()) + return acroform->AsReference()->GetRefObjNum(); + return 0; +} + +struct DirectClassification { + uint32_t target_obj_num = 0; + uint32_t page_obj_num = 0; + SemanticChangeType semantic_type = SemanticChangeType::kOther; + SupportOwnerKind owner_kind = SupportOwnerKind::kNone; +}; + +uint32_t GetPageObjectNum(const CPDF_Dictionary* dict) { + if (!dict) + return 0; + + const CPDF_Object* page = dict->GetObjectFor("P"); + if (page) { + if (page->IsReference()) + return page->AsReference()->GetRefObjNum(); + + if (const CPDF_Dictionary* page_dict = page->AsDictionary()) + return page_dict->GetObjNum(); + } + + // Follow the /Parent owner chain until a /P page reference is found. + // This is not page-tree inheritance; it simply walks the owner chain. + // Depth cap prevents infinite loops from malformed circular references. + const CPDF_Dictionary* cur = dict; + for (int depth = 0; depth < 4; ++depth) { + const CPDF_Object* parent_ref = cur->GetObjectFor("Parent"); + if (!parent_ref) + break; + + const CPDF_Object* parent_obj = parent_ref->GetDirect(); + const CPDF_Dictionary* parent_dict = + parent_obj ? parent_obj->AsDictionary() : nullptr; + if (!parent_dict) + break; + + const CPDF_Object* parent_page = parent_dict->GetObjectFor("P"); + if (parent_page) { + if (parent_page->IsReference()) + return parent_page->AsReference()->GetRefObjNum(); + + if (const CPDF_Dictionary* parent_page_dict = parent_page->AsDictionary()) + return parent_page_dict->GetObjNum(); + } + + cur = parent_dict; + } + + return 0; +} + +// LIMITATION: Classification uses the latest parse context +// (doc->GetMutableIndirectObject), not a historical snapshot. Objects that were +// freed or replaced in later revisions are resolved to their current-revision +// state, which may differ from their state at the time the change was made. +// This is acceptable for the first implementation because freed objects are +// uncommon in typical form-fill and annotation workflows, and the conservative +// promotion rules in Pass 2 mitigate most misclassification risk. +DirectClassification ClassifyObject(CPDF_Document* doc, + uint32_t obj_num, + const CPDF_Dictionary* root, + uint32_t acroform_obj_num, + const std::set& dss_obj_nums) { + if (dss_obj_nums.count(obj_num)) + return {obj_num, 0, SemanticChangeType::kDSS, SupportOwnerKind::kNone}; + + RetainPtr obj = doc->GetMutableIndirectObject(obj_num); + if (!obj) + return {}; + + CPDF_Dictionary* dict = obj->AsMutableDictionary(); + if (!dict) { + if (CPDF_Stream* stream = obj->AsMutableStream()) + dict = stream->GetMutableDict(); + if (!dict) + return {}; + } + + ByteString type = dict->GetNameFor("Type"); + + if (type == "DocTimeStamp") + return {obj_num, 0, SemanticChangeType::kDocumentTimestamp, + SupportOwnerKind::kNone}; + + if (type == "Sig") + return {obj_num, 0, SemanticChangeType::kSignature, + SupportOwnerKind::kNone}; + + ByteString subtype = dict->GetNameFor("Subtype"); + if (subtype == "Widget") + return {obj_num, GetPageObjectNum(dict), SemanticChangeType::kFormStateChange, + SupportOwnerKind::kForm}; + + if (acroform_obj_num != 0 && obj_num == acroform_obj_num) + return {obj_num, 0, SemanticChangeType::kFormStateChange, + SupportOwnerKind::kForm}; + + if (!subtype.IsEmpty() && dict->KeyExist("Rect")) { + uint32_t target_obj_num = obj_num; + if (subtype == "Popup") { + const CPDF_Object* parent_ref = dict->GetObjectFor("Parent"); + if (parent_ref) { + if (parent_ref->IsReference()) { + target_obj_num = parent_ref->AsReference()->GetRefObjNum(); + } else if (const CPDF_Dictionary* parent_dict = + parent_ref->AsDictionary()) { + if (parent_dict->GetObjNum()) + target_obj_num = parent_dict->GetObjNum(); + } + } + } + + return {target_obj_num, GetPageObjectNum(dict), + SemanticChangeType::kAnnotation, SupportOwnerKind::kAnnotation}; + } + + if (dict->KeyExist("FT") || dict->KeyExist("V") || + dict->KeyExist("Parent")) { + return {obj_num, GetPageObjectNum(dict), + SemanticChangeType::kFormStateChange, SupportOwnerKind::kForm}; + } + + if (type == "Page" || type == "Pages") + return {obj_num, type == "Page" ? obj_num : 0, SemanticChangeType::kPage, + SupportOwnerKind::kNone}; + + if (type == "Catalog") + return {obj_num, 0, SemanticChangeType::kCatalog, + SupportOwnerKind::kNone}; + + return {}; +} + +} // namespace + +std::set CollectDSSObjectNumbers(CPDF_Document* doc) { + std::set result; + const CPDF_Dictionary* root = doc->GetRoot(); + if (!root) + return result; + + const CPDF_Object* dss_obj = root->GetObjectFor("DSS"); + if (!dss_obj) + return result; + + if (dss_obj->IsReference()) { + uint32_t dss_num = dss_obj->AsReference()->GetRefObjNum(); + result.insert(dss_num); + RetainPtr resolved = doc->GetIndirectObject(dss_num); + if (resolved) + CollectReferencesRecursive(doc, resolved.Get(), &result); + } else if (dss_obj->IsDictionary()) { + CollectReferencesRecursive(doc, dss_obj, &result); + } + return result; +} + +std::set CollectSupportObjectNumbers( + CPDF_Document* doc, + const CPDF_Dictionary* owner_dict, + const SupportCollectionPolicy& policy) { + std::set result; + + if (policy.include_ap) + result.merge(CollectAPObjectNumbers(doc, owner_dict)); + + if (policy.include_value) + result.merge(CollectValueObjectNumbers(doc, owner_dict)); + + if (policy.include_annotation_support) + result.merge(CollectAnnotationSupportObjectNumbers(doc, owner_dict)); + + return result; +} + +SupportPromotionDecision DecideSupportPromotion( + uint32_t /*obj_num*/, + RevisionDiffCategory /*diff_category*/, + const std::set& form_owner_hits, + const std::set& annotation_owner_hits, + bool has_multiple_references_in_document) { + const bool from_form = !form_owner_hits.empty(); + const bool from_annot = !annotation_owner_hits.empty(); + + if (!from_form && !from_annot) + return SupportPromotionDecision::kNotLocal; + + if (from_form && from_annot) + return SupportPromotionDecision::kAmbiguous; + + if (form_owner_hits.size() > 1 || annotation_owner_hits.size() > 1) + return SupportPromotionDecision::kAmbiguous; + + if (has_multiple_references_in_document) + return SupportPromotionDecision::kShared; + + return from_form ? SupportPromotionDecision::kPromoteToForm + : SupportPromotionDecision::kPromoteToAnnotation; +} + +// LIMITATION: All object access uses the current (latest) document parse +// state. See ClassifyObject comment for details on freed/replaced object +// classification accuracy. +std::vector ClassifyChanges( + CPDF_Document* doc, + const std::vector& raw_diff, + const std::set& multi_ref_set) { + const CPDF_Dictionary* root = doc->GetRoot(); + const uint32_t acroform_obj_num = GetAcroFormObjNum(root); + + std::set dss_obj_nums = CollectDSSObjectNumbers(doc); + + const std::set changed_obj_nums = [&raw_diff]() { + std::set changed; + for (const auto& entry : raw_diff) + changed.insert(entry.obj_num); + return changed; + }(); + + // --- Pass 1: direct-owner classification --- + std::vector result; + result.reserve(raw_diff.size()); + + std::map direct_owners; + + for (const auto& entry : raw_diff) { + ResolvedSemanticChange change; + change.changed_obj_num = entry.obj_num; + change.target_obj_num = entry.obj_num; + change.page_obj_num = 0; + change.diff_category = entry.category; + DirectClassification direct = + ClassifyObject(doc, entry.obj_num, root, acroform_obj_num, + dss_obj_nums); + change.target_obj_num = direct.target_obj_num ? direct.target_obj_num + : entry.obj_num; + change.page_obj_num = direct.page_obj_num; + change.semantic_type = direct.semantic_type; + result.push_back(change); + + if (direct.owner_kind != SupportOwnerKind::kNone) + direct_owners[entry.obj_num] = direct.owner_kind; + } + + // --- Pass 2: support-object promotion --- + std::map> form_support_hits; + std::map> annotation_support_hits; + + for (const auto& [owner_obj_num, owner_kind] : direct_owners) { + RetainPtr obj = doc->GetMutableIndirectObject(owner_obj_num); + CPDF_Dictionary* owner_dict = obj ? obj->AsMutableDictionary() : nullptr; + if (!owner_dict) { + if (CPDF_Stream* stream = obj ? obj->AsMutableStream() : nullptr) + owner_dict = stream->GetMutableDict(); + } + if (!owner_dict) + continue; + + SupportCollectionPolicy policy; + if (owner_kind == SupportOwnerKind::kForm) { + policy.include_ap = true; + policy.include_value = true; + } else if (owner_kind == SupportOwnerKind::kAnnotation) { + policy.include_annotation_support = true; + } + + std::set owned = + CollectSupportObjectNumbers(doc, owner_dict, policy); + + for (uint32_t support_obj_num : owned) { + if (!changed_obj_nums.count(support_obj_num)) + continue; + if (owner_kind == SupportOwnerKind::kForm) { + form_support_hits[support_obj_num].insert(owner_obj_num); + } else if (owner_kind == SupportOwnerKind::kAnnotation) { + annotation_support_hits[support_obj_num].insert(owner_obj_num); + } + } + } + + for (auto& change : result) { + if ((change.semantic_type == SemanticChangeType::kSignature || + change.semantic_type == SemanticChangeType::kDocumentTimestamp) && + form_support_hits[change.changed_obj_num].size() == 1 && + annotation_support_hits[change.changed_obj_num].empty() && + !multi_ref_set.count(change.changed_obj_num)) { + uint32_t owner_obj_num = *form_support_hits[change.changed_obj_num].begin(); + change.target_obj_num = owner_obj_num; + RetainPtr owner_obj = + doc->GetMutableIndirectObject(owner_obj_num); + CPDF_Dictionary* owner_dict = + owner_obj ? owner_obj->AsMutableDictionary() : nullptr; + if (!owner_dict) { + if (CPDF_Stream* owner_stream = + owner_obj ? owner_obj->AsMutableStream() : nullptr) { + owner_dict = owner_stream->GetMutableDict(); + } + } + change.page_obj_num = GetPageObjectNum(owner_dict); + continue; + } + + if (change.semantic_type != SemanticChangeType::kOther) + continue; + + SupportPromotionDecision decision = DecideSupportPromotion( + change.changed_obj_num, change.diff_category, + form_support_hits[change.changed_obj_num], + annotation_support_hits[change.changed_obj_num], + multi_ref_set.count(change.changed_obj_num)); + + switch (decision) { + case SupportPromotionDecision::kPromoteToForm: + change.semantic_type = SemanticChangeType::kFormStateChange; + if (!form_support_hits[change.changed_obj_num].empty()) { + uint32_t owner_obj_num = *form_support_hits[change.changed_obj_num].begin(); + change.target_obj_num = owner_obj_num; + RetainPtr owner_obj = doc->GetMutableIndirectObject(owner_obj_num); + CPDF_Dictionary* owner_dict = + owner_obj ? owner_obj->AsMutableDictionary() : nullptr; + if (!owner_dict) { + if (CPDF_Stream* owner_stream = owner_obj ? owner_obj->AsMutableStream() : nullptr) + owner_dict = owner_stream->GetMutableDict(); + } + change.page_obj_num = GetPageObjectNum(owner_dict); + } + break; + case SupportPromotionDecision::kPromoteToAnnotation: + change.semantic_type = SemanticChangeType::kAnnotation; + if (!annotation_support_hits[change.changed_obj_num].empty()) { + uint32_t owner_obj_num = + *annotation_support_hits[change.changed_obj_num].begin(); + change.target_obj_num = owner_obj_num; + RetainPtr owner_obj = doc->GetMutableIndirectObject(owner_obj_num); + CPDF_Dictionary* owner_dict = + owner_obj ? owner_obj->AsMutableDictionary() : nullptr; + if (!owner_dict) { + if (CPDF_Stream* owner_stream = owner_obj ? owner_obj->AsMutableStream() : nullptr) + owner_dict = owner_stream->GetMutableDict(); + } + change.page_obj_num = GetPageObjectNum(owner_dict); + } + break; + case SupportPromotionDecision::kAmbiguous: + case SupportPromotionDecision::kShared: + case SupportPromotionDecision::kNotLocal: + break; + } + } + + return result; +} diff --git a/core/fpdfapi/parser/cpdf_revision_classifier.h b/core/fpdfapi/parser/cpdf_revision_classifier.h new file mode 100644 index 000000000..999f3b94b --- /dev/null +++ b/core/fpdfapi/parser/cpdf_revision_classifier.h @@ -0,0 +1,83 @@ +// Copyright 2024 The PDFium Authors +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CORE_FPDFAPI_PARSER_CPDF_REVISION_CLASSIFIER_H_ +#define CORE_FPDFAPI_PARSER_CPDF_REVISION_CLASSIFIER_H_ + +#include + +#include +#include + +#include "core/fpdfapi/parser/cpdf_revision_diff.h" + +class CPDF_Document; +class CPDF_Dictionary; + +enum class SemanticChangeType : uint8_t { + kFormStateChange = 0, + kAnnotation = 1, + kSignature = 2, + kDocumentTimestamp = 3, + kDSS = 4, + kPage = 5, + kCatalog = 6, + kOther = 7, +}; + +struct ResolvedSemanticChange { + uint32_t changed_obj_num; + uint32_t target_obj_num; + uint32_t page_obj_num; + RevisionDiffCategory diff_category; + SemanticChangeType semantic_type; +}; + +enum class SupportOwnerKind : uint8_t { + kNone = 0, + kForm, + kAnnotation, +}; + +struct SupportCollectionPolicy { + bool include_ap = false; + bool include_value = false; + bool include_annotation_support = false; +}; + +enum class SupportPromotionDecision : uint8_t { + kPromoteToForm, + kPromoteToAnnotation, + kAmbiguous, + kShared, + kNotLocal, +}; + +// Collect all indirect object numbers reachable from the /DSS entry in the +// document catalog. Returns an empty set if no DSS exists. +std::set CollectDSSObjectNumbers(CPDF_Document* doc); + +// Collect all indirect object numbers reachable from the support roots +// of an annotation/widget owner dictionary, per the given policy. +std::set CollectSupportObjectNumbers( + CPDF_Document* doc, + const CPDF_Dictionary* owner_dict, + const SupportCollectionPolicy& policy); + +// Decide whether a changed support object should be promoted. +SupportPromotionDecision DecideSupportPromotion( + uint32_t obj_num, + RevisionDiffCategory diff_category, + const std::set& form_owner_hits, + const std::set& annotation_owner_hits, + bool has_multiple_references_in_document); + +// Two-pass semantic classification of raw diff entries. +// multi_ref_set should be precomputed via GetObjectsWithMultipleReferences(). +std::vector ClassifyChanges( + CPDF_Document* doc, + const std::vector& raw_diff, + const std::set& multi_ref_set); + +#endif // CORE_FPDFAPI_PARSER_CPDF_REVISION_CLASSIFIER_H_ diff --git a/core/fpdfapi/parser/cpdf_revision_diff.cpp b/core/fpdfapi/parser/cpdf_revision_diff.cpp new file mode 100644 index 000000000..a915fecf5 --- /dev/null +++ b/core/fpdfapi/parser/cpdf_revision_diff.cpp @@ -0,0 +1,57 @@ +// Copyright 2024 The PDFium Authors +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "core/fpdfapi/parser/cpdf_revision_diff.h" + +std::vector CPDF_RevisionDiff::ComputeDiff( + const ObjectMap& older, + const ObjectMap& newer) { + std::vector result; + + for (const auto& [obj_num, new_info] : newer) { + auto it = older.find(obj_num); + + if (it == older.end()) { + if (new_info.type != CPDF_CrossRefTable::ObjectType::kFree) { + result.push_back({obj_num, RevisionDiffCategory::kAdded}); + } + continue; + } + + const auto& old_info = it->second; + + if (new_info.type == CPDF_CrossRefTable::ObjectType::kFree && + old_info.type != CPDF_CrossRefTable::ObjectType::kFree) { + result.push_back({obj_num, RevisionDiffCategory::kFreed}); + continue; + } + + if (new_info.type != old_info.type) { + result.push_back({obj_num, RevisionDiffCategory::kModified}); + continue; + } + + if (new_info.type == CPDF_CrossRefTable::ObjectType::kNormal) { + if (new_info.pos != old_info.pos || + new_info.gennum != old_info.gennum) { + result.push_back({obj_num, RevisionDiffCategory::kModified}); + } + } else if (new_info.type == CPDF_CrossRefTable::ObjectType::kCompressed) { + if (new_info.archive.obj_num != old_info.archive.obj_num || + new_info.archive.obj_index != old_info.archive.obj_index) { + result.push_back({obj_num, RevisionDiffCategory::kModified}); + } + } + } + + // Objects present in older but not in newer were freed. + for (const auto& [obj_num, old_info] : older) { + if (old_info.type != CPDF_CrossRefTable::ObjectType::kFree && + newer.find(obj_num) == newer.end()) { + result.push_back({obj_num, RevisionDiffCategory::kFreed}); + } + } + + return result; +} diff --git a/core/fpdfapi/parser/cpdf_revision_diff.h b/core/fpdfapi/parser/cpdf_revision_diff.h new file mode 100644 index 000000000..64c91ebb6 --- /dev/null +++ b/core/fpdfapi/parser/cpdf_revision_diff.h @@ -0,0 +1,35 @@ +// Copyright 2024 The PDFium Authors +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CORE_FPDFAPI_PARSER_CPDF_REVISION_DIFF_H_ +#define CORE_FPDFAPI_PARSER_CPDF_REVISION_DIFF_H_ + +#include + +#include +#include + +#include "core/fpdfapi/parser/cpdf_cross_ref_table.h" + +enum class RevisionDiffCategory : uint8_t { + kAdded = 0, + kModified = 1, + kFreed = 2, +}; + +struct RevisionDiffEntry { + uint32_t obj_num; + RevisionDiffCategory category; +}; + +class CPDF_RevisionDiff { + public: + using ObjectInfo = CPDF_CrossRefTable::ObjectInfo; + using ObjectMap = std::map; + + static std::vector ComputeDiff(const ObjectMap& older, + const ObjectMap& newer); +}; + +#endif // CORE_FPDFAPI_PARSER_CPDF_REVISION_DIFF_H_ diff --git a/core/fpdfapi/parser/cpdf_revision_provider.cpp b/core/fpdfapi/parser/cpdf_revision_provider.cpp new file mode 100644 index 000000000..3f8d98f87 --- /dev/null +++ b/core/fpdfapi/parser/cpdf_revision_provider.cpp @@ -0,0 +1,280 @@ +// Copyright 2024 The PDFium Authors +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "core/fpdfapi/parser/cpdf_revision_provider.h" + +#include +#include +#include + +#include "core/fpdfapi/parser/cpdf_array.h" +#include "core/fpdfapi/parser/cpdf_dictionary.h" +#include "core/fpdfapi/parser/cpdf_number.h" +#include "core/fpdfapi/parser/cpdf_parser.h" +#include "core/fpdfapi/parser/cpdf_stream.h" +#include "core/fpdfapi/parser/cpdf_stream_acc.h" +#include "core/fxcrt/fx_safe_types.h" +#include "core/fxcrt/numerics/safe_conversions.h" +#include "core/fxcrt/span.h" + +using ObjectType = CPDF_CrossRefTable::ObjectType; +using ObjectInfo = CPDF_CrossRefTable::ObjectInfo; + +namespace { + +struct XRefStreamIndexEntry { + uint32_t start_obj_num; + uint32_t obj_count; +}; + +constexpr size_t kMinFieldCount = 3; + +uint32_t GetVarInt(pdfium::span input) { + uint32_t result = 0; + for (uint8_t c : input) { + result = result * 256 + c; + } + return result; +} + +std::optional GetObjectTypeFromStreamType(uint32_t type) { + switch (type) { + case 0: + return ObjectType::kFree; + case 1: + return ObjectType::kNormal; + case 2: + return ObjectType::kCompressed; + default: + return std::nullopt; + } +} + +std::vector GetStreamIndices(const CPDF_Array* array, + uint32_t size) { + std::vector indices; + if (array) { + for (size_t i = 0; i < array->size() / 2; i++) { + RetainPtr start_num = array->GetNumberAt(i * 2); + if (!start_num) + continue; + RetainPtr count_obj = array->GetNumberAt(i * 2 + 1); + if (!count_obj) + continue; + int nStartNum = start_num->GetInteger(); + int nCount = count_obj->GetInteger(); + if (nStartNum < 0 || nCount <= 0) + continue; + indices.push_back( + {static_cast(nStartNum), static_cast(nCount)}); + } + } + if (indices.empty()) + indices.push_back({0, size}); + return indices; +} + +std::vector GetFieldWidths(const CPDF_Array* array) { + std::vector results; + if (!array) + return results; + CPDF_ArrayLocker locker(array); + for (const auto& obj : locker) + results.push_back(obj->GetInteger()); + return results; +} + +// Decode one xref stream entry and insert into the output map. +void DecodeOneEntry(pdfium::span entry_span, + pdfium::span field_widths, + uint32_t obj_num, + CPDF_RevisionProvider::ObjectMap* out) { + ObjectType type; + if (field_widths[0]) { + uint32_t raw_type = GetVarInt(entry_span.first(field_widths[0])); + std::optional maybe_type = GetObjectTypeFromStreamType(raw_type); + if (!maybe_type.has_value()) + return; + type = maybe_type.value(); + } else { + type = ObjectType::kNormal; + } + + uint32_t second = + GetVarInt(entry_span.subspan(field_widths[0], field_widths[1])); + uint32_t third = GetVarInt( + entry_span.subspan(field_widths[0] + field_widths[1], field_widths[2])); + + ObjectInfo info; + info.type = type; + + if (type == ObjectType::kFree) { + if (pdfium::IsValueInRangeForNumericType(third)) { + info.gennum = static_cast(third); + (*out)[obj_num] = info; + } + return; + } + + if (type == ObjectType::kNormal) { + if (pdfium::IsValueInRangeForNumericType(second) && + pdfium::IsValueInRangeForNumericType(third)) { + info.pos = static_cast(second); + info.gennum = static_cast(third); + (*out)[obj_num] = info; + } + return; + } + + // kCompressed + if (obj_num <= CPDF_Parser::kMaxObjectNumber) { + info.archive.obj_num = second; + info.archive.obj_index = third; + (*out)[obj_num] = info; + } +} + +// Decode all xref stream entries from a stream object into an ObjectMap. +CPDF_RevisionProvider::ObjectMap DecodeXRefStreamEntries( + const CPDF_Stream* stream) { + CPDF_RevisionProvider::ObjectMap result; + if (!stream) + return result; + + RetainPtr dict = stream->GetDict(); + if (!dict) + return result; + + const int32_t size = dict->GetIntegerFor("Size"); + if (size < 0) + return result; + + std::vector indices = + GetStreamIndices(dict->GetArrayFor("Index").Get(), + static_cast(size)); + + std::vector field_widths = + GetFieldWidths(dict->GetArrayFor("W").Get()); + if (field_widths.size() < kMinFieldCount) + return result; + + FX_SAFE_UINT32 dwAccWidth; + for (uint32_t width : field_widths) + dwAccWidth += width; + if (!dwAccWidth.IsValid()) + return result; + + uint32_t total_width = dwAccWidth.ValueOrDie(); + + auto pAcc = pdfium::MakeRetain( + pdfium::WrapRetain(const_cast(stream))); + pAcc->LoadAllDataFiltered(); + + pdfium::span data_span = pAcc->GetSpan(); + uint32_t segindex = 0; + for (const auto& index : indices) { + FX_SAFE_UINT32 seg_end = segindex; + seg_end += index.obj_count; + seg_end *= total_width; + if (!seg_end.IsValid() || seg_end.ValueOrDie() > data_span.size()) + continue; + + pdfium::span seg_span = data_span.subspan( + segindex * total_width, index.obj_count * total_width); + + for (uint32_t i = 0; i < index.obj_count; ++i) { + const uint32_t obj_num = index.start_obj_num + i; + if (obj_num > CPDF_Parser::kMaxObjectNumber) + break; + DecodeOneEntry(seg_span.subspan(i * total_width, total_width), + field_widths, obj_num, &result); + } + segindex += index.obj_count; + } + return result; +} + +} // namespace + +CPDF_RevisionProvider::CPDF_RevisionProvider() = default; +CPDF_RevisionProvider::~CPDF_RevisionProvider() = default; + +bool CPDF_RevisionProvider::Build( + CPDF_Parser* parser, + const std::vector& xref_list, + const std::vector& xref_stream_list) { + if (built_) + return true; + + if (!parser || xref_list.size() != xref_stream_list.size()) + return false; + + std::vector trailer_ends = parser->GetTrailerEnds(); + + const size_t count = xref_list.size(); + layers_.resize(count); + + for (size_t i = 0; i < count; ++i) { + RevisionLayer& layer = layers_[i]; + layer.xref_offset = std::max(xref_list[i], xref_stream_list[i]); + + // Map to trailer_ends by index. trailer_ends are ordered by file position + // (earliest %%EOF first), matching oldest-first ordering of xref_list + // after FindAllCrossReferenceTablesAndStream prepends to the vectors. + if (i < trailer_ends.size()) { + layer.revision_end = static_cast(trailer_ends[i]); + } else if (!trailer_ends.empty()) { + layer.revision_end = + static_cast(trailer_ends.back()); + } else { + layer.revision_end = parser->GetDocumentSize(); + } + + // Extract xref stream entries for this layer. + if (xref_stream_list[i] > 0) { + RetainPtr obj = + parser->ParseIndirectObjectAtForTesting(xref_stream_list[i]); + if (const CPDF_Stream* stream = obj ? obj->AsStream() : nullptr) { + layer.layer_objects = DecodeXRefStreamEntries(stream); + } + } + + // Extract classic xref table entries for this layer. Table entries + // take precedence over stream entries per ISO 32000-1 7.5.8.4, + // so they are applied after stream entries to overwrite conflicts. + if (xref_list[i] > 0) { + CPDF_Parser::ObjectMap table_objects; + if (parser->ExtractCrossRefTableEntriesAt(xref_list[i], + &table_objects)) { + for (const auto& [obj_num, info] : table_objects) { + layer.layer_objects[obj_num] = info; + } + } + } + } + + built_ = true; + return true; +} + +size_t CPDF_RevisionProvider::GetRevisionCount() const { + return layers_.size(); +} + +const CPDF_RevisionProvider::RevisionLayer& +CPDF_RevisionProvider::GetLayer(size_t index) const { + return layers_[index]; +} + +CPDF_RevisionProvider::ObjectMap +CPDF_RevisionProvider::GetVisibleObjectsAtRevision( + size_t revision_index) const { + ObjectMap merged; + for (size_t i = 0; i <= revision_index && i < layers_.size(); ++i) { + for (const auto& [obj_num, info] : layers_[i].layer_objects) { + merged[obj_num] = info; + } + } + return merged; +} diff --git a/core/fpdfapi/parser/cpdf_revision_provider.h b/core/fpdfapi/parser/cpdf_revision_provider.h new file mode 100644 index 000000000..94576b13b --- /dev/null +++ b/core/fpdfapi/parser/cpdf_revision_provider.h @@ -0,0 +1,59 @@ +// Copyright 2024 The PDFium Authors +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CORE_FPDFAPI_PARSER_CPDF_REVISION_PROVIDER_H_ +#define CORE_FPDFAPI_PARSER_CPDF_REVISION_PROVIDER_H_ + +#include + +#include +#include + +#include "core/fpdfapi/parser/cpdf_cross_ref_table.h" +#include "core/fxcrt/fx_types.h" + +class CPDF_Parser; + +class CPDF_RevisionProvider { + public: + using ObjectInfo = CPDF_CrossRefTable::ObjectInfo; + using ObjectMap = std::map; + + struct RevisionLayer { + FX_FILESIZE xref_offset = 0; + // Byte offset after %%EOF for this revision's incremental save. + // WARNING: Derived from GetTrailerEnds() which returns unsigned int, + // truncating FX_FILESIZE for files > 4 GB. This is an upstream PDFium + // limitation. + FX_FILESIZE revision_end = 0; + ObjectMap layer_objects; + }; + + CPDF_RevisionProvider(); + ~CPDF_RevisionProvider(); + + // Build layers by re-parsing xref sections from stored offsets. + // Uses parser's syntax_ to re-read xref tables/streams at known positions. + bool Build(CPDF_Parser* parser, + const std::vector& xref_list, + const std::vector& xref_stream_list); + + bool is_built() const { return built_; } + size_t GetRevisionCount() const; + const RevisionLayer& GetLayer(size_t index) const; + + // Build merged object map representing visible xref state at revision N. + // Merges layers 0..revision_index inclusive. + // + // Returns xref-level visibility: which object numbers exist and where + // they live (offset or compressed archive ref). Does NOT parse or return + // actual object values. + ObjectMap GetVisibleObjectsAtRevision(size_t revision_index) const; + + private: + std::vector layers_; + bool built_ = false; +}; + +#endif // CORE_FPDFAPI_PARSER_CPDF_REVISION_PROVIDER_H_ diff --git a/core/fpdfdoc/cpdf_interactiveform.cpp b/core/fpdfdoc/cpdf_interactiveform.cpp index 24bf93ad4..013415613 100644 --- a/core/fpdfdoc/cpdf_interactiveform.cpp +++ b/core/fpdfdoc/cpdf_interactiveform.cpp @@ -7,6 +7,7 @@ #include "core/fpdfdoc/cpdf_interactiveform.h" #include +#include #include #include #include @@ -394,6 +395,45 @@ class CFieldNameExtractor { size_t cur_ = 0; }; +uint32_t GetObjNumFromEntry(const CPDF_Object* entry) { + if (!entry) + return 0; + if (entry->IsReference()) + return entry->AsReference()->GetRefObjNum(); + const CPDF_Object* direct = entry->GetDirect(); + return direct ? direct->GetObjNum() : 0; +} + +bool RemoveChildFromKids(CPDF_Dictionary* pParent, uint32_t child_objnum) { + if (!pParent || child_objnum == 0) + return false; + + RetainPtr pKids = pParent->GetMutableArrayFor("Kids"); + if (!pKids) + return false; + + for (size_t i = 0; i < pKids->size(); ++i) { + if (GetObjNumFromEntry(pKids->GetObjectAt(i).Get()) == child_objnum) { + pKids->RemoveAt(i); + return true; + } + } + return false; +} + +bool RemoveFromFieldsArray(CPDF_Array* pFields, uint32_t objnum) { + if (!pFields || objnum == 0) + return false; + + for (size_t i = 0; i < pFields->size(); ++i) { + if (GetObjNumFromEntry(pFields->GetObjectAt(i).Get()) == objnum) { + pFields->RemoveAt(i); + return true; + } + } + return false; +} + } // namespace class CFieldTree { @@ -887,7 +927,14 @@ void CPDF_InteractiveForm::LoadField(RetainPtr field_dict, } void CPDF_InteractiveForm::FixPageFields(CPDF_Page* page) { - RetainPtr annots = page->GetMutableAnnotsArray(); + FixPageFieldsFromDict(page->GetMutableDict().Get()); +} + +void CPDF_InteractiveForm::FixPageFieldsFromDict(CPDF_Dictionary* page_dict) { + if (!page_dict) { + return; + } + RetainPtr annots = page_dict->GetMutableArrayFor("Annots"); if (!annots) { return; } @@ -900,6 +947,56 @@ void CPDF_InteractiveForm::FixPageFields(CPDF_Page* page) { } } +void CPDF_InteractiveForm::RemoveWidgetFromFieldTree( + CPDF_Dictionary* widget_dict) { + if (!widget_dict || widget_dict->GetNameFor("Subtype") != "Widget") + return; + if (!form_dict_) + return; + + RetainPtr pFields = form_dict_->GetMutableArrayFor("Fields"); + if (!pFields) + return; + + uint32_t child_objnum = widget_dict->GetObjNum(); + + RetainPtr pParent = + widget_dict->GetMutableDictFor("Parent"); + + if (!pParent) { + RemoveFromFieldsArray(pFields.Get(), child_objnum); + return; + } + + RemoveChildFromKids(pParent.Get(), child_objnum); + + std::set visited; + RetainPtr pNode = pParent; + while (pNode) { + uint32_t node_objnum = pNode->GetObjNum(); + if (node_objnum && !visited.insert(node_objnum).second) + break; + + RetainPtr pKids = pNode->GetMutableArrayFor("Kids"); + if (pKids && !pKids->IsEmpty()) + break; + + RetainPtr pGrandparent = + pNode->GetMutableDictFor("Parent"); + + if (pGrandparent) { + RemoveChildFromKids(pGrandparent.Get(), node_objnum); + } else { + RemoveFromFieldsArray(pFields.Get(), node_objnum); + } + + if (node_objnum) + document_->DeleteIndirectObject(node_objnum); + + pNode = pGrandparent; + } +} + void CPDF_InteractiveForm::AddTerminalField( RetainPtr field_dict) { if (!field_dict->KeyExist(pdfium::form_fields::kFT)) { diff --git a/core/fpdfdoc/cpdf_interactiveform.h b/core/fpdfdoc/cpdf_interactiveform.h index 33c09170e..870f06bf5 100644 --- a/core/fpdfdoc/cpdf_interactiveform.h +++ b/core/fpdfdoc/cpdf_interactiveform.h @@ -93,6 +93,8 @@ class CPDF_InteractiveForm { void SetNotifierIface(NotifierIface* notify); void FixPageFields(CPDF_Page* page); + void FixPageFieldsFromDict(CPDF_Dictionary* page_dict); + void RemoveWidgetFromFieldTree(CPDF_Dictionary* widget_dict); // Wrap callbacks thru NotifierIface. bool NotifyBeforeValueChange(CPDF_FormField* field, const WideString& value); diff --git a/fpdfsdk/BUILD.gn b/fpdfsdk/BUILD.gn index b903c2b31..e69f773fe 100644 --- a/fpdfsdk/BUILD.gn +++ b/fpdfsdk/BUILD.gn @@ -53,6 +53,7 @@ source_set("fpdfsdk") { "fpdf_javascript.cpp", "fpdf_ppo.cpp", "fpdf_progressive.cpp", + "fpdf_revision.cpp", "fpdf_save.cpp", "fpdf_searchex.cpp", "fpdf_signature.cpp", diff --git a/fpdfsdk/cpdfsdk_helpers.cpp b/fpdfsdk/cpdfsdk_helpers.cpp index 766f6033a..edce278ec 100644 --- a/fpdfsdk/cpdfsdk_helpers.cpp +++ b/fpdfsdk/cpdfsdk_helpers.cpp @@ -15,6 +15,8 @@ #include "core/fpdfapi/parser/cpdf_array.h" #include "core/fpdfapi/parser/cpdf_dictionary.h" #include "core/fpdfapi/parser/cpdf_document.h" +#include "core/fpdfapi/parser/cpdf_parser.h" +#include "core/fpdfapi/parser/cpdf_revision_provider.h" #include "core/fpdfapi/parser/cpdf_stream_acc.h" #include "core/fpdfapi/render/cpdf_renderoptions.h" #include "core/fpdfdoc/cpdf_annot.h" @@ -579,3 +581,14 @@ std::vector ParsePageRangeString(const ByteString& bsPageRange, } return results; } + +const CPDF_RevisionProvider* GetRevisionProviderFromDocument( + FPDF_DOCUMENT document) { + auto* doc = CPDFDocumentFromFPDFDocument(document); + if (!doc) + return nullptr; + CPDF_Parser* parser = doc->GetParser(); + if (!parser) + return nullptr; + return parser->GetRevisionProvider(); +} diff --git a/fpdfsdk/cpdfsdk_helpers.h b/fpdfsdk/cpdfsdk_helpers.h index 4e1dc0874..3b315231e 100644 --- a/fpdfsdk/cpdfsdk_helpers.h +++ b/fpdfsdk/cpdfsdk_helpers.h @@ -354,4 +354,11 @@ void SetColorFromScheme(const FPDF_COLORSCHEME* pColorScheme, std::vector ParsePageRangeString(const ByteString& bsPageRange, uint32_t nCount); +class CPDF_RevisionProvider; + +// Resolve an FPDF_DOCUMENT to its lazy-built revision provider. +// Returns nullptr if the document or its parser is invalid. +const CPDF_RevisionProvider* GetRevisionProviderFromDocument( + FPDF_DOCUMENT document); + #endif // FPDFSDK_CPDFSDK_HELPERS_H_ diff --git a/fpdfsdk/fpdf_annot.cpp b/fpdfsdk/fpdf_annot.cpp index 5603622e1..0a9ba5b72 100644 --- a/fpdfsdk/fpdf_annot.cpp +++ b/fpdfsdk/fpdf_annot.cpp @@ -1004,6 +1004,99 @@ static bool WrapAPContentIntoFormXObject( child_stream->GetObjNum()); return true; } + +std::vector GetRedactRectsFromAnnotDict( + const CPDF_Dictionary* annot_dict) { + std::vector rects; + if (!annot_dict) + return rects; + + RetainPtr quad_points_array = + annot_dict->GetArrayFor("QuadPoints"); + if (quad_points_array && quad_points_array->size() >= 8) { + size_t quad_count = CPDF_Annot::QuadPointCount(quad_points_array.Get()); + for (size_t i = 0; i < quad_count; ++i) { + CFX_FloatRect rect = CPDF_Annot::RectFromQuadPoints(annot_dict, i); + rect.Normalize(); + if (!rect.IsEmpty()) + rects.push_back(rect); + } + if (!rects.empty()) + return rects; + } + + CFX_FloatRect rect = annot_dict->GetRectFor(pdfium::annotation::kRect); + rect.Normalize(); + if (!rect.IsEmpty()) + rects.push_back(rect); + + return rects; +} + +void FlattenFormXObjectToPage(CPDF_Page* page, + RetainPtr form_stream, + const CFX_FloatRect& target_rect) { + if (!page || !form_stream) + return; + + CPDF_Document* doc = page->GetDocument(); + if (!doc) + return; + + RetainPtr form_dict = form_stream->GetDict(); + if (!form_dict) + return; + + CFX_FloatRect form_bbox = form_dict->GetRectFor("BBox"); + form_bbox.Normalize(); + if (form_bbox.IsEmpty()) + form_bbox = target_rect; + + float scale_x = 1.0f; + float scale_y = 1.0f; + if (form_bbox.Width() > 0) + scale_x = target_rect.Width() / form_bbox.Width(); + if (form_bbox.Height() > 0) + scale_y = target_rect.Height() / form_bbox.Height(); + + CFX_Matrix form_matrix; + form_matrix.a = scale_x; + form_matrix.d = scale_y; + form_matrix.e = target_rect.left - form_bbox.left * scale_x; + form_matrix.f = target_rect.bottom - form_bbox.bottom * scale_y; + + auto form = std::make_unique( + doc, + page->GetMutableResources(), + pdfium::WrapRetain(const_cast(form_stream.Get()))); + form->ParseContent(); + + auto form_obj = std::make_unique( + CPDF_PageObject::kNoContentStream, + std::move(form), + form_matrix); + + form_obj->CalcBoundingBox(); + form_obj->SetDirty(true); + + page->AppendPageObject(std::move(form_obj)); +} + +int GetAnnotIndexOnPage(CPDF_Page* page, const CPDF_Dictionary* annot_dict) { + if (!page || !annot_dict) + return -1; + + RetainPtr annots = page->GetMutableAnnotsArray(); + if (!annots) + return -1; + + for (size_t i = 0; i < annots->size(); ++i) { + if (annots->GetDictAt(i) == annot_dict) + return static_cast(i); + } + return -1; +} + } // namespace FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV @@ -3446,7 +3539,9 @@ EPDFPage_GetAnnotByName(FPDF_PAGE page, FPDF_WIDESTRING nm) { } FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV -EPDFPage_RemoveAnnotByName(FPDF_PAGE page, FPDF_WIDESTRING nm) { +EPDFPage_RemoveAnnotByName(FPDF_PAGE page, + FPDF_WIDESTRING nm, + FPDF_FORMHANDLE form_handle) { if (!page || !nm || !*nm) return false; @@ -3461,29 +3556,29 @@ EPDFPage_RemoveAnnotByName(FPDF_PAGE page, FPDF_WIDESTRING nm) { WideString target = UNSAFE_BUFFERS(WideStringFromFPDFWideString(nm)); for (size_t i = 0; i < annots->size(); ++i) { - // Keep the raw entry so we can see if it was a reference. RetainPtr entry = annots->GetMutableObjectAt(i); - // Resolve to a dictionary to compare /NM. RetainPtr dict = ToDictionary(entry ? entry->GetMutableDirect() : nullptr); if (!dict || dict->GetUnicodeTextFor("NM") != target) continue; - // Determine indirect object number, if any. uint32_t objnum = 0; if (entry && entry->IsReference()) { objnum = entry->AsReference()->GetRefObjNum(); } else if (dict) { - // Handles the case where the dict was promoted indirect but the Annots - // array still holds it directly. objnum = dict->GetObjNum(); } - // Remove from /Annots. + if (form_handle) { + CPDFSDK_InteractiveForm* pForm = + FormHandleToInteractiveForm(form_handle); + if (pForm) + pForm->GetInteractiveForm()->RemoveWidgetFromFieldTree(dict.Get()); + } + annots->RemoveAt(i); - // If it was indirect, delete the object to avoid leaving an orphan. if (objnum) pPage->GetDocument()->DeleteIndirectObject(objnum); @@ -3576,12 +3671,16 @@ EPDFPage_GetAnnotRaw(FPDF_DOCUMENT doc, int page_index, int index) { } FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV -EPDFPage_RemoveAnnotRaw(FPDF_DOCUMENT doc, int page_index, int index) { +EPDFPage_RemoveAnnotRaw(FPDF_DOCUMENT doc, + int page_index, + int index, + FPDF_FORMHANDLE form_handle) { CPDF_Document* pdf = CPDFDocumentFromFPDFDocument(doc); if (!pdf || page_index < 0 || page_index >= pdf->GetPageCount() || index < 0) return false; - RetainPtr page_dict = pdf->GetMutablePageDictionary(page_index); + RetainPtr page_dict = + pdf->GetMutablePageDictionary(page_index); if (!page_dict) return false; @@ -3589,10 +3688,8 @@ EPDFPage_RemoveAnnotRaw(FPDF_DOCUMENT doc, int page_index, int index) { if (!annots || static_cast(index) >= annots->size()) return false; - // Keep original entry so we can determine if it was indirect. RetainPtr entry = annots->GetMutableObjectAt(index); - // Resolve to dictionary for fallback objnum detection. RetainPtr dict = ToDictionary(entry ? entry->GetMutableDirect() : nullptr); @@ -3603,10 +3700,15 @@ EPDFPage_RemoveAnnotRaw(FPDF_DOCUMENT doc, int page_index, int index) { objnum = dict->GetObjNum(); } - // Remove from /Annots. + if (form_handle) { + CPDFSDK_InteractiveForm* pForm = + FormHandleToInteractiveForm(form_handle); + if (pForm) + pForm->GetInteractiveForm()->RemoveWidgetFromFieldTree(dict.Get()); + } + annots->RemoveAt(index); - // If it was indirect, delete the annot object to avoid leaving orphans. if (objnum) pdf->DeleteIndirectObject(objnum); @@ -3947,116 +4049,6 @@ EPDFAnnot_GetOverlayTextRepeat(FPDF_ANNOTATION annot) { return dict->GetBooleanFor("Repeat", false); } -namespace { - -// Helper to extract redaction rectangles from a REDACT annotation. -// Returns QuadPoints if present, otherwise falls back to Rect. -std::vector GetRedactRectsFromAnnotDict( - const CPDF_Dictionary* annot_dict) { - std::vector rects; - if (!annot_dict) - return rects; - - // Try QuadPoints first (for text-based redactions) - RetainPtr quad_points_array = - annot_dict->GetArrayFor("QuadPoints"); - if (quad_points_array && quad_points_array->size() >= 8) { - size_t quad_count = CPDF_Annot::QuadPointCount(quad_points_array.Get()); - for (size_t i = 0; i < quad_count; ++i) { - CFX_FloatRect rect = CPDF_Annot::RectFromQuadPoints(annot_dict, i); - rect.Normalize(); - if (!rect.IsEmpty()) - rects.push_back(rect); - } - if (!rects.empty()) - return rects; - } - - // Fall back to Rect (for area-based redactions) - CFX_FloatRect rect = annot_dict->GetRectFor(pdfium::annotation::kRect); - rect.Normalize(); - if (!rect.IsEmpty()) - rects.push_back(rect); - - return rects; -} - -// Internal helper to flatten any Form XObject stream to page content. -// Used by EPDFAnnot_Flatten (for AP/N) and EPDFAnnot_ApplyRedaction (for RO). -void FlattenFormXObjectToPage(CPDF_Page* page, - RetainPtr form_stream, - const CFX_FloatRect& target_rect) { - if (!page || !form_stream) - return; - - CPDF_Document* doc = page->GetDocument(); - if (!doc) - return; - - // Get the form dictionary from the stream - RetainPtr form_dict = form_stream->GetDict(); - if (!form_dict) - return; - - // Get the BBox from the form stream - CFX_FloatRect form_bbox = form_dict->GetRectFor("BBox"); - form_bbox.Normalize(); - if (form_bbox.IsEmpty()) - form_bbox = target_rect; - - // Calculate the transformation matrix to position the form at the target rect - // The form's content is defined in BBox coordinates, we need to map it to target_rect - float scale_x = 1.0f; - float scale_y = 1.0f; - if (form_bbox.Width() > 0) - scale_x = target_rect.Width() / form_bbox.Width(); - if (form_bbox.Height() > 0) - scale_y = target_rect.Height() / form_bbox.Height(); - - CFX_Matrix form_matrix; - form_matrix.a = scale_x; - form_matrix.d = scale_y; - form_matrix.e = target_rect.left - form_bbox.left * scale_x; - form_matrix.f = target_rect.bottom - form_bbox.bottom * scale_y; - - // Create a CPDF_Form from the stream - auto form = std::make_unique( - doc, - page->GetMutableResources(), - pdfium::WrapRetain(const_cast(form_stream.Get()))); - form->ParseContent(); - - // Create a FormObject that wraps the form - auto form_obj = std::make_unique( - CPDF_PageObject::kNoContentStream, - std::move(form), - form_matrix); - - form_obj->CalcBoundingBox(); - form_obj->SetDirty(true); - - page->AppendPageObject(std::move(form_obj)); -} - -// Find the index of an annotation in the page's annotation array. -// Returns -1 if not found. -int GetAnnotIndexOnPage(CPDF_Page* page, const CPDF_Dictionary* annot_dict) { - if (!page || !annot_dict) - return -1; - - RetainPtr annots = page->GetMutableAnnotsArray(); - if (!annots) - return -1; - - for (size_t i = 0; i < annots->size(); ++i) { - if (annots->GetDictAt(i) == annot_dict) - return static_cast(i); - } - return -1; -} - -} // namespace - FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV EPDFAnnot_ApplyRedaction(FPDF_PAGE page, FPDF_ANNOTATION annot) { CPDF_Page* pPage = CPDFPageFromFPDFPage(page); @@ -4859,6 +4851,7 @@ EPDFPage_CreateFormField(FPDF_PAGE page, case FPDF_FORMFIELD_COMBOBOX: case FPDF_FORMFIELD_LISTBOX: case FPDF_FORMFIELD_PUSHBUTTON: + case FPDF_FORMFIELD_SIGNATURE: break; default: return nullptr; @@ -4891,6 +4884,9 @@ EPDFPage_CreateFormField(FPDF_PAGE page, case FPDF_FORMFIELD_LISTBOX: ft_value = "Ch"; break; + case FPDF_FORMFIELD_SIGNATURE: + ft_value = "Sig"; + break; } // Create the parent field dictionary (indirect) diff --git a/fpdfsdk/fpdf_formfill.cpp b/fpdfsdk/fpdf_formfill.cpp index d6d765576..14228cff8 100644 --- a/fpdfsdk/fpdf_formfill.cpp +++ b/fpdfsdk/fpdf_formfill.cpp @@ -6,7 +6,9 @@ #include "public/fpdf_formfill.h" +#include #include +#include #include #include @@ -936,3 +938,40 @@ FORM_IsIndexSelected(FPDF_FORMHANDLE hHandle, FPDF_PAGE page, int index) { CPDFSDK_PageView* pPageView = FormHandleToPageView(hHandle, page); return pPageView && pPageView->IsIndexSelected(index); } + +FPDF_EXPORT void FPDF_CALLCONV +EPDF_FixPageFieldsRaw(FPDF_FORMHANDLE hHandle, + FPDF_DOCUMENT document, + int page_index) { + CPDFSDK_InteractiveForm* pForm = FormHandleToInteractiveForm(hHandle); + if (!pForm) { + return; + } + + CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document); + if (!pDoc || page_index < 0 || page_index >= pDoc->GetPageCount()) { + return; + } + + RetainPtr page_dict = + pDoc->GetMutablePageDictionary(page_index); + if (!page_dict) { + return; + } + + pForm->GetInteractiveForm()->FixPageFieldsFromDict(page_dict.Get()); +} + +FPDF_EXPORT FPDF_FORMFILLINFO* FPDF_CALLCONV EPDF_OpenFormFillInfo() { + FPDF_FORMFILLINFO* info = new (std::nothrow) FPDF_FORMFILLINFO(); + if (!info) + return nullptr; + memset(info, 0, sizeof(FPDF_FORMFILLINFO)); + info->version = 1; + return info; +} + +FPDF_EXPORT void FPDF_CALLCONV +EPDF_CloseFormFillInfo(FPDF_FORMFILLINFO* info) { + delete info; +} diff --git a/fpdfsdk/fpdf_revision.cpp b/fpdfsdk/fpdf_revision.cpp new file mode 100644 index 000000000..565953dfc --- /dev/null +++ b/fpdfsdk/fpdf_revision.cpp @@ -0,0 +1,212 @@ +// Copyright 2024 The PDFium Authors +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "public/fpdf_revision.h" + +#include +#include +#include + +#include "core/fpdfapi/parser/cpdf_document.h" +#include "core/fpdfapi/parser/cpdf_parser.h" +#include "core/fpdfapi/parser/cpdf_revision_classifier.h" +#include "core/fpdfapi/parser/cpdf_revision_diff.h" +#include "core/fpdfapi/parser/cpdf_revision_provider.h" +#include "core/fpdfapi/parser/object_tree_traversal_util.h" +#include "fpdfsdk/cpdfsdk_helpers.h" + +namespace { + +struct RevisionDiffResult { + std::vector entries; + std::vector semantic_changes; + std::set multi_ref_set; + bool semantic_computed = false; +}; + +void EnsureSemanticComputed(FPDF_DOCUMENT document, + RevisionDiffResult* result) { + if (result->semantic_computed) + return; + + auto* doc = CPDFDocumentFromFPDFDocument(document); + if (!doc) + return; + + result->multi_ref_set = GetObjectsWithMultipleReferences(doc); + result->semantic_changes = + ClassifyChanges(doc, result->entries, result->multi_ref_set); + result->semantic_computed = true; +} + +} // namespace + +FPDF_EXPORT int FPDF_CALLCONV +EPDFRevision_GetCount(FPDF_DOCUMENT document) { + const CPDF_RevisionProvider* provider = + GetRevisionProviderFromDocument(document); + if (!provider) + return -1; + return static_cast(provider->GetRevisionCount()); +} + +FPDF_EXPORT EPDF_REVISION FPDF_CALLCONV +EPDFRevision_Get(FPDF_DOCUMENT document, int index) { + const CPDF_RevisionProvider* provider = + GetRevisionProviderFromDocument(document); + if (!provider || index < 0 || + static_cast(index) >= provider->GetRevisionCount()) { + return nullptr; + } + return reinterpret_cast(&provider->GetLayer(index)); +} + +FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV +EPDFRevision_GetFileEnd(EPDF_REVISION revision, + unsigned long long* out_file_end) { + if (!revision || !out_file_end) + return false; + const auto* layer = + reinterpret_cast(revision); + *out_file_end = static_cast(layer->revision_end); + return true; +} + +FPDF_EXPORT EPDF_REVISION_DIFF FPDF_CALLCONV +EPDFRevision_Compare(FPDF_DOCUMENT document, + int older_revision, + int newer_revision) { + const CPDF_RevisionProvider* provider = + GetRevisionProviderFromDocument(document); + if (!provider) + return nullptr; + + const size_t count = provider->GetRevisionCount(); + if (older_revision < 0 || static_cast(older_revision) >= count || + newer_revision < 0 || static_cast(newer_revision) >= count || + older_revision >= newer_revision) { + return nullptr; + } + + auto older_map = + provider->GetVisibleObjectsAtRevision(older_revision); + auto newer_map = + provider->GetVisibleObjectsAtRevision(newer_revision); + + auto* result = new RevisionDiffResult(); + result->entries = CPDF_RevisionDiff::ComputeDiff(older_map, newer_map); + return reinterpret_cast(result); +} + +FPDF_EXPORT void FPDF_CALLCONV +EPDFRevisionDiff_Close(EPDF_REVISION_DIFF diff) { + delete reinterpret_cast(diff); +} + +FPDF_EXPORT unsigned long FPDF_CALLCONV +EPDFRevisionDiff_GetEntryCount(EPDF_REVISION_DIFF diff) { + if (!diff) + return 0; + const auto* result = reinterpret_cast(diff); + return static_cast(result->entries.size()); +} + +FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV +EPDFRevisionDiff_GetEntry(EPDF_REVISION_DIFF diff, + unsigned long index, + unsigned int* out_obj_num, + int* out_category) { + if (!diff || !out_obj_num || !out_category) + return false; + + const auto* result = reinterpret_cast(diff); + if (index >= result->entries.size()) + return false; + + const RevisionDiffEntry& entry = result->entries[index]; + *out_obj_num = entry.obj_num; + *out_category = static_cast(entry.category); + return true; +} + +FPDF_EXPORT unsigned long FPDF_CALLCONV +EPDFRevisionDiff_GetSemanticCategoryCounts(FPDF_DOCUMENT document, + EPDF_REVISION_DIFF diff, + int* category_buffer, + unsigned long* count_buffer, + unsigned long buffer_length) { + if (!diff || !document) + return 0; + + auto* result = + const_cast( + reinterpret_cast(diff)); + + EnsureSemanticComputed(document, result); + + std::map counts; + for (const auto& change : result->semantic_changes) { + counts[static_cast(change.semantic_type)]++; + } + + unsigned long filled = 0; + for (const auto& [cat, cnt] : counts) { + if (filled < buffer_length) { + if (category_buffer) + category_buffer[filled] = cat; + if (count_buffer) + count_buffer[filled] = cnt; + filled++; + } + } + return filled; +} + +FPDF_EXPORT unsigned long FPDF_CALLCONV +EPDFRevisionDiff_GetResolvedEntryCount(FPDF_DOCUMENT document, + EPDF_REVISION_DIFF diff) { + if (!diff || !document) + return 0; + + auto* result = + const_cast( + reinterpret_cast(diff)); + + EnsureSemanticComputed(document, result); + + return static_cast(result->semantic_changes.size()); +} + +FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV +EPDFRevisionDiff_GetResolvedEntry(FPDF_DOCUMENT document, + EPDF_REVISION_DIFF diff, + unsigned long index, + unsigned int* out_changed_obj_num, + unsigned int* out_target_obj_num, + unsigned int* out_page_obj_num, + int* out_diff_category, + int* out_semantic_type) { + if (!diff || !document || !out_changed_obj_num || !out_target_obj_num || + !out_page_obj_num || !out_diff_category || + !out_semantic_type) { + return false; + } + + auto* result = + const_cast( + reinterpret_cast(diff)); + + EnsureSemanticComputed(document, result); + + if (index >= result->semantic_changes.size()) + return false; + + const ResolvedSemanticChange& change = result->semantic_changes[index]; + *out_changed_obj_num = change.changed_obj_num; + *out_target_obj_num = change.target_obj_num; + *out_page_obj_num = change.page_obj_num; + *out_diff_category = static_cast(change.diff_category); + *out_semantic_type = static_cast(change.semantic_type); + return true; +} diff --git a/fpdfsdk/fpdf_save.cpp b/fpdfsdk/fpdf_save.cpp index a6eb68ca5..da38a20d5 100644 --- a/fpdfsdk/fpdf_save.cpp +++ b/fpdfsdk/fpdf_save.cpp @@ -6,8 +6,11 @@ #include "public/fpdf_save.h" +#include +#include #include #include +#include #include #include @@ -229,6 +232,44 @@ bool DoDocSave(FPDF_DOCUMENT document, return create_result; } +struct MemoryFileWriter : public FPDF_FILEWRITE { + std::string data; + + MemoryFileWriter() { + version = 1; + WriteBlock = [](FPDF_FILEWRITE* self, const void* buf, + unsigned long size) -> int { + static_cast(self)->data.append( + static_cast(buf), size); + return static_cast(size); + }; + } +}; + +void* SaveToBuffer(FPDF_DOCUMENT document, + FPDF_DWORD flags, + unsigned long* out_size, + std::optional version) { + if (!out_size) + return nullptr; + *out_size = 0; + + MemoryFileWriter writer; + bool ok = version.has_value() + ? DoDocSave(document, &writer, flags, version) + : DoDocSave(document, &writer, flags, {}); + if (!ok || writer.data.empty()) + return nullptr; + + void* buffer = malloc(writer.data.size()); + if (!buffer) + return nullptr; + + memcpy(buffer, writer.data.data(), writer.data.size()); + *out_size = static_cast(writer.data.size()); + return buffer; +} + } // namespace FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDF_SaveAsCopy(FPDF_DOCUMENT document, @@ -244,3 +285,18 @@ FPDF_SaveWithVersion(FPDF_DOCUMENT document, int fileVersion) { return DoDocSave(document, file_write, flags, fileVersion); } + +FPDF_EXPORT void* FPDF_CALLCONV +EPDF_SaveDocumentToBuffer(FPDF_DOCUMENT document, + FPDF_DWORD flags, + unsigned long* out_size) { + return SaveToBuffer(document, flags, out_size, {}); +} + +FPDF_EXPORT void* FPDF_CALLCONV +EPDF_SaveDocumentToBufferWithVersion(FPDF_DOCUMENT document, + FPDF_DWORD flags, + unsigned long* out_size, + int file_version) { + return SaveToBuffer(document, flags, out_size, file_version); +} diff --git a/fpdfsdk/fpdf_signature.cpp b/fpdfsdk/fpdf_signature.cpp index 66b5c38a9..2d7bf02e0 100644 --- a/fpdfsdk/fpdf_signature.cpp +++ b/fpdfsdk/fpdf_signature.cpp @@ -4,13 +4,23 @@ #include "public/fpdf_signature.h" -#include #include #include "constants/form_fields.h" +#include "core/fpdfapi/page/cpdf_annotcontext.h" +#include "core/fpdfapi/page/cpdf_page.h" #include "core/fpdfapi/parser/cpdf_array.h" #include "core/fpdfapi/parser/cpdf_dictionary.h" #include "core/fpdfapi/parser/cpdf_document.h" +#include "core/fpdfapi/parser/cpdf_name.h" +#include "core/fpdfapi/parser/cpdf_number.h" +#include "core/fpdfapi/parser/cpdf_parser.h" +#include "core/fpdfapi/parser/cpdf_reference.h" +#include "core/fpdfapi/parser/cpdf_revision_classifier.h" +#include "core/fpdfapi/parser/cpdf_revision_diff.h" +#include "core/fpdfapi/parser/cpdf_revision_provider.h" +#include "core/fpdfapi/parser/cpdf_string.h" +#include "core/fpdfapi/parser/object_tree_traversal_util.h" #include "core/fxcrt/compiler_specific.h" #include "core/fxcrt/numerics/safe_conversions.h" #include "core/fxcrt/span.h" @@ -20,7 +30,26 @@ namespace { -std::vector> CollectSignatures( +// Fixed-width sentinel for /ByteRange placeholder slots 1-3. +// INT_MAX serializes as exactly 10 decimal digits ("2147483647"), ensuring the +// signing orchestrator can patch real values in-place without changing file +// length. Each patched value must be left-aligned, right-padded with spaces to +// exactly 10 characters. +constexpr int kByteRangePlaceholder = 2147483647; + +RetainPtr CreateByteRangePlaceholderArray( + CPDF_Dictionary* pSigDict) { + auto pByteRange = pSigDict->SetNewFor("ByteRange"); + pByteRange->AppendNew(0); + pByteRange->AppendNew(kByteRangePlaceholder); + pByteRange->AppendNew(kByteRangePlaceholder); + pByteRange->AppendNew(kByteRangePlaceholder); + return pByteRange; +} + +} // namespace + +static std::vector> CollectSignatures( CPDF_Document* doc) { std::vector> signatures; const CPDF_Dictionary* root = doc->GetRoot(); @@ -49,6 +78,41 @@ std::vector> CollectSignatures( return signatures; } +namespace { + +RetainPtr GetSigFieldDict(FPDF_ANNOTATION annot) { + CPDF_AnnotContext* pCtx = CPDFAnnotContextFromFPDFAnnotation(annot); + if (!pCtx) + return nullptr; + RetainPtr pAnnotDict = pCtx->GetMutableAnnotDict(); + if (!pAnnotDict) + return nullptr; + + // Merged field/widget: FT is directly on the annotation dict. + if (pAnnotDict->GetNameFor(pdfium::form_fields::kFT) == + pdfium::form_fields::kSig) { + return pAnnotDict; + } + + // Separate field + widget: walk to Parent. + RetainPtr pParent = + pAnnotDict->GetMutableDictFor("Parent"); + if (!pParent) + return nullptr; + if (pParent->GetNameFor(pdfium::form_fields::kFT) != + pdfium::form_fields::kSig) { + return nullptr; + } + return pParent; +} + +RetainPtr GetSigValueDict(FPDF_ANNOTATION annot) { + RetainPtr pField = GetSigFieldDict(annot); + if (!pField) + return nullptr; + return pField->GetMutableDictFor(pdfium::form_fields::kV); +} + } // namespace FPDF_EXPORT int FPDF_CALLCONV FPDF_GetSignatureCount(FPDF_DOCUMENT document) { @@ -255,3 +319,446 @@ FPDFSignatureObj_GetDocMDPPermission(FPDF_SIGNATURE signature) { return permission; } + +FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV +EPDFSig_PrepareSignatureDict(FPDF_ANNOTATION annot, + EPDF_SIG_SUBFILTER sub_filter, + unsigned long contents_size) { + RetainPtr pField = GetSigFieldDict(annot); + if (!pField) + return false; + + if (pField->GetDictFor(pdfium::form_fields::kV)) + return false; + + if (contents_size == 0) + return false; + + CPDF_AnnotContext* pCtx = CPDFAnnotContextFromFPDFAnnotation(annot); + if (!pCtx || !pCtx->GetPage()) + return false; + + CPDF_Page* pPage = pCtx->GetPage()->AsPDFPage(); + if (!pPage) + return false; + + CPDF_Document* pDoc = pPage->GetDocument(); + if (!pDoc) + return false; + + ByteString type_value; + ByteString sub_filter_value; + switch (sub_filter) { + case EPDF_SIG_SUBFILTER_ADBE_PKCS7_DETACHED: + type_value = "Sig"; + sub_filter_value = "adbe.pkcs7.detached"; + break; + case EPDF_SIG_SUBFILTER_ETSI_CADES_DETACHED: + type_value = "Sig"; + sub_filter_value = "ETSI.CAdES.detached"; + break; + case EPDF_SIG_SUBFILTER_ETSI_RFC3161: + type_value = "DocTimeStamp"; + sub_filter_value = "ETSI.RFC3161"; + break; + default: + return false; + } + + RetainPtr pSigDict = + pDoc->NewIndirect(); + + pSigDict->SetNewFor("Type", type_value); + pSigDict->SetNewFor("Filter", "Adobe.PPKLite"); + pSigDict->SetNewFor("SubFilter", sub_filter_value); + + CreateByteRangePlaceholderArray(pSigDict.Get()); + + // /Contents placeholder: zero-filled byte buffer of |contents_size| bytes. + // PDFium serializes this as a hex string <00...00> of 2*contents_size chars. + std::vector contents_placeholder(contents_size, 0); + pSigDict->SetNewFor( + "Contents", + pdfium::span(contents_placeholder), + CPDF_String::DataType::kIsHex); + + pField->SetNewFor(pdfium::form_fields::kV, pDoc, + pSigDict->GetObjNum()); + return true; +} + +FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV +EPDFSig_SetReason(FPDF_ANNOTATION annot, FPDF_WIDESTRING reason) { + RetainPtr pSigDict = GetSigValueDict(annot); + if (!pSigDict) + return false; + + if (!reason) { + pSigDict->RemoveFor("Reason"); + return true; + } + + // SAFETY: caller guarantees NUL-terminated FPDF_WIDESTRING. + WideString ws = UNSAFE_BUFFERS(WideStringFromFPDFWideString(reason)); + pSigDict->SetNewFor("Reason", ws.AsStringView()); + return true; +} + +FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV +EPDFSig_SetLocation(FPDF_ANNOTATION annot, FPDF_WIDESTRING location) { + RetainPtr pSigDict = GetSigValueDict(annot); + if (!pSigDict) + return false; + + if (!location) { + pSigDict->RemoveFor("Location"); + return true; + } + + // SAFETY: caller guarantees NUL-terminated FPDF_WIDESTRING. + WideString ws = UNSAFE_BUFFERS(WideStringFromFPDFWideString(location)); + pSigDict->SetNewFor("Location", ws.AsStringView()); + return true; +} + +FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV +EPDFSig_SetContactInfo(FPDF_ANNOTATION annot, FPDF_WIDESTRING contact_info) { + RetainPtr pSigDict = GetSigValueDict(annot); + if (!pSigDict) + return false; + + if (!contact_info) { + pSigDict->RemoveFor("ContactInfo"); + return true; + } + + // SAFETY: caller guarantees NUL-terminated FPDF_WIDESTRING. + WideString ws = UNSAFE_BUFFERS(WideStringFromFPDFWideString(contact_info)); + pSigDict->SetNewFor("ContactInfo", ws.AsStringView()); + return true; +} + +FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV +EPDFSig_SetDocMDP(FPDF_DOCUMENT document, + FPDF_ANNOTATION annot, + int permission) { + if (permission < 1 || permission > 3) + return false; + + CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document); + if (!pDoc) + return false; + + RetainPtr pSigDict = GetSigValueDict(annot); + if (!pSigDict) + return false; + + // Certification signatures must be /Type /Sig, not /Type /DocTimeStamp. + if (pSigDict->GetNameFor("Type") == "DocTimeStamp") + return false; + + RetainPtr pRoot = pDoc->GetMutableRoot(); + if (!pRoot) + return false; + + // Enforce single certification per document at catalog level. + RetainPtr pPerms = pRoot->GetDictFor("Perms"); + if (pPerms && pPerms->GetDictFor("DocMDP")) + return false; + + // 1. Add /Reference entry on the signature's /V dict. + RetainPtr pTransformParams = + pDoc->NewIndirect(); + pTransformParams->SetNewFor("Type", "TransformParams"); + pTransformParams->SetNewFor("V", "1.2"); + pTransformParams->SetNewFor("P", permission); + + RetainPtr pSigRef = + pDoc->NewIndirect(); + pSigRef->SetNewFor("Type", "SigRef"); + pSigRef->SetNewFor("TransformMethod", "DocMDP"); + pSigRef->SetNewFor("TransformParams", pDoc, + pTransformParams->GetObjNum()); + + RetainPtr pRefArray = + pSigDict->SetNewFor("Reference"); + pRefArray->AppendNew(pDoc, pSigRef->GetObjNum()); + + // 2. Wire catalog /Perms/DocMDP to point at the signature value dict. + RetainPtr pPermsDict = + pRoot->GetOrCreateDictFor("Perms"); + pPermsDict->SetNewFor("DocMDP", pDoc, + pSigDict->GetObjNum()); + return true; +} + +FPDF_EXPORT unsigned long FPDF_CALLCONV +EPDFSig_GetLocation(FPDF_SIGNATURE signature, + void* buffer, + unsigned long length) { + const CPDF_Dictionary* signature_dict = + CPDFDictionaryFromFPDFSignature(signature); + if (!signature_dict) + return 0; + + RetainPtr value_dict = + signature_dict->GetDictFor(pdfium::form_fields::kV); + if (!value_dict) + return 0; + + RetainPtr obj = value_dict->GetObjectFor("Location"); + if (!obj || !obj->IsString()) + return 0; + + // SAFETY: required from caller. + return Utf16EncodeMaybeCopyAndReturnLength( + obj->GetUnicodeText(), + UNSAFE_BUFFERS(SpanFromFPDFApiArgs(buffer, length))); +} + +FPDF_EXPORT unsigned long FPDF_CALLCONV +EPDFSig_GetContactInfo(FPDF_SIGNATURE signature, + void* buffer, + unsigned long length) { + const CPDF_Dictionary* signature_dict = + CPDFDictionaryFromFPDFSignature(signature); + if (!signature_dict) + return 0; + + RetainPtr value_dict = + signature_dict->GetDictFor(pdfium::form_fields::kV); + if (!value_dict) + return 0; + + RetainPtr obj = value_dict->GetObjectFor("ContactInfo"); + if (!obj || !obj->IsString()) + return 0; + + // SAFETY: required from caller. + return Utf16EncodeMaybeCopyAndReturnLength( + obj->GetUnicodeText(), + UNSAFE_BUFFERS(SpanFromFPDFApiArgs(buffer, length))); +} + +FPDF_EXPORT FPDF_SIGNATURE FPDF_CALLCONV +EPDFSig_GetAnnotSignatureHandle(FPDF_ANNOTATION annot) { + RetainPtr pField = GetSigFieldDict(annot); + if (!pField) + return nullptr; + return FPDFSignatureFromCPDFDictionary(pField.Get()); +} + +// ---- Signature-Revision Bridge Implementations ---- + +namespace { + +int MapSignatureToRevision(const CPDF_RevisionProvider* provider, + const CPDF_Dictionary* sig_field_dict) { + RetainPtr value_dict = + sig_field_dict->GetDictFor(pdfium::form_fields::kV); + if (!value_dict) + return -1; + + RetainPtr byte_range = + value_dict->GetArrayFor("ByteRange"); + if (!byte_range || byte_range->size() < 4) + return -1; + + const int64_t signed_end = + byte_range->GetIntegerAt(2) + byte_range->GetIntegerAt(3); + + for (size_t i = 0; i < provider->GetRevisionCount(); ++i) { + if (provider->GetLayer(i).revision_end == signed_end) + return static_cast(i); + } + + int best = -1; + for (size_t i = 0; i < provider->GetRevisionCount(); ++i) { + if (provider->GetLayer(i).revision_end <= signed_end) + best = static_cast(i); + } + return best; +} + +} // namespace + +FPDF_EXPORT FPDF_SIGNATURE FPDF_CALLCONV +EPDFRevision_GetSignature(FPDF_DOCUMENT document, EPDF_REVISION revision) { + if (!revision || !document) + return nullptr; + + auto* doc = CPDFDocumentFromFPDFDocument(document); + if (!doc) + return nullptr; + + const CPDF_RevisionProvider* provider = + GetRevisionProviderFromDocument(document); + if (!provider || provider->GetRevisionCount() == 0) + return nullptr; + + const auto* target_layer = + reinterpret_cast(revision); + + int target_index = -1; + for (size_t i = 0; i < provider->GetRevisionCount(); ++i) { + if (&provider->GetLayer(i) == target_layer) { + target_index = static_cast(i); + break; + } + } + if (target_index < 0) + return nullptr; + + std::vector> signatures = + CollectSignatures(doc); + for (const auto& sig : signatures) { + if (MapSignatureToRevision(provider, sig.Get()) == target_index) + return FPDFSignatureFromCPDFDictionary(sig.Get()); + } + return nullptr; +} + +FPDF_EXPORT int FPDF_CALLCONV +EPDFSig_GetSignatureRevision(FPDF_DOCUMENT document, + FPDF_SIGNATURE signature) { + auto* doc = CPDFDocumentFromFPDFDocument(document); + if (!doc) + return -1; + + const CPDF_RevisionProvider* provider = + GetRevisionProviderFromDocument(document); + if (!provider) + return -1; + + const CPDF_Dictionary* sig_dict = + CPDFDictionaryFromFPDFSignature(signature); + if (!sig_dict) + return -1; + + return MapSignatureToRevision(provider, sig_dict); +} + +FPDF_EXPORT int FPDF_CALLCONV +EPDFSig_CheckDocMDPCompliance(FPDF_DOCUMENT document, + int check_revision) { + auto* doc = CPDFDocumentFromFPDFDocument(document); + if (!doc) + return EPDF_DOCMDP_UNSUPPORTED; + + CPDF_Parser* parser = doc->GetParser(); + if (!parser) + return EPDF_DOCMDP_UNSUPPORTED; + + const CPDF_RevisionProvider* provider = parser->GetRevisionProvider(); + if (!provider || provider->GetRevisionCount() == 0) + return EPDF_DOCMDP_UNSUPPORTED; + + // Find the certification signature via catalog /Perms/DocMDP. + const CPDF_Dictionary* root = doc->GetRoot(); + if (!root) + return EPDF_DOCMDP_NOT_APPLICABLE; + + RetainPtr perms = root->GetDictFor("Perms"); + if (!perms) + return EPDF_DOCMDP_NOT_APPLICABLE; + + RetainPtr docmdp_sig = perms->GetDictFor("DocMDP"); + if (!docmdp_sig) + return EPDF_DOCMDP_NOT_APPLICABLE; + + // Get permission level from the /Reference TransformParams. + int permission = 2; // Default per ISO 32000. + RetainPtr reference = docmdp_sig->GetArrayFor("Reference"); + if (reference) { + for (size_t i = 0; i < reference->size(); ++i) { + RetainPtr ref_dict = reference->GetDictAt(i); + if (!ref_dict) + continue; + if (ref_dict->GetNameFor("TransformMethod") != "DocMDP") + continue; + RetainPtr params = + ref_dict->GetDictFor("TransformParams"); + if (params) { + int p = params->GetIntegerFor("P"); + if (p >= 1 && p <= 3) + permission = p; + } + break; + } + } + + // Determine the certification signature's revision. + int cert_revision = -1; + std::vector> signatures = + CollectSignatures(doc); + for (const auto& sig : signatures) { + RetainPtr v = sig->GetDictFor(pdfium::form_fields::kV); + if (v.Get() == docmdp_sig.Get()) { + cert_revision = MapSignatureToRevision(provider, sig.Get()); + break; + } + } + if (cert_revision < 0) + return EPDF_DOCMDP_INDETERMINATE; + + // Determine the check revision. + int target_revision = check_revision; + if (target_revision < 0) + target_revision = static_cast(provider->GetRevisionCount()) - 1; + + if (target_revision <= cert_revision) + return EPDF_DOCMDP_COMPLIANT; + + // Compute diff between certified revision and check revision. + auto cert_map = + provider->GetVisibleObjectsAtRevision(cert_revision); + auto check_map = + provider->GetVisibleObjectsAtRevision(target_revision); + + std::vector raw_diff = + CPDF_RevisionDiff::ComputeDiff(cert_map, check_map); + + if (raw_diff.empty()) + return EPDF_DOCMDP_COMPLIANT; + + // TODO: GetObjectsWithMultipleReferences is a full-document BFS. If this + // function is called repeatedly for the same document, consider caching the + // result on a per-document handle or a dedicated context object. + std::set multi_ref_set = + GetObjectsWithMultipleReferences(doc); + std::vector changes = + ClassifyChanges(doc, raw_diff, multi_ref_set); + + // Check compliance. + if (permission < 1 || permission > 3) + return EPDF_DOCMDP_UNSUPPORTED; + + for (const auto& change : changes) { + if (change.semantic_type == SemanticChangeType::kDSS || + change.semantic_type == SemanticChangeType::kDocumentTimestamp) { + continue; + } + + switch (permission) { + case 1: + return EPDF_DOCMDP_VIOLATED; + + case 2: + if (change.semantic_type != SemanticChangeType::kFormStateChange && + change.semantic_type != SemanticChangeType::kSignature) { + return EPDF_DOCMDP_VIOLATED; + } + break; + + case 3: + if (change.semantic_type != SemanticChangeType::kFormStateChange && + change.semantic_type != SemanticChangeType::kSignature && + change.semantic_type != SemanticChangeType::kAnnotation) { + return EPDF_DOCMDP_VIOLATED; + } + break; + } + } + + return EPDF_DOCMDP_COMPLIANT; +} diff --git a/fpdfsdk/fpdf_view.cpp b/fpdfsdk/fpdf_view.cpp index 6a4278cf8..df36ad16b 100644 --- a/fpdfsdk/fpdf_view.cpp +++ b/fpdfsdk/fpdf_view.cpp @@ -245,6 +245,21 @@ FPDF_DOCUMENT LoadDocumentImpl(RetainPtr pFileAccess, return FPDFDocumentFromCPDFDocument(document.release()); } +uint32_t BuildPermissionsForRevision(uint32_t allowed_flags) { + if (allowed_flags & EPDF_PERM_PRINT_HIGH) { + allowed_flags |= EPDF_PERM_PRINT; + } + + uint32_t p = allowed_flags; + + // PDF Reference 1.7, Table 3.20: bits 1-2 must be 0 + p &= 0xFFFFFFFC; + // Bits 7-8 must be 1 (for R>=3), bits 13-32 must be 1 + p |= 0xFFFFF0C0; + + return p; +} + } // namespace FPDF_EXPORT void FPDF_CALLCONV FPDF_InitLibrary() { @@ -474,33 +489,6 @@ FPDF_GetSecurityHandlerRevision(FPDF_DOCUMENT document) { return dict ? dict->GetIntegerFor("R") : -1; } -namespace { - -// Build P value with correct reserved bits for R>=3 (including R=4 and R=6) -// Input: allowed_flags - OR'd combination of permission bits user wants to ALLOW -// Output: proper P value with reserved bits set correctly -uint32_t BuildPermissionsForRevision(uint32_t allowed_flags) { - // Enforce: PrintHighQuality implies Print (bit 12 requires bit 3) - // Some readers interpret oddly if PRINT_HIGH is set without PRINT - if (allowed_flags & EPDF_PERM_PRINT_HIGH) { - allowed_flags |= EPDF_PERM_PRINT; - } - - // Start with allowed flags - uint32_t p = allowed_flags; - - // Apply reserved bit requirements (PDF Reference 1.7, Table 3.20) - // Bits 1-2 must be 0 - p &= 0xFFFFFFFC; - // Bits 7-8 must be 1 (for R>=3) - // Bits 13-32 must be 1 - p |= 0xFFFFF0C0; - - return p; -} - -} // namespace - FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV EPDF_SetEncryption(FPDF_DOCUMENT document, FPDF_BYTESTRING user_password, diff --git a/public/fpdf_annot.h b/public/fpdf_annot.h index db493ca3d..8e69f3e04 100644 --- a/public/fpdf_annot.h +++ b/public/fpdf_annot.h @@ -1588,12 +1588,18 @@ EPDFPage_GetAnnotByName(FPDF_PAGE page, FPDF_WIDESTRING nm); // Remove the annotation by name. // -// page - handle to a page. -// nm - the name of the annotation. +// page - handle to a page. +// nm - the name of the annotation. +// form_handle - handle to the form fill module (from +// FPDFDOC_InitFormFillEnvironment). If non-null, the +// widget's field entry is also removed from the AcroForm +// field tree. Pass NULL to skip field-tree cleanup. // // Returns true on success. -FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV -EPDFPage_RemoveAnnotByName(FPDF_PAGE page, FPDF_WIDESTRING nm); +FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV +EPDFPage_RemoveAnnotByName(FPDF_PAGE page, + FPDF_WIDESTRING nm, + FPDF_FORMHANDLE form_handle); // Set the linked annotation. @@ -1644,13 +1650,20 @@ EPDFPage_GetAnnotRaw(FPDF_DOCUMENT doc, int page_index, int index); // Experimental EmbedPDF Extension API. // Remove the annotation by index. // -// doc - handle to a document. -// page_index - the index of the page. -// index - the index of the annotation. +// doc - handle to a document. +// page_index - the index of the page. +// index - the index of the annotation. +// form_handle - handle to the form fill module (from +// FPDFDOC_InitFormFillEnvironment). If non-null, the +// widget's field entry is also removed from the AcroForm +// field tree. Pass NULL to skip field-tree cleanup. // // Returns true on success. -FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV -EPDFPage_RemoveAnnotRaw(FPDF_DOCUMENT doc, int page_index, int index); +FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV +EPDFPage_RemoveAnnotRaw(FPDF_DOCUMENT doc, + int page_index, + int index, + FPDF_FORMHANDLE form_handle); // Experimental EmbedPDF Extension API. // Set the /Name entry of an annotation (icon name for text/file/sound, diff --git a/public/fpdf_formfill.h b/public/fpdf_formfill.h index 1f0b1298f..9e138ee23 100644 --- a/public/fpdf_formfill.h +++ b/public/fpdf_formfill.h @@ -1823,6 +1823,47 @@ FORM_IsIndexSelected(FPDF_FORMHANDLE hHandle, FPDF_PAGE page, int index); // into PDFium, performs no action and always returns FALSE. FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDF_LoadXFA(FPDF_DOCUMENT document); +// Experimental EmbedPDF Extension API. +// Function: EPDF_FixPageFieldsRaw +// Register orphan widget annotations from a page's Annots array +// into the interactive form's field tree, without loading the full +// page via FPDF_LoadPage. This must be called before using +// FPDFAnnot_GetFormFieldType on annotations obtained through +// EPDFPage_GetAnnotRaw. +// Parameters: +// hHandle - Handle to the form fill module, as returned by +// FPDFDOC_InitFormFillEnvironment(). +// document - Handle to the document. +// page_index - 0-based index of the page. +// Return value: +// None. +FPDF_EXPORT void FPDF_CALLCONV +EPDF_FixPageFieldsRaw(FPDF_FORMHANDLE hHandle, + FPDF_DOCUMENT document, + int page_index); + +// Experimental EmbedPDF Extension API. +// Function: EPDF_OpenFormFillInfo +// Allocates a zeroed FPDF_FORMFILLINFO struct with version set to 1. +// Parameters: +// None. +// Return value: +// Pointer to a new FPDF_FORMFILLINFO, or NULL on failure. +// Caller must free with EPDF_CloseFormFillInfo(). +// +FPDF_EXPORT FPDF_FORMFILLINFO* FPDF_CALLCONV EPDF_OpenFormFillInfo(); + +// Experimental EmbedPDF Extension API. +// Function: EPDF_CloseFormFillInfo +// Frees a FPDF_FORMFILLINFO allocated by EPDF_OpenFormFillInfo(). +// Parameters: +// info - Pointer returned by EPDF_OpenFormFillInfo(). +// Return value: +// None. +// +FPDF_EXPORT void FPDF_CALLCONV +EPDF_CloseFormFillInfo(FPDF_FORMFILLINFO* info); + #ifdef __cplusplus } #endif diff --git a/public/fpdf_revision.h b/public/fpdf_revision.h new file mode 100644 index 000000000..e055851d6 --- /dev/null +++ b/public/fpdf_revision.h @@ -0,0 +1,152 @@ +// Copyright 2024 The PDFium Authors +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef PUBLIC_FPDF_REVISION_H_ +#define PUBLIC_FPDF_REVISION_H_ + +// NOLINTNEXTLINE(build/include) +#include "fpdfview.h" + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +// Opaque handle to a document revision. Valid until FPDF_CloseDocument(). +typedef const struct epdf_revision_t__* EPDF_REVISION; + +// Opaque handle to a revision diff result. Caller must manage lifetime. +typedef const struct epdf_revision_diff_t__* EPDF_REVISION_DIFF; + +// Experimental EmbedPDF Extension API. +// Get the number of incremental revisions in the document. +// Index 0 = oldest (original document). +// Returns -1 on error. +FPDF_EXPORT int FPDF_CALLCONV +EPDFRevision_GetCount(FPDF_DOCUMENT document); + +// Experimental EmbedPDF Extension API. +// Get a revision handle by index. Index 0 = oldest. +// Returns NULL on error. +FPDF_EXPORT EPDF_REVISION FPDF_CALLCONV +EPDFRevision_Get(FPDF_DOCUMENT document, int index); + +// Experimental EmbedPDF Extension API. +// Get the effective file end offset for a revision (64-bit out-param). +// The offset is the byte position immediately after the %%EOF marker +// for this revision's incremental save. +// WARNING: actual values may be truncated to 32 bits due to upstream +// GetTrailerEnds() limitation for files > 4 GB. +// Returns TRUE on success, FALSE on error. +FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV +EPDFRevision_GetFileEnd(EPDF_REVISION revision, + unsigned long long* out_file_end); + +// Experimental EmbedPDF Extension API. +// Compare two revisions and produce a diff of changed objects. +// Returns NULL on error. Caller takes ownership; free with +// EPDFRevisionDiff_Close(). +FPDF_EXPORT EPDF_REVISION_DIFF FPDF_CALLCONV +EPDFRevision_Compare(FPDF_DOCUMENT document, + int older_revision, + int newer_revision); + +// Experimental EmbedPDF Extension API. +// Close a diff handle returned by EPDFRevision_Compare. +FPDF_EXPORT void FPDF_CALLCONV +EPDFRevisionDiff_Close(EPDF_REVISION_DIFF diff); + +// Experimental EmbedPDF Extension API. +// Get the number of changed objects in a diff. +FPDF_EXPORT unsigned long FPDF_CALLCONV +EPDFRevisionDiff_GetEntryCount(EPDF_REVISION_DIFF diff); + +// Experimental EmbedPDF Extension API. +// Get a specific diff entry. +// out_obj_num - receives the object number. +// out_category - receives 0=added, 1=modified, 2=freed. +// Returns TRUE on success. +FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV +EPDFRevisionDiff_GetEntry(EPDF_REVISION_DIFF diff, + unsigned long index, + unsigned int* out_obj_num, + int* out_category); + +// Semantic change category values returned by +// EPDFRevisionDiff_GetSemanticCategoryCounts and +// EPDFRevisionDiff_GetSemanticEntry. +#define EPDF_SEMANTIC_FORM_STATE_CHANGE 0 +#define EPDF_SEMANTIC_ANNOTATION 1 +#define EPDF_SEMANTIC_SIGNATURE 2 +#define EPDF_SEMANTIC_DOCUMENT_TIMESTAMP 3 +#define EPDF_SEMANTIC_DSS 4 +#define EPDF_SEMANTIC_PAGE 5 +#define EPDF_SEMANTIC_CATALOG 6 +#define EPDF_SEMANTIC_OTHER 7 + +// Experimental EmbedPDF Extension API. +// Get semantic category counts for a diff. Lazily computes semantic +// classification on the first call for a given diff handle and caches the +// result. Requires the document handle for object-graph access during +// classification. +// +// document - document handle (needed for semantic classification). +// diff - diff handle from EPDFRevision_Compare(). +// category_buffer - receives SemanticChangeType values. +// count_buffer - receives the count for each category. +// buffer_length - number of slots in category_buffer and count_buffer. +// +// Returns the number of distinct non-zero categories. +FPDF_EXPORT unsigned long FPDF_CALLCONV +EPDFRevisionDiff_GetSemanticCategoryCounts( + FPDF_DOCUMENT document, + EPDF_REVISION_DIFF diff, + int* category_buffer, + unsigned long* count_buffer, + unsigned long buffer_length); + +// Experimental EmbedPDF Extension API. +// Get the number of resolved semantic entries in a diff. +// Lazily computes semantic classification on the first call and caches +// the result (same cache as GetSemanticCategoryCounts). +// +// document - document handle (needed for semantic classification). +// diff - diff handle from EPDFRevision_Compare(). +// +// Returns the number of resolved entries, or 0 on error. +FPDF_EXPORT unsigned long FPDF_CALLCONV +EPDFRevisionDiff_GetResolvedEntryCount(FPDF_DOCUMENT document, + EPDF_REVISION_DIFF diff); + +// Experimental EmbedPDF Extension API. +// Get a specific resolved semantic entry. +// Lazily computes semantic classification on the first call and caches +// the result (same cache as GetSemanticCategoryCounts). +// +// document - document handle (needed for semantic classification). +// diff - diff handle from EPDFRevision_Compare(). +// index - zero-based index into the resolved entries. +// out_changed_obj_num - receives the actual changed indirect object number. +// out_target_obj_num - receives the resolved logical target object number, +// or 0 if unavailable. +// out_page_obj_num - receives the owning page dictionary object number, +// or 0 if unavailable. +// out_diff_category - receives 0=added, 1=modified, 2=freed. +// out_semantic_type - receives one of the EPDF_SEMANTIC_* values. +// +// Returns TRUE on success, FALSE on error or out-of-range index. +FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV +EPDFRevisionDiff_GetResolvedEntry(FPDF_DOCUMENT document, + EPDF_REVISION_DIFF diff, + unsigned long index, + unsigned int* out_changed_obj_num, + unsigned int* out_target_obj_num, + unsigned int* out_page_obj_num, + int* out_diff_category, + int* out_semantic_type); + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus + +#endif // PUBLIC_FPDF_REVISION_H_ diff --git a/public/fpdf_save.h b/public/fpdf_save.h index 093b889ad..b2dc068c6 100644 --- a/public/fpdf_save.h +++ b/public/fpdf_save.h @@ -86,6 +86,43 @@ FPDF_SaveWithVersion(FPDF_DOCUMENT document, FPDF_DWORD flags, int file_version); +// Experimental EmbedPDF Extension API. +// Function: EPDF_SaveDocumentToBuffer +// Saves the document to a heap-allocated buffer. +// Parameters: +// document - Handle to document, as returned by +// FPDF_LoadDocument() or FPDF_CreateNewDocument(). +// flags - Same flags as FPDF_SaveAsCopy (e.g. FPDF_INCREMENTAL). +// out_size - Pointer that receives the byte count of the saved data. +// Return value: +// Pointer to a malloc'd buffer containing the saved PDF bytes, +// or nullptr on failure. Caller must free() the returned pointer. +// +FPDF_EXPORT void* FPDF_CALLCONV +EPDF_SaveDocumentToBuffer(FPDF_DOCUMENT document, + FPDF_DWORD flags, + unsigned long* out_size); + +// Experimental EmbedPDF Extension API. +// Function: EPDF_SaveDocumentToBufferWithVersion +// Same as EPDF_SaveDocumentToBuffer(), except the file version of the +// saved document can be specified by the caller. +// Parameters: +// document - Handle to document. +// flags - Same flags as FPDF_SaveAsCopy. +// out_size - Pointer that receives the byte count. +// file_version - The PDF file version. File version: 14 for 1.4, +// 15 for 1.5, ... +// Return value: +// Pointer to a malloc'd buffer, or nullptr on failure. +// Caller must free() the returned pointer. +// +FPDF_EXPORT void* FPDF_CALLCONV +EPDF_SaveDocumentToBufferWithVersion(FPDF_DOCUMENT document, + FPDF_DWORD flags, + unsigned long* out_size, + int file_version); + #ifdef __cplusplus } #endif diff --git a/public/fpdf_signature.h b/public/fpdf_signature.h index 9a075e5f8..86f887741 100644 --- a/public/fpdf_signature.h +++ b/public/fpdf_signature.h @@ -8,6 +8,9 @@ // NOLINTNEXTLINE(build/include) #include "fpdfview.h" +// NOLINTNEXTLINE(build/include) +#include "fpdf_revision.h" + #ifdef __cplusplus extern "C" { #endif // __cplusplus @@ -148,6 +151,227 @@ FPDFSignatureObj_GetTime(FPDF_SIGNATURE signature, FPDF_EXPORT unsigned int FPDF_CALLCONV FPDFSignatureObj_GetDocMDPPermission(FPDF_SIGNATURE signature); +// SubFilter values for digital signature dictionaries. +// +// These map to the /SubFilter name in a signature value dictionary (/V). +// The choice of SubFilter determines the signature format and validation +// rules that PDF processors must follow. +typedef enum EPDF_SIG_SUBFILTER { + // /adbe.pkcs7.detached -- Standard PKCS#7 detached signatures. + // Used for approval and certification signatures. + // Note: deprecated in PDF 2.0 (ISO 32000-2) in favor of + // ETSI.CAdES.detached, but widely supported for compatibility. + EPDF_SIG_SUBFILTER_ADBE_PKCS7_DETACHED = 0, + + // /ETSI.CAdES.detached -- CAdES signatures per ETSI TS 102 778 / EN 319 142. + // Preferred for PAdES profiles (B-B, B-T, B-LT, B-LTA) and eIDAS compliance. + // Used for approval and certification signatures. + EPDF_SIG_SUBFILTER_ETSI_CADES_DETACHED = 1, + + // /ETSI.RFC3161 -- Document timestamp signatures per RFC 3161. + // NOT for signer identity signatures. This is exclusively for document + // timestamps, typically added as the final revision in a PAdES B-LTA flow. + // When used, EPDFSig_PrepareSignatureDict sets /Type /DocTimeStamp + // instead of /Type /Sig. + EPDF_SIG_SUBFILTER_ETSI_RFC3161 = 2, +} EPDF_SIG_SUBFILTER; + +// Experimental EmbedPDF Extension API. +// Prepare a signature value dictionary (/V) on a Sig field widget. +// +// Creates the /V dict as a new indirect object on the field's parent +// dictionary. The /V dict contains: +// /Type -- /Sig (or /DocTimeStamp when sub_filter is ETSI_RFC3161) +// /Filter -- /Adobe.PPKLite +// /SubFilter -- per |sub_filter| enum +// /ByteRange -- [0 0000000000 0000000000 0000000000] (placeholder) +// /Contents -- <0000...0000> (hex placeholder, 2 * contents_size chars) +// +// Intended lifecycle: +// 1. Call this function to prepare the /V dict in the document model. +// 2. Optionally call EPDFSig_SetReason(), EPDFSig_SetLocation(), etc. +// 3. Save the document incrementally (FPDF_INCREMENTAL flag). +// 4. In the saved bytes, locate the /ByteRange and /Contents placeholders. +// 5. Compute actual ByteRange offsets and patch them in-place. +// 6. Hash the byte spans described by ByteRange. +// 7. Generate a CMS/PKCS#7 blob (or RFC 3161 timestamp token). +// 8. Hex-encode the blob and patch it into the /Contents placeholder. +// +// annot - handle to a Sig field widget annotation (from +// EPDFPage_CreateFormField with FPDF_FORMFIELD_SIGNATURE, +// or an existing unsigned Sig widget). +// sub_filter - one of EPDF_SIG_SUBFILTER_*. +// contents_size - placeholder size for /Contents in bytes. +// The hex string in the PDF will be 2x this length. +// Recommended: 8192 for plain PKCS#7 signatures, +// 16384 for PAdES B-T (signature + timestamp), +// 32768 for PAdES B-LTA with revocation data. +// +// Returns true on success, false if annot is not a Sig widget, the field +// already has a /V dict, or on any other error. +FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV +EPDFSig_PrepareSignatureDict(FPDF_ANNOTATION annot, + EPDF_SIG_SUBFILTER sub_filter, + unsigned long contents_size); + +// Experimental EmbedPDF Extension API. +// Set the /Reason string on an already-prepared signature value dict. +// Must be called after EPDFSig_PrepareSignatureDict and before save. +// +// annot - handle to a Sig field widget annotation. +// reason - the reason string (UTF-16LE). Pass NULL to remove. +// +// Returns true on success. +FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV +EPDFSig_SetReason(FPDF_ANNOTATION annot, FPDF_WIDESTRING reason); + +// Experimental EmbedPDF Extension API. +// Set the /Location string on an already-prepared signature value dict. +// Must be called after EPDFSig_PrepareSignatureDict and before save. +// +// annot - handle to a Sig field widget annotation. +// location - the location string (UTF-16LE). Pass NULL to remove. +// +// Returns true on success. +FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV +EPDFSig_SetLocation(FPDF_ANNOTATION annot, FPDF_WIDESTRING location); + +// Experimental EmbedPDF Extension API. +// Set the /ContactInfo string on an already-prepared signature value dict. +// Must be called after EPDFSig_PrepareSignatureDict and before save. +// +// annot - handle to a Sig field widget annotation. +// contact_info - the contact info string (UTF-16LE). Pass NULL to remove. +// +// Returns true on success. +FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV +EPDFSig_SetContactInfo(FPDF_ANNOTATION annot, FPDF_WIDESTRING contact_info); + +// Experimental EmbedPDF Extension API. +// Make a signature a certification (DocMDP) signature. +// +// This performs two actions required by the PDF spec (ISO 32000-2, 12.8.2.2): +// +// 1. Adds a /Reference entry on the signature's /V dict: +// /Reference [ << /TransformMethod /DocMDP +// /Type /SigRef +// /TransformParams << /P |permission| +// /V /1.2 +// /Type /TransformParams >> >> ] +// +// 2. Sets the document catalog's /Perms/DocMDP entry to point at +// the signature value dictionary, so PDF processors can locate +// the certification signature from the catalog. +// +// Only one certification signature is allowed per document (per spec). +// This function checks the catalog /Perms/DocMDP entry and returns false +// if a certification signature already exists in the document, regardless +// of which field it is on. +// +// document - handle to the document (needed to access the catalog). +// annot - handle to a Sig field widget whose /V dict has been +// prepared via EPDFSig_PrepareSignatureDict. +// permission - DocMDP permission level, must be 1, 2, or 3: +// 1 = no changes allowed (except DSS/timestamps) +// 2 = form filling, signing, and page templates +// 3 = same as 2, plus annotation create/delete/modify +// +// Returns true on success, false if: +// - annot is not a prepared Sig widget +// - permission is not 1, 2, or 3 +// - a certification signature already exists in the document catalog +FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV +EPDFSig_SetDocMDP(FPDF_DOCUMENT document, + FPDF_ANNOTATION annot, + int permission); + +// Experimental EmbedPDF Extension API. +// Get /Location from a signature object's /V dict. +// +// signature - handle to the signature object. +// buffer - the address of a buffer that receives the location. +// length - the size, in bytes, of |buffer|. +// +// Returns the number of bytes in the location on success, 0 on error. +// The |buffer| is always in UTF-16LE encoding, terminated by a UTF16 NUL. +// If |length| is less than the returned length, or |buffer| is NULL, +// |buffer| will not be modified. +FPDF_EXPORT unsigned long FPDF_CALLCONV +EPDFSig_GetLocation(FPDF_SIGNATURE signature, + void* buffer, + unsigned long length); + +// Experimental EmbedPDF Extension API. +// Get /ContactInfo from a signature object's /V dict. +// +// signature - handle to the signature object. +// buffer - the address of a buffer that receives the contact info. +// length - the size, in bytes, of |buffer|. +// +// Returns the number of bytes in the contact info on success, 0 on error. +// The |buffer| is always in UTF-16LE encoding, terminated by a UTF16 NUL. +// If |length| is less than the returned length, or |buffer| is NULL, +// |buffer| will not be modified. +FPDF_EXPORT unsigned long FPDF_CALLCONV +EPDFSig_GetContactInfo(FPDF_SIGNATURE signature, + void* buffer, + unsigned long length); + +// Experimental EmbedPDF Extension API. +// Get the FPDF_SIGNATURE handle for a Sig field widget annotation. +// +// Bridges the annotation world to the FPDFSignatureObj_Get* reader family. +// The returned handle can be passed to FPDFSignatureObj_GetContents, +// FPDFSignatureObj_GetReason, EPDFSig_GetLocation, etc. +// +// annot - handle to an annotation. Must be a Sig field widget +// (merged field/widget or child widget with Sig parent). +// +// Returns the FPDF_SIGNATURE handle on success, or NULL if the annotation +// is not a Sig field widget. The caller does not take ownership; the handle +// remains valid until FPDF_CloseDocument() is called. +FPDF_EXPORT FPDF_SIGNATURE FPDF_CALLCONV +EPDFSig_GetAnnotSignatureHandle(FPDF_ANNOTATION annot); + +// ---- Signature-Revision Bridge APIs ---- +// These functions cross both the signature and revision domains. +// Pure revision APIs are in fpdf_revision.h. + +// Experimental EmbedPDF Extension API. +// Get the signature associated with a revision, if any. +// Requires the document handle to enumerate signature fields. +// If multiple signatures map to the same revision, returns the first match. +// Returns NULL if the revision has no signature. +FPDF_EXPORT FPDF_SIGNATURE FPDF_CALLCONV +EPDFRevision_GetSignature(FPDF_DOCUMENT document, EPDF_REVISION revision); + +// Experimental EmbedPDF Extension API. +// Get the revision index that a signature belongs to. +// Returns -1 on error or if unmappable. +FPDF_EXPORT int FPDF_CALLCONV +EPDFSig_GetSignatureRevision(FPDF_DOCUMENT document, + FPDF_SIGNATURE signature); + +// DocMDP compliance status values. +#define EPDF_DOCMDP_COMPLIANT 0 +#define EPDF_DOCMDP_VIOLATED 1 +#define EPDF_DOCMDP_NOT_APPLICABLE 2 +#define EPDF_DOCMDP_UNSUPPORTED 3 +#define EPDF_DOCMDP_INDETERMINATE 4 + +// Experimental EmbedPDF Extension API. +// Check DocMDP compliance between the certified revision and a later revision. +// Automatically finds the certification signature and its permission from +// catalog /Perms/DocMDP. +// +// document - document handle. +// check_revision - revision to check (-1 means current/latest). +// +// Returns one of the EPDF_DOCMDP_* status values. +FPDF_EXPORT int FPDF_CALLCONV +EPDFSig_CheckDocMDPCompliance(FPDF_DOCUMENT document, + int check_revision); + #ifdef __cplusplus } // extern "C" #endif // __cplusplus