diff --git a/.flake8 b/.flake8
deleted file mode 100644
index ba5db34c..00000000
--- a/.flake8
+++ /dev/null
@@ -1,8 +0,0 @@
-[flake8]
-per-file-ignores = __init__.py:F401
-max-line-length = 120
-exclude = test/*
-max-complexity = 25
-docstring-convention = google
-ignore = W503,E203,E741
-classmethod-decorators = classmethod,validator
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index eda9dd6c..e8bda570 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,29 +1,16 @@
 fail_fast: true
 repos:
-  - repo: local
-    hooks:
-      - id: black
-        name: Black
-        entry: uv run --no-sync black docling_core test
-        pass_filenames: false
-        language: system
-        files: '\.py$'
-  - repo: local
-    hooks:
-      - id: isort
-        name: isort
-        entry: uv run --no-sync isort docling_core test
-        pass_filenames: false
-        language: system
-        files: '\.py$'
-  - repo: local
-    hooks:
-      - id: autoflake
-        name: autoflake
-        entry: uv run --no-sync autoflake docling_core test
-        pass_filenames: false
-        language: system
-        files: '\.py$'
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.11.5
+    hooks:
+      - id: ruff-format
+        name: "Ruff formatter"
+        args: [--config=pyproject.toml]
+        files: '^(docling_core|tests|docs/examples).*\.(py|ipynb)$'
+      - id: ruff
+        name: "Ruff linter"
+        args: [--exit-non-zero-on-fix, --fix, --config=pyproject.toml]
+        files: '^(docling_core|tests|docs/examples).*\.(py|ipynb)$'
   - repo: local
     hooks:
       - id: mypy
@@ -32,14 +19,6 @@ repos:
         pass_filenames: false
         language: system
         files: '\.py$'
-  - repo: local
-    hooks:
-      - id: flake8
-        name: Flake8
-        entry: uv run --no-sync flake8 docling_core
-        pass_filenames: false
-        language: system
-        files: '\.py$'
   - repo: local
     hooks:
       - id: pytest
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 1d81415a..f15059d6 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -47,8 +47,7 @@ uv add [OPTIONS] <PACKAGES|--requirements <REQUIREMENTS>>
 
 We use the following tools to enforce code style:
 
-- isort, to sort imports
-- Black, to format code
+- Ruff, to format and lint code
 - Flake8, to lint code
 - autoflake, to remove unused variables and imports
 - [MyPy](https://mypy.readthedocs.io), as static type checker
@@ -65,9 +64,6 @@ To run the checks on-demand, type:
 uv run pre-commit run --all-files
 ```
 
-Note: Checks like `Black` and `isort` will _fail_ if they modify files. This is because `pre-commit` doesn't like to see files modified by their hooks. In these cases, `git add` the modified files and `git commit` again.
-
-
 ### Documentation
 
 We use [JSON Schema for Humans](https://github.com/coveooss/json-schema-for-humans) to generate Markdown pages documenting the JSON schema of the Docling objects.
diff --git a/docling_core/experimental/idoctags.py b/docling_core/experimental/idoctags.py
index 7990f4cd..cb3143d4 100644
--- a/docling_core/experimental/idoctags.py
+++ b/docling_core/experimental/idoctags.py
@@ -147,7 +147,7 @@ def get_special_tokens(
 
         if include_location_tokens:
             # Adding dynamically generated location-tokens
-            for i in range(0, max(page_dimension[0], page_dimension[1])):
+            for i in range(max(page_dimension[0], page_dimension[1])):
                 special_tokens.append(f"<{IDocTagsToken._LOC_PREFIX.value}{i}/>")
 
         return special_tokens
@@ -294,11 +294,7 @@ def serialize(
             # as siblings at the same level (not wrapped in <list_item>).
             for subref in child.children:
                 sub = subref.resolve(doc)
-                if (
-                    isinstance(sub, ListGroup)
-                    and sub.self_ref not in my_visited
-                    and sub.self_ref not in excluded
-                ):
+                if isinstance(sub, ListGroup) and sub.self_ref not in my_visited and sub.self_ref not in excluded:
                     my_visited.add(sub.self_ref)
                     sub_res = doc_serializer.serialize(
                         item=sub,
@@ -343,15 +339,9 @@ def serialize(
         texts = (
             [
                 tmp
-                for key in (
-                    list(item.meta.__class__.model_fields)
-                    + list(item.meta.get_custom_part())
-                )
+                for key in (list(item.meta.__class__.model_fields) + list(item.meta.get_custom_part()))
                 if (
-                    (
-                        params.allowed_meta_names is None
-                        or key in params.allowed_meta_names
-                    )
+                    (params.allowed_meta_names is None or key in params.allowed_meta_names)
                     and (key not in params.blocked_meta_names)
                     and (tmp := self._serialize_meta_field(item.meta, key))
                 )
@@ -369,28 +359,16 @@ def serialize(
 
     def _serialize_meta_field(self, meta: BaseMeta, name: str) -> Optional[str]:
         if (field_val := getattr(meta, name)) is not None:
-            if name == MetaFieldName.SUMMARY and isinstance(
-                field_val, SummaryMetaField
-            ):
+            if name == MetaFieldName.SUMMARY and isinstance(field_val, SummaryMetaField):
                 txt = f"<summary>{field_val.text}</summary>"
-            elif name == MetaFieldName.DESCRIPTION and isinstance(
-                field_val, DescriptionMetaField
-            ):
+            elif name == MetaFieldName.DESCRIPTION and isinstance(field_val, DescriptionMetaField):
                 txt = f"<description>{field_val.text}</description>"
-            elif name == MetaFieldName.CLASSIFICATION and isinstance(
-                field_val, PictureClassificationMetaField
-            ):
-                class_name = self._humanize_text(
-                    field_val.get_main_prediction().class_name
-                )
+            elif name == MetaFieldName.CLASSIFICATION and isinstance(field_val, PictureClassificationMetaField):
+                class_name = self._humanize_text(field_val.get_main_prediction().class_name)
                 txt = f"<classification>{class_name}</classification>"
-            elif name == MetaFieldName.MOLECULE and isinstance(
-                field_val, MoleculeMetaField
-            ):
+            elif name == MetaFieldName.MOLECULE and isinstance(field_val, MoleculeMetaField):
                 txt = f"<molecule>{field_val.smi}</molecule>"
-            elif name == MetaFieldName.TABULAR_CHART and isinstance(
-                field_val, TabularChartMetaField
-            ):
+            elif name == MetaFieldName.TABULAR_CHART and isinstance(field_val, TabularChartMetaField):
                 # suppressing tabular chart serialization
                 return None
             # elif tmp := str(field_val or ""):
@@ -419,7 +397,6 @@ def serialize(
         is_chart = False
 
         if item.self_ref not in doc_serializer.get_excluded_refs(**kwargs):
-
             if item.meta:
                 meta_res = doc_serializer.serialize_meta(item=item, **kwargs)
                 if meta_res.text:
@@ -508,12 +485,8 @@ def serialize_doc(
 
         text_res = tmp
 
-        if self.params.pretty_indentation and (
-            my_root := parseString(text_res).documentElement
-        ):
+        if self.params.pretty_indentation and (my_root := parseString(text_res).documentElement):
             text_res = my_root.toprettyxml(indent=self.params.pretty_indentation)
-            text_res = "\n".join(
-                [line for line in text_res.split("\n") if line.strip()]
-            )
+            text_res = "\n".join([line for line in text_res.split("\n") if line.strip()])
 
         return create_ser_result(text=text_res, span_source=parts)
diff --git a/docling_core/search/json_schema_to_search_mapper.py b/docling_core/search/json_schema_to_search_mapper.py
index f5644835..2d8819a2 100644
--- a/docling_core/search/json_schema_to_search_mapper.py
+++ b/docling_core/search/json_schema_to_search_mapper.py
@@ -269,9 +269,7 @@ def __suppress(d_: Any) -> Any:
                 if suppress_key in d_ and d_[suppress_key] is True:
                     return {}
                 else:
-                    return {
-                        k: v for k, v in ((k, __suppress(v)) for k, v in d_.items())
-                    }
+                    return {k: v for k, v in ((k, __suppress(v)) for k, v in d_.items())}
             return d_
 
         return __suppress(doc)
@@ -325,12 +323,7 @@ def __remove(d_: Any) -> Any:
                 return [v for v in (__remove(v) for v in d_)]
 
             if isinstance(d_, dict):
-                return {
-                    k: v
-                    for k, v in (
-                        (k, __remove(v)) for k, v in d_.items() if not regx.match(k)
-                    )
-                }
+                return {k: v for k, v in ((k, __remove(v)) for k, v in d_.items() if not regx.match(k))}
 
             return d_
 
@@ -393,11 +386,7 @@ def _clean(d_: Any) -> Any:
                 return [v for v in (_clean(v) for v in d_) if not _empty(v)]
 
             if isinstance(d_, dict):
-                return {
-                    k: v
-                    for k, v in ((k, _clean(v)) for k, v in d_.items())
-                    if not _empty(v)
-                }
+                return {k: v for k, v in ((k, _clean(v)) for k, v in d_.items()) if not _empty(v)}
 
             return d_
 
diff --git a/docling_core/search/meta.py b/docling_core/search/meta.py
index 2b5ff926..20e1e415 100644
--- a/docling_core/search/meta.py
+++ b/docling_core/search/meta.py
@@ -78,12 +78,8 @@ def version_has_schema(cls, v):
         """Validate that the docling-core library is always set in version field."""
         docling_core = [item for item in v if item.name == "docling-core"]
         if not docling_core:
-            raise ValueError(
-                "the version should include at least a valid docling-core package"
-            )
+            raise ValueError("the version should include at least a valid docling-core package")
         elif len(docling_core) > 1:
-            raise ValueError(
-                "the version must not include more than 1 docling-core package"
-            )
+            raise ValueError("the version must not include more than 1 docling-core package")
         else:
             return v
diff --git a/docling_core/search/package.py b/docling_core/search/package.py
index 1d1bf37a..cdffc3f9 100644
--- a/docling_core/search/package.py
+++ b/docling_core/search/package.py
@@ -22,8 +22,8 @@ class Package(BaseModel, extra="forbid"):
     """
 
     name: StrictStr = "docling-core"
-    version: Annotated[str, StringConstraints(strict=True, pattern=VERSION_PATTERN)] = (
-        importlib.metadata.version("docling-core")
+    version: Annotated[str, StringConstraints(strict=True, pattern=VERSION_PATTERN)] = importlib.metadata.version(
+        "docling-core"
     )
 
     def __hash__(self):
diff --git a/docling_core/transforms/chunker/base.py b/docling_core/transforms/chunker/base.py
index 24f32ea4..72f9731c 100644
--- a/docling_core/transforms/chunker/base.py
+++ b/docling_core/transforms/chunker/base.py
@@ -77,14 +77,7 @@ def contextualize(self, chunk: BaseChunk) -> str:
         for k in meta:
             if k not in chunk.meta.excluded_embed:
                 if isinstance(meta[k], list):
-                    items.append(
-                        self.delim.join(
-                            [
-                                d if isinstance(d, str) else json.dumps(d)
-                                for d in meta[k]
-                            ]
-                        )
-                    )
+                    items.append(self.delim.join([d if isinstance(d, str) else json.dumps(d) for d in meta[k]]))
                 else:
                     items.append(json.dumps(meta[k]))
         items.append(chunk.text)
diff --git a/docling_core/transforms/chunker/code_chunking/_language_code_chunkers.py b/docling_core/transforms/chunker/code_chunking/_language_code_chunkers.py
index 5c0d3c7c..5b9cb810 100644
--- a/docling_core/transforms/chunker/code_chunking/_language_code_chunkers.py
+++ b/docling_core/transforms/chunker/code_chunking/_language_code_chunkers.py
@@ -149,9 +149,7 @@ def build_class_metadata(
             chunk_type=CodeChunkType.CLASS,
         )
 
-    def build_preamble_metadata(
-        self, *, item: CodeItem, content: str, start_line: int, end_line: int
-    ) -> CodeDocMeta:
+    def build_preamble_metadata(self, *, item: CodeItem, content: str, start_line: int, end_line: int) -> CodeDocMeta:
         """Build metadata for preamble chunks."""
         return CodeDocMeta(
             doc_items=[item],
@@ -162,9 +160,7 @@ def build_preamble_metadata(
             chunk_type=CodeChunkType.PREAMBLE,
         )
 
-    def calculate_line_numbers(
-        self, code: str, start_byte: int, end_byte: int
-    ) -> Tuple[int, int]:
+    def calculate_line_numbers(self, code: str, start_byte: int, end_byte: int) -> Tuple[int, int]:
         """Calculate line numbers from byte positions."""
         start_line = code[:start_byte].count("\n") + 1
         if end_byte > 0 and end_byte <= len(code):
@@ -224,9 +220,7 @@ def build_class_chunk(
         )
         return CodeChunk(text=content, meta=metadata, doc_items=[self.item])
 
-    def build_preamble_chunk(
-        self, content: str, start_line: int, end_line: int
-    ) -> CodeChunk:
+    def build_preamble_chunk(self, content: str, start_line: int, end_line: int) -> CodeChunk:
         """Build a preamble chunk."""
         metadata = self.metadata_builder.build_preamble_metadata(
             item=self.item,
@@ -236,15 +230,11 @@ def build_preamble_chunk(
         )
         return CodeChunk(text=content, meta=metadata, doc_items=[self.item])
 
-    def process_orphan_chunks(
-        self, used_ranges: List[Tuple[int, int]], dl_doc
-    ) -> Iterator[CodeChunk]:
+    def process_orphan_chunks(self, used_ranges: List[Tuple[int, int]], dl_doc) -> Iterator[CodeChunk]:
         """Process orphan chunks (preamble) from unused code ranges."""
         from docling_core.types.doc.labels import DocItemLabel
 
-        code = next(
-            (t.text for t in dl_doc.texts if t.label == DocItemLabel.CODE), None
-        )
+        code = next((t.text for t in dl_doc.texts if t.label == DocItemLabel.CODE), None)
         if not code:
             return
 
@@ -263,18 +253,14 @@ def process_orphan_chunks(
             first_start_byte = orphan_pieces[0][1]
             last_end_byte = orphan_pieces[-1][2]
 
-            start_line, end_line = self.metadata_builder.calculate_line_numbers(
-                code, first_start_byte, last_end_byte
-            )
+            start_line, end_line = self.metadata_builder.calculate_line_numbers(code, first_start_byte, last_end_byte)
             yield self.build_preamble_chunk(merged_content, start_line, end_line)
 
 
 class _ChunkSizeProcessor:
     """Processes chunks to split large ones into smaller pieces."""
 
-    def __init__(
-        self, tokenizer, max_tokens: int, min_chunk_size: int = 300, chunker=None
-    ):
+    def __init__(self, tokenizer, max_tokens: int, min_chunk_size: int = 300, chunker=None):
         """Initialize the chunk size processor with tokenizer and size constraints."""
         self.tokenizer = tokenizer
         self.max_tokens = max_tokens
@@ -369,11 +355,7 @@ def _split_function_chunk(
                 continue
 
             new_meta = chunk.meta.model_copy()
-            new_meta.part_name = (
-                f"{chunk.meta.part_name}_part_{i + 1}"
-                if len(chunks) > 1
-                else chunk.meta.part_name
-            )
+            new_meta.part_name = f"{chunk.meta.part_name}_part_{i + 1}" if len(chunks) > 1 else chunk.meta.part_name
 
             sub_chunk = CodeChunk(text=chunk_text, meta=new_meta)
             yield sub_chunk, ranges
@@ -405,9 +387,10 @@ def _split_generic_chunk(
             if current_size + line_tokens > self.max_tokens and current_chunk_lines:
                 chunk_text = "\n".join(current_chunk_lines)
                 if self.tokenizer.count_tokens(chunk_text) >= self.min_chunk_size:
-                    yield self._create_split_chunk(
-                        chunk, chunk_text, chunk_number
-                    ), ranges
+                    yield (
+                        self._create_split_chunk(chunk, chunk_text, chunk_number),
+                        ranges,
+                    )
                     chunk_number += 1
 
                 current_chunk_lines = [line]
@@ -421,9 +404,7 @@ def _split_generic_chunk(
             if self.tokenizer.count_tokens(chunk_text) >= self.min_chunk_size:
                 yield self._create_split_chunk(chunk, chunk_text, chunk_number), ranges
 
-    def _create_split_chunk(
-        self, original_chunk: CodeChunk, text: str, chunk_number: int
-    ) -> CodeChunk:
+    def _create_split_chunk(self, original_chunk: CodeChunk, text: str, chunk_number: int) -> CodeChunk:
         """Create a new chunk from split text."""
         new_meta = original_chunk.meta.model_copy()
         new_meta.part_name = f"{original_chunk.meta.part_name}_part_{chunk_number}"
@@ -484,9 +465,7 @@ def chunk(self, dl_doc: DLDocument, **kwargs: Any) -> Iterator[CodeChunk]:
                 module_variables = self._get_module_variables(tree)
                 range_tracker = _RangeTracker()
                 chunk_builder = _ChunkBuilder(item=item, origin=dl_doc.origin)
-                size_processor = _ChunkSizeProcessor(
-                    self.tokenizer, self.max_tokens, self.min_chunk_size, chunker=self
-                )
+                size_processor = _ChunkSizeProcessor(self.tokenizer, self.max_tokens, self.min_chunk_size, chunker=self)
 
                 self._mark_copyright_comments(tree.root_node, range_tracker)
 
@@ -508,32 +487,24 @@ def chunk(self, dl_doc: DLDocument, **kwargs: Any) -> Iterator[CodeChunk]:
                         all_chunks.append((chunk, chunk_used_ranges))
 
                 if module_variables:
-                    self._track_constructor_variables(
-                        tree.root_node, module_variables, range_tracker
-                    )
+                    self._track_constructor_variables(tree.root_node, module_variables, range_tracker)
 
                 empty_classes = self._get_classes_no_methods(tree.root_node, "")
                 for node in empty_classes:
                     for (
                         chunk,
                         chunk_used_ranges,
-                    ) in self._yield_class_chunk_with_ranges(
-                        node, import_nodes, chunk_builder
-                    ):
+                    ) in self._yield_class_chunk_with_ranges(node, import_nodes, chunk_builder):
                         range_tracker.extend(chunk_used_ranges)
                         all_chunks.append((chunk, chunk_used_ranges))
 
-                for chunk in chunk_builder.process_orphan_chunks(
-                    range_tracker.get_used_ranges(), dl_doc
-                ):
+                for chunk in chunk_builder.process_orphan_chunks(range_tracker.get_used_ranges(), dl_doc):
                     all_chunks.append((chunk, []))
 
                 for chunk, _ in size_processor.process_chunks(all_chunks):
                     yield chunk
 
-    def _mark_copyright_comments(
-        self, root_node: Node, range_tracker: _RangeTracker
-    ) -> None:
+    def _mark_copyright_comments(self, root_node: Node, range_tracker: _RangeTracker) -> None:
         """Mark copyright comments as used."""
         comment_nodes = _get_children(root_node, self.docs_types)
         for node in comment_nodes:
@@ -549,14 +520,9 @@ def _yield_function_chunks_with_ranges(
         chunk_builder: _ChunkBuilder,
         module_variables: Optional[Dict[str, Node]] = None,
     ) -> Iterator[Tuple[CodeChunk, List[Tuple[int, int]]]]:
-
         docstring = self._get_docstring(node)
-        additional_context, additional_context_no_docstring = (
-            self._build_additional_context(node, root_node)
-        )
-        imports = self._build_imports(
-            import_nodes, node, additional_context_no_docstring
-        )
+        additional_context, additional_context_no_docstring = self._build_additional_context(node, root_node)
+        imports = self._build_imports(import_nodes, node, additional_context_no_docstring)
         function_line_start, _ = node.start_point
         function_line_end, _ = node.end_point
         signature_line_end, _ = self._get_function_signature_end(node)
@@ -583,12 +549,8 @@ def _yield_function_chunks_with_ranges(
             current_node = node
             while current_node.parent:
                 if current_node.parent.type in self.class_definition_types:
-                    used_ranges.append(
-                        (current_node.parent.start_byte, current_node.parent.end_byte)
-                    )
-                    used_ranges.extend(
-                        self._get_class_member_ranges(current_node.parent)
-                    )
+                    used_ranges.append((current_node.parent.start_byte, current_node.parent.end_byte))
+                    used_ranges.extend(self._get_class_member_ranges(current_node.parent))
                     break
                 current_node = current_node.parent
 
@@ -605,23 +567,23 @@ def _yield_function_chunks_with_ranges(
                     module_variable_definitions += var_text + "\n"
 
         function_content = self._build_function(node)
-        function_no_docstring = (
-            function_content.replace(docstring, "") if docstring else function_content
-        )
+        function_no_docstring = function_content.replace(docstring, "") if docstring else function_content
 
         base_content = (
-            f"{prefix}{imports}{module_variable_definitions}"
-            f"{additional_context_no_docstring}{function_no_docstring}"
+            f"{prefix}{imports}{module_variable_definitions}{additional_context_no_docstring}{function_no_docstring}"
         )
 
-        yield chunk_builder.build_function_chunk(
-            base_content,
-            function_name,
-            docstring,
-            function_line_start,
-            function_line_end,
-            signature_line_end,
-        ), used_ranges
+        yield (
+            chunk_builder.build_function_chunk(
+                base_content,
+                function_name,
+                docstring,
+                function_line_start,
+                function_line_end,
+                signature_line_end,
+            ),
+            used_ranges,
+        )
 
     def _yield_class_chunk_with_ranges(
         self, node: Node, import_nodes: Dict[str, Node], chunk_builder: _ChunkBuilder
@@ -643,9 +605,7 @@ def _yield_class_chunk_with_ranges(
         used_ranges.extend(class_ranges)
 
         if imports:
-            used_imports = self._find_used_imports_in_function(
-                import_nodes, node, function_content, None
-            )
+            used_imports = self._find_used_imports_in_function(import_nodes, node, function_content, None)
             for import_name in sorted(used_imports):
                 if import_name in import_nodes:
                     import_node = import_nodes[import_name]
@@ -655,27 +615,26 @@ def _yield_class_chunk_with_ranges(
         if prefix:
             used_ranges.extend(prefix_range)
 
-        function_no_docstring = (
-            function_content.replace(docstring, "") if docstring else function_content
-        )
+        function_no_docstring = function_content.replace(docstring, "") if docstring else function_content
         content_no_docstring = f"{prefix}{imports}{function_no_docstring}"
 
         if chunk_builder:
-            yield chunk_builder.build_class_chunk(
-                content_no_docstring,
-                class_name,
-                docstring,
-                function_line_start,
-                function_line_end,
-            ), used_ranges
+            yield (
+                chunk_builder.build_class_chunk(
+                    content_no_docstring,
+                    class_name,
+                    docstring,
+                    function_line_start,
+                    function_line_end,
+                ),
+                used_ranges,
+            )
 
     def _file_prefix(self, root_node: Node) -> Tuple[str, List]:
         return "", []
 
     def _get_function_body(self, node: Node) -> Optional[Node]:
-        return next(
-            (child for child in node.children if child.type == self.function_body), None
-        )
+        return next((child for child in node.children if child.type == self.function_body), None)
 
     def _get_docstring(self, node: Node) -> str:
         if node.prev_named_sibling and node.prev_named_sibling.type in self.docs_types:
@@ -708,10 +667,7 @@ def _get_classes_no_methods(self, node: Node, parent_type: str) -> List[Node]:
         def has_methods(class_node: Node) -> bool:
             return any(
                 child.type in self.function_definition_types
-                or any(
-                    grandchild.type in self.function_definition_types
-                    for grandchild in child.children
-                )
+                or any(grandchild.type in self.function_definition_types for grandchild in child.children)
                 for child in class_node.children
             )
 
@@ -781,10 +737,7 @@ def _build_imports(
         used, set_imports = set(), set()
 
         def find_used_imports(node):
-            if (
-                node.type in self.identifiers
-                and node.text.decode(self.utf8_encoding) in imports
-            ):
+            if node.type in self.identifiers and node.text.decode(self.utf8_encoding) in imports:
                 used.add(node.text.decode(self.utf8_encoding))
             for child in node.children:
                 find_used_imports(child)
@@ -818,10 +771,7 @@ def _find_used_imports_in_function(
         used = set()
 
         def find_used_imports(node):
-            if (
-                node.type in self.identifiers
-                and node.text.decode(self.utf8_encoding) in imports
-            ):
+            if node.type in self.identifiers and node.text.decode(self.utf8_encoding) in imports:
                 used.add(node.text.decode(self.utf8_encoding))
             for child in node.children:
                 find_used_imports(child)
@@ -892,27 +842,19 @@ def _get_node_ranges_with_comments(self, node: Node) -> List[Tuple[int, int]]:
 
         return ranges
 
-    def _get_variable_ranges_with_comments(
-        self, var_node: Node
-    ) -> List[Tuple[int, int]]:
+    def _get_variable_ranges_with_comments(self, var_node: Node) -> List[Tuple[int, int]]:
         """Get variable ranges including any preceding comments."""
         return self._get_node_ranges_with_comments(var_node)
 
-    def _get_import_ranges_with_comments(
-        self, import_node: Node
-    ) -> List[Tuple[int, int]]:
+    def _get_import_ranges_with_comments(self, import_node: Node) -> List[Tuple[int, int]]:
         """Get import ranges including any preceding comments."""
         return self._get_node_ranges_with_comments(import_node)
 
-    def _get_class_ranges_with_comments(
-        self, class_node: Node
-    ) -> List[Tuple[int, int]]:
+    def _get_class_ranges_with_comments(self, class_node: Node) -> List[Tuple[int, int]]:
         """Get class ranges including any preceding comments and docstrings."""
         return self._get_node_ranges_with_comments(class_node)
 
-    def _build_additional_context(
-        self, function_node: Node, root_node: Node
-    ) -> Tuple[str, str]:
+    def _build_additional_context(self, function_node: Node, root_node: Node) -> Tuple[str, str]:
         context = ""
         context_no_docstring = ""
         node = function_node
@@ -938,9 +880,7 @@ def _get_imports(self, tree: Tree) -> Dict[str, Node]:
         """Get imports from the AST. Must be implemented by language-specific chunkers."""
         raise NotImplementedError
 
-    def _build_class_context(
-        self, class_node: Node, root_node: Node
-    ) -> Tuple[str, str]:
+    def _build_class_context(self, class_node: Node, root_node: Node) -> Tuple[str, str]:
         class_indent = class_node.start_point.column
         start_byte = class_node.start_byte
 
@@ -960,9 +900,7 @@ def _build_class_context(
             header_text = ""
         header = f"{' ' * class_indent}{header_text}\n"
         docstring = self._get_docstring(class_node)
-        header_with_docstring = (
-            f"{header}{' ' * (class_indent + 4)}{docstring}\n" if docstring else header
-        )
+        header_with_docstring = f"{header}{' ' * (class_indent + 4)}{docstring}\n" if docstring else header
 
         fields = [
             _to_str(child)
@@ -975,9 +913,7 @@ def _build_class_context(
             constructor_doc = self._get_docstring(constructor_node)
             constructor_text = self._build_function(constructor_node)
             constructor_text_no_doc = (
-                constructor_text.replace(constructor_doc, "")
-                if constructor_doc
-                else constructor_text
+                constructor_text.replace(constructor_doc, "") if constructor_doc else constructor_text
             )
         else:
             constructor_text = constructor_text_no_doc = ""
@@ -991,9 +927,7 @@ def _find_constructor(self, body: Node) -> Optional[Node]:
         for child in body.children:
             definition_field = child.child_by_field_name(self.definition_field)
             if self._is_constructor(child) or (
-                child.type == self.decorator_type
-                and definition_field
-                and self._is_constructor(definition_field)
+                child.type == self.decorator_type and definition_field and self._is_constructor(definition_field)
             ):
                 return child
         return None
@@ -1029,10 +963,7 @@ def _is_only_function_in_class(self, constructor_node: Node) -> bool:
 
         function_count = 0
         for child in body_node.children:
-            if (
-                child.type in self.function_definition_types
-                and child != constructor_node
-            ):
+            if child.type in self.function_definition_types and child != constructor_node:
                 function_count += 1
 
         return function_count == 0
@@ -1057,7 +988,6 @@ def _track_constructor_variables(
 
 
 class _PythonFunctionChunker(_CodeChunker):
-
     language: CodeLanguageLabel = CodeLanguageLabel.PYTHON
     ts_language: Any = Field(default=None)
     parser: Any = Field(default=None)
@@ -1129,15 +1059,10 @@ def _get_module_variables(self, tree: Tree) -> Dict[str, Node]:
             if child.type in self.expression_types and child.named_children:
                 expr = child.named_children[0]
                 if expr.type == "assignment":
-                    if (
-                        expr.named_children
-                        and expr.named_children[0].type in self.identifiers
-                    ):
+                    if expr.named_children and expr.named_children[0].type in self.identifiers:
                         text = expr.named_children[0].text
                         var_name = text.decode(self.utf8_encoding) if text else ""
-                        extended_node = self._get_variable_with_comments(
-                            child, tree.root_node
-                        )
+                        extended_node = self._get_variable_with_comments(child, tree.root_node)
                         variables[var_name] = extended_node
         return variables
 
@@ -1183,10 +1108,7 @@ def _is_local_assignment(self, identifier_node: Node) -> bool:
         current = identifier_node.parent
         while current:
             if current.type == "assignment":
-                if (
-                    current.named_children
-                    and current.named_children[0] == identifier_node
-                ):
+                if current.named_children and current.named_children[0] == identifier_node:
                     return True
             current = current.parent
         return False
@@ -1247,13 +1169,9 @@ def _get_imports(self, tree: Tree) -> Dict[str, Node]:
                         if sub_child.type == self.named_imports:
                             for spec in sub_child.children:
                                 if spec.type == self.import_specifier:
-                                    name_node = spec.child_by_field_name(
-                                        self.name_field
-                                    )
+                                    name_node = spec.child_by_field_name(self.name_field)
                                     if name_node:
-                                        identifiers.append(
-                                            name_node.text.decode("utf8")
-                                        )
+                                        identifiers.append(name_node.text.decode("utf8"))
                         elif sub_child.type in self.identifiers:
                             identifiers.append(sub_child.text.decode("utf8"))
                         elif sub_child.type == self.namespace_import:
@@ -1312,10 +1230,7 @@ def _is_docstring(self, node: Node) -> bool:
     def _get_docstring(self, node: Node) -> str:
         docstring = ""
         if node.prev_named_sibling and node.prev_named_sibling.type in self.docs_types:
-            while (
-                node.prev_named_sibling
-                and node.prev_named_sibling.type in self.docs_types
-            ):
+            while node.prev_named_sibling and node.prev_named_sibling.type in self.docs_types:
                 text = node.prev_named_sibling.text
                 if text:
                     docstring += text.decode(self.utf8_encoding)
@@ -1343,12 +1258,8 @@ def _structs(node):
                 if clean_name:
                     structs[clean_name] = node
             elif node.type in [self.declaration]:
-                if _has_child(
-                    node.child_by_field_name(self.declarator), self.declarator
-                ):
-                    name = node.child_by_field_name(
-                        self.declarator
-                    ).child_by_field_name(self.declarator)
+                if _has_child(node.child_by_field_name(self.declarator), self.declarator):
+                    name = node.child_by_field_name(self.declarator).child_by_field_name(self.declarator)
                 else:
                     name = node.child_by_field_name(self.declarator)
                 if name:
@@ -1356,12 +1267,8 @@ def _structs(node):
                     if clean_name:
                         structs[clean_name] = node
             elif node.type in self.function_declaration:
-                if _has_child(
-                    node.child_by_field_name(self.type_field), self.name_field
-                ):
-                    name = node.child_by_field_name(
-                        self.type_field
-                    ).child_by_field_name(self.name_field)
+                if _has_child(node.child_by_field_name(self.type_field), self.name_field):
+                    name = node.child_by_field_name(self.type_field).child_by_field_name(self.name_field)
                 else:
                     name = node.child_by_field_name(self.type_field)
                 if name:
@@ -1432,7 +1339,6 @@ def collect_identifiers(node, depth=0):
 
 
 class _JavaFunctionChunker(_CodeChunker):
-
     language: CodeLanguageLabel = CodeLanguageLabel.JAVA
     ts_language: Any = Field(default=None)
     parser: Any = Field(default=None)
@@ -1514,16 +1420,12 @@ def _get_imports(self, tree: Tree) -> Dict[str, Node]:
         return import_dict
 
     @override
-    def _build_additional_context(
-        self, function_node: Node, root_node: Node
-    ) -> Tuple[str, str]:
+    def _build_additional_context(self, function_node: Node, root_node: Node) -> Tuple[str, str]:
         context: List[str] = []
         context_no_doc: List[str] = []
         while function_node.parent is not None:
             if function_node.type in self.object_declarations:
-                with_doc, without_doc = self._build_java_object_context(
-                    function_node, root_node
-                )
+                with_doc, without_doc = self._build_java_object_context(function_node, root_node)
                 context.insert(0, with_doc)
                 context_no_doc.insert(0, without_doc)
             function_node = function_node.parent
@@ -1534,9 +1436,7 @@ def _build_additional_context(
             without_doc + ("" if without_doc else ""),
         )
 
-    def _build_java_object_context(
-        self, obj_node: Node, root_node: Node
-    ) -> Tuple[str, str]:
+    def _build_java_object_context(self, obj_node: Node, root_node: Node) -> Tuple[str, str]:
         """Build context for Java objects (classes, enums, interfaces)."""
         obj_type = obj_node.type
 
@@ -1549,9 +1449,7 @@ def _build_java_object_context(
 
         return ("", "")
 
-    def _build_java_class_like_context(
-        self, node: Node, root_node: Node, context_type: str
-    ) -> Tuple[str, str]:
+    def _build_java_class_like_context(self, node: Node, root_node: Node, context_type: str) -> Tuple[str, str]:
         """Unified context building for Java classes, enums, and interfaces."""
         body = node.child_by_field_name(self.class_body_field)
         if not body:
@@ -1560,56 +1458,30 @@ def _build_java_class_like_context(
 
         header = self._get_function_signature(node, root_node)
         doc = self._get_docstring(node)
-        header_with_doc = (
-            f"{header}{' ' * (node.start_point.column + 4)}{doc}" if doc else header
-        )
+        header_with_doc = f"{header}{' ' * (node.start_point.column + 4)}{doc}" if doc else header
 
         inner_parts = []
 
         if context_type == "enum":
-            constants = [
-                _to_str(child)
-                for child in body.children
-                if child.type == self.enum_constant
-            ]
+            constants = [_to_str(child) for child in body.children if child.type == self.enum_constant]
             const_block = (",".join(constants) + ";") if constants else ""
             inner_parts.append(const_block)
 
             decl = next(
-                (
-                    child
-                    for child in body.children
-                    if child.type == self.enum_body_declarations
-                ),
+                (child for child in body.children if child.type == self.enum_body_declarations),
                 None,
             )
             if decl:
-                decl_parts = [
-                    _to_str(child)
-                    for child in decl.children
-                    if child.type in self.enum_inner_types
-                ]
+                decl_parts = [_to_str(child) for child in decl.children if child.type in self.enum_inner_types]
                 inner_parts.append("".join(decl_parts))
 
         elif context_type == "interface":
-            constants = [
-                _to_str(child)
-                for child in body.children
-                if child.type == self.constant_declaration
-            ]
-            methods = [
-                _to_str(child)
-                for child in body.children
-                if child.type in self.function_definition_types
-            ]
+            constants = [_to_str(child) for child in body.children if child.type == self.constant_declaration]
+            methods = [_to_str(child) for child in body.children if child.type in self.function_definition_types]
             inner_parts.extend(["".join(constants), "".join(methods)])
 
         else:
-            parts = [
-                _to_str(child)
-                for child in body.children
-                if child.type in self.class_header_inner_types
-            ]
+            parts = [_to_str(child) for child in body.children if child.type in self.class_header_inner_types]
             inner_parts.extend(parts)
 
         ctor = self._find_constructor(body)
@@ -1619,9 +1491,7 @@ def _build_java_class_like_context(
         inner = "".join(part for part in inner_parts if part.strip())
         close = (" " * node.start_point.column) + "}"
 
-        with_doc = (
-            "\n\n".join(x for x in [header_with_doc, inner] if x).rstrip() + close
-        )
+        with_doc = "\n\n".join(x for x in [header_with_doc, inner] if x).rstrip() + close
         without_doc = "\n\n".join(x for x in [header, inner] if x).rstrip() + close
 
         return with_doc, without_doc
diff --git a/docling_core/transforms/chunker/code_chunking/_utils.py b/docling_core/transforms/chunker/code_chunking/_utils.py
index aa971f3a..9f791585 100644
--- a/docling_core/transforms/chunker/code_chunking/_utils.py
+++ b/docling_core/transforms/chunker/code_chunking/_utils.py
@@ -98,9 +98,7 @@ def _get_function_name(language: CodeLanguageLabel, node: Node) -> Optional[str]
         return None
 
 
-def _is_collectable_function(
-    language: CodeLanguageLabel, node: Node, constructor_name: str
-) -> bool:
+def _is_collectable_function(language: CodeLanguageLabel, node: Node, constructor_name: str) -> bool:
     """Check if a function should be collected for chunking."""
     if language == CodeLanguageLabel.C:
         return True
@@ -117,9 +115,7 @@ def _get_default_tokenizer() -> "BaseTokenizer":
         HuggingFaceTokenizer,
     )
 
-    return HuggingFaceTokenizer.from_pretrained(
-        model_name="sentence-transformers/all-MiniLM-L6-v2"
-    )
+    return HuggingFaceTokenizer.from_pretrained(model_name="sentence-transformers/all-MiniLM-L6-v2")
 
 
 def _has_child(node: Node, child_name: str) -> bool:
diff --git a/docling_core/transforms/chunker/code_chunking/standard_code_chunking_strategy.py b/docling_core/transforms/chunker/code_chunking/standard_code_chunking_strategy.py
index cdddb9ad..c4ebfb00 100644
--- a/docling_core/transforms/chunker/code_chunking/standard_code_chunking_strategy.py
+++ b/docling_core/transforms/chunker/code_chunking/standard_code_chunking_strategy.py
@@ -77,9 +77,7 @@ def chunk_code_item(
 
         if chunker := self._get_chunker(item.code_language):
             doc = DoclingDocument(name="", origin=doc.origin)
-            doc.add_code(
-                text=code_text, code_language=item.code_language, orig=code_text
-            )
+            doc.add_code(text=code_text, code_language=item.code_language, orig=code_text)
             yield from chunker.chunk(doc, **kwargs)
         else:  # if no inner chunker available for language, fall back to yielding a single code block chunk
             yield CodeChunk(
diff --git a/docling_core/transforms/chunker/doc_chunk.py b/docling_core/transforms/chunker/doc_chunk.py
index 74264560..43de0798 100644
--- a/docling_core/transforms/chunker/doc_chunk.py
+++ b/docling_core/transforms/chunker/doc_chunk.py
@@ -30,11 +30,9 @@ class DocMeta(BaseMeta):
         default="docling_core.transforms.chunker.DocMeta",
         alias=_KEY_SCHEMA_NAME,
     )
-    version: Annotated[str, StringConstraints(pattern=VERSION_PATTERN, strict=True)] = (
-        Field(
-            default=_VERSION,
-            alias=_KEY_VERSION,
-        )
+    version: Annotated[str, StringConstraints(pattern=VERSION_PATTERN, strict=True)] = Field(
+        default=_VERSION,
+        alias=_KEY_VERSION,
     )
     doc_items: list[DocItem] = Field(
         alias=_KEY_DOC_ITEMS,
diff --git a/docling_core/transforms/chunker/hierarchical_chunker.py b/docling_core/transforms/chunker/hierarchical_chunker.py
index 7b855b2d..888a6967 100644
--- a/docling_core/transforms/chunker/hierarchical_chunker.py
+++ b/docling_core/transforms/chunker/hierarchical_chunker.py
@@ -3,7 +3,7 @@
 from __future__ import annotations
 
 import logging
-from typing import Any, Iterator, Optional
+from typing import Any, Iterator
 
 from pydantic import ConfigDict, Field
 from typing_extensions import Annotated, override
@@ -66,7 +66,6 @@ def serialize(
         if item.self_ref not in doc_serializer.get_excluded_refs(**kwargs):
             table_df = item.export_to_dataframe(doc)
             if table_df.shape[0] >= 1 and table_df.shape[1] >= 2:
-
                 # copy header as first row and shift all rows by one
                 table_df.loc[-1] = table_df.columns  # type: ignore[call-overload]
                 table_df.index = table_df.index + 1
@@ -126,7 +125,7 @@ class HierarchicalChunker(BaseChunker):
     model_config = ConfigDict(arbitrary_types_allowed=True)
 
     serializer_provider: BaseSerializerProvider = ChunkingSerializerProvider()
-    code_chunking_strategy: Optional[BaseCodeChunkingStrategy] = Field(default=None)
+    code_chunking_strategy: BaseCodeChunkingStrategy | None = Field(default=None)
 
     # deprecated:
     merge_list_items: Annotated[bool, Field(deprecated=True)] = True
@@ -161,13 +160,8 @@ def chunk(
                 for k in keys_to_del:
                     heading_by_level.pop(k, None)
                 continue
-            elif (
-                isinstance(item, (ListGroup, InlineGroup, DocItem))
-                and item.self_ref not in visited
-            ):
-                if self.code_chunking_strategy is not None and isinstance(
-                    item, CodeItem
-                ):
+            elif isinstance(item, (ListGroup, InlineGroup, DocItem)) and item.self_ref not in visited:
+                if self.code_chunking_strategy is not None and isinstance(item, CodeItem):
                     yield from self.code_chunking_strategy.chunk_code_item(
                         item=item,
                         doc=dl_doc,
@@ -188,8 +182,7 @@ def chunk(
                     text=ser_res.text,
                     meta=DocMeta(
                         doc_items=doc_items,
-                        headings=[heading_by_level[k] for k in sorted(heading_by_level)]
-                        or None,
+                        headings=[heading_by_level[k] for k in sorted(heading_by_level)] or None,
                         origin=dl_doc.origin,
                     ),
                 )
diff --git a/docling_core/transforms/chunker/hybrid_chunker.py b/docling_core/transforms/chunker/hybrid_chunker.py
index ecffaccd..a016f886 100644
--- a/docling_core/transforms/chunker/hybrid_chunker.py
+++ b/docling_core/transforms/chunker/hybrid_chunker.py
@@ -41,9 +41,7 @@ def _get_default_tokenizer():
         HuggingFaceTokenizer,
     )
 
-    return HuggingFaceTokenizer.from_pretrained(
-        model_name="sentence-transformers/all-MiniLM-L6-v2"
-    )
+    return HuggingFaceTokenizer.from_pretrained(model_name="sentence-transformers/all-MiniLM-L6-v2")
 
 
 class HybridChunker(BaseChunker):
@@ -72,8 +70,7 @@ def _patch(cls, data: Any) -> Any:
             max_tokens = data.get("max_tokens")
             if not isinstance(tokenizer, BaseTokenizer) and (
                 # some legacy param passed:
-                tokenizer is not None
-                or max_tokens is not None
+                tokenizer is not None or max_tokens is not None
             ):
                 from docling_core.transforms.chunker.tokenizer.huggingface import (
                     HuggingFaceTokenizer,
@@ -91,12 +88,8 @@ def _patch(cls, data: Any) -> Any:
                         model_name=tokenizer,
                         max_tokens=max_tokens,
                     )
-                elif tokenizer is None or isinstance(
-                    tokenizer, PreTrainedTokenizerBase
-                ):
-                    kwargs = {
-                        "tokenizer": tokenizer or _get_default_tokenizer().tokenizer
-                    }
+                elif tokenizer is None or isinstance(tokenizer, PreTrainedTokenizerBase):
+                    kwargs = {"tokenizer": tokenizer or _get_default_tokenizer().tokenizer}
                     if max_tokens is not None:
                         kwargs["max_tokens"] = max_tokens
                     data["tokenizer"] = HuggingFaceTokenizer(**kwargs)
@@ -158,19 +151,13 @@ def _make_chunk_from_doc_items(
             if len(doc_chunk.meta.doc_items) == 1
             # TODO: merging should ideally be done by the serializer:
             else self.delim.join(
-                [
-                    res_text
-                    for doc_item in doc_items
-                    if (res_text := doc_serializer.serialize(item=doc_item).text)
-                ]
+                [res_text for doc_item in doc_items if (res_text := doc_serializer.serialize(item=doc_item).text)]
             )
         )
         new_chunk = DocChunk(text=window_text, meta=meta)
         return new_chunk
 
-    def _split_by_doc_items(
-        self, doc_chunk: DocChunk, doc_serializer: BaseDocSerializer
-    ) -> list[DocChunk]:
+    def _split_by_doc_items(self, doc_chunk: DocChunk, doc_serializer: BaseDocSerializer) -> list[DocChunk]:
         chunks = []
         window_start = 0
         window_end = 0  # an inclusive index
@@ -224,9 +211,7 @@ def _split_using_plain_text(
             # How much room is there for text after subtracting out the headers and
             # captions:
             available_length = self.max_tokens - lengths.other_len
-            sem_chunker = semchunk.chunkerify(
-                self.tokenizer.get_tokenizer(), chunk_size=available_length
-            )
+            sem_chunker = semchunk.chunkerify(self.tokenizer.get_tokenizer(), chunk_size=available_length)
             if available_length <= 0:
                 warnings.warn(
                     "Headers and captions for this chunk are longer than the total "
@@ -267,10 +252,7 @@ def _merge_chunks_with_matching_metadata(self, chunks: list[DocChunk]):
                         origin=chunk.meta.origin,
                     ),
                 )
-                if (
-                    headings == current_headings
-                    and self._count_chunk_tokens(doc_chunk=candidate) <= self.max_tokens
-                ):
+                if headings == current_headings and self._count_chunk_tokens(doc_chunk=candidate) <= self.max_tokens:
                     # there is room to include the new chunk so add it to the window and
                     # continue
                     window_end += 1
@@ -311,11 +293,7 @@ def chunk(
             doc_serializer=my_doc_ser,
             **kwargs,
         )  # type: ignore
-        res = [
-            x
-            for c in res
-            for x in self._split_by_doc_items(c, doc_serializer=my_doc_ser)
-        ]
+        res = [x for c in res for x in self._split_by_doc_items(c, doc_serializer=my_doc_ser)]
         res = [x for c in res for x in self._split_using_plain_text(c)]
         if self.merge_peers:
             res = self._merge_chunks_with_matching_metadata(res)
diff --git a/docling_core/transforms/chunker/tokenizer/huggingface.py b/docling_core/transforms/chunker/tokenizer/huggingface.py
index 5d23f73c..d7cec707 100644
--- a/docling_core/transforms/chunker/tokenizer/huggingface.py
+++ b/docling_core/transforms/chunker/tokenizer/huggingface.py
@@ -13,10 +13,7 @@
 try:
     from transformers import AutoTokenizer, PreTrainedTokenizerBase
 except ImportError:
-    raise RuntimeError(
-        "Module requires 'chunking' extra; to install, run: "
-        "`pip install 'docling-core[chunking]'`"
-    )
+    raise RuntimeError("Module requires 'chunking' extra; to install, run: `pip install 'docling-core[chunking]'`")
 
 
 class HuggingFaceTokenizer(BaseTokenizer):
@@ -42,10 +39,7 @@ def _patch(self) -> Self:
                     data = json.load(f)
                 self.max_tokens = int(data["max_seq_length"])
             except Exception as e:
-                raise RuntimeError(
-                    "max_tokens could not be determined automatically; please set "
-                    "explicitly."
-                ) from e
+                raise RuntimeError("max_tokens could not be determined automatically; please set explicitly.") from e
         return self
 
     def count_tokens(self, text: str):
@@ -65,9 +59,7 @@ def from_pretrained(
     ) -> Self:
         """Create tokenizer from model name."""
         my_kwargs = {
-            "tokenizer": AutoTokenizer.from_pretrained(
-                pretrained_model_name_or_path=model_name, **kwargs
-            ),
+            "tokenizer": AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name, **kwargs),
         }
         if max_tokens is not None:
             my_kwargs["max_tokens"] = max_tokens
diff --git a/docling_core/transforms/chunker/tokenizer/openai.py b/docling_core/transforms/chunker/tokenizer/openai.py
index 48ce944b..572229ba 100644
--- a/docling_core/transforms/chunker/tokenizer/openai.py
+++ b/docling_core/transforms/chunker/tokenizer/openai.py
@@ -8,8 +8,7 @@
     import tiktoken
 except ImportError:
     raise RuntimeError(
-        "Module requires 'chunking-openai' extra; to install, run: "
-        "`pip install 'docling-core[chunking-openai]'`"
+        "Module requires 'chunking-openai' extra; to install, run: `pip install 'docling-core[chunking-openai]'`"
     )
 
 
diff --git a/docling_core/transforms/serializer/azure.py b/docling_core/transforms/serializer/azure.py
index 674f90b8..385aca6a 100644
--- a/docling_core/transforms/serializer/azure.py
+++ b/docling_core/transforms/serializer/azure.py
@@ -74,9 +74,7 @@ def _bbox_to_polygon_coords(
     return [l, t, r, t, r, b, l, b]
 
 
-def _bbox_to_polygon_for_item(
-    doc: DoclingDocument, item: DocItem
-) -> Optional[list[float]]:
+def _bbox_to_polygon_for_item(doc: DoclingDocument, item: DocItem) -> Optional[list[float]]:
     """Compute a TOPLEFT-origin polygon for the first provenance of the item."""
     if not item.prov:
         return None
@@ -113,7 +111,7 @@ class _AzureBoundingRegion(BaseModel):
     Matches Azure's schema; field names use camelCase by design.
     """
 
-    pageNumber: int  # noqa: N815
+    pageNumber: int
     polygon: list[float]
 
 
@@ -121,7 +119,7 @@ class _AzureParagraph(BaseModel):
     """Paragraph content with optional role and regions."""
 
     content: str
-    boundingRegions: list["_AzureBoundingRegion"]  # noqa: N815
+    boundingRegions: list["_AzureBoundingRegion"]
     role: Optional[str] = None
 
 
@@ -129,34 +127,34 @@ class _AzureTableCell(BaseModel):
     """Single table cell with position, span, and optional region."""
 
     content: str
-    rowIndex: int  # noqa: N815
-    columnIndex: int  # noqa: N815
-    rowSpan: int = 1  # noqa: N815
-    colSpan: int = 1  # noqa: N815
+    rowIndex: int
+    columnIndex: int
+    rowSpan: int = 1
+    colSpan: int = 1
     kind: Optional[str] = None
-    boundingRegions: Optional[list[_AzureBoundingRegion]] = None  # noqa: N815
+    boundingRegions: Optional[list[_AzureBoundingRegion]] = None
 
 
 class _AzureTable(BaseModel):
     """Table with dimensions, regions, and cells."""
 
-    rowCount: int  # noqa: N815
-    columnCount: int  # noqa: N815
-    boundingRegions: list[_AzureBoundingRegion]  # noqa: N815
+    rowCount: int
+    columnCount: int
+    boundingRegions: list[_AzureBoundingRegion]
     cells: list[_AzureTableCell]
 
 
 class _AzureImage(BaseModel):
     """Image/figure with bounding region and optional footnotes."""
 
-    boundingRegions: list[_AzureBoundingRegion]  # noqa: N815
+    boundingRegions: list[_AzureBoundingRegion]
     footnotes: Optional[list[_AzureParagraph]] = None
 
 
 class _AzurePage(BaseModel):
     """Page metadata used in the Azure-like output."""
 
-    pageNumber: int  # noqa: N815
+    pageNumber: int
     width: float
     height: float
     # Words are not currently emitted; keep as untyped list
@@ -215,9 +213,7 @@ def serialize(
         if content != "" and polygon is not None:
             para = _AzureParagraph(
                 content=content,
-                boundingRegions=[
-                    _AzureBoundingRegion(pageNumber=page_no, polygon=polygon)
-                ],
+                boundingRegions=[_AzureBoundingRegion(pageNumber=page_no, polygon=polygon)],
                 role=role,
             )
 
@@ -266,9 +262,7 @@ def serialize(
 
                 # For RichTableCell, get textual content via helper
                 if isinstance(cell, RichTableCell):
-                    content_text = cell._get_text(
-                        doc=doc, doc_serializer=doc_serializer
-                    )
+                    content_text = cell._get_text(doc=doc, doc_serializer=doc_serializer)
                 else:
                     content_text = cell.text
 
@@ -280,9 +274,7 @@ def serialize(
                         page_h = doc.pages[page_no].size.height
                         if bbox.coord_origin != CoordOrigin.TOPLEFT:
                             bbox = bbox.to_top_left_origin(page_height=page_h)
-                    cell_poly = _bbox_to_polygon_coords(
-                        l=bbox.l, t=bbox.t, r=bbox.r, b=bbox.b
-                    )
+                    cell_poly = _bbox_to_polygon_coords(l=bbox.l, t=bbox.t, r=bbox.r, b=bbox.b)
 
                 cell_obj = _AzureTableCell(
                     content=content_text.strip(),
@@ -290,15 +282,9 @@ def serialize(
                     columnIndex=cell.start_col_offset_idx,
                     rowSpan=max(cell.row_span, 1),
                     colSpan=max(cell.col_span, 1),
-                    kind=(
-                        "columnHeader"
-                        if cell.column_header
-                        else ("rowHeader" if cell.row_header else None)
-                    ),
+                    kind=("columnHeader" if cell.column_header else ("rowHeader" if cell.row_header else None)),
                     boundingRegions=(
-                        [_AzureBoundingRegion(pageNumber=page_no, polygon=cell_poly)]
-                        if cell_poly is not None
-                        else None
+                        [_AzureBoundingRegion(pageNumber=page_no, polygon=cell_poly)] if cell_poly is not None else None
                     ),
                 )
 
@@ -331,9 +317,7 @@ def serialize(
         if poly is None:
             return create_ser_result()
 
-        fig_obj = _AzureImage(
-            boundingRegions=[_AzureBoundingRegion(pageNumber=page_no, polygon=poly)]
-        )
+        fig_obj = _AzureImage(boundingRegions=[_AzureBoundingRegion(pageNumber=page_no, polygon=poly)])
 
         # Include picture footnotes if present
         foots = []
@@ -346,11 +330,7 @@ def serialize(
                         foots.append(
                             _AzureParagraph(
                                 content=tgt.text,
-                                boundingRegions=[
-                                    _AzureBoundingRegion(
-                                        pageNumber=tgt.prov[0].page_no, polygon=f_poly
-                                    )
-                                ],
+                                boundingRegions=[_AzureBoundingRegion(pageNumber=tgt.prov[0].page_no, polygon=f_poly)],
                             )
                         )
 
diff --git a/docling_core/transforms/serializer/common.py b/docling_core/transforms/serializer/common.py
index b494eb0e..b790b89c 100644
--- a/docling_core/transforms/serializer/common.py
+++ b/docling_core/transforms/serializer/common.py
@@ -96,10 +96,7 @@ def _iterate_items(
         traverse_pictures=traverse_pictures,
     ):
         if add_page_breaks:
-            if (
-                isinstance(item, (ListGroup, InlineGroup))
-                and item.self_ref not in my_visited
-            ):
+            if isinstance(item, (ListGroup, InlineGroup)) and item.self_ref not in my_visited:
                 # if group starts with new page, yield page break before group node
                 my_visited.add(item.self_ref)
                 for it, _ in _iterate_items(
@@ -113,21 +110,27 @@ def _iterate_items(
                     if isinstance(it, DocItem) and it.prov:
                         page_no = it.prov[0].page_no
                         if prev_page_nr is not None and page_no > prev_page_nr:
-                            yield _PageBreakNode(
-                                self_ref=f"#/pb/{page_break_i}",
-                                prev_page=prev_page_nr,
-                                next_page=page_no,
-                            ), lvl
+                            yield (
+                                _PageBreakNode(
+                                    self_ref=f"#/pb/{page_break_i}",
+                                    prev_page=prev_page_nr,
+                                    next_page=page_no,
+                                ),
+                                lvl,
+                            )
                         break
             elif isinstance(item, DocItem) and item.prov:
                 page_no = item.prov[0].page_no
                 if prev_page_nr is None or page_no > prev_page_nr:
                     if prev_page_nr is not None:  # close previous range
-                        yield _PageBreakNode(
-                            self_ref=f"#/pb/{page_break_i}",
-                            prev_page=prev_page_nr,
-                            next_page=page_no,
-                        ), lvl
+                        yield (
+                            _PageBreakNode(
+                                self_ref=f"#/pb/{page_break_i}",
+                                prev_page=prev_page_nr,
+                                next_page=page_no,
+                            ),
+                            lvl,
+                        )
                         page_break_i += 1
                     prev_page_nr = page_no
         yield item, lvl
@@ -138,11 +141,7 @@ def _get_annotation_text(
 ) -> Optional[str]:
     result = None
     if isinstance(annotation, PictureClassificationData):
-        predicted_class = (
-            annotation.predicted_classes[0].class_name
-            if annotation.predicted_classes
-            else None
-        )
+        predicted_class = annotation.predicted_classes[0].class_name if annotation.predicted_classes else None
         if predicted_class is not None:
             result = predicted_class.replace("_", " ")
     elif isinstance(annotation, DescriptionAnnotation):
@@ -286,10 +285,7 @@ def get_excluded_refs(self, **kwargs: Any) -> set[str]:
                             or item.content_layer not in params.layers
                             or (
                                 params.pages is not None
-                                and (
-                                    (not item.prov)
-                                    or item.prov[0].page_no not in params.pages
-                                )
+                                and ((not item.prov) or item.prov[0].page_no not in params.pages)
                             )
                         )
                     )
@@ -450,9 +446,7 @@ def serialize(
                 )
             parts.append(part)
 
-        return create_ser_result(
-            text=delim.join([p.text for p in parts if p.text]), span_source=parts
-        )
+        return create_ser_result(text=delim.join([p.text for p in parts if p.text]), span_source=parts)
 
     # making some assumptions about the kwargs it can pass
     @override
@@ -604,13 +598,9 @@ def serialize_meta(
                     **(self.params.model_dump() | kwargs),
                 )
             else:
-                return create_ser_result(
-                    text="", span_source=item if isinstance(item, DocItem) else []
-                )
+                return create_ser_result(text="", span_source=item if isinstance(item, DocItem) else [])
         else:
-            return create_ser_result(
-                text="", span_source=item if isinstance(item, DocItem) else []
-            )
+            return create_ser_result(text="", span_source=item if isinstance(item, DocItem) else [])
 
     # TODO deprecate
     @override
@@ -639,10 +629,7 @@ def _get_applicable_pages(self) -> Optional[list[int]]:
             if (
                 isinstance(item, DocItem)
                 and item.prov
-                and (
-                    self.params.pages is None
-                    or item.prov[0].page_no in self.params.pages
-                )
+                and (self.params.pages is None or item.prov[0].page_no in self.params.pages)
                 and ix >= self.params.start_idx
                 and ix < self.params.stop_idx
             )
@@ -672,17 +659,9 @@ def _should_use_legacy_annotations(
         return False
     with warnings.catch_warnings(record=True) as caught_warnings:
         warnings.simplefilter("ignore", DeprecationWarning)
-        if (
-            incl_attr := getattr(params, "include_annotations", None)
-        ) is not None and not incl_attr:
+        if (incl_attr := getattr(params, "include_annotations", None)) is not None and not incl_attr:
             return False
-        use_legacy = bool(
-            [
-                ann
-                for ann in item.annotations
-                if ((ann.kind == kind) if kind is not None else True)
-            ]
-        )
+        use_legacy = bool([ann for ann in item.annotations if ((ann.kind == kind) if kind is not None else True)])
         if use_legacy:
             for w in caught_warnings:
                 warnings.warn(w.message, w.category)
diff --git a/docling_core/transforms/serializer/doctags.py b/docling_core/transforms/serializer/doctags.py
index beff6168..22485f84 100644
--- a/docling_core/transforms/serializer/doctags.py
+++ b/docling_core/transforms/serializer/doctags.py
@@ -107,13 +107,9 @@ def serialize(
         my_visited = visited if visited is not None else set()
         params = DocTagsParams(**kwargs)
         # Decide wrapping up-front so ListItem never gets wrapped here
-        wrap_tag_token: Optional[str] = (
-            DocumentToken.create_token_name_from_doc_item_label(
-                label=item.label,
-                **(
-                    {"level": item.level} if isinstance(item, SectionHeaderItem) else {}
-                ),
-            )
+        wrap_tag_token: Optional[str] = DocumentToken.create_token_name_from_doc_item_label(
+            label=item.label,
+            **({"level": item.level} if isinstance(item, SectionHeaderItem) else {}),
         )
         wrap_tag: Optional[str] = None if isinstance(item, ListItem) else wrap_tag_token
         parts: list[str] = []
@@ -137,9 +133,7 @@ def serialize(
             if (
                 item.text == ""
                 and len(item.children) == 1
-                and isinstance(
-                    (child_group := item.children[0].resolve(doc)), InlineGroup
-                )
+                and isinstance((child_group := item.children[0].resolve(doc)), InlineGroup)
             ):
                 ser_res = doc_serializer.serialize(item=child_group, visited=my_visited)
                 text_part = ser_res.text
@@ -259,23 +253,15 @@ def serialize(
             predicted_class: Optional[str] = None
             if item.meta:
                 if item.meta.classification:
-                    predicted_class = (
-                        item.meta.classification.get_main_prediction().class_name
-                    )
+                    predicted_class = item.meta.classification.get_main_prediction().class_name
             elif _should_use_legacy_annotations(
                 params=params,
                 item=item,
                 kind=PictureClassificationData.model_fields["kind"].default,
             ):
-                if classifications := [
-                    ann
-                    for ann in item.annotations
-                    if isinstance(ann, PictureClassificationData)
-                ]:
+                if classifications := [ann for ann in item.annotations if isinstance(ann, PictureClassificationData)]:
                     if classifications[0].predicted_classes:
-                        predicted_class = (
-                            classifications[0].predicted_classes[0].class_name
-                        )
+                        predicted_class = classifications[0].predicted_classes[0].class_name
             if predicted_class:
                 body += DocumentToken.get_picture_classification_token(predicted_class)
                 if predicted_class in [
@@ -299,11 +285,7 @@ def serialize(
                 item=item,
                 kind=PictureMoleculeData.model_fields["kind"].default,
             ):
-                if smiles_annotations := [
-                    ann
-                    for ann in item.annotations
-                    if isinstance(ann, PictureMoleculeData)
-                ]:
+                if smiles_annotations := [ann for ann in item.annotations if isinstance(ann, PictureMoleculeData)]:
                     smi = smiles_annotations[0].smi
             if smi:
                 body += _wrap(text=smi, wrap_tag=DocumentToken.SMILES.value)
@@ -319,17 +301,13 @@ def serialize(
                 kind=PictureTabularChartData.model_fields["kind"].default,
             ):
                 if tabular_chart_annotations := [
-                    ann
-                    for ann in item.annotations
-                    if isinstance(ann, PictureTabularChartData)
+                    ann for ann in item.annotations if isinstance(ann, PictureTabularChartData)
                 ]:
                     chart_data = tabular_chart_annotations[0].chart_data
             if chart_data and chart_data.table_cells:
                 temp_doc = DoclingDocument(name="temp")
                 temp_table = temp_doc.add_table(data=chart_data)
-                otsl_content = temp_table.export_to_otsl(
-                    temp_doc, add_cell_location=False
-                )
+                otsl_content = temp_table.export_to_otsl(temp_doc, add_cell_location=False)
                 body += otsl_content
             res_parts.append(create_ser_result(text=body, span_source=item))
 
@@ -379,9 +357,7 @@ def serialize(
         # mapping from source_cell_id to a list of target_cell_ids
         source_to_targets: Dict[int, List[int]] = {}
         for link in item.graph.links:
-            source_to_targets.setdefault(link.source_cell_id, []).append(
-                link.target_cell_id
-            )
+            source_to_targets.setdefault(link.source_cell_id, []).append(link.target_cell_id)
 
         for cell in item.graph.cells:
             cell_txt = ""
@@ -468,11 +444,7 @@ def serialize(
 
         if parts:
             text_res = delim.join(
-                [
-                    t
-                    for p in parts
-                    if (t := _wrap(text=p.text, wrap_tag=DocumentToken.LIST_ITEM.value))
-                ]
+                [t for p in parts if (t := _wrap(text=p.text, wrap_tag=DocumentToken.LIST_ITEM.value))]
             )
             text_res = f"{text_res}{delim}"
             wrap_tag = (
@@ -492,7 +464,6 @@ class DocTagsInlineSerializer(BaseInlineSerializer):
     def _get_inline_location_tags(
         self, doc: DoclingDocument, item: InlineGroup, params: DocTagsParams
     ) -> SerializationResult:
-
         prov: Optional[ProvenanceItem] = None
         boxes: list[BoundingBox] = []
         doc_items: list[DocItem] = []
diff --git a/docling_core/transforms/serializer/html.py b/docling_core/transforms/serializer/html.py
index 4c18ec3c..af66246a 100644
--- a/docling_core/transforms/serializer/html.py
+++ b/docling_core/transforms/serializer/html.py
@@ -8,7 +8,7 @@
 from pathlib import Path
 from typing import Any, Optional, Union
 from urllib.parse import quote
-from xml.etree.cElementTree import SubElement, tostring
+from xml.etree.ElementTree import SubElement, tostring
 from xml.sax.saxutils import unescape
 
 import latex2mathml.converter
@@ -164,12 +164,8 @@ def serialize(
 
         # Prepare the HTML based on item type
         if isinstance(item, (TitleItem, SectionHeaderItem)):
-            section_level = (
-                min(item.level + 1, 6) if isinstance(item, SectionHeaderItem) else 1
-            )
-            text = get_html_tag_with_text_direction(
-                html_tag=f"h{section_level}", text=text
-            )
+            section_level = min(item.level + 1, 6) if isinstance(item, SectionHeaderItem) else 1
+            text = get_html_tag_with_text_direction(html_tag=f"h{section_level}", text=text)
 
         elif isinstance(item, FormulaItem):
             text = self._process_formula(
@@ -183,11 +179,7 @@ def serialize(
             )
 
         elif isinstance(item, CodeItem):
-            text = (
-                f"<code>{text}</code>"
-                if is_inline_scope
-                else f"<pre><code>{text}</code></pre>"
-            )
+            text = f"<code>{text}</code>" if is_inline_scope else f"<pre><code>{text}</code></pre>"
 
         elif isinstance(item, ListItem):
             # List items are handled by list serializer
@@ -271,11 +263,7 @@ def _process_formula(
             and orig != ""
             and len(item.prov) > 0
             and image_mode == ImageRefMode.EMBEDDED
-            and (
-                img_fallback := self._get_formula_image_fallback(
-                    item=item, orig=orig, doc=doc
-                )
-            )
+            and (img_fallback := self._get_formula_image_fallback(item=item, orig=orig, doc=doc))
         ):
             return img_fallback
 
@@ -284,12 +272,8 @@ def _process_formula(
             try:
                 # Set display mode based on context
                 display_mode = "inline" if is_inline_scope else "block"
-                mathml_element = latex2mathml.converter.convert_to_element(
-                    text, display=display_mode
-                )
-                annotation = SubElement(
-                    mathml_element, "annotation", dict(encoding="TeX")
-                )
+                mathml_element = latex2mathml.converter.convert_to_element(text, display=display_mode)
+                annotation = SubElement(mathml_element, "annotation", dict(encoding="TeX"))
                 annotation.text = text
                 mathml = unescape(tostring(mathml_element, encoding="unicode"))
 
@@ -300,14 +284,8 @@ def _process_formula(
                     return f"<div>{mathml}</div>"
 
             except Exception:
-                img_fallback = self._get_formula_image_fallback(
-                    item=item, orig=orig, doc=doc
-                )
-                if (
-                    image_mode == ImageRefMode.EMBEDDED
-                    and len(item.prov) > 0
-                    and img_fallback
-                ):
+                img_fallback = self._get_formula_image_fallback(item=item, orig=orig, doc=doc)
+                if image_mode == ImageRefMode.EMBEDDED and len(item.prov) > 0 and img_fallback:
                     return img_fallback
                 elif text:
                     return f"<pre>{text}</pre>"
@@ -326,14 +304,12 @@ def _process_formula(
 
         return '<div class="formula-not-decoded">Formula not decoded</div>'
 
-    def _get_formula_image_fallback(
-        self, *, item: DocItem, orig: str, doc: DoclingDocument
-    ) -> Optional[str]:
+    def _get_formula_image_fallback(self, *, item: DocItem, orig: str, doc: DoclingDocument) -> Optional[str]:
         """Try to get an image fallback for a formula."""
         item_image = item.get_image(doc=doc)
         if item_image is not None:
             img_ref = ImageRef.from_pil(item_image, dpi=72)
-            return "<figure>" f'<img src="{img_ref.uri}" alt="{orig}" />' "</figure>"
+            return f'<figure><img src="{img_ref.uri}" alt="{orig}" /></figure>'
         return None
 
 
@@ -362,7 +338,6 @@ def serialize(
             for i, row in enumerate(item.data.grid):
                 body += "<tr>"
                 for j, cell in enumerate(row):
-
                     rowspan, rowstart = (
                         cell.row_span,
                         cell.start_row_offset_idx,
@@ -378,9 +353,7 @@ def serialize(
                         continue
 
                     if isinstance(cell, RichTableCell):
-                        ser_res = doc_serializer.serialize(
-                            item=cell.ref.resolve(doc=doc), **kwargs
-                        )
+                        ser_res = doc_serializer.serialize(item=cell.ref.resolve(doc=doc), **kwargs)
                         content = ser_res.text
                         span_source = [ser_res]
                     else:
@@ -448,7 +421,6 @@ def get_img_row(imgb64: str, ind: int) -> str:
 
         img_text = ""
         if item.self_ref not in doc_serializer.get_excluded_refs(**kwargs):
-
             if params.image_mode == ImageRefMode.EMBEDDED:
                 # short-cut: we already have the image in base64
                 if (
@@ -458,10 +430,7 @@ def get_img_row(imgb64: str, ind: int) -> str:
                 ):
                     img_text = f'<img src="{item.image.uri}">'
                 elif len(item.prov) > 1:  # more than 1 provenance
-
-                    img_text = (
-                        '<table style="border-collapse: collapse; width: 100%;">\n'
-                    )
+                    img_text = '<table style="border-collapse: collapse; width: 100%;">\n'
                     for ind, prov in enumerate(item.prov):
                         img = item.get_image(doc, prov_index=ind)
 
@@ -485,8 +454,7 @@ def get_img_row(imgb64: str, ind: int) -> str:
 
             elif params.image_mode == ImageRefMode.REFERENCED:
                 if isinstance(item.image, ImageRef) and not (
-                    isinstance(item.image.uri, AnyUrl)
-                    and item.image.uri.scheme == "data"
+                    isinstance(item.image.uri, AnyUrl) and item.image.uri.scheme == "data"
                 ):
                     img_text = f'<img src="{quote(str(item.image.uri))}">'
 
@@ -499,21 +467,13 @@ def get_img_row(imgb64: str, ind: int) -> str:
             kind=PictureTabularChartData.model_fields["kind"].default,
         ):
             # Check if picture has attached PictureTabularChartData
-            tabular_chart_annotations = [
-                ann
-                for ann in item.annotations
-                if isinstance(ann, PictureTabularChartData)
-            ]
+            tabular_chart_annotations = [ann for ann in item.annotations if isinstance(ann, PictureTabularChartData)]
             if len(tabular_chart_annotations) > 0:
                 temp_doc = DoclingDocument(name="temp")
-                temp_table = temp_doc.add_table(
-                    data=tabular_chart_annotations[0].chart_data
-                )
+                temp_table = temp_doc.add_table(data=tabular_chart_annotations[0].chart_data)
                 html_table_content = temp_table.export_to_html(temp_doc)
                 if len(html_table_content) > 0:
-                    res_parts.append(
-                        create_ser_result(text=html_table_content, span_source=item)
-                    )
+                    res_parts.append(create_ser_result(text=html_table_content, span_source=item))
 
         text_res = "".join([r.text for r in res_parts])
         if text_res:
@@ -537,30 +497,19 @@ def serialize(
         cell_map = {cell.cell_id: cell for cell in graph_data.cells}
 
         # Build relationship maps
-        child_links: dict[int, list[int]] = (
-            {}
-        )  # source_id -> list of child_ids (to_child)
+        child_links: dict[int, list[int]] = {}  # source_id -> list of child_ids (to_child)
         value_links: dict[int, list[int]] = {}  # key_id -> list of value_ids (to_value)
-        parents: set[int] = (
-            set()
-        )  # Set of all IDs that are targets of to_child (to find roots)
+        parents: set[int] = set()  # Set of all IDs that are targets of to_child (to find roots)
 
         for link in graph_data.links:
-            if (
-                link.source_cell_id not in cell_map
-                or link.target_cell_id not in cell_map
-            ):
+            if link.source_cell_id not in cell_map or link.target_cell_id not in cell_map:
                 continue
 
             if link.label.value == "to_child":
-                child_links.setdefault(link.source_cell_id, []).append(
-                    link.target_cell_id
-                )
+                child_links.setdefault(link.source_cell_id, []).append(link.target_cell_id)
                 parents.add(link.target_cell_id)
             elif link.label.value == "to_value":
-                value_links.setdefault(link.source_cell_id, []).append(
-                    link.target_cell_id
-                )
+                value_links.setdefault(link.source_cell_id, []).append(link.target_cell_id)
 
         # Find root cells (cells with no parent)
         root_ids = [cell_id for cell_id in cell_map.keys() if cell_id not in parents]
@@ -624,7 +573,7 @@ def _render_cell_tree(
             cell_text = f"<strong>{cell_text}</strong>: {', '.join(value_texts)}"
 
         # If this cell has children, create a nested list
-        if cell_id in child_links and child_links[cell_id]:
+        if child_links.get(cell_id):
             children_html = []
             children_html.append(f"<li>{cell_text}</li>")
             children_html.append("<ul>")
@@ -837,15 +786,9 @@ def serialize(
             text="\n".join(
                 [
                     tmp
-                    for key in (
-                        list(item.meta.__class__.model_fields)
-                        + list(item.meta.get_custom_part())
-                    )
+                    for key in (list(item.meta.__class__.model_fields) + list(item.meta.get_custom_part()))
                     if (
-                        (
-                            params.allowed_meta_names is None
-                            or key in params.allowed_meta_names
-                        )
+                        (params.allowed_meta_names is None or key in params.allowed_meta_names)
                         and (key not in params.blocked_meta_names)
                         and (tmp := self._serialize_meta_field(item.meta, key))
                     )
@@ -906,11 +849,7 @@ def serialize(
                     text_dir = get_text_direction(ann_text)
                     dir_str = f' dir="{text_dir}"' if text_dir == "rtl" else ""
                     ann_ser_res = create_ser_result(
-                        text=(
-                            f'<div data-annotation-kind="{ann.kind}"{dir_str}>'
-                            f"{html.escape(ann_text)}"
-                            f"</div>"
-                        ),
+                        text=(f'<div data-annotation-kind="{ann.kind}"{dir_str}>{html.escape(ann_text)}</div>'),
                         span_source=item,
                     )
                     res_parts.append(ann_ser_res)
@@ -977,7 +916,7 @@ def serialize_hyperlink(
         **kwargs: Any,
     ) -> str:
         """Apply HTML-specific hyperlink serialization."""
-        return f'<a href="{str(hyperlink)}">{text}</a>'
+        return f'<a href="{hyperlink!s}">{text}</a>'
 
     @override
     def serialize_doc(
@@ -1033,7 +972,6 @@ def _serialize_page_img(page_img: Image):
                 vized_pages_dict = visualizer.get_visualization(doc=self.doc)
 
             for page_no, page in pages.items():
-
                 if isinstance(page_no, int):
                     if applicable_pages is not None and page_no not in applicable_pages:
                         continue
@@ -1069,9 +1007,7 @@ def _serialize_page_img(page_img: Image):
 
                     html_parts.append("</tr>")
                 else:
-                    raise ValueError(
-                        "We need page-indices to leverage `split_page_view`"
-                    )
+                    raise ValueError("We need page-indices to leverage `split_page_view`")
 
             html_parts.append("</tbody>")
             html_parts.append("</table>")
@@ -1107,19 +1043,12 @@ def serialize_captions(
 
         if DocItemLabel.CAPTION in params.labels:
             for cap in item.captions:
-                if (
-                    isinstance(it := cap.resolve(self.doc), TextItem)
-                    and it.self_ref not in excluded_refs
-                ):
+                if isinstance(it := cap.resolve(self.doc), TextItem) and it.self_ref not in excluded_refs:
                     text_cap = it.text
                     text_dir = get_text_direction(text_cap)
                     dir_str = f' dir="{text_dir}"' if text_dir == "rtl" else ""
                     cap_ser_res = create_ser_result(
-                        text=(
-                            f'<div class="caption"{dir_str}>'
-                            f"{html.escape(text_cap)}"
-                            f"</div>"
-                        ),
+                        text=(f'<div class="caption"{dir_str}>{html.escape(text_cap)}</div>'),
                         span_source=it,
                     )
                     results.append(cap_ser_res)
@@ -1157,15 +1086,11 @@ def _generate_head(self) -> str:
             else:
                 head_parts.append("<title>Docling Document</title>")
 
-            head_parts.append(
-                '<meta name="generator" content="Docling HTML Serializer"/>'
-            )
+            head_parts.append('<meta name="generator" content="Docling HTML Serializer"/>')
 
         # Add default styles or custom CSS
         if params.css_styles:
-            if params.css_styles.startswith("<style>") and params.css_styles.endswith(
-                "</style>"
-            ):
+            if params.css_styles.startswith("<style>") and params.css_styles.endswith("</style>"):
                 head_parts.append(f"\n{params.css_styles}\n")
             else:
                 head_parts.append(f"<style>\n{params.css_styles}\n</style>")
diff --git a/docling_core/transforms/serializer/latex.py b/docling_core/transforms/serializer/latex.py
index 10ff899b..69edc7bc 100644
--- a/docling_core/transforms/serializer/latex.py
+++ b/docling_core/transforms/serializer/latex.py
@@ -187,9 +187,7 @@ def serialize(
                     )
                 lvl = item.level
                 if lvl <= 0 or lvl >= 4:
-                    raise ValueError(
-                        "LaTeX serializer: SectionHeaderItem.level must be in [1, 3]"
-                    )
+                    raise ValueError("LaTeX serializer: SectionHeaderItem.level must be in [1, 3]")
                 cmd = {1: "section", 2: "subsection", 3: "subsubsection"}[lvl]
                 text_part = f"\\{cmd}{{{text}}}"
                 post_process = False
@@ -267,9 +265,7 @@ def serialize(
                     if len(lines) <= 1:
                         comment_text = f"% annotation[{ann.kind}]: {ann_text}"
                     else:
-                        prefixed_lines = [f"% annotation[{ann.kind}]: {lines[0]}"] + [
-                            f"% {ln}" for ln in lines[1:]
-                        ]
+                        prefixed_lines = [f"% annotation[{ann.kind}]: {lines[0]}"] + [f"% {ln}" for ln in lines[1:]]
                         comment_text = "\n".join(prefixed_lines)
                     res_parts.append(
                         create_ser_result(
@@ -310,15 +306,9 @@ def serialize(
                 body_row: list[str] = []
                 for cell in row:
                     if isinstance(cell, RichTableCell):
-                        cell_text = doc_serializer.serialize(
-                            item=cell.ref.resolve(doc=doc), **kwargs
-                        ).text
+                        cell_text = doc_serializer.serialize(item=cell.ref.resolve(doc=doc), **kwargs).text
                     else:
-                        cell_text = (
-                            _escape_latex(cell.text)
-                            if params.escape_latex
-                            else cell.text
-                        )
+                        cell_text = _escape_latex(cell.text) if params.escape_latex else cell.text
                     body_row.append(cell_text.replace("\n", " "))
                 body_rows.append(body_row)
 
@@ -348,9 +338,7 @@ def serialize(
             if table_text:
                 content.append(table_text)
             content.append("\\end{table}")
-            res_parts.append(
-                create_ser_result(text="\n".join(content), span_source=item)
-            )
+            res_parts.append(create_ser_result(text="\n".join(content), span_source=item))
 
         return create_ser_result(
             text="\n\n".join([r.text for r in res_parts if r.text]),
@@ -401,25 +389,15 @@ def serialize(
                     fig_lines.append(ann_res.text)
 
             fig_lines.append("\\end{figure}")
-            res_parts.append(
-                create_ser_result(text="\n".join(fig_lines), span_source=item)
-            )
+            res_parts.append(create_ser_result(text="\n".join(fig_lines), span_source=item))
 
         # Optional chart data as a simple table after the figure
         if params.enable_chart_tables:
-            tabular_chart_annotations = [
-                ann
-                for ann in item.annotations
-                if isinstance(ann, PictureTabularChartData)
-            ]
+            tabular_chart_annotations = [ann for ann in item.annotations if isinstance(ann, PictureTabularChartData)]
             if tabular_chart_annotations:
                 temp_doc = DoclingDocument(name="temp")
-                temp_table = temp_doc.add_table(
-                    data=tabular_chart_annotations[0].chart_data
-                )
-                latex_table_content = (
-                    LaTeXDocSerializer(doc=temp_doc).serialize(item=temp_table).text
-                )
+                temp_table = temp_doc.add_table(data=tabular_chart_annotations[0].chart_data)
+                latex_table_content = LaTeXDocSerializer(doc=temp_doc).serialize(item=temp_table).text
                 if latex_table_content:
                     res_parts.append(
                         create_ser_result(
@@ -450,7 +428,7 @@ def _serialize_image_part(
                 return create_ser_result(text=image_placeholder, span_source=item)
             else:
                 return create_ser_result(
-                    text=f"\\includegraphics[width=\\linewidth]{{{str(item.image.uri)}}}",
+                    text=f"\\includegraphics[width=\\linewidth]{{{item.image.uri!s}}}",
                     span_source=item,
                 )
         else:  # EMBEDDED not supported natively
@@ -523,11 +501,7 @@ def serialize(
         env = "enumerate" if item.first_item_is_enumerated(doc) else "itemize"
         indent_str = " " * (list_level * params.indent)
         content = "\n".join([p.text for p in parts if p.text])
-        text_res = (
-            f"{indent_str}\\begin{{{env}}}\n{content}\n{indent_str}\\end{{{env}}}"
-            if content
-            else ""
-        )
+        text_res = f"{indent_str}\\begin{{{env}}}\n{content}\n{indent_str}\\end{{{env}}}" if content else ""
         return create_ser_result(text=text_res, span_source=parts)
 
 
@@ -682,11 +656,7 @@ def serialize_doc(
         if title_cmd:
             preamble_lines.append(title_cmd)
 
-        header = (
-            "\n".join(preamble_lines + ["", "\\begin{document}"])
-            if preamble_lines
-            else "\\begin{document}"
-        )
+        header = "\n".join(preamble_lines + ["", "\\begin{document}"]) if preamble_lines else "\\begin{document}"
         footer = "\\end{document}"
 
         # Compose final document with optional \maketitle after begin{document}
diff --git a/docling_core/transforms/serializer/markdown.py b/docling_core/transforms/serializer/markdown.py
index 0702241b..bd371362 100644
--- a/docling_core/transforms/serializer/markdown.py
+++ b/docling_core/transforms/serializer/markdown.py
@@ -159,28 +159,22 @@ def serialize(
 
             if isinstance(item, ListItem):
                 pieces: list[str] = []
-                case_auto = (
-                    params.orig_list_item_marker_mode == OrigListItemMarkerMode.AUTO
-                    and bool(re.search(r"[a-zA-Z0-9]", item.marker))
+                case_auto = params.orig_list_item_marker_mode == OrigListItemMarkerMode.AUTO and bool(
+                    re.search(r"[a-zA-Z0-9]", item.marker)
                 )
                 case_already_valid = (
                     params.ensure_valid_list_item_marker
-                    and params.orig_list_item_marker_mode
-                    != OrigListItemMarkerMode.NEVER
-                    and (
-                        item.marker in ["-", "*", "+"]
-                        or re.fullmatch(r"\d+\.", item.marker)
-                    )
+                    and params.orig_list_item_marker_mode != OrigListItemMarkerMode.NEVER
+                    and (item.marker in ["-", "*", "+"] or re.fullmatch(r"\d+\.", item.marker))
                 )
 
                 # wrap with outer marker (if applicable)
                 if params.ensure_valid_list_item_marker and not case_already_valid:
-                    assert item.parent and isinstance(
-                        (list_group := item.parent.resolve(doc)), ListGroup
-                    )
+                    assert item.parent
+                    list_group = item.parent.resolve(doc)
+                    assert isinstance(list_group, ListGroup)
                     if list_group.first_item_is_enumerated(doc) and (
-                        params.orig_list_item_marker_mode != OrigListItemMarkerMode.AUTO
-                        or not item.marker
+                        params.orig_list_item_marker_mode != OrigListItemMarkerMode.AUTO or not item.marker
                     ):
                         pos = -1
                         for i, child in enumerate(list_group.children):
@@ -267,21 +261,11 @@ def serialize(
             text="\n\n".join(
                 [
                     tmp
-                    for key in (
-                        list(item.meta.__class__.model_fields)
-                        + list(item.meta.get_custom_part())
-                    )
+                    for key in (list(item.meta.__class__.model_fields) + list(item.meta.get_custom_part()))
                     if (
-                        (
-                            params.allowed_meta_names is None
-                            or key in params.allowed_meta_names
-                        )
+                        (params.allowed_meta_names is None or key in params.allowed_meta_names)
                         and (key not in params.blocked_meta_names)
-                        and (
-                            tmp := self._serialize_meta_field(
-                                item.meta, key, params.mark_meta
-                            )
-                        )
+                        and (tmp := self._serialize_meta_field(item.meta, key, params.mark_meta))
                     )
                 ]
                 if item.meta
@@ -291,9 +275,7 @@ def serialize(
             # NOTE for now using an empty span source for GroupItems
         )
 
-    def _serialize_meta_field(
-        self, meta: BaseMeta, name: str, mark_meta: bool
-    ) -> Optional[str]:
+    def _serialize_meta_field(self, meta: BaseMeta, name: str, mark_meta: bool) -> Optional[str]:
         if (field_val := getattr(meta, name)) is not None:
             if isinstance(field_val, SummaryMetaField):
                 txt = field_val.text
@@ -315,9 +297,7 @@ def _serialize_meta_field(
                 txt = tmp
             else:
                 return None
-            return (
-                f"[{self._humanize_text(name, title=True)}] {txt}" if mark_meta else txt
-            )
+            return f"[{self._humanize_text(name, title=True)}] {txt}" if mark_meta else txt
         else:
             return None
 
@@ -349,11 +329,7 @@ def serialize(
                 if ann_text := _get_annotation_text(ann):
                     ann_res = create_ser_result(
                         text=(
-                            (
-                                f'<!--<annotation kind="{ann.kind}">-->'
-                                f"{ann_text}"
-                                f"<!--<annotation/>-->"
-                            )
+                            (f'<!--<annotation kind="{ann.kind}">-->{ann_text}<!--<annotation/>-->')
                             if params.mark_annotations
                             else ann_text
                         ),
@@ -390,9 +366,7 @@ def serialize(
             res_parts.append(cap_res)
 
         if item.self_ref not in doc_serializer.get_excluded_refs(**kwargs):
-
             if _should_use_legacy_annotations(params=params, item=item):
-
                 ann_res = doc_serializer.serialize_annotations(
                     item=item,
                     **kwargs,
@@ -405,9 +379,7 @@ def serialize(
                     # make sure that md tables are not broken
                     # due to newline chars in the text
                     (
-                        doc_serializer.serialize(
-                            item=col.ref.resolve(doc=doc), **kwargs
-                        ).text
+                        doc_serializer.serialize(item=col.ref.resolve(doc=doc), **kwargs).text
                         if isinstance(col, RichTableCell)
                         else col.text
                     ).replace("\n", " ")
@@ -483,21 +455,13 @@ def serialize(
             kind=PictureTabularChartData.model_fields["kind"].default,
         ):
             # Check if picture has attached PictureTabularChartData
-            tabular_chart_annotations = [
-                ann
-                for ann in item.annotations
-                if isinstance(ann, PictureTabularChartData)
-            ]
+            tabular_chart_annotations = [ann for ann in item.annotations if isinstance(ann, PictureTabularChartData)]
             if len(tabular_chart_annotations) > 0:
                 temp_doc = DoclingDocument(name="temp")
-                temp_table = temp_doc.add_table(
-                    data=tabular_chart_annotations[0].chart_data
-                )
+                temp_table = temp_doc.add_table(data=tabular_chart_annotations[0].chart_data)
                 md_table_content = temp_table.export_to_markdown(temp_doc)
                 if len(md_table_content) > 0:
-                    res_parts.append(
-                        create_ser_result(text=md_table_content, span_source=item)
-                    )
+                    res_parts.append(create_ser_result(text=md_table_content, span_source=item))
         text_res = "\n\n".join([r.text for r in res_parts if r.text])
 
         return create_ser_result(text=text_res, span_source=res_parts)
@@ -511,9 +475,7 @@ def _serialize_image_part(
         **kwargs: Any,
     ) -> SerializationResult:
         error_response = (
-            "<!-- 🖼️❌ Image not available. "
-            "Please use `PdfPipelineOptions(generate_picture_images=True)`"
-            " -->"
+            "<!-- 🖼️❌ Image not available. Please use `PdfPipelineOptions(generate_picture_images=True)` -->"
         )
         if image_mode == ImageRefMode.PLACEHOLDER:
             text_res = image_placeholder
@@ -543,7 +505,7 @@ def _serialize_image_part(
             ):
                 text_res = image_placeholder
             else:
-                text_res = f"![Image]({str(item.image.uri)})"
+                text_res = f"![Image]({item.image.uri!s})"
         else:
             text_res = image_placeholder
 
@@ -739,7 +701,7 @@ def serialize_hyperlink(
         **kwargs: Any,
     ):
         """Apply Markdown-specific hyperlink serialization."""
-        return f"[{text}]({str(hyperlink)})"
+        return f"[{text}]({hyperlink!s})"
 
     @classmethod
     def _escape_underscores(cls, text: str):
diff --git a/docling_core/transforms/visualizer/key_value_visualizer.py b/docling_core/transforms/visualizer/key_value_visualizer.py
index b0198455..7e22c89d 100644
--- a/docling_core/transforms/visualizer/key_value_visualizer.py
+++ b/docling_core/transforms/visualizer/key_value_visualizer.py
@@ -81,9 +81,7 @@ def _draw_key_value_layer(
                 if cell.prov is None or cell.prov.page_no != page_no:
                     continue  # skip cells not on this page or without bbox
 
-                tl_bbox = cell.prov.bbox.to_top_left_origin(
-                    page_height=doc.pages[page_no].size.height
-                )
+                tl_bbox = cell.prov.bbox.to_top_left_origin(page_height=doc.pages[page_no].size.height)
                 x0, y0, x1, y1 = tl_bbox.as_tuple()
                 x0 *= scale_x
                 x1 *= scale_x
@@ -133,9 +131,7 @@ def _draw_key_value_layer(
                     continue  # only draw if both ends are on this page
 
                 def _centre(bbox):
-                    tl = bbox.to_top_left_origin(
-                        page_height=doc.pages[page_no].size.height
-                    )
+                    tl = bbox.to_top_left_origin(page_height=doc.pages[page_no].size.height)
                     l, t, r, b = tl.as_tuple()
                     return ((l + r) / 2 * scale_x, (t + b) / 2 * scale_y)
 
@@ -162,9 +158,7 @@ def _centre(bbox):
                     tgt_xy[0] - ux * arrow_len + px * arrow_len / 2,
                     tgt_xy[1] - uy * arrow_len + py * arrow_len / 2,
                 )
-                draw.polygon(
-                    [tgt_xy, head_base_left, head_base_right], fill=_LINK_COLOUR
-                )
+                draw.polygon([tgt_xy, head_base_left, head_base_right], fill=_LINK_COLOUR)
 
     # ---------------------------------------------------------------------
     # Public API – BaseVisualizer implementation
@@ -180,9 +174,7 @@ def get_visualization(
     ) -> dict[Optional[int], Image]:
         """Return page‑wise images with key/value overlay (incl. base layer)."""
         base_images = (
-            self.base_visualizer.get_visualization(
-                doc=doc, included_content_layers=included_content_layers, **kwargs
-            )
+            self.base_visualizer.get_visualization(doc=doc, included_content_layers=included_content_layers, **kwargs)
             if self.base_visualizer
             else None
         )
diff --git a/docling_core/transforms/visualizer/layout_visualizer.py b/docling_core/transforms/visualizer/layout_visualizer.py
index 886ad8b4..190f6fd0 100644
--- a/docling_core/transforms/visualizer/layout_visualizer.py
+++ b/docling_core/transforms/visualizer/layout_visualizer.py
@@ -45,9 +45,7 @@ class Params(BaseModel):
     base_visualizer: Optional[BaseVisualizer] = None
     params: Params = Params()
 
-    def _draw_clusters(
-        self, image: Image, clusters: list[_TLCluster], scale_x: float, scale_y: float
-    ) -> None:
+    def _draw_clusters(self, image: Image, clusters: list[_TLCluster], scale_x: float, scale_y: float) -> None:
         """Draw clusters on an image."""
         draw = ImageDraw.Draw(image, "RGBA")
         # Create a smaller font for the labels
@@ -148,9 +146,7 @@ def _draw_doc_layout(
         prev_image = None
         prev_page_nr = None
         for idx, (elem, _) in enumerate(
-            doc.iterate_items(
-                included_content_layers=included_content_layers, traverse_pictures=True
-            )
+            doc.iterate_items(included_content_layers=included_content_layers, traverse_pictures=True)
         ):
             if not isinstance(elem, DocItem):
                 continue
@@ -171,16 +167,12 @@ def _draw_doc_layout(
                         self._draw_clusters(
                             image=prev_image,
                             clusters=clusters,
-                            scale_x=prev_image.width
-                            / doc.pages[prev_page_nr].size.width,
-                            scale_y=prev_image.height
-                            / doc.pages[prev_page_nr].size.height,
+                            scale_x=prev_image.width / doc.pages[prev_page_nr].size.width,
+                            scale_y=prev_image.height / doc.pages[prev_page_nr].size.height,
                         )
                         clusters = []
 
-                tlo_bbox = prov.bbox.to_top_left_origin(
-                    page_height=doc.pages[prov.page_no].size.height
-                )
+                tlo_bbox = prov.bbox.to_top_left_origin(page_height=doc.pages[prov.page_no].size.height)
                 cluster = _TLCluster(
                     id=idx,
                     label=elem.label,
@@ -211,11 +203,7 @@ def get_visualization(
         **kwargs,
     ) -> dict[Optional[int], Image]:
         """Get visualization of the document as images by page."""
-        base_images = (
-            self.base_visualizer.get_visualization(doc=doc, **kwargs)
-            if self.base_visualizer
-            else None
-        )
+        base_images = self.base_visualizer.get_visualization(doc=doc, **kwargs) if self.base_visualizer else None
         return self._draw_doc_layout(
             doc=doc,
             images=base_images,
diff --git a/docling_core/transforms/visualizer/reading_order_visualizer.py b/docling_core/transforms/visualizer/reading_order_visualizer.py
index c012f22b..8f8f0803 100644
--- a/docling_core/transforms/visualizer/reading_order_visualizer.py
+++ b/docling_core/transforms/visualizer/reading_order_visualizer.py
@@ -31,16 +31,12 @@ class Params(BaseModel):
 
         show_label: bool = True
         show_branch_numbering: bool = False
-        content_layers: set[ContentLayer] = {
-            cl for cl in ContentLayer if cl != ContentLayer.BACKGROUND
-        }
+        content_layers: set[ContentLayer] = {cl for cl in ContentLayer if cl != ContentLayer.BACKGROUND}
 
     base_visualizer: Optional[BaseVisualizer] = None
     params: Params = Params()
 
-    def _get_picture_context(
-        self, elem: DocItem, doc: DoclingDocument
-    ) -> Optional[str]:
+    def _get_picture_context(self, elem: DocItem, doc: DoclingDocument) -> Optional[str]:
         """Get the picture self_ref if element is nested inside a PictureItem, None otherwise."""
         current = elem
         while current.parent is not None:
@@ -112,14 +108,10 @@ def _draw_doc_reading_order(
         # Separate reading order paths for outside vs inside pictures
         # Key: (page_no, picture_ref_or_None) -> (x0, y0, element_index)
         # picture_ref is None for elements outside any picture, otherwise the picture's self_ref
-        reading_order_state: dict[
-            tuple[int, Optional[str]], tuple[float, float, int]
-        ] = {}
+        reading_order_state: dict[tuple[int, Optional[str]], tuple[float, float, int]] = {}
         number_data_to_draw: dict[int, list[_NumberDrawingData]] = {}
         # Only int keys are used (from prov.page_no), even if input images has Optional[int] keys
-        my_images: dict[int, Image] = {
-            k: v for k, v in (images or {}).items() if k is not None
-        }
+        my_images: dict[int, Image] = {k: v for k, v in (images or {}).items() if k is not None}
         prev_page: Optional[int] = None
         element_index = 0
 
@@ -153,23 +145,16 @@ def _draw_doc_reading_order(
 
                     if image is None:
                         page_image = doc.pages[page_no].image
-                        if (
-                            page_image is None
-                            or (pil_img := page_image.pil_image) is None
-                        ):
-                            raise RuntimeError(
-                                "Cannot visualize document without images"
-                            )
+                        if page_image is None or (pil_img := page_image.pil_image) is None:
+                            raise RuntimeError("Cannot visualize document without images")
                         else:
                             image = deepcopy(pil_img)
                             my_images[page_no] = image
                 draw = ImageDraw.Draw(image, "RGBA")
 
-                tlo_bbox = prov.bbox.to_top_left_origin(
-                    page_height=doc.pages[prov.page_no].size.height
-                )
+                tlo_bbox = prov.bbox.to_top_left_origin(page_height=doc.pages[prov.page_no].size.height)
                 ro_bbox = tlo_bbox.normalized(doc.pages[prov.page_no].size)
-                ro_bbox.l = round(ro_bbox.l * image.width)  # noqa: E741
+                ro_bbox.l = round(ro_bbox.l * image.width)
                 ro_bbox.r = round(ro_bbox.r * image.width)
                 ro_bbox.t = round(ro_bbox.t * image.height)
                 ro_bbox.b = round(ro_bbox.b * image.height)
@@ -214,7 +199,6 @@ def _draw_doc_reading_order(
                 draw = ImageDraw.Draw(image, "RGBA")
 
                 for num_item in number_data_to_draw[page]:
-
                     text_bbox = draw.textbbox(num_item.xy, num_item.text, font)
                     text_bg_padding = 5
                     draw.ellipse(
@@ -247,11 +231,7 @@ def get_visualization(
         **kwargs,
     ) -> dict[Optional[int], Image]:
         """Get visualization of the document as images by page."""
-        base_images = (
-            self.base_visualizer.get_visualization(doc=doc, **kwargs)
-            if self.base_visualizer
-            else None
-        )
+        base_images = self.base_visualizer.get_visualization(doc=doc, **kwargs) if self.base_visualizer else None
         return self._draw_doc_reading_order(
             doc=doc,
             images=base_images,
diff --git a/docling_core/transforms/visualizer/table_visualizer.py b/docling_core/transforms/visualizer/table_visualizer.py
index 0a722959..2b7b6813 100644
--- a/docling_core/transforms/visualizer/table_visualizer.py
+++ b/docling_core/transforms/visualizer/table_visualizer.py
@@ -57,20 +57,15 @@ def _draw_table_cells(
 
         for cell in table.data.table_cells:
             if cell.bbox is not None:
-
                 tl_bbox = cell.bbox.to_top_left_origin(page_height=page_height)
 
                 cell_color = self.params.cell_color  # Transparent black for cells
                 cell_outline = self.params.cell_outline
                 if cell.column_header:
-                    cell_color = (
-                        self.params.col_header_color
-                    )  # Transparent black for cells
+                    cell_color = self.params.col_header_color  # Transparent black for cells
                     cell_outline = self.params.col_header_outline
                 if cell.row_header:
-                    cell_color = (
-                        self.params.row_header_color
-                    )  # Transparent black for cells
+                    cell_color = self.params.row_header_color  # Transparent black for cells
                     cell_outline = self.params.row_header_outline
                 if cell.row_section:
                     cell_color = self.params.row_header_color
@@ -102,7 +97,6 @@ def _draw_table_rows(
         rows = table.data.get_row_bounding_boxes()
 
         for rid, bbox in rows.items():
-
             tl_bbox = bbox.to_top_left_origin(page_height=page_height)
 
             cx0, cy0, cx1, cy1 = tl_bbox.as_tuple()
@@ -131,7 +125,6 @@ def _draw_table_cols(
         cols = table.data.get_column_bounding_boxes()
 
         for cid, bbox in cols.items():
-
             tl_bbox = bbox.to_top_left_origin(page_height=page_height)
 
             cx0, cy0, cx1, cy1 = tl_bbox.as_tuple()
@@ -171,16 +164,13 @@ def _draw_doc_tables(
                 image = deepcopy(pil_img)
                 my_images[page_nr] = image
 
-        for idx, (elem, _) in enumerate(
-            doc.iterate_items(included_content_layers=included_content_layers)
-        ):
+        for idx, (elem, _) in enumerate(doc.iterate_items(included_content_layers=included_content_layers)):
             if not isinstance(elem, TableItem):
                 continue
             if len(elem.prov) == 0:
                 continue  # Skip elements without provenances
 
             if len(elem.prov) == 1:
-
                 page_nr = elem.prov[0].page_no
 
                 if page_nr in my_images:
@@ -229,11 +219,7 @@ def get_visualization(
         **kwargs,
     ) -> dict[Optional[int], Image]:
         """Get visualization of the document as images by page."""
-        base_images = (
-            self.base_visualizer.get_visualization(doc=doc, **kwargs)
-            if self.base_visualizer
-            else None
-        )
+        base_images = self.base_visualizer.get_visualization(doc=doc, **kwargs) if self.base_visualizer else None
         return self._draw_doc_tables(
             doc=doc,
             images=base_images,
diff --git a/docling_core/types/base.py b/docling_core/types/base.py
index 62460b4e..5eceab70 100644
--- a/docling_core/types/base.py
+++ b/docling_core/types/base.py
@@ -53,14 +53,10 @@
 StrictDateTime = Annotated[
     datetime,
     WrapValidator(validate_datetime),
-    PlainSerializer(
-        lambda x: x.astimezone(tz=timezone.utc).isoformat(), return_type=str
-    ),
+    PlainSerializer(lambda x: x.astimezone(tz=timezone.utc).isoformat(), return_type=str),
 ]
 
-ACQUISITION_TYPE = Literal[
-    "API", "FTP", "Download", "Link", "Web scraping/Crawling", "Other"
-]
+ACQUISITION_TYPE = Literal["API", "FTP", "Download", "Link", "Web scraping/Crawling", "Other"]
 
 
 class Identifier(AliasModel, Generic[IdentifierTypeT], extra="forbid"):
@@ -68,16 +64,11 @@ class Identifier(AliasModel, Generic[IdentifierTypeT], extra="forbid"):
 
     type_: IdentifierTypeT = Field(
         alias="type",
-        description=(
-            "A string representing a collection or database that contains this "
-            "data object."
-        ),
+        description=("A string representing a collection or database that contains this data object."),
         json_schema_extra=es_field(type="keyword", ignore_above=8191),
     )
     value: StrictStr = Field(
-        description=(
-            "The identifier value of the data object within a collection or database."
-        ),
+        description=("The identifier value of the data object within a collection or database."),
         json_schema_extra=es_field(type="keyword", ignore_above=8191),
     )
     name: str = Field(
@@ -103,8 +94,7 @@ def name_from_type_value(cls, v, info: ValidationInfo):
             and v != f"{info.data['type_'].lower()}#{info.data['value'].lower()}"
         ):
             raise ValueError(
-                "the _name field must be the concatenation of type and value in lower "
-                "case, separated by hash (#)"
+                "the _name field must be the concatenation of type and value in lower case, separated by hash (#)"
             )
         return v
 
@@ -134,9 +124,7 @@ class Log(AliasModel, extra="forbid"):
         description="A description of the task or any comments in natural language.",
     )
     date: StrictDateTime = Field(
-        description=(
-            "A string representation of the task execution datetime in ISO 8601 format."
-        )
+        description=("A string representation of the task execution datetime in ISO 8601 format.")
     )
 
 
@@ -149,18 +137,12 @@ class FileInfoObject(AliasModel):
     )
     fileprov: Optional[StrictStr] = Field(
         default=None,
-        description=(
-            "The provenance of this data object, e.g. an archive file, a URL, or any"
-            " other repository."
-        ),
+        description=("The provenance of this data object, e.g. an archive file, a URL, or any other repository."),
         alias="filename-prov",
         json_schema_extra=es_field(type="keyword", ignore_above=8191),
     )
     document_hash: StrictStr = Field(
-        description=(
-            "A unique identifier of this data object within a collection of a "
-            "Docling database"
-        ),
+        description=("A unique identifier of this data object within a collection of a Docling database"),
         alias="document-hash",
         json_schema_extra=es_field(type="keyword", ignore_above=8191),
     )
@@ -177,9 +159,7 @@ class CollectionTypeEnum(str, Enum):
 CollectionTypeT = TypeVar("CollectionTypeT", bound=CollectionTypeEnum)
 
 
-class CollectionInfo(
-    BaseModel, Generic[CollectionNameTypeT, CollectionTypeT], extra="forbid"
-):
+class CollectionInfo(BaseModel, Generic[CollectionNameTypeT, CollectionTypeT], extra="forbid"):
     """Information of a collection."""
 
     name: Optional[CollectionNameTypeT] = Field(
@@ -192,9 +172,7 @@ class CollectionInfo(
         description="The collection type.",
         json_schema_extra=es_field(type="keyword", ignore_above=8191),
     )
-    version: Optional[
-        Annotated[str, StringConstraints(pattern=VERSION_PATTERN, strict=True)]
-    ] = Field(
+    version: Optional[Annotated[str, StringConstraints(pattern=VERSION_PATTERN, strict=True)]] = Field(
         default=None,
         description="The version of this collection model.",
         json_schema_extra=es_field(type="keyword", ignore_above=8191),
@@ -231,9 +209,7 @@ class Acquisition(BaseModel, extra="forbid"):
     )
     date: Optional[StrictDateTime] = Field(
         default=None,
-        description=(
-            "A string representation of the acquisition datetime in ISO 8601 format."
-        ),
+        description=("A string representation of the acquisition datetime in ISO 8601 format."),
     )
     link: Optional[AnyUrl] = Field(
         default=None,
diff --git a/docling_core/types/doc/base.py b/docling_core/types/doc/base.py
index f4a020e4..d13a6b81 100644
--- a/docling_core/types/doc/base.py
+++ b/docling_core/types/doc/base.py
@@ -28,14 +28,10 @@ class PydanticSerCtxKey(str, Enum):
     CONFID_PREC = "confid_prec"  # key for confidence values precision
 
 
-def round_pydantic_float(
-    val: float, ctx: Any, precision_ctx_key: PydanticSerCtxKey
-) -> float:
+def round_pydantic_float(val: float, ctx: Any, precision_ctx_key: PydanticSerCtxKey) -> float:
     """Round float, provided the precision is available in the context."""
     precision = (
-        ctx.get(precision_ctx_key.value)
-        if isinstance(ctx, dict)
-        else getattr(ctx, precision_ctx_key.value, None)
+        ctx.get(precision_ctx_key.value) if isinstance(ctx, dict) else getattr(ctx, precision_ctx_key.value, None)
     )
     return round(val, precision) if isinstance(precision, int) else val
 
@@ -104,9 +100,7 @@ def scaled(self, scale: float):
     # same as before, but using the implementation above
     def normalized(self, page_size: Size):
         """normalized."""
-        return self.scale_to_size(
-            old_size=page_size, new_size=Size(height=1.0, width=1.0)
-        )
+        return self.scale_to_size(old_size=page_size, new_size=Size(height=1.0, width=1.0))
 
     def expand_by_scale(self, x_scale: float, y_scale: float) -> "BoundingBox":
         """expand_to_size."""
@@ -190,9 +184,7 @@ def intersection_area_with(self, other: "BoundingBox") -> float:
 
         return width * height
 
-    def intersection_over_union(
-        self, other: "BoundingBox", eps: float = 1.0e-6
-    ) -> float:
+    def intersection_over_union(self, other: "BoundingBox", eps: float = 1.0e-6) -> float:
         """intersection_over_union."""
         intersection_area = self.intersection_area_with(other=other)
 
@@ -204,9 +196,7 @@ def intersection_over_union(
 
         return intersection_area / (union_area + eps)
 
-    def intersection_over_self(
-        self, other: "BoundingBox", eps: float = 1.0e-6
-    ) -> float:
+    def intersection_over_self(self, other: "BoundingBox", eps: float = 1.0e-6) -> float:
         """intersection_over_self."""
         intersection_area = self.intersection_area_with(other=other)
         if self.area() > 0:
@@ -244,17 +234,13 @@ def get_intersection_bbox(self, other: "BoundingBox") -> Optional["BoundingBox"]
             bottom = min(self.b, other.b)
             if right <= left or bottom <= top:
                 return None
-            return BoundingBox(
-                l=left, t=top, r=right, b=bottom, coord_origin=self.coord_origin
-            )
+            return BoundingBox(l=left, t=top, r=right, b=bottom, coord_origin=self.coord_origin)
 
         top = min(self.t, other.t)
         bottom = max(self.b, other.b)
         if right <= left or top <= bottom:
             return None
-        return BoundingBox(
-            l=left, t=top, r=right, b=bottom, coord_origin=self.coord_origin
-        )
+        return BoundingBox(l=left, t=top, r=right, b=bottom, coord_origin=self.coord_origin)
 
     def to_top_left_origin(self, page_height: float) -> "BoundingBox":
         """to_top_left_origin.
@@ -275,9 +261,7 @@ def to_top_left_origin(self, page_height: float) -> "BoundingBox":
 
     def overlaps(self, other: "BoundingBox") -> bool:
         """overlaps."""
-        return self.overlaps_horizontally(other=other) and self.overlaps_vertically(
-            other=other
-        )
+        return self.overlaps_horizontally(other=other) and self.overlaps_vertically(other=other)
 
     def overlaps_horizontally(self, other: "BoundingBox") -> bool:
         """Check if two bounding boxes overlap horizontally."""
@@ -296,13 +280,8 @@ def overlaps_vertically(self, other: "BoundingBox") -> bool:
 
     def overlaps_vertically_with_iou(self, other: "BoundingBox", iou: float) -> bool:
         """overlaps_y_with_iou."""
-        if (
-            self.coord_origin == CoordOrigin.BOTTOMLEFT
-            and other.coord_origin == CoordOrigin.BOTTOMLEFT
-        ):
-
+        if self.coord_origin == CoordOrigin.BOTTOMLEFT and other.coord_origin == CoordOrigin.BOTTOMLEFT:
             if self.overlaps_vertically(other=other):
-
                 u0 = min(self.b, other.b)
                 u1 = max(self.t, other.t)
 
@@ -314,10 +293,7 @@ def overlaps_vertically_with_iou(self, other: "BoundingBox", iou: float) -> bool
 
             return False
 
-        elif (
-            self.coord_origin == CoordOrigin.TOPLEFT
-            and other.coord_origin == CoordOrigin.TOPLEFT
-        ):
+        elif self.coord_origin == CoordOrigin.TOPLEFT and other.coord_origin == CoordOrigin.TOPLEFT:
             if self.overlaps_vertically(other=other):
                 u0 = min(self.t, other.t)
                 u1 = max(self.b, other.b)
@@ -344,16 +320,10 @@ def is_strictly_left_of(self, other: "BoundingBox", eps: float = 0.001) -> bool:
 
     def is_above(self, other: "BoundingBox") -> bool:
         """is_above."""
-        if (
-            self.coord_origin == CoordOrigin.BOTTOMLEFT
-            and other.coord_origin == CoordOrigin.BOTTOMLEFT
-        ):
+        if self.coord_origin == CoordOrigin.BOTTOMLEFT and other.coord_origin == CoordOrigin.BOTTOMLEFT:
             return self.t > other.t
 
-        elif (
-            self.coord_origin == CoordOrigin.TOPLEFT
-            and other.coord_origin == CoordOrigin.TOPLEFT
-        ):
+        elif self.coord_origin == CoordOrigin.TOPLEFT and other.coord_origin == CoordOrigin.TOPLEFT:
             return self.t < other.t
 
         else:
@@ -363,16 +333,10 @@ def is_above(self, other: "BoundingBox") -> bool:
 
     def is_strictly_above(self, other: "BoundingBox", eps: float = 1.0e-3) -> bool:
         """is_strictly_above."""
-        if (
-            self.coord_origin == CoordOrigin.BOTTOMLEFT
-            and other.coord_origin == CoordOrigin.BOTTOMLEFT
-        ):
+        if self.coord_origin == CoordOrigin.BOTTOMLEFT and other.coord_origin == CoordOrigin.BOTTOMLEFT:
             return (self.b + eps) > other.t
 
-        elif (
-            self.coord_origin == CoordOrigin.TOPLEFT
-            and other.coord_origin == CoordOrigin.TOPLEFT
-        ):
+        elif self.coord_origin == CoordOrigin.TOPLEFT and other.coord_origin == CoordOrigin.TOPLEFT:
             return (self.b + eps) < other.t
 
         else:
@@ -380,9 +344,7 @@ def is_strictly_above(self, other: "BoundingBox", eps: float = 1.0e-3) -> bool:
 
         return False
 
-    def is_horizontally_connected(
-        self, elem_i: "BoundingBox", elem_j: "BoundingBox"
-    ) -> bool:
+    def is_horizontally_connected(self, elem_i: "BoundingBox", elem_j: "BoundingBox") -> bool:
         """is_horizontally_connected."""
         if (
             self.coord_origin == CoordOrigin.BOTTOMLEFT
diff --git a/docling_core/types/doc/document.py b/docling_core/types/doc/document.py
index 414640d6..006d15b2 100644
--- a/docling_core/types/doc/document.py
+++ b/docling_core/types/doc/document.py
@@ -334,8 +334,7 @@ def from_dict_format(cls, data: Any) -> Any:
                 # "bbox" not in data
                 # or data["bbox"] is None
                 # or isinstance(data["bbox"], BoundingBox)
-                "text"
-                in data
+                "text" in data
             ):
                 return data
             text = data.get("bbox", {}).get("token", "")
@@ -364,9 +363,7 @@ def _get_text(self, doc: Optional["DoclingDocument"] = None, **kwargs: Any) -> s
         from docling_core.transforms.serializer.markdown import MarkdownDocSerializer
 
         if doc is not None:
-            doc_serializer = kwargs.pop(
-                "doc_serializer", MarkdownDocSerializer(doc=doc)
-            )
+            doc_serializer = kwargs.pop("doc_serializer", MarkdownDocSerializer(doc=doc))
             ser_res = doc_serializer.serialize(item=self.ref.resolve(doc=doc), **kwargs)
             return ser_res.text
         else:
@@ -421,9 +418,7 @@ def grid(
 
         return table_data
 
-    def remove_rows(
-        self, indices: List[int], doc: Optional["DoclingDocument"] = None
-    ) -> List[List[TableCell]]:
+    def remove_rows(self, indices: List[int], doc: Optional["DoclingDocument"] = None) -> List[List[TableCell]]:
         """Remove rows from the table by their indices.
 
         :param indices: List[int]: A list of indices of the rows to remove. (Starting from 0)
@@ -486,9 +481,7 @@ def pop_row(self, doc: Optional["DoclingDocument"] = None) -> List[TableCell]:
 
         return self.remove_row(self.num_rows - 1, doc=doc)
 
-    def remove_row(
-        self, row_index: int, doc: Optional["DoclingDocument"] = None
-    ) -> List[TableCell]:
+    def remove_row(self, row_index: int, doc: Optional["DoclingDocument"] = None) -> List[TableCell]:
         """Remove a row from the table by its index.
 
         :param row_index: int: The index of the row to remove. (Starting from 0)
@@ -497,9 +490,7 @@ def remove_row(
         """
         return self.remove_rows([row_index], doc=doc)[0]
 
-    def insert_rows(
-        self, row_index: int, rows: List[List[str]], after: bool = False
-    ) -> None:
+    def insert_rows(self, row_index: int, rows: List[List[str]], after: bool = False) -> None:
         """Insert multiple new rows from a list of lists of strings before/after a specific index in the table.
 
         :param row_index: int: The index at which to insert the new rows. (Starting from 0)
@@ -523,16 +514,12 @@ def insert_row(self, row_index: int, row: List[str], after: bool = False) -> Non
         :returns: None
         """
         if len(row) != self.num_cols:
-            raise ValueError(
-                f"Row length {len(row)} does not match the number of columns {self.num_cols}."
-            )
+            raise ValueError(f"Row length {len(row)} does not match the number of columns {self.num_cols}.")
 
         effective_index = row_index + (1 if after else 0)
 
         if effective_index < 0 or effective_index > self.num_rows:
-            raise IndexError(
-                f"Row index {row_index} is out of bounds for the current number of rows {self.num_rows}."
-            )
+            raise IndexError(f"Row index {row_index} is out of bounds for the current number of rows {self.num_rows}.")
 
         new_row_cells = [
             TableCell(
@@ -604,12 +591,7 @@ def get_row_bounding_boxes(self) -> dict[int, BoundingBox]:
 
             # Collect all cells in this row that have bounding boxes
             for cell in self.table_cells:
-
-                if (
-                    cell.bbox is not None
-                    and cell.start_row_offset_idx <= row_idx < cell.end_row_offset_idx
-                ):
-
+                if cell.bbox is not None and cell.start_row_offset_idx <= row_idx < cell.end_row_offset_idx:
                     row_span = cell.end_row_offset_idx - cell.start_row_offset_idx
                     if row_span in row_cells_with_bbox:
                         row_cells_with_bbox[row_span].append(cell.bbox)
@@ -619,9 +601,7 @@ def get_row_bounding_boxes(self) -> dict[int, BoundingBox]:
             # Calculate the enclosing bounding box for this row
             if len(row_cells_with_bbox) > 0:
                 min_row_span = min(row_cells_with_bbox.keys())
-                row_bbox: BoundingBox = BoundingBox.enclosing_bbox(
-                    row_cells_with_bbox[min_row_span]
-                )
+                row_bbox: BoundingBox = BoundingBox.enclosing_bbox(row_cells_with_bbox[min_row_span])
 
                 for rspan, bboxs in row_cells_with_bbox.items():
                     for bbox in bboxs:
@@ -658,12 +638,7 @@ def get_column_bounding_boxes(self) -> dict[int, BoundingBox]:
 
             # Collect all cells in this row that have bounding boxes
             for cell in self.table_cells:
-
-                if (
-                    cell.bbox is not None
-                    and cell.start_col_offset_idx <= col_idx < cell.end_col_offset_idx
-                ):
-
+                if cell.bbox is not None and cell.start_col_offset_idx <= col_idx < cell.end_col_offset_idx:
                     col_span = cell.end_col_offset_idx - cell.start_col_offset_idx
                     if col_span in col_cells_with_bbox:
                         col_cells_with_bbox[col_span].append(cell.bbox)
@@ -673,9 +648,7 @@ def get_column_bounding_boxes(self) -> dict[int, BoundingBox]:
             # Calculate the enclosing bounding box for this row
             if len(col_cells_with_bbox) > 0:
                 min_col_span = min(col_cells_with_bbox.keys())
-                col_bbox: BoundingBox = BoundingBox.enclosing_bbox(
-                    col_cells_with_bbox[min_col_span]
-                )
+                col_bbox: BoundingBox = BoundingBox.enclosing_bbox(col_cells_with_bbox[min_col_span])
 
                 for rspan, bboxs in col_cells_with_bbox.items():
                     for bbox in bboxs:
@@ -701,10 +674,7 @@ def _dedupe_bboxes(
         """Return elements whose bounding boxes are unique within ``iou_threshold``."""
         deduped: list[BoundingBox] = []
         for element in elements:
-            if all(
-                element.intersection_over_union(kept) < iou_threshold
-                for kept in deduped
-            ):
+            if all(element.intersection_over_union(kept) < iou_threshold for kept in deduped):
                 deduped.append(element)
         return deduped
 
@@ -790,12 +760,8 @@ def span_from_merge(
 
         # 1) Add merged cells first (and mark their covered simple cells)
         for m in merges:
-            rspan = span_from_merge(
-                m, rows, axis="row", frac_threshold=row_overlap_threshold
-            )
-            cspan = span_from_merge(
-                m, columns, axis="col", frac_threshold=col_overlap_threshold
-            )
+            rspan = span_from_merge(m, rows, axis="row", frac_threshold=row_overlap_threshold)
+            cspan = span_from_merge(m, columns, axis="col", frac_threshold=col_overlap_threshold)
             if rspan is None or cspan is None:
                 # Can't confidently map this merge to grid -> skip it
                 continue
@@ -846,10 +812,8 @@ def span_from_merge(
                 if not inter:
                     # In degenerate cases (big gaps), there might be no intersection; skip.
                     continue
-                c_column_header, c_row_header, c_row_section = (
-                    cls._process_table_headers(
-                        inter, col_headers, row_headers, row_sections
-                    )
+                c_column_header, c_row_header, c_row_section = cls._process_table_headers(
+                    inter, col_headers, row_headers, row_sections
                 )
                 cells.append(
                     TableCell(
@@ -886,47 +850,23 @@ def from_regions(
         default_containment_thresh = 0.5
         rows.extend(row_sections)  # use row sections to compensate for missing rows
         rows = cls._dedupe_bboxes(
-            [
-                e
-                for e in rows
-                if e.intersection_over_self(table_bbox) >= default_containment_thresh
-            ]
+            [e for e in rows if e.intersection_over_self(table_bbox) >= default_containment_thresh]
         )
         cols = cls._dedupe_bboxes(
-            [
-                e
-                for e in cols
-                if e.intersection_over_self(table_bbox) >= default_containment_thresh
-            ]
+            [e for e in cols if e.intersection_over_self(table_bbox) >= default_containment_thresh]
         )
         merges = cls._dedupe_bboxes(
-            [
-                e
-                for e in merges
-                if e.intersection_over_self(table_bbox) >= default_containment_thresh
-            ]
+            [e for e in merges if e.intersection_over_self(table_bbox) >= default_containment_thresh]
         )
 
         col_headers = cls._dedupe_bboxes(
-            [
-                e
-                for e in col_headers
-                if e.intersection_over_self(table_bbox) >= default_containment_thresh
-            ]
+            [e for e in col_headers if e.intersection_over_self(table_bbox) >= default_containment_thresh]
         )
         row_headers = cls._dedupe_bboxes(
-            [
-                e
-                for e in row_headers
-                if e.intersection_over_self(table_bbox) >= default_containment_thresh
-            ]
+            [e for e in row_headers if e.intersection_over_self(table_bbox) >= default_containment_thresh]
         )
         row_sections = cls._dedupe_bboxes(
-            [
-                e
-                for e in row_sections
-                if e.intersection_over_self(table_bbox) >= default_containment_thresh
-            ]
+            [e for e in row_sections if e.intersection_over_self(table_bbox) >= default_containment_thresh]
         )
 
         # Compute table cells from CVAT elements: rows, cols, merges
@@ -1033,9 +973,7 @@ def parse_hex_string(cls, value):
                 # Convert hex string to an integer
                 hash_int = Uint64(value, 16)
                 # Mask to fit within 64 bits (unsigned)
-                return (
-                    hash_int & 0xFFFFFFFFFFFFFFFF
-                )  # TODO be sure it doesn't clip uint64 max
+                return hash_int & 0xFFFFFFFFFFFFFFFF  # TODO be sure it doesn't clip uint64 max
             except ValueError:
                 raise ValueError(f"Invalid sha256 hexdigest: {value}")
         return value  # If already an int, return it as is.
@@ -1237,9 +1175,7 @@ def get_custom_part(self) -> dict[str, Any]:
 
     def _copy_without_extra(self) -> Self:
         """Create a copy without the extra fields."""
-        return self.model_validate(
-            self.model_dump(exclude={ex for ex in self.get_custom_part()})
-        )
+        return self.model_validate(self.model_dump(exclude={ex for ex in self.get_custom_part()}))
 
     def _check_custom_field_format(self, key: str) -> None:
         parts = key.split(MetaUtils._META_FIELD_NAMESPACE_DELIMITER, maxsplit=1)
@@ -1255,9 +1191,7 @@ def _validate_field_names(self) -> Self:
             if key in extra_dict:
                 self._check_custom_field_format(key=key)
             elif MetaUtils._META_FIELD_NAMESPACE_DELIMITER in key:
-                raise ValueError(
-                    f"Standard meta field name must not contain '__': {key}"
-                )
+                raise ValueError(f"Standard meta field name must not contain '__': {key}")
 
         return self
 
@@ -1332,18 +1266,14 @@ class PictureClassificationPrediction(BasePrediction):
 class PictureClassificationMetaField(_ExtraAllowingModel):
     """Picture classification metadata field."""
 
-    predictions: list[PictureClassificationPrediction] = Field(
-        default_factory=list, min_length=1
-    )
+    predictions: list[PictureClassificationPrediction] = Field(default_factory=list, min_length=1)
 
     def get_main_prediction(self) -> PictureClassificationPrediction:
         """Get prediction with highest confidence (if confidence not available, first is used by convention)."""
         max_conf_pos: Optional[int] = None
         max_conf: Optional[float] = None
         for i, pred in enumerate(self.predictions):
-            if pred.confidence is not None and (
-                max_conf is None or pred.confidence > max_conf
-            ):
+            if pred.confidence is not None and (max_conf is None or pred.confidence > max_conf):
                 max_conf_pos = i
                 max_conf = pred.confidence
         return self.predictions[max_conf_pos if max_conf_pos is not None else 0]
@@ -1393,9 +1323,7 @@ def get_ref(self) -> RefItem:
         """get_ref."""
         return RefItem(cref=self.self_ref)
 
-    def _get_parent_ref(
-        self, doc: "DoclingDocument", stack: list[int]
-    ) -> Optional[RefItem]:
+    def _get_parent_ref(self, doc: "DoclingDocument", stack: list[int]) -> Optional[RefItem]:
         """get_parent_ref."""
         if len(stack) == 0:
             return self.parent
@@ -1416,9 +1344,7 @@ def _delete_child(self, doc: "DoclingDocument", stack: list[int]) -> bool:
 
         return False
 
-    def _update_child(
-        self, doc: "DoclingDocument", stack: list[int], new_ref: RefItem
-    ) -> bool:
+    def _update_child(self, doc: "DoclingDocument", stack: list[int], new_ref: RefItem) -> bool:
         """Update child node in tree."""
         if len(stack) == 1 and stack[0] < len(self.children):
             # ensure the parent is correct
@@ -1433,12 +1359,9 @@ def _update_child(
 
         return False
 
-    def _add_child(
-        self, doc: "DoclingDocument", stack: list[int], new_ref: RefItem
-    ) -> bool:
+    def _add_child(self, doc: "DoclingDocument", stack: list[int], new_ref: RefItem) -> bool:
         """Append child to node identified by stack."""
         if len(stack) == 0:
-
             # ensure the parent is correct
             new_item = new_ref.resolve(doc=doc)
             new_item.parent = self.get_ref()
@@ -1475,9 +1398,7 @@ def _add_sibling(
             return True
         elif len(stack) > 1 and stack[0] < len(self.children):
             item = self.children[stack[0]].resolve(doc)
-            return item._add_sibling(
-                doc=doc, stack=stack[1:], new_ref=new_ref, after=after
-            )
+            return item._add_sibling(doc=doc, stack=stack[1:], new_ref=new_ref, after=after)
 
         return False
 
@@ -1517,9 +1438,7 @@ def first_item_is_enumerated(self, doc: "DoclingDocument"):
 class OrderedList(GroupItem):
     """OrderedList."""
 
-    label: typing.Literal[GroupLabel.ORDERED_LIST] = (
-        GroupLabel.ORDERED_LIST  # type: ignore[assignment]
-    )
+    label: typing.Literal[GroupLabel.ORDERED_LIST] = GroupLabel.ORDERED_LIST  # type: ignore[assignment]
 
 
 class InlineGroup(GroupItem):
@@ -1528,9 +1447,7 @@ class InlineGroup(GroupItem):
     label: typing.Literal[GroupLabel.INLINE] = GroupLabel.INLINE
 
 
-class DocItem(
-    NodeItem
-):  # Base type for any element that carries content, can be a leaf node
+class DocItem(NodeItem):  # Base type for any element that carries content, can be a leaf node
     """DocItem."""
 
     label: DocItemLabel
@@ -1564,9 +1481,7 @@ def get_location_tokens(
 
         return location
 
-    def get_image(
-        self, doc: "DoclingDocument", prov_index: int = 0
-    ) -> Optional[PILImage.Image]:
+    def get_image(self, doc: "DoclingDocument", prov_index: int = 0) -> Optional[PILImage.Image]:
         """Returns the image of this DocItem.
 
         The function returns None if this DocItem has no valid provenance or
@@ -1634,9 +1549,7 @@ class TextItem(DocItem):
     text: str  # sanitized representation
 
     formatting: Optional[Formatting] = None
-    hyperlink: Optional[Union[AnyUrl, Path]] = Field(
-        union_mode="left_to_right", default=None
-    )
+    hyperlink: Optional[Union[AnyUrl, Path]] = Field(union_mode="left_to_right", default=None)
 
     @deprecated("Use export_to_doctags() instead.")
     def export_to_document_tokens(self, *args, **kwargs):
@@ -1683,17 +1596,13 @@ def export_to_doctags(
 class TitleItem(TextItem):
     """TitleItem."""
 
-    label: typing.Literal[DocItemLabel.TITLE] = (
-        DocItemLabel.TITLE  # type: ignore[assignment]
-    )
+    label: typing.Literal[DocItemLabel.TITLE] = DocItemLabel.TITLE  # type: ignore[assignment]
 
 
 class SectionHeaderItem(TextItem):
     """SectionItem."""
 
-    label: typing.Literal[DocItemLabel.SECTION_HEADER] = (
-        DocItemLabel.SECTION_HEADER  # type: ignore[assignment]
-    )
+    label: typing.Literal[DocItemLabel.SECTION_HEADER] = DocItemLabel.SECTION_HEADER  # type: ignore[assignment]
     level: LevelNumber = 1
 
     @deprecated("Use export_to_doctags() instead.")
@@ -1741,9 +1650,7 @@ def export_to_doctags(
 class ListItem(TextItem):
     """SectionItem."""
 
-    label: typing.Literal[DocItemLabel.LIST_ITEM] = (
-        DocItemLabel.LIST_ITEM  # type: ignore[assignment]
-    )
+    label: typing.Literal[DocItemLabel.LIST_ITEM] = DocItemLabel.LIST_ITEM  # type: ignore[assignment]
     enumerated: bool = False
     marker: str = "-"  # The bullet or number symbol that prefixes this list item
 
@@ -1765,9 +1672,7 @@ def caption_text(self, doc: "DoclingDocument") -> str:
             text += cap.resolve(doc).text
         return text
 
-    def get_image(
-        self, doc: "DoclingDocument", prov_index: int = 0
-    ) -> Optional[PILImage.Image]:
+    def get_image(self, doc: "DoclingDocument", prov_index: int = 0) -> Optional[PILImage.Image]:
         """Returns the image corresponding to this FloatingItem.
 
         This function returns the PIL image from self.image if one is available.
@@ -1785,9 +1690,7 @@ def get_image(
 class CodeItem(FloatingItem, TextItem):
     """CodeItem."""
 
-    label: typing.Literal[DocItemLabel.CODE] = (
-        DocItemLabel.CODE  # type: ignore[assignment]
-    )
+    label: typing.Literal[DocItemLabel.CODE] = DocItemLabel.CODE  # type: ignore[assignment]
     code_language: CodeLanguageLabel = CodeLanguageLabel.UNKNOWN
 
     @deprecated("Use export_to_doctags() instead.")
@@ -1835,9 +1738,7 @@ def export_to_doctags(
 class FormulaItem(TextItem):
     """FormulaItem."""
 
-    label: typing.Literal[DocItemLabel.FORMULA] = (
-        DocItemLabel.FORMULA  # type: ignore[assignment]
-    )
+    label: typing.Literal[DocItemLabel.FORMULA] = DocItemLabel.FORMULA  # type: ignore[assignment]
 
 
 class MetaUtils:
@@ -1862,17 +1763,13 @@ def _create_migrated_meta_field_name(
         *,
         name: str,
     ) -> str:
-        return cls.create_meta_field_name(
-            namespace=cls._META_FIELD_LEGACY_NAMESPACE, name=name
-        )
+        return cls.create_meta_field_name(namespace=cls._META_FIELD_LEGACY_NAMESPACE, name=name)
 
 
 class PictureItem(FloatingItem):
     """PictureItem."""
 
-    label: typing.Literal[DocItemLabel.PICTURE, DocItemLabel.CHART] = (
-        DocItemLabel.PICTURE
-    )
+    label: typing.Literal[DocItemLabel.PICTURE, DocItemLabel.CHART] = DocItemLabel.PICTURE
 
     meta: Optional[PictureMeta] = None
     annotations: Annotated[
@@ -1920,12 +1817,8 @@ def _migrate_annotations_to_meta(self) -> Self:
                             confidence=ann.confidence,
                             created_by=ann.provenance,
                             **{
-                                MetaUtils._create_migrated_meta_field_name(
-                                    name="segmentation"
-                                ): ann.segmentation,
-                                MetaUtils._create_migrated_meta_field_name(
-                                    name="class_name"
-                                ): ann.class_name,
+                                MetaUtils._create_migrated_meta_field_name(name="segmentation"): ann.segmentation,
+                                MetaUtils._create_migrated_meta_field_name(name="class_name"): ann.class_name,
                             },
                         )
                     elif isinstance(ann, PictureTabularChartData):
@@ -1937,11 +1830,7 @@ def _migrate_annotations_to_meta(self) -> Self:
                         self.meta.set_custom_field(
                             namespace=MetaUtils._META_FIELD_LEGACY_NAMESPACE,
                             name=ann.kind,
-                            value=(
-                                ann.content
-                                if isinstance(ann, MiscAnnotation)
-                                else ann.model_dump(mode="json")
-                            ),
+                            value=(ann.content if isinstance(ann, MiscAnnotation) else ann.model_dump(mode="json")),
                         )
 
             return self
@@ -1952,9 +1841,7 @@ def _image_to_base64(self, pil_image, format="PNG"):
         buffered = BytesIO()
         pil_image.save(buffered, format=format)  # Save the image to the byte stream
         img_bytes = buffered.getvalue()  # Get the byte data
-        img_base64 = base64.b64encode(img_bytes).decode(
-            "utf-8"
-        )  # Encode to Base64 and decode to string
+        img_base64 = base64.b64encode(img_bytes).decode("utf-8")  # Encode to Base64 and decode to string
         return img_base64
 
     @staticmethod
@@ -2109,7 +1996,6 @@ def _migrate_annotations_to_meta(self) -> Self:
                     "Note that only the first available instance of each annotation type will be migrated."
                 )
                 for ann in self.annotations:
-
                     # ensure meta field is present
                     if self.meta is None:
                         self.meta = FloatingMeta()
@@ -2123,23 +2009,15 @@ def _migrate_annotations_to_meta(self) -> Self:
                         self.meta.set_custom_field(
                             namespace=MetaUtils._META_FIELD_LEGACY_NAMESPACE,
                             name=ann.kind,
-                            value=(
-                                ann.content
-                                if isinstance(ann, MiscAnnotation)
-                                else ann.model_dump(mode="json")
-                            ),
+                            value=(ann.content if isinstance(ann, MiscAnnotation) else ann.model_dump(mode="json")),
                         )
 
             return self
 
-    def export_to_dataframe(
-        self, doc: Optional["DoclingDocument"] = None
-    ) -> pd.DataFrame:
+    def export_to_dataframe(self, doc: Optional["DoclingDocument"] = None) -> pd.DataFrame:
         """Export the table as a Pandas DataFrame."""
         if doc is None:
-            _logger.warning(
-                "Usage of TableItem.export_to_dataframe() without `doc` argument is deprecated."
-            )
+            _logger.warning("Usage of TableItem.export_to_dataframe() without `doc` argument is deprecated.")
 
         if self.data.num_rows == 0 or self.data.num_cols == 0:
             return pd.DataFrame()
@@ -2148,9 +2026,7 @@ def export_to_dataframe(
         num_headers = 0
         for i, row in enumerate(self.data.grid):
             if len(row) == 0:
-                raise RuntimeError(
-                    f"Invalid table. {len(row)=} but {self.data.num_cols=}."
-                )
+                raise RuntimeError(f"Invalid table. {len(row)=} but {self.data.num_cols=}.")
 
             any_header = False
             for cell in row:
@@ -2175,10 +2051,7 @@ def export_to_dataframe(
                     columns[j] += col_name
 
         # Create table data
-        table_data = [
-            [cell._get_text(doc=doc) for cell in row]
-            for row in self.data.grid[num_headers:]
-        ]
+        table_data = [[cell._get_text(doc=doc) for cell in row] for row in self.data.grid[num_headers:]]
 
         # Create DataFrame
         df = pd.DataFrame(table_data, columns=columns)
@@ -2197,15 +2070,13 @@ def export_to_markdown(self, doc: Optional["DoclingDocument"] = None) -> str:
             return text
         else:
             _logger.warning(
-                "Usage of TableItem.export_to_markdown() without `doc` argument is "
-                "deprecated.",
+                "Usage of TableItem.export_to_markdown() without `doc` argument is deprecated.",
             )
 
             table = []
             for row in self.data.grid:
                 tmp = []
                 for col in row:
-
                     # make sure that md tables are not broken
                     # due to newline chars in the text
                     text = col._get_text(doc=doc)
@@ -2242,8 +2113,7 @@ def export_to_html(
             return text
         else:
             _logger.error(
-                "Usage of TableItem.export_to_html() without `doc` argument is "
-                "deprecated.",
+                "Usage of TableItem.export_to_html() without `doc` argument is deprecated.",
             )
             return ""
 
@@ -2290,9 +2160,7 @@ def export_to_otsl(
         for i in range(nrows):
             for j in range(ncols):
                 cell: TableCell = self.data.grid[i][j]
-                content = cell._get_text(
-                    doc=doc, doc_serializer=doc_serializer, **kwargs
-                ).strip()
+                content = cell._get_text(doc=doc, doc_serializer=doc_serializer, **kwargs).strip()
                 rowspan, rowstart = (
                     cell.row_span,
                     cell.start_row_offset_idx,
@@ -2447,13 +2315,9 @@ def validate_links(cls, links, info):
 
         for link in links:
             if link.source_cell_id not in valid_cell_ids:
-                raise ValueError(
-                    f"Invalid source_cell_id {link.source_cell_id} in GraphLink"
-                )
+                raise ValueError(f"Invalid source_cell_id {link.source_cell_id} in GraphLink")
             if link.target_cell_id not in valid_cell_ids:
-                raise ValueError(
-                    f"Invalid target_cell_id {link.target_cell_id} in GraphLink"
-                )
+                raise ValueError(f"Invalid target_cell_id {link.target_cell_id} in GraphLink")
 
         return links
 
@@ -2540,9 +2404,7 @@ class DoclingDocument(BaseModel):
     """DoclingDocument."""
 
     schema_name: typing.Literal["DoclingDocument"] = "DoclingDocument"
-    version: Annotated[str, StringConstraints(pattern=VERSION_PATTERN, strict=True)] = (
-        CURRENT_VERSION
-    )
+    version: Annotated[str, StringConstraints(pattern=VERSION_PATTERN, strict=True)] = CURRENT_VERSION
     name: str  # The working name of this document, without extensions
     # (could be taken from originating doc, or just "Untitled 1")
     origin: Optional[DocumentOrigin] = (
@@ -2559,9 +2421,7 @@ class DoclingDocument(BaseModel):
     body: GroupItem = GroupItem(name="_root_", self_ref="#/body")  # List[RefItem] = []
 
     groups: List[Union[ListGroup, InlineGroup, GroupItem]] = []
-    texts: List[
-        Union[TitleItem, SectionHeaderItem, ListItem, CodeItem, FormulaItem, TextItem]
-    ] = []
+    texts: List[Union[TitleItem, SectionHeaderItem, ListItem, CodeItem, FormulaItem, TextItem]] = []
     pictures: List[PictureItem] = []
     tables: List[TableItem] = []
     key_value_items: List[KeyValueItem] = []
@@ -2589,9 +2449,7 @@ def transform_to_content_layer(cls, data: Any) -> Any:
     # Public Manipulation methods
     # ---------------------------
 
-    def append_child_item(
-        self, *, child: NodeItem, parent: Optional[NodeItem] = None
-    ) -> None:
+    def append_child_item(self, *, child: NodeItem, parent: Optional[NodeItem] = None) -> None:
         """Adds an item."""
         if len(child.children) > 0:
             raise ValueError("Can not append a child with children")
@@ -2601,9 +2459,7 @@ def append_child_item(
         success, stack = self._get_stack_of_item(item=parent)
 
         if not success:
-            raise ValueError(
-                f"Could not resolve the parent node in the document tree: {parent}"
-            )
+            raise ValueError(f"Could not resolve the parent node in the document tree: {parent}")
 
         # Append the item to the attributes of the doc
         self._append_item(item=child, parent_ref=parent.get_ref())
@@ -2616,15 +2472,11 @@ def append_child_item(
             self._pop_item(item=child)
             raise ValueError(f"Could not append child: {child} to parent: {parent}")
 
-    def insert_item_after_sibling(
-        self, *, new_item: NodeItem, sibling: NodeItem
-    ) -> None:
+    def insert_item_after_sibling(self, *, new_item: NodeItem, sibling: NodeItem) -> None:
         """Inserts an item, given its node_item instance, after other as a sibling."""
         self._insert_item_at_refitem(item=new_item, ref=sibling.get_ref(), after=True)
 
-    def insert_item_before_sibling(
-        self, *, new_item: NodeItem, sibling: NodeItem
-    ) -> None:
+    def insert_item_before_sibling(self, *, new_item: NodeItem, sibling: NodeItem) -> None:
         """Inserts an item, given its node_item instance, before other as a sibling."""
         self._insert_item_at_refitem(item=new_item, ref=sibling.get_ref(), after=False)
 
@@ -2672,16 +2524,12 @@ def _get_stack_of_refitem(self, ref: RefItem) -> tuple[bool, list[int]]:
 
         return (True, stack)
 
-    def _insert_item_at_refitem(
-        self, item: NodeItem, ref: RefItem, after: bool
-    ) -> RefItem:
+    def _insert_item_at_refitem(self, item: NodeItem, ref: RefItem, after: bool) -> RefItem:
         """Insert node-item using the self-reference."""
         success, stack = self._get_stack_of_refitem(ref=ref)
 
         if not success:
-            raise ValueError(
-                f"Could not insert at {ref.cref}: could not find the stack"
-            )
+            raise ValueError(f"Could not insert at {ref.cref}: could not find the stack")
 
         return self._insert_item_at_stack(item=item, stack=stack, after=after)
 
@@ -2780,17 +2628,13 @@ def _pop_item(self, *, item: NodeItem):
         item_label = path[1]
         item_index = int(path[2])
 
-        if (
-            len(self.__getattribute__(item_label)) == item_index + 1
-        ):  # we can only pop the last item
+        if len(self.__getattribute__(item_label)) == item_index + 1:  # we can only pop the last item
             del self.__getattribute__(item_label)[item_index]
         else:
             msg = f"index:{item_index}, len:{len(self.__getattribute__(item_label))}"
             raise ValueError(f"Failed to pop: item is not last ({msg})")
 
-    def _insert_item_at_stack(
-        self, item: NodeItem, stack: list[int], after: bool
-    ) -> RefItem:
+    def _insert_item_at_stack(self, item: NodeItem, stack: list[int], after: bool) -> RefItem:
         """Insert node-item using the self-reference."""
         parent_ref = self.body._get_parent_ref(doc=self, stack=stack)
 
@@ -2799,16 +2643,12 @@ def _insert_item_at_stack(
 
         new_ref = self._append_item(item=item, parent_ref=parent_ref)
 
-        success = self.body._add_sibling(
-            doc=self, stack=stack, new_ref=new_ref, after=after
-        )
+        success = self.body._add_sibling(doc=self, stack=stack, new_ref=new_ref, after=after)
 
         if not success:
             self._pop_item(item=item)
 
-            raise ValueError(
-                f"Could not insert item: {item} under parent: {parent_ref.resolve(doc=self)}"
-            )
+            raise ValueError(f"Could not insert item: {item} under parent: {parent_ref.resolve(doc=self)}")
 
         return item.get_ref()
 
@@ -2836,9 +2676,7 @@ def _delete_items(self, refs: list[RefItem]):
                     to_be_deleted_items[tuple(stack)] = ref.cref
 
         if len(to_be_deleted_items) < len(refs):
-            raise ValueError(
-                f"Cannot find all provided RefItems in doc: {[r.cref for r in refs]}"
-            )
+            raise ValueError(f"Cannot find all provided RefItems in doc: {[r.cref for r in refs]}")
 
         # Clean the tree, reverse the order to not have to update
         for stack_, ref_ in reversed(sorted(to_be_deleted_items.items())):
@@ -2857,7 +2695,6 @@ def _delete_items(self, refs: list[RefItem]):
         for stack_, ref_ in to_be_deleted_items.items():
             path = ref_.split("/")
             if len(path) == 3:
-
                 item_label = path[1]
                 item_index = int(path[2])
 
@@ -2874,22 +2711,16 @@ def _delete_items(self, refs: list[RefItem]):
                 _logger.debug(f"deleting item in doc for {item_label} for {item_index}")
                 del self.__getattribute__(item_label)[item_index]
 
-        self._update_breadth_first_with_lookup(
-            node=self.body, refs_to_be_deleted=refs, lookup=lookup
-        )
+        self._update_breadth_first_with_lookup(node=self.body, refs_to_be_deleted=refs, lookup=lookup)
 
     # Update the references
-    def _update_ref_with_lookup(
-        self, item_label: str, item_index: int, lookup: dict[str, dict[int, int]]
-    ) -> RefItem:
+    def _update_ref_with_lookup(self, item_label: str, item_index: int, lookup: dict[str, dict[int, int]]) -> RefItem:
         """Update ref with lookup."""
         if item_label not in lookup:  # Nothing to be done
             return RefItem(cref=f"#/{item_label}/{item_index}")
 
         # Count how many items have been deleted in front of you
-        delta = sum(
-            val if item_index >= key else 0 for key, val in lookup[item_label].items()
-        )
+        delta = sum(val if item_index >= key else 0 for key, val in lookup[item_label].items())
         new_index = item_index + delta
 
         return RefItem(cref=f"#/{item_label}/{new_index}")
@@ -2903,10 +2734,7 @@ def _update_refitems_with_lookup(
         """Update refitems with lookup."""
         new_refitems = []
         for ref_item in ref_items:
-
-            if (
-                ref_item not in refs_to_be_deleted
-            ):  # if ref_item is in ref, then delete/skip them
+            if ref_item not in refs_to_be_deleted:  # if ref_item is in ref, then delete/skip them
                 path = ref_item._split_ref_to_path()
                 if len(path) == 3:
                     new_refitems.append(
@@ -2959,17 +2787,13 @@ def _update_breadth_first_with_lookup(
         if node.parent is not None:
             path = node.parent._split_ref_to_path()
             if len(path) == 3:
-                node.parent = self._update_ref_with_lookup(
-                    item_label=path[1], item_index=int(path[2]), lookup=lookup
-                )
+                node.parent = self._update_ref_with_lookup(item_label=path[1], item_index=int(path[2]), lookup=lookup)
 
         # Update the parent reference
         if node.self_ref is not None:
             path = node.self_ref.split("/")
             if len(path) == 3:
-                _ref = self._update_ref_with_lookup(
-                    item_label=path[1], item_index=int(path[2]), lookup=lookup
-                )
+                _ref = self._update_ref_with_lookup(item_label=path[1], item_index=int(path[2]), lookup=lookup)
                 node.self_ref = _ref.cref
 
         # Update the child references
@@ -2981,9 +2805,7 @@ def _update_breadth_first_with_lookup(
 
         for i, child_ref in enumerate(node.children):
             node = child_ref.resolve(self)
-            self._update_breadth_first_with_lookup(
-                node=node, refs_to_be_deleted=refs_to_be_deleted, lookup=lookup
-            )
+            self._update_breadth_first_with_lookup(node=node, refs_to_be_deleted=refs_to_be_deleted, lookup=lookup)
 
     ###################################
     # TODO: refactor add* methods below
@@ -3232,7 +3054,6 @@ def add_text(
             )
 
         else:
-
             if not parent:
                 parent = self.body
 
@@ -3601,9 +3422,7 @@ def add_form(
     # Node Item Insertion Methods
     # ---------------------------
 
-    def _get_insertion_stack_and_parent(
-        self, sibling: NodeItem
-    ) -> tuple[list[int], RefItem]:
+    def _get_insertion_stack_and_parent(self, sibling: NodeItem) -> tuple[list[int], RefItem]:
         """Get the stack and parent reference for inserting a new item at a sibling."""
         # Get the stack of the sibling
         sibling_ref = sibling.get_ref()
@@ -3611,9 +3430,7 @@ def _get_insertion_stack_and_parent(
         success, stack = self._get_stack_of_refitem(ref=sibling_ref)
 
         if not success:
-            raise ValueError(
-                f"Could not insert at {sibling_ref.cref}: could not find the stack"
-            )
+            raise ValueError(f"Could not insert at {sibling_ref.cref}: could not find the stack")
 
         # Get the parent RefItem
         parent_ref = self.body._get_parent_ref(doc=self, stack=stack)
@@ -3639,9 +3456,7 @@ def _insert_in_structure(
 
         new_ref = item.get_ref()
 
-        success = self.body._add_sibling(
-            doc=self, stack=stack, new_ref=new_ref, after=after
-        )
+        success = self.body._add_sibling(doc=self, stack=stack, new_ref=new_ref, after=after)
 
         # Error handling can be determined here
         if not success:
@@ -3650,9 +3465,7 @@ def _insert_in_structure(
             if created_parent:
                 self.delete_items(node_items=[item.parent.resolve(self)])
 
-            raise ValueError(
-                f"Could not insert item: {item} under parent: {item.parent.resolve(doc=self)}"
-            )
+            raise ValueError(f"Could not insert item: {item} under parent: {item.parent.resolve(doc=self)}")
 
     def insert_list_group(
         self,
@@ -3834,9 +3647,7 @@ def insert_list_item(
         if content_layer:
             list_item.content_layer = content_layer
 
-        self._insert_in_structure(
-            item=list_item, stack=stack, after=after, created_parent=set_parent
-        )
+        self._insert_in_structure(item=list_item, stack=stack, after=after, created_parent=set_parent)
 
         return list_item
 
@@ -4331,15 +4142,11 @@ def delete_items_range(
 
         :returns: None
         """
-        start_parent_ref = (
-            start.parent if start.parent is not None else self.body.get_ref()
-        )
+        start_parent_ref = start.parent if start.parent is not None else self.body.get_ref()
         end_parent_ref = end.parent if end.parent is not None else self.body.get_ref()
 
         if start.parent != end.parent:
-            raise ValueError(
-                "Start and end NodeItems must have the same parent to delete a range."
-            )
+            raise ValueError("Start and end NodeItems must have the same parent to delete a range.")
 
         start_ref = start.get_ref()
         end_ref = end.get_ref()
@@ -4384,24 +4191,18 @@ def extract_items_range(
         :returns: DoclingDocument: A new document containing the extracted NodeItems and their children
         """
         if not start.parent == end.parent:
-            raise ValueError(
-                "Start and end NodeItems must have the same parent to extract a range."
-            )
+            raise ValueError("Start and end NodeItems must have the same parent to extract a range.")
 
         start_ref = start.get_ref()
         end_ref = end.get_ref()
 
-        start_parent_ref = (
-            start.parent if start.parent is not None else self.body.get_ref()
-        )
+        start_parent_ref = start.parent if start.parent is not None else self.body.get_ref()
         end_parent_ref = end.parent if end.parent is not None else self.body.get_ref()
 
         start_parent = start_parent_ref.resolve(doc=self)
         end_parent = end_parent_ref.resolve(doc=self)
 
-        start_index = start_parent.children.index(start_ref) + (
-            0 if start_inclusive else 1
-        )
+        start_index = start_parent.children.index(start_ref) + (0 if start_inclusive else 1)
         end_index = end_parent.children.index(end_ref) + (1 if end_inclusive else 0)
 
         if start_index > end_index:
@@ -4442,9 +4243,7 @@ def insert_document(
         """
         ref_items = doc.body.children
         node_items = [ref.resolve(doc) for ref in ref_items]
-        self.insert_node_items(
-            sibling=sibling, node_items=node_items, doc=doc, after=after
-        )
+        self.insert_node_items(sibling=sibling, node_items=node_items, doc=doc, after=after)
 
     def add_document(
         self,
@@ -4488,9 +4287,7 @@ def add_node_items(
 
         parent_ref = parent.get_ref()
 
-        new_refs = self._append_item_copies(
-            node_items=node_items, parent_ref=parent_ref, doc=doc
-        )
+        new_refs = self._append_item_copies(node_items=node_items, parent_ref=parent_ref, doc=doc)
 
         # Add the new item refs in the document structure
 
@@ -4519,17 +4316,13 @@ def insert_node_items(
         if not isinstance(parent, ListGroup):
             for item in node_items:
                 if isinstance(item, ListItem):
-                    raise ValueError(
-                        "Cannot insert ListItem into a non-ListGroup parent."
-                    )
+                    raise ValueError("Cannot insert ListItem into a non-ListGroup parent.")
 
         # Append the NodeItems to the document content
 
         parent_ref = parent.get_ref()
 
-        new_refs = self._append_item_copies(
-            node_items=node_items, parent_ref=parent_ref, doc=doc
-        )
+        new_refs = self._append_item_copies(node_items=node_items, parent_ref=parent_ref, doc=doc)
 
         # Get the stack of the sibling
 
@@ -4538,23 +4331,17 @@ def insert_node_items(
         success, stack = self._get_stack_of_refitem(ref=sibling_ref)
 
         if not success:
-            raise ValueError(
-                f"Could not insert at {sibling_ref.cref}: could not find the stack"
-            )
+            raise ValueError(f"Could not insert at {sibling_ref.cref}: could not find the stack")
 
         # Insert the new item refs in the document structure
 
         reversed_new_refs = new_refs[::-1]
 
         for ref in reversed_new_refs:
-            success = self.body._add_sibling(
-                doc=self, stack=stack, new_ref=ref, after=after
-            )
+            success = self.body._add_sibling(doc=self, stack=stack, new_ref=ref, after=after)
 
             if not success:
-                raise ValueError(
-                    f"Could not insert item {ref.cref} at {sibling.get_ref().cref}"
-                )
+                raise ValueError(f"Could not insert item {ref.cref} at {sibling.get_ref().cref}")
 
     def _append_item_copies(
         self,
@@ -4605,8 +4392,7 @@ def validate_tree(self, root: NodeItem) -> bool:
         if isinstance(root, TableItem):
             for cell in root.data.table_cells:
                 if isinstance(cell, RichTableCell) and (
-                    (par_ref := cell.ref.resolve(self).parent) is None
-                    or par_ref.resolve(self) != root
+                    (par_ref := cell.ref.resolve(self).parent) is None or par_ref.resolve(self) != root
                 ):
                     return False
 
@@ -4641,11 +4427,7 @@ def _iterate_items_with_stack(
         _stack: Optional[list[int]] = None,
     ) -> typing.Iterable[Tuple[NodeItem, list[int]]]:  # tuple of node and level
         """Iterate elements with stack."""
-        my_layers = (
-            included_content_layers
-            if included_content_layers is not None
-            else DEFAULT_CONTENT_LAYERS
-        )
+        my_layers = included_content_layers if included_content_layers is not None else DEFAULT_CONTENT_LAYERS
         my_stack: list[int] = _stack if _stack is not None else []
 
         if not root:
@@ -4658,10 +4440,7 @@ def _iterate_items_with_stack(
             (not isinstance(root, GroupItem) or with_groups)
             and (
                 not isinstance(root, DocItem)
-                or (
-                    page_nrs is None
-                    or any(prov.page_no in page_nrs for prov in root.prov)
-                )
+                or (page_nrs is None or any(prov.page_no in page_nrs for prov in root.prov))
             )
             and root.content_layer in my_layers
         )
@@ -4672,9 +4451,7 @@ def _iterate_items_with_stack(
         my_stack.append(-1)
 
         allowed_pic_refs: set[str] = (
-            {r.cref for r in root.captions}
-            if (root_is_picture := isinstance(root, PictureItem))
-            else set()
+            {r.cref for r in root.captions} if (root_is_picture := isinstance(root, PictureItem)) else set()
         )
 
         # Traverse children
@@ -4737,12 +4514,8 @@ def _with_embedded_pictures(self) -> "DoclingDocument":
 
         for ix, (item, level) in enumerate(result.iterate_items(with_groups=True)):
             if isinstance(item, PictureItem):
-
                 if item.image is not None:
-                    if (
-                        isinstance(item.image.uri, AnyUrl)
-                        and item.image.uri.scheme == "file"
-                    ):
+                    if isinstance(item.image.uri, AnyUrl) and item.image.uri.scheme == "file":
                         assert isinstance(item.image.uri.path, str)
                         tmp_image = PILImage.open(str(unquote(item.image.uri.path)))
                         item.image = ImageRef.from_pil(tmp_image, dpi=item.image.dpi)
@@ -4774,7 +4547,6 @@ def _with_pictures_refs(
                 if isinstance(item, PictureItem):
                     img = item.get_image(doc=self)
                     if img is not None:
-
                         hexhash = PictureItem._image_to_hexhash(img)
 
                         # loc_path = image_dir / f"image_{img_count:06}.png"
@@ -4792,9 +4564,7 @@ def _with_pictures_refs(
 
                             if item.image is None:
                                 scale = img.size[0] / item.prov[0].bbox.width
-                                item.image = ImageRef.from_pil(
-                                    image=img, dpi=round(72 * scale)
-                                )
+                                item.image = ImageRef.from_pil(image=img, dpi=round(72 * scale))
                             item.image.uri = Path(obj_path)
 
                         # if item.image._pil is not None:
@@ -4821,7 +4591,7 @@ def print_element_tree(self):
             elif isinstance(item, TextItem):
                 print(
                     " " * level,
-                    f"{ix}: {item.label.value}: {item.text[:min(len(item.text), 100)]}",
+                    f"{ix}: {item.label.value}: {item.text[: min(len(item.text), 100)]}",
                 )
 
             elif isinstance(item, DocItem):
@@ -4838,14 +4608,9 @@ def export_to_element_tree(self) -> str:
             )
         ):
             if isinstance(item, GroupItem):
-                texts.append(
-                    " " * level + f"{ix}: {item.label.value} with name={item.name}"
-                )
+                texts.append(" " * level + f"{ix}: {item.label.value} with name={item.name}")
             elif isinstance(item, TextItem):
-                texts.append(
-                    " " * level
-                    + f"{ix}: {item.label.value}: {item.text[:min(len(item.text), 100)]}"
-                )
+                texts.append(" " * level + f"{ix}: {item.label.value}: {item.text[: min(len(item.text), 100)]}")
             elif isinstance(item, DocItem):
                 texts.append(" " * level + f"{ix}: {item.label.value}")
 
@@ -4868,13 +4633,9 @@ def save_as_json(
         if image_mode == ImageRefMode.REFERENCED:
             os.makedirs(artifacts_dir, exist_ok=True)
 
-        new_doc = self._make_copy_with_refmode(
-            artifacts_dir, image_mode, page_no=None, reference_path=reference_path
-        )
+        new_doc = self._make_copy_with_refmode(artifacts_dir, image_mode, page_no=None, reference_path=reference_path)
 
-        out = new_doc.export_to_dict(
-            coord_precision=coord_precision, confid_precision=confid_precision
-        )
+        out = new_doc.export_to_dict(coord_precision=coord_precision, confid_precision=confid_precision)
         with open(filename, "w", encoding="utf-8") as fw:
             json.dump(out, fw, indent=indent)
 
@@ -4911,13 +4672,9 @@ def save_as_yaml(
         if image_mode == ImageRefMode.REFERENCED:
             os.makedirs(artifacts_dir, exist_ok=True)
 
-        new_doc = self._make_copy_with_refmode(
-            artifacts_dir, image_mode, page_no=None, reference_path=reference_path
-        )
+        new_doc = self._make_copy_with_refmode(artifacts_dir, image_mode, page_no=None, reference_path=reference_path)
 
-        out = new_doc.export_to_dict(
-            coord_precision=coord_precision, confid_precision=confid_precision
-        )
+        out = new_doc.export_to_dict(coord_precision=coord_precision, confid_precision=confid_precision)
         with open(filename, "w", encoding="utf-8") as fw:
             yaml.dump(out, fw, default_flow_style=default_flow_style)
 
@@ -4951,9 +4708,7 @@ def export_to_dict(
             context[PydanticSerCtxKey.COORD_PREC.value] = coord_precision
         if confid_precision is not None:
             context[PydanticSerCtxKey.CONFID_PREC.value] = confid_precision
-        out = self.model_dump(
-            mode=mode, by_alias=by_alias, exclude_none=exclude_none, context=context
-        )
+        out = self.model_dump(mode=mode, by_alias=by_alias, exclude_none=exclude_none, context=context)
 
         return out
 
@@ -4988,9 +4743,7 @@ def save_as_markdown(
         if image_mode == ImageRefMode.REFERENCED:
             os.makedirs(artifacts_dir, exist_ok=True)
 
-        new_doc = self._make_copy_with_refmode(
-            artifacts_dir, image_mode, page_no, reference_path=reference_path
-        )
+        new_doc = self._make_copy_with_refmode(artifacts_dir, image_mode, page_no, reference_path=reference_path)
 
         md_out = new_doc.export_to_markdown(
             delim=delim,
@@ -5015,7 +4768,7 @@ def save_as_markdown(
         with open(filename, "w", encoding="utf-8") as fw:
             fw.write(md_out)
 
-    def export_to_markdown(  # noqa: C901
+    def export_to_markdown(
         self,
         delim: str = "\n\n",
         from_element: int = 0,
@@ -5101,11 +4854,7 @@ def export_to_markdown(  # noqa: C901
         )
 
         my_labels = labels if labels is not None else DOCUMENT_TOKENS_EXPORT_LABELS
-        my_layers = (
-            included_content_layers
-            if included_content_layers is not None
-            else DEFAULT_CONTENT_LAYERS
-        )
+        my_layers = included_content_layers if included_content_layers is not None else DEFAULT_CONTENT_LAYERS
 
         if use_legacy_annotations is not None:
             warnings.warn(
@@ -5149,7 +4898,7 @@ def export_to_markdown(  # noqa: C901
 
         return ser_res.text
 
-    def export_to_text(  # noqa: C901
+    def export_to_text(
         self,
         delim: str = "\n\n",
         from_element: int = 0,
@@ -5194,9 +4943,7 @@ def save_as_html(
         if image_mode == ImageRefMode.REFERENCED:
             os.makedirs(artifacts_dir, exist_ok=True)
 
-        new_doc = self._make_copy_with_refmode(
-            artifacts_dir, image_mode, page_no, reference_path=reference_path
-        )
+        new_doc = self._make_copy_with_refmode(artifacts_dir, image_mode, page_no, reference_path=reference_path)
 
         html_out = new_doc.export_to_html(
             from_element=from_element,
@@ -5243,16 +4990,14 @@ def _make_copy_with_refmode(
         if image_mode == ImageRefMode.PLACEHOLDER:
             new_doc = self
         elif image_mode == ImageRefMode.REFERENCED:
-            new_doc = self._with_pictures_refs(
-                image_dir=artifacts_dir, page_no=page_no, reference_path=reference_path
-            )
+            new_doc = self._with_pictures_refs(image_dir=artifacts_dir, page_no=page_no, reference_path=reference_path)
         elif image_mode == ImageRefMode.EMBEDDED:
             new_doc = self._with_embedded_pictures()
         else:
             raise ValueError("Unsupported ImageRefMode")
         return new_doc
 
-    def export_to_html(  # noqa: C901
+    def export_to_html(
         self,
         from_element: int = 0,
         to_element: int = sys.maxsize,
@@ -5275,11 +5020,7 @@ def export_to_html(  # noqa: C901
         )
 
         my_labels = labels if labels is not None else DOCUMENT_TOKENS_EXPORT_LABELS
-        my_layers = (
-            included_content_layers
-            if included_content_layers is not None
-            else DEFAULT_CONTENT_LAYERS
-        )
+        my_layers = included_content_layers if included_content_layers is not None else DEFAULT_CONTENT_LAYERS
 
         output_style = HTMLOutputStyle.SINGLE_COLUMN
         if split_page_view:
@@ -5446,8 +5187,8 @@ def parse_key_value_item(
                 re.DOTALL,
             )
 
-            cells: List["GraphCell"] = []
-            links: List["GraphLink"] = []
+            cells: List[GraphCell] = []
+            links: List[GraphLink] = []
             raw_link_predictions = []
 
             for cell_match in cell_pattern.finditer(tokens):
@@ -5680,9 +5421,7 @@ def _add_text(
                             if caption is not None and caption_bbox is not None:
                                 caption.prov.append(
                                     ProvenanceItem(
-                                        bbox=caption_bbox.resize_by_scale(
-                                            pg_width, pg_height
-                                        ),
+                                        bbox=caption_bbox.resize_by_scale(pg_width, pg_height),
                                         charspan=(0, len(caption.text)),
                                         page_no=page_no,
                                     )
@@ -5707,10 +5446,7 @@ def _add_text(
                                     chart_data=table_data,
                                 )
 
-                            if (
-                                pic_classification is not None
-                                or pic_tabular_chart is not None
-                            ):
+                            if pic_classification is not None or pic_tabular_chart is not None:
                                 pic.meta = PictureMeta(
                                     classification=pic_classification,
                                     tabular_chart=pic_tabular_chart,
@@ -5720,17 +5456,13 @@ def _add_text(
                             # In case we don't have access to an binary of an image
                             pic = doc.add_picture(
                                 parent=None,
-                                prov=ProvenanceItem(
-                                    bbox=bbox, charspan=(0, 0), page_no=page_no
-                                ),
+                                prov=ProvenanceItem(bbox=bbox, charspan=(0, 0), page_no=page_no),
                             )
                             # If there is a caption to an image, add it as well
                             if caption is not None and caption_bbox is not None:
                                 caption.prov.append(
                                     ProvenanceItem(
-                                        bbox=caption_bbox.resize_by_scale(
-                                            pg_width, pg_height
-                                        ),
+                                        bbox=caption_bbox.resize_by_scale(pg_width, pg_height),
                                         charspan=(0, len(caption.text)),
                                         page_no=page_no,
                                     )
@@ -5742,23 +5474,17 @@ def _add_text(
                                         provenance="load_from_doctags",
                                         predicted_classes=[
                                             # chart_type
-                                            PictureClassificationClass(
-                                                class_name=chart_type, confidence=1.0
-                                            )
+                                            PictureClassificationClass(class_name=chart_type, confidence=1.0)
                                         ],
                                     )
                                 )
                             if table_data is not None:
                                 # Add chart data as PictureTabularChartData
-                                pd = PictureTabularChartData(
-                                    chart_data=table_data, title=pic_title
-                                )
+                                pd = PictureTabularChartData(chart_data=table_data, title=pic_title)
                                 pic.annotations.append(pd)
 
                 elif tag_name == DocItemLabel.KEY_VALUE_REGION:
-                    key_value_data, kv_item_prov = parse_key_value_item(
-                        full_chunk, image
-                    )
+                    key_value_data, kv_item_prov = parse_key_value_item(full_chunk, image)
                     doc.add_key_values(graph=key_value_data, prov=kv_item_prov)
                 elif tag_name in [
                     DocumentToken.ORDERED_LIST.value,
@@ -5770,9 +5496,7 @@ def _add_text(
                     if tag_name == DocumentToken.ORDERED_LIST.value:
                         GroupLabel.ORDERED_LIST
 
-                    list_item_pattern = (
-                        rf"<(?P<tag>{DocItemLabel.LIST_ITEM})>.*?</(?P=tag)>"
-                    )
+                    list_item_pattern = rf"<(?P<tag>{DocItemLabel.LIST_ITEM})>.*?</(?P=tag)>"
                     li_pattern = re.compile(list_item_pattern, re.DOTALL)
                     # Add list group:
                     new_list = doc.add_list_group(name="list")
@@ -5865,7 +5589,7 @@ def export_to_document_tokens(self, *args, **kwargs):
         r"""Export to DocTags format."""
         return self.export_to_doctags(*args, **kwargs)
 
-    def export_to_doctags(  # noqa: C901
+    def export_to_doctags(
         self,
         delim: str = "",  # deprecated
         from_element: int = 0,
@@ -5925,11 +5649,7 @@ def export_to_doctags(  # noqa: C901
                 add_table_cell_location=add_table_cell_location,
                 add_table_cell_text=add_table_cell_text,
                 pages=pages,
-                mode=(
-                    DocTagsParams.Mode.MINIFIED
-                    if minified
-                    else DocTagsParams.Mode.HUMAN_FRIENDLY
-                ),
+                mode=(DocTagsParams.Mode.MINIFIED if minified else DocTagsParams.Mode.HUMAN_FRIENDLY),
             ),
         )
         ser_res = serializer.serialize()
@@ -5945,7 +5665,6 @@ def _export_to_indented_text(
         result = []
 
         def get_text(text: str, max_text_len: int):
-
             middle = " ... "
 
             if max_text_len == -1:
@@ -5960,48 +5679,34 @@ def get_text(text: str, max_text_len: int):
 
         for i, (item, level) in enumerate(self.iterate_items(with_groups=True)):
             if isinstance(item, GroupItem):
-                result.append(
-                    indent * level
-                    + f"item-{i} at level {level}: {item.label}: group {item.name}"
-                )
+                result.append(indent * level + f"item-{i} at level {level}: {item.label}: group {item.name}")
 
             elif isinstance(item, TextItem) and item.label in [DocItemLabel.TITLE]:
                 text = get_text(text=item.text, max_text_len=max_text_len)
 
-                result.append(
-                    indent * level + f"item-{i} at level {level}: {item.label}: {text}"
-                )
+                result.append(indent * level + f"item-{i} at level {level}: {item.label}: {text}")
 
             elif isinstance(item, SectionHeaderItem):
                 text = get_text(text=item.text, max_text_len=max_text_len)
 
-                result.append(
-                    indent * level + f"item-{i} at level {level}: {item.label}: {text}"
-                )
+                result.append(indent * level + f"item-{i} at level {level}: {item.label}: {text}")
 
             elif isinstance(item, TextItem) and item.label in [DocItemLabel.CODE]:
                 text = get_text(text=item.text, max_text_len=max_text_len)
 
-                result.append(
-                    indent * level + f"item-{i} at level {level}: {item.label}: {text}"
-                )
+                result.append(indent * level + f"item-{i} at level {level}: {item.label}: {text}")
 
             elif isinstance(item, ListItem) and item.label in [DocItemLabel.LIST_ITEM]:
                 text = get_text(text=item.text, max_text_len=max_text_len)
 
-                result.append(
-                    indent * level + f"item-{i} at level {level}: {item.label}: {text}"
-                )
+                result.append(indent * level + f"item-{i} at level {level}: {item.label}: {text}")
 
             elif isinstance(item, TextItem):
                 text = get_text(text=item.text, max_text_len=max_text_len)
 
-                result.append(
-                    indent * level + f"item-{i} at level {level}: {item.label}: {text}"
-                )
+                result.append(indent * level + f"item-{i} at level {level}: {item.label}: {text}")
 
             elif isinstance(item, TableItem):
-
                 result.append(
                     indent * level
                     + f"item-{i} at level {level}: {item.label} with "
@@ -6011,9 +5716,7 @@ def get_text(text: str, max_text_len: int):
                 for _ in item.captions:
                     caption = _.resolve(self)
                     result.append(
-                        indent * (level + 1)
-                        + f"item-{i} at level {level + 1}: {caption.label}: "
-                        + f"{caption.text}"
+                        indent * (level + 1) + f"item-{i} at level {level + 1}: {caption.label}: " + f"{caption.text}"
                     )
 
                 if explicit_tables:
@@ -6022,38 +5725,26 @@ def get_text(text: str, max_text_len: int):
                         grid.append([])
                         for j, cell in enumerate(row):
                             if j < 10:
-                                text = get_text(
-                                    cell._get_text(doc=self), max_text_len=16
-                                )
+                                text = get_text(cell._get_text(doc=self), max_text_len=16)
                                 grid[-1].append(text)
 
                     result.append("\n" + tabulate(grid) + "\n")
 
             elif isinstance(item, PictureItem):
-
-                result.append(
-                    indent * level + f"item-{i} at level {level}: {item.label}"
-                )
+                result.append(indent * level + f"item-{i} at level {level}: {item.label}")
 
                 for _ in item.captions:
                     caption = _.resolve(self)
                     result.append(
-                        indent * (level + 1)
-                        + f"item-{i} at level {level + 1}: {caption.label}: "
-                        + f"{caption.text}"
+                        indent * (level + 1) + f"item-{i} at level {level + 1}: {caption.label}: " + f"{caption.text}"
                     )
 
             elif isinstance(item, DocItem):
-                result.append(
-                    indent * (level + 1)
-                    + f"item-{i} at level {level}: {item.label}: ignored"
-                )
+                result.append(indent * (level + 1) + f"item-{i} at level {level}: {item.label}: ignored")
 
         return "\n".join(result)
 
-    def add_page(
-        self, page_no: int, size: Size, image: Optional[ImageRef] = None
-    ) -> PageItem:
+    def add_page(self, page_no: int, size: Size, image: Optional[ImageRef] = None) -> PageItem:
         """add_page.
 
         :param page_no: int:
@@ -6139,9 +5830,7 @@ def check_version_is_compatible(cls, v: str) -> str:
             or doc_match["major"] != sdk_match["major"]
             or doc_match["minor"] > sdk_match["minor"]
         ):
-            raise ValueError(
-                f"Doc version {v} incompatible with SDK schema version {CURRENT_VERSION}"
-            )
+            raise ValueError(f"Doc version {v} incompatible with SDK schema version {CURRENT_VERSION}")
         else:
             return CURRENT_VERSION
 
@@ -6151,9 +5840,7 @@ def validate_document(self) -> Self:
         with warnings.catch_warnings():
             # ignore warning from deprecated furniture
             warnings.filterwarnings("ignore", category=DeprecationWarning)
-            if not self.validate_tree(self.body) or not self.validate_tree(
-                self.furniture
-            ):
+            if not self.validate_tree(self.body) or not self.validate_tree(self.furniture):
                 raise ValueError("Document hierachy is inconsistent.")
 
         return self
@@ -6170,8 +5857,7 @@ def validate_misplaced_list_items(self) -> Self:
             with_groups=True,  # so that we can distinguish neighboring lists
         ):
             if isinstance(item, ListItem) and (
-                item.parent is None
-                or not isinstance(item.parent.resolve(doc=self), ListGroup)
+                item.parent is None or not isinstance(item.parent.resolve(doc=self), ListGroup)
             ):
                 if isinstance(prev, ListItem) and (
                     prev.parent is None or prev.parent.resolve(self) == self.body
@@ -6182,7 +5868,6 @@ def validate_misplaced_list_items(self) -> Self:
             prev = item
 
         for curr_list_items in reversed(misplaced_list_items):
-
             # add group
             new_group = ListGroup(self_ref="#")
             self.insert_item_before_sibling(
@@ -6227,16 +5912,9 @@ class _DocIndex(BaseModel):
         def get_item_list(self, key: str) -> list[NodeItem]:
             return getattr(self, key)
 
-        def index(
-            self, doc: "DoclingDocument", page_nrs: Optional[set[int]] = None
-        ) -> None:
-
-            if page_nrs is not None and (
-                unavailable_page_nrs := page_nrs - set(doc.pages.keys())
-            ):
-                raise ValueError(
-                    f"The following page numbers are not present in the document: {unavailable_page_nrs}"
-                )
+        def index(self, doc: "DoclingDocument", page_nrs: Optional[set[int]] = None) -> None:
+            if page_nrs is not None and (unavailable_page_nrs := page_nrs - set(doc.pages.keys())):
+                raise ValueError(f"The following page numbers are not present in the document: {unavailable_page_nrs}")
 
             orig_ref_to_new_ref: dict[str, str] = {}
             page_delta = self._max_page - min(doc.pages.keys()) + 1 if doc.pages else 0
@@ -6255,9 +5933,7 @@ def index(
             ):
                 key = item.self_ref.split("/")[1]
                 is_body = key == "body"
-                new_cref = (
-                    "#/body" if is_body else f"#/{key}/{len(self.get_item_list(key))}"
-                )
+                new_cref = "#/body" if is_body else f"#/{key}/{len(self.get_item_list(key))}"
                 # register cref mapping:
                 orig_ref_to_new_ref[item.self_ref] = new_cref
 
@@ -6281,15 +5957,10 @@ def index(
                         # set item's parent
                         new_parent_cref = orig_ref_to_new_ref.get(item.parent.cref)
                         if new_parent_cref is None:
-
                             parent_ref = item.parent
                             while new_parent_cref is None and parent_ref is not None:
-                                parent_ref = RefItem(
-                                    cref=parent_ref.resolve(doc).parent.cref
-                                )
-                                new_parent_cref = orig_ref_to_new_ref.get(
-                                    parent_ref.cref
-                                )
+                                parent_ref = RefItem(cref=parent_ref.resolve(doc).parent.cref)
+                                new_parent_cref = orig_ref_to_new_ref.get(parent_ref.cref)
 
                             if new_parent_cref is not None:
                                 warnings.warn(
@@ -6297,9 +5968,7 @@ def index(
                                     f"using ancestor {new_parent_cref} instead"
                                 )
                             else:
-                                warnings.warn(
-                                    "No ancestor found in indexed nodes, using body as parent"
-                                )
+                                warnings.warn("No ancestor found in indexed nodes, using body as parent")
                                 new_parent_cref = "#/body"
 
                         new_item.parent = RefItem(cref=new_parent_cref)
@@ -6316,27 +5985,20 @@ def index(
                             if isinstance(parent_item, FloatingItem):
                                 for cap_it, cap in enumerate(parent_item.captions):
                                     if cap.cref == item.self_ref:
-                                        parent_item.captions[cap_it] = RefItem(
-                                            cref=new_cref
-                                        )
+                                        parent_item.captions[cap_it] = RefItem(cref=new_cref)
                                         break
 
                             # update rich table cells references:
                             if isinstance(parent_item, TableItem):
                                 for cell in parent_item.data.table_cells:
-                                    if (
-                                        isinstance(cell, RichTableCell)
-                                        and cell.ref.cref == item.self_ref
-                                    ):
+                                    if isinstance(cell, RichTableCell) and cell.ref.cref == item.self_ref:
                                         cell.ref.cref = new_cref
                                         break
 
                         elif num_components == 2 and path_components[1] == "body":
                             parent_item = self._body
                         else:
-                            raise RuntimeError(
-                                f"Unsupported ref format: {new_parent_cref}"
-                            )
+                            raise RuntimeError(f"Unsupported ref format: {new_parent_cref}")
                         parent_item.children.append(RefItem(cref=new_cref))
 
             # update pages
@@ -6392,7 +6054,6 @@ def concatenate(cls, docs: Sequence["DoclingDocument"]) -> "DoclingDocument":
         return res_doc
 
     def _validate_rules(self, raise_on_error: bool = True):
-
         def _handle(error: Exception):
             if raise_on_error:
                 raise error
@@ -6405,9 +6066,7 @@ def validate_furniture(doc: DoclingDocument):
                 has_furniture_children = len(doc.furniture.children) > 0
             if has_furniture_children:
                 _handle(
-                    ValueError(
-                        f"Deprecated furniture node {doc.furniture.self_ref} has children"
-                    ),
+                    ValueError(f"Deprecated furniture node {doc.furniture.self_ref} has children"),
                 )
 
         def validate_list_group(doc: DoclingDocument, item: ListGroup):
@@ -6427,15 +6086,11 @@ def validate_list_item(doc: DoclingDocument, item: ListItem):
                 )
             elif not isinstance(item.parent.resolve(doc), ListGroup):
                 _handle(
-                    ValueError(
-                        f"ListItem {item.self_ref} has non-ListGroup parent: {item.parent.cref}"
-                    ),
+                    ValueError(f"ListItem {item.self_ref} has non-ListGroup parent: {item.parent.cref}"),
                 )
 
         def validate_group(doc: DoclingDocument, item: GroupItem):
-            if (
-                item.parent and not item.children
-            ):  # tolerate empty body, but not other groups
+            if item.parent and not item.children:  # tolerate empty body, but not other groups
                 _handle(
                     ValueError(f"Group {item.self_ref} has no children"),
                 )
@@ -6460,12 +6115,8 @@ def add_table_cell(self, table_item: TableItem, cell: TableCell) -> None:
         """Add a table cell to the table."""
         if isinstance(cell, RichTableCell):
             item = cell.ref.resolve(doc=self)
-            if isinstance(item, NodeItem) and (
-                (not item.parent) or item.parent.cref != table_item.self_ref
-            ):
-                raise ValueError(
-                    f"Trying to add cell with another parent {item.parent} to {table_item.self_ref}"
-                )
+            if isinstance(item, NodeItem) and ((not item.parent) or item.parent.cref != table_item.self_ref):
+                raise ValueError(f"Trying to add cell with another parent {item.parent} to {table_item.self_ref}")
         table_item.data.table_cells.append(cell)
 
 
diff --git a/docling_core/types/doc/labels.py b/docling_core/types/doc/labels.py
index 835cea42..76223a5e 100644
--- a/docling_core/types/doc/labels.py
+++ b/docling_core/types/doc/labels.py
@@ -74,9 +74,7 @@ class GroupLabel(str, Enum):
     """GroupLabel."""
 
     UNSPECIFIED = "unspecified"
-    LIST = (
-        "list"  # group label for list container (not the list-items) (e.g. HTML <ul/>)
-    )
+    LIST = "list"  # group label for list container (not the list-items) (e.g. HTML <ul/>)
     ORDERED_LIST = "ordered_list"  # deprecated
     CHAPTER = "chapter"
     SECTION = "section"
diff --git a/docling_core/types/doc/page.py b/docling_core/types/doc/page.py
index 7fc67ddf..0c4dfda9 100644
--- a/docling_core/types/doc/page.py
+++ b/docling_core/types/doc/page.py
@@ -332,9 +332,7 @@ class PdfCellRenderingMode(int, Enum):
 class PdfTextCell(TextCell):
     """Specialized text cell for PDF documents with font information."""
 
-    rendering_mode: (
-        PdfCellRenderingMode  # Turn into enum (PDF32000 Text Rendering Mode)
-    )
+    rendering_mode: PdfCellRenderingMode  # Turn into enum (PDF32000 Text Rendering Mode)
     widget: bool  # Determines if this belongs to fillable PDF field.
 
     font_key: str
@@ -347,9 +345,7 @@ class PdfTextCell(TextCell):
     def update_ltr_property(cls, data: dict) -> dict:
         """Update text direction property from left_to_right flag."""
         if "left_to_right" in data:
-            data["text_direction"] = (
-                "left_to_right" if data["left_to_right"] else "right_to_left"
-            )
+            data["text_direction"] = "left_to_right" if data["left_to_right"] else "right_to_left"
         # if "ordering" in data:
         #    data["index"] = data["ordering"]
         return data
@@ -395,7 +391,7 @@ def iterate_segments(
         self,
     ) -> Iterator[Tuple[Coord2D, Coord2D]]:
         """Iterate through line segments defined by consecutive point pairs."""
-        for k in range(0, len(self.points) - 1):
+        for k in range(len(self.points) - 1):
             yield (self.points[k], self.points[k + 1])
 
     def to_bottom_left_origin(self, page_height: float):
@@ -619,9 +615,7 @@ def load_from_json(cls, filename: Union[str, Path]) -> "SegmentedPdfPage":
         with open(filename, "r", encoding="utf-8") as f:
             return cls.model_validate_json(f.read())
 
-    def crop_text(
-        self, cell_unit: TextCellUnit, bbox: BoundingBox, eps: float = 1.0
-    ) -> str:
+    def crop_text(self, cell_unit: TextCellUnit, bbox: BoundingBox, eps: float = 1.0) -> str:
         """Extract text from cells within the specified bounding box.
 
         Args:
@@ -633,16 +627,9 @@ def crop_text(
         """
         selection = []
         for page_cell in self.iterate_cells(cell_unit):
-            cell_bbox = page_cell.rect.to_bottom_left_origin(
-                page_height=self.dimension.height
-            ).to_bounding_box()
-
-            if (
-                bbox.l <= cell_bbox.l
-                and cell_bbox.r <= bbox.r
-                and bbox.b <= cell_bbox.b
-                and cell_bbox.t <= bbox.t
-            ):
+            cell_bbox = page_cell.rect.to_bottom_left_origin(page_height=self.dimension.height).to_bounding_box()
+
+            if bbox.l <= cell_bbox.l and cell_bbox.r <= bbox.r and bbox.b <= cell_bbox.b and cell_bbox.t <= bbox.t:
                 selection.append(page_cell.copy())
 
         selection = sorted(selection, key=lambda x: x.index)
@@ -654,10 +641,7 @@ def crop_text(
             else:
                 prev = selection[i - 1]
 
-                if (
-                    abs(cell.rect.r_x0 - prev.rect.r_x1) < eps
-                    and abs(cell.rect.r_y0 - prev.rect.r_y1) < eps
-                ):
+                if abs(cell.rect.r_x0 - prev.rect.r_x1) < eps and abs(cell.rect.r_y0 - prev.rect.r_y1) < eps:
                     text += cell.text
                 else:
                     text += " "
@@ -801,9 +785,7 @@ def render_as_image(
         page_height = page_bbox.height
 
         # Create a blank white image with RGBA mode
-        result = PILImage.new(
-            "RGBA", (round(page_width), round(page_height)), (255, 255, 255, 255)
-        )
+        result = PILImage.new("RGBA", (round(page_width), round(page_height)), (255, 255, 255, 255))
         draw = ImageDraw.Draw(result)
 
         # Draw each rectangle by connecting its four points
@@ -817,9 +799,7 @@ def render_as_image(
             )
 
         if draw_cells_text:
-            result = self._render_cells_text(
-                cell_unit=cell_unit, img=result, page_height=page_height
-            )
+            result = self._render_cells_text(cell_unit=cell_unit, img=result, page_height=page_height)
 
         elif draw_cells_bbox:
             self._render_cells_bbox(
@@ -902,16 +882,10 @@ def _render_bitmap_resources(
             Updated ImageDraw object
         """
         for bitmap_resource in self.bitmap_resources:
-            poly = bitmap_resource.rect.to_top_left_origin(
-                page_height=page_height
-            ).to_polygon()
+            poly = bitmap_resource.rect.to_top_left_origin(page_height=page_height).to_polygon()
 
-            fill = self._get_rgba(
-                name=bitmap_resources_fill, alpha=bitmap_resources_alpha
-            )
-            outline = self._get_rgba(
-                name=bitmap_resources_outline, alpha=bitmap_resources_alpha
-            )
+            fill = self._get_rgba(name=bitmap_resources_fill, alpha=bitmap_resources_alpha)
+            outline = self._get_rgba(name=bitmap_resources_outline, alpha=bitmap_resources_alpha)
 
             draw.polygon(poly, outline=outline, fill=fill)
 
@@ -944,9 +918,7 @@ def _render_cells_bbox(
 
         # Draw each rectangle by connecting its four points
         for page_cell in self.iterate_cells(unit_type=cell_unit):
-            poly = page_cell.rect.to_top_left_origin(
-                page_height=page_height
-            ).to_polygon()
+            poly = page_cell.rect.to_top_left_origin(page_height=page_height).to_polygon()
             draw.polygon(poly, outline=outline, fill=fill)
 
         return draw
@@ -995,9 +967,7 @@ def _draw_text_in_rectangle(
         _, _, text_width, text_height = tmp_draw.textbbox((0, 0), text=text, font=font)
 
         # Create a properly sized temporary image
-        text_img = PILImage.new(
-            "RGBA", (round(text_width), round(text_height)), (255, 255, 255, 255)
-        )
+        text_img = PILImage.new("RGBA", (round(text_width), round(text_height)), (255, 255, 255, 255))
         text_draw = ImageDraw.Draw(text_img)
         text_draw.text((0, 0), text, font=font, fill=(0, 0, 0, 255))
 
@@ -1017,9 +987,7 @@ def _draw_text_in_rectangle(
 
         return img
 
-    def _render_cells_text(
-        self, cell_unit: TextCellUnit, img: PILImage.Image, page_height: float
-    ) -> PILImage.Image:
+    def _render_cells_text(self, cell_unit: TextCellUnit, img: PILImage.Image, page_height: float) -> PILImage.Image:
         """Render text content of cells on the image.
 
         Args:
@@ -1070,9 +1038,7 @@ def _draw_cells_bl(
 
         # Draw each rectangle by connecting its four points
         for page_cell in self.iterate_cells(unit_type=cell_unit):
-            poly = page_cell.rect.to_top_left_origin(
-                page_height=page_height
-            ).to_polygon()
+            poly = page_cell.rect.to_top_left_origin(page_height=page_height).to_polygon()
             # Define the bounding box for the dot
             dot_bbox = [
                 (poly[0][0] - cell_bl_radius, poly[0][1] - cell_bl_radius),
@@ -1113,9 +1079,7 @@ def _draw_cells_tr(
 
         # Draw each rectangle by connecting its four points
         for page_cell in self.iterate_cells(unit_type=cell_unit):
-            poly = page_cell.rect.to_top_left_origin(
-                page_height=page_height
-            ).to_polygon()
+            poly = page_cell.rect.to_top_left_origin(page_height=page_height).to_polygon()
             # Define the bounding box for the dot
             dot_bbox = [
                 (poly[0][0] - cell_tr_radius, poly[0][1] - cell_tr_radius),
@@ -1181,9 +1145,7 @@ def initialise(self):
         for _ in matches:
             namespace_open, tag_open, content, namespace_close, tag_close = _
             if namespace_open == namespace_close and tag_open == tag_close:
-                _logger.debug(
-                    f"Namespace: {namespace_open}, Tag: {tag_open}, Content: {content}"
-                )
+                _logger.debug(f"Namespace: {namespace_open}, Tag: {tag_open}, Content: {content}")
                 self.data[tag_open] = content
 
 
@@ -1253,8 +1215,7 @@ def iterate_pages(
         Returns:
             Iterator of (page number, page) tuples
         """
-        for page_no, page in self.pages.items():
-            yield (page_no, page)
+        yield from self.pages.items()
 
     def export_to_dict(
         self,
diff --git a/docling_core/types/doc/tokens.py b/docling_core/types/doc/tokens.py
index 6b4f6919..81d831ba 100644
--- a/docling_core/types/doc/tokens.py
+++ b/docling_core/types/doc/tokens.py
@@ -206,7 +206,7 @@ def get_special_tokens(
         special_tokens.extend(TableToken.get_special_tokens())
 
         # Adding dynamically generated location-tokens
-        for i in range(0, max(page_dimension[0], page_dimension[1])):
+        for i in range(max(page_dimension[0], page_dimension[1])):
             special_tokens.append(f"<{_LOC_PREFIX}{i}>")
 
         return special_tokens
@@ -265,9 +265,7 @@ def get_code_language_token(code_language: str, self_closing: bool = False) -> s
             return _CodeLanguageToken(f"<_{code_language}_>").value
 
     @staticmethod
-    def get_location_token(
-        val: float, rnorm: int = 500, self_closing: bool = False
-    ):  # TODO review
+    def get_location_token(val: float, rnorm: int = 500, self_closing: bool = False):  # TODO review
         """Function to get location tokens."""
         val_ = round(rnorm * val)
         val_ = max(val_, 0)
@@ -292,18 +290,10 @@ def get_location(
         x1 = bbox[2] / page_w
         y1 = bbox[3] / page_h
 
-        x0_tok = DocumentToken.get_location_token(
-            val=min(x0, x1), rnorm=xsize, self_closing=self_closing
-        )
-        y0_tok = DocumentToken.get_location_token(
-            val=min(y0, y1), rnorm=ysize, self_closing=self_closing
-        )
-        x1_tok = DocumentToken.get_location_token(
-            val=max(x0, x1), rnorm=xsize, self_closing=self_closing
-        )
-        y1_tok = DocumentToken.get_location_token(
-            val=max(y0, y1), rnorm=ysize, self_closing=self_closing
-        )
+        x0_tok = DocumentToken.get_location_token(val=min(x0, x1), rnorm=xsize, self_closing=self_closing)
+        y0_tok = DocumentToken.get_location_token(val=min(y0, y1), rnorm=ysize, self_closing=self_closing)
+        x1_tok = DocumentToken.get_location_token(val=max(x0, x1), rnorm=xsize, self_closing=self_closing)
+        y1_tok = DocumentToken.get_location_token(val=max(y0, y1), rnorm=ysize, self_closing=self_closing)
 
         loc_str = f"{x0_tok}{y0_tok}{x1_tok}{y1_tok}"
 
diff --git a/docling_core/types/doc/utils.py b/docling_core/types/doc/utils.py
index c4e517a5..a105ae14 100644
--- a/docling_core/types/doc/utils.py
+++ b/docling_core/types/doc/utils.py
@@ -53,9 +53,7 @@ def relative_path(src: Path, target: Path) -> Path:
     return Path(*up_segments, *down_segments)
 
 
-def get_html_tag_with_text_direction(
-    html_tag: str, text: str, attrs: Optional[dict] = None
-) -> str:
+def get_html_tag_with_text_direction(html_tag: str, text: str, attrs: Optional[dict] = None) -> str:
     """Form the HTML element with tag, text, and optional dir attribute."""
     my_attrs = attrs or {}
     if (dir := my_attrs.get("dir")) is not None and dir != "ltr":
@@ -63,10 +61,7 @@ def get_html_tag_with_text_direction(
     pieces: list[str] = [html_tag]
     if my_attrs:
         attrs_str = " ".join(
-            [
-                f'{html.escape(k, quote=False)}="{html.escape(my_attrs[k], quote=False)}"'
-                for k in my_attrs
-            ]
+            [f'{html.escape(k, quote=False)}="{html.escape(my_attrs[k], quote=False)}"' for k in my_attrs]
         )
         pieces.append(attrs_str)
     return f"<{' '.join(pieces)}>{text}</{html_tag}>"
@@ -80,12 +75,7 @@ def get_text_direction(text: str) -> str:
     rtl_scripts = {"R", "AL"}
     rtl_chars = sum(unicodedata.bidirectional(c) in rtl_scripts for c in text)
 
-    return (
-        "rtl"
-        if unicodedata.bidirectional(text[0]) in rtl_scripts
-        or rtl_chars > len(text) / 2
-        else "ltr"
-    )
+    return "rtl" if unicodedata.bidirectional(text[0]) in rtl_scripts or rtl_chars > len(text) / 2 else "ltr"
 
 
 def otsl_extract_tokens_and_text(s: str) -> Tuple[List[str], List[str]]:
@@ -128,9 +118,7 @@ def otsl_extract_tokens_and_text(s: str) -> Tuple[List[str], List[str]]:
     return tokens, text_parts
 
 
-def otsl_parse_texts(
-    texts: List[str], tokens: List[str]
-) -> Tuple[List["TableCell"], List[List[str]]]:
+def otsl_parse_texts(texts: List[str], tokens: List[str]) -> Tuple[List["TableCell"], List[List[str]]]:
     """Parse OTSL texts and tokens into table cells."""
     from docling_core.types.doc.document import TableCell
 
@@ -151,19 +139,13 @@ def otsl_parse_texts(
         ]:
             clean_tokens.append(t)
     tokens = clean_tokens
-    split_row_tokens = [
-        list(y)
-        for x, y in itertools.groupby(tokens, lambda z: z == split_word)
-        if not x
-    ]
+    split_row_tokens = [list(y) for x, y in itertools.groupby(tokens, lambda z: z == split_word) if not x]
 
     table_cells = []
     r_idx = 0
     c_idx = 0
 
-    def count_right(
-        tokens: List[List[str]], c_idx: int, r_idx: int, which_tokens: List[str]
-    ) -> int:
+    def count_right(tokens: List[List[str]], c_idx: int, r_idx: int, which_tokens: List[str]) -> int:
         span = 0
         c_idx_iter = c_idx
         while tokens[r_idx][c_idx_iter] in which_tokens:
@@ -173,9 +155,7 @@ def count_right(
                 return span
         return span
 
-    def count_down(
-        tokens: List[List[str]], c_idx: int, r_idx: int, which_tokens: List[str]
-    ) -> int:
+    def count_down(tokens: List[List[str]], c_idx: int, r_idx: int, which_tokens: List[str]) -> int:
         span = 0
         r_idx_iter = r_idx
         while tokens[r_idx_iter][c_idx] in which_tokens:
diff --git a/docling_core/types/gen/generic.py b/docling_core/types/gen/generic.py
index 7596bf4e..3fa5a7a5 100644
--- a/docling_core/types/gen/generic.py
+++ b/docling_core/types/gen/generic.py
@@ -21,8 +21,6 @@ class Generic(AliasModel):
 
     file_info: FileInfoObject = Field(
         title="Document information",
-        description=(
-            "Minimal identification information of the document within a collection."
-        ),
+        description=("Minimal identification information of the document within a collection."),
         alias="file-info",
     )
diff --git a/docling_core/types/legacy_doc/base.py b/docling_core/types/legacy_doc/base.py
index 6b9a2ee6..16e08338 100644
--- a/docling_core/types/legacy_doc/base.py
+++ b/docling_core/types/legacy_doc/base.py
@@ -48,18 +48,14 @@ class S3Data(AliasModel):
     pdf_images: Optional[list[S3Resource]] = Field(default=None, alias="pdf-images")
     json_document: Optional[S3Resource] = Field(default=None, alias="json-document")
     json_meta: Optional[S3Resource] = Field(default=None, alias="json-meta")
-    glm_json_document: Optional[S3Resource] = Field(
-        default=None, alias="glm-json-document"
-    )
+    glm_json_document: Optional[S3Resource] = Field(default=None, alias="glm-json-document")
     figures: Optional[list[S3Resource]] = None
 
 
 class S3Reference(AliasModel):
     """References an s3 resource."""
 
-    ref_s3_data: StrictStr = Field(
-        alias="__ref_s3_data", examples=["#/_s3_data/figures/0"]
-    )
+    ref_s3_data: StrictStr = Field(alias="__ref_s3_data", examples=["#/_s3_data/figures/0"])
 
 
 class Prov(AliasModel):
@@ -84,9 +80,7 @@ class BitmapObject(AliasModel):
     """Bitmap object."""
 
     obj_type: str = Field(alias="type")
-    bounding_box: BoundingBoxContainer = Field(
-        json_schema_extra=es_field(suppress=True)
-    )
+    bounding_box: BoundingBoxContainer = Field(json_schema_extra=es_field(suppress=True))
     prov: Prov
 
 
@@ -111,31 +105,19 @@ class GlmTableCell(TableCell):
     """Glm Table cell."""
 
     col: Optional[int] = Field(default=None, json_schema_extra=es_field(suppress=True))
-    col_header: bool = Field(
-        default=False, alias="col-header", json_schema_extra=es_field(suppress=True)
-    )
-    col_span: Optional[Span] = Field(
-        default=None, alias="col-span", json_schema_extra=es_field(suppress=True)
-    )
+    col_header: bool = Field(default=False, alias="col-header", json_schema_extra=es_field(suppress=True))
+    col_span: Optional[Span] = Field(default=None, alias="col-span", json_schema_extra=es_field(suppress=True))
     row: Optional[int] = Field(default=None, json_schema_extra=es_field(suppress=True))
-    row_header: bool = Field(
-        default=False, alias="row-header", json_schema_extra=es_field(suppress=True)
-    )
-    row_span: Optional[Span] = Field(
-        default=None, alias="row-span", json_schema_extra=es_field(suppress=True)
-    )
+    row_header: bool = Field(default=False, alias="row-header", json_schema_extra=es_field(suppress=True))
+    row_span: Optional[Span] = Field(default=None, alias="row-span", json_schema_extra=es_field(suppress=True))
 
 
 class BaseCell(AliasModel):
     """Base cell."""
 
     prov: Optional[list[Prov]] = None
-    text: Optional[str] = Field(
-        default=None, json_schema_extra=es_field(term_vector="with_positions_offsets")
-    )
-    obj_type: str = Field(
-        alias="type", json_schema_extra=es_field(type="keyword", ignore_above=8191)
-    )
+    text: Optional[str] = Field(default=None, json_schema_extra=es_field(term_vector="with_positions_offsets"))
+    obj_type: str = Field(alias="type", json_schema_extra=es_field(type="keyword", ignore_above=8191))
     payload: Optional[dict] = None
 
     def get_location_tokens(
@@ -153,7 +135,6 @@ def get_location_tokens(
 
         location = ""
         for prov in self.prov:
-
             page_i = -1
             if add_page_index:
                 page_i = prov.page
@@ -247,8 +228,8 @@ def export_to_html(self) -> str:
             for j in range(ncols):
                 cell: TableCell = self.data[i][j]
 
-                rowspan, rowstart, rowend = self._get_tablecell_span(cell, 0)
-                colspan, colstart, colend = self._get_tablecell_span(cell, 1)
+                rowspan, rowstart, _ = self._get_tablecell_span(cell, 0)
+                colspan, colstart, _ = self._get_tablecell_span(cell, 1)
 
                 if rowstart is not None and rowstart != i:
                     continue
@@ -318,7 +299,6 @@ def export_to_document_tokens(
             for i, row in enumerate(self.data):
                 body += f"<row_{i}>"
                 for j, col in enumerate(row):
-
                     text = ""
                     if add_cell_text:
                         text = col.text.strip()
@@ -339,11 +319,7 @@ def export_to_document_tokens(
                             ysize=ysize,
                             page_i=self.prov[0].page,
                         )
-                    elif (
-                        col.bbox is not None
-                        and add_cell_location
-                        and not add_page_index
-                    ):
+                    elif col.bbox is not None and add_cell_location and not add_page_index:
                         cell_loc = DocumentToken.get_location(
                             bbox=col.bbox,
                             page_w=page_w,
@@ -354,11 +330,7 @@ def export_to_document_tokens(
                         )
 
                     cell_label = ""
-                    if (
-                        add_cell_label
-                        and col.obj_type is not None
-                        and len(col.obj_type) > 0
-                    ):
+                    if add_cell_label and col.obj_type is not None and len(col.obj_type) > 0:
                         cell_label = f"<{col.obj_type}>"
 
                     body += f"<col_{j}>{cell_loc}{cell_label}{text}</col_{j}>"
@@ -419,9 +391,7 @@ class BaseText(BaseCell):
     """Base model for text objects."""
 
     # FIXME: do we need these ???
-    name: Optional[StrictStr] = Field(
-        default=None, json_schema_extra=es_field(type="keyword", ignore_above=8191)
-    )
+    name: Optional[StrictStr] = Field(default=None, json_schema_extra=es_field(type="keyword", ignore_above=8191))
     font: Optional[str] = None
 
     def export_to_document_tokens(
@@ -438,9 +408,7 @@ def export_to_document_tokens(
         """Export text element to document tokens format."""
         body = f"<{self.obj_type}>"
 
-        assert DocumentToken.is_known_token(
-            body
-        ), f"failed DocumentToken.is_known_token({body})"
+        assert DocumentToken.is_known_token(body), f"failed DocumentToken.is_known_token({body})"
 
         if add_location:
             body += self.get_location_tokens(
diff --git a/docling_core/types/legacy_doc/doc_raw.py b/docling_core/types/legacy_doc/doc_raw.py
index 5b177196..1428341c 100644
--- a/docling_core/types/legacy_doc/doc_raw.py
+++ b/docling_core/types/legacy_doc/doc_raw.py
@@ -153,9 +153,7 @@ class Page(AliasModel):
     cells: list[Cell]
     paths: list[Path]
     vertical_lines: Optional[list[VerticalLine]] = Field(..., alias="vertical-lines")
-    horizontal_lines: Optional[list[HorizontalLine]] = Field(
-        ..., alias="horizontal-lines"
-    )
+    horizontal_lines: Optional[list[HorizontalLine]] = Field(..., alias="horizontal-lines")
     ignored_cells: list[IgnoredCell] = Field(..., alias="ignored-cells")
     images: list[Image]
     fonts: dict[str, FontInfo]
diff --git a/docling_core/types/legacy_doc/document.py b/docling_core/types/legacy_doc/document.py
index 91b4c2ac..c289d986 100644
--- a/docling_core/types/legacy_doc/document.py
+++ b/docling_core/types/legacy_doc/document.py
@@ -61,12 +61,8 @@ class CCSFileInfoObject(FileInfoObject, extra="forbid"):
         alias="collection-name",
         json_schema_extra=es_field(type="keyword", ignore_above=8191),
     )
-    description: Optional[CCSFileInfoDescription] = Field(
-        default=None, json_schema_extra=es_field(suppress=True)
-    )
-    page_hashes: Optional[list[PageReference]] = Field(
-        default=None, alias="page-hashes"
-    )
+    description: Optional[CCSFileInfoDescription] = Field(default=None, json_schema_extra=es_field(suppress=True))
+    page_hashes: Optional[list[PageReference]] = Field(default=None, alias="page-hashes")
 
 
 class Affiliation(BaseModel, extra="forbid"):
@@ -85,12 +81,8 @@ class Affiliation(BaseModel, extra="forbid"):
             },
         ),
     )
-    id: Optional[str] = Field(
-        default=None, json_schema_extra=es_field(type="keyword", ignore_above=8191)
-    )
-    source: Optional[str] = Field(
-        default=None, json_schema_extra=es_field(type="keyword", ignore_above=8191)
-    )
+    id: Optional[str] = Field(default=None, json_schema_extra=es_field(type="keyword", ignore_above=8191))
+    source: Optional[str] = Field(default=None, json_schema_extra=es_field(type="keyword", ignore_above=8191))
 
 
 class Author(BaseModel, extra="forbid"):
@@ -110,12 +102,8 @@ class Author(BaseModel, extra="forbid"):
             },
         ),
     )
-    id: Optional[str] = Field(
-        default=None, json_schema_extra=es_field(type="keyword", ignore_above=8191)
-    )
-    source: Optional[str] = Field(
-        default=None, json_schema_extra=es_field(type="keyword", ignore_above=8191)
-    )
+    id: Optional[str] = Field(default=None, json_schema_extra=es_field(type="keyword", ignore_above=8191))
+    source: Optional[str] = Field(default=None, json_schema_extra=es_field(type="keyword", ignore_above=8191))
     affiliations: Optional[list[Affiliation]] = None
 
 
@@ -166,9 +154,7 @@ class Publication(BaseModel, Generic[IdentifierTypeT], extra="forbid"):
 class DescriptionLicense(BaseModel, extra="forbid"):
     """Licence in document description."""
 
-    code: Optional[StrictStr] = Field(
-        default=None, json_schema_extra=es_field(type="keyword", ignore_above=8191)
-    )
+    code: Optional[StrictStr] = Field(default=None, json_schema_extra=es_field(type="keyword", ignore_above=8191))
     text: Optional[StrictStr] = None
 
 
@@ -190,13 +176,9 @@ class CCSDocumentDescription(
     affiliations: Optional[list[Affiliation]] = None
     subjects: Optional[list[str]] = Field(
         default=None,
-        json_schema_extra=es_field(
-            fields={"keyword": {"ignore_above": 8191, "type": "keyword"}}
-        ),
-    )
-    keywords: Optional[list[str]] = Field(
-        default=None, json_schema_extra=es_field(type="keyword", ignore_above=8191)
+        json_schema_extra=es_field(fields={"keyword": {"ignore_above": 8191, "type": "keyword"}}),
     )
+    keywords: Optional[list[str]] = Field(default=None, json_schema_extra=es_field(type="keyword", ignore_above=8191))
     publication_date: Optional[datetime] = None
     languages: Optional[list[LanguageT]] = Field(
         default=None, json_schema_extra=es_field(type="keyword", ignore_above=8191)
@@ -205,9 +187,7 @@ class CCSDocumentDescription(
     publishers: Optional[list[StrictStr]] = Field(
         default=None, json_schema_extra=es_field(type="keyword", ignore_above=8191)
     )
-    url_refs: Optional[list[str]] = Field(
-        default=None, json_schema_extra=es_field(type="keyword", ignore_above=8191)
-    )
+    url_refs: Optional[list[str]] = Field(default=None, json_schema_extra=es_field(type="keyword", ignore_above=8191))
     references: Optional[list[Identifier[IdentifierTypeT]]] = None
     publication: Optional[list[Publication]] = Field(
         default=None, description="List of publication journals or venues."
@@ -240,10 +220,7 @@ class CCSDocumentDescription(
     )
     acquisition: Optional[Acquisition] = Field(
         default=None,
-        description=(
-            "Information on how the document was obtained, for data governance"
-            " purposes."
-        ),
+        description=("Information on how the document was obtained, for data governance purposes."),
     )
 
 
@@ -269,9 +246,7 @@ class MinimalDocument(
         CollectionNameTypeT,
     ]
     file_info: FileInfoObject = Field(alias="file-info")
-    main_text: Optional[list[Union[Ref, BaseText]]] = Field(
-        default=None, alias="main-text"
-    )
+    main_text: Optional[list[Union[Ref, BaseText]]] = Field(default=None, alias="main-text")
     figures: Optional[list[Figure]] = None
     tables: Optional[list[Table]] = None
 
@@ -297,9 +272,7 @@ class CCSDocument(
         default=None,
         alias="main-text",
     )
-    page_dimensions: Optional[list[PageDimensions]] = Field(
-        default=None, alias="page-dimensions"
-    )
+    page_dimensions: Optional[list[PageDimensions]] = Field(default=None, alias="page-dimensions")
     page_footers: Optional[list[BaseText]] = Field(default=None, alias="page-footers")
     page_headers: Optional[list[BaseText]] = Field(default=None, alias="page-headers")
     s3_data: Optional[S3Data] = Field(default=None, alias="_s3_data")
@@ -370,12 +343,8 @@ class ExportedCCSDocument(
         CollectionNameTypeT,
     ]
     file_info: CCSFileInfoObject = Field(alias="file-info")
-    main_text: Optional[list[Union[Ref, BaseText]]] = Field(
-        default=None, alias="main-text"
-    )
-    page_dimensions: Optional[list[PageDimensions]] = Field(
-        default=None, alias="page-dimensions"
-    )
+    main_text: Optional[list[Union[Ref, BaseText]]] = Field(default=None, alias="main-text")
+    page_dimensions: Optional[list[PageDimensions]] = Field(default=None, alias="page-dimensions")
     page_footers: Optional[list[BaseText]] = Field(default=None, alias="page-footers")
     page_headers: Optional[list[BaseText]] = Field(default=None, alias="page-headers")
     s3_data: Optional[S3Data] = Field(default=None, alias="_s3_data")
@@ -433,7 +402,7 @@ def export_to_dict(self) -> Dict[str, Any]:
         """export_to_dict."""
         return self.model_dump(mode="json", by_alias=True, exclude_none=True)
 
-    def export_to_markdown(  # noqa: C901
+    def export_to_markdown(
         self,
         delim: str = "\n\n",
         main_text_start: int = 0,
@@ -480,30 +449,18 @@ def export_to_markdown(  # noqa: C901
             # to avoid repeating them
             embedded_captions = set()
             for orig_item in self.main_text[main_text_start:main_text_stop]:
-                item = (
-                    self._resolve_ref(orig_item)
-                    if isinstance(orig_item, Ref)
-                    else orig_item
-                )
+                item = self._resolve_ref(orig_item) if isinstance(orig_item, Ref) else orig_item
                 if item is None:
                     continue
 
-                if (
-                    isinstance(item, (Table, Figure))
-                    and item.text
-                    and item.obj_type in main_text_labels
-                ):
+                if isinstance(item, (Table, Figure)) and item.text and item.obj_type in main_text_labels:
                     embedded_captions.add(item.text)
 
             # serialize document to markdown
             for orig_item in self.main_text[main_text_start:main_text_stop]:
                 markdown_text = ""
 
-                item = (
-                    self._resolve_ref(orig_item)
-                    if isinstance(orig_item, Ref)
-                    else orig_item
-                )
+                item = self._resolve_ref(orig_item) if isinstance(orig_item, Ref) else orig_item
                 if item is None:
                     continue
 
@@ -531,9 +488,7 @@ def export_to_markdown(  # noqa: C901
                         has_title = True
 
                     # secondary titles
-                    elif item_type in {"title", "subtitle-level-1"} or (
-                        has_title and item_type == "title"
-                    ):
+                    elif item_type in {"title", "subtitle-level-1"} or (has_title and item_type == "title"):
                         if strict_text:
                             markdown_text = f"{text}"
                         else:
@@ -543,12 +498,7 @@ def export_to_markdown(  # noqa: C901
                     else:
                         markdown_text = text
 
-                elif (
-                    isinstance(item, Table)
-                    and (item.data or item.text)
-                    and item_type in main_text_labels
-                ):
-
+                elif isinstance(item, Table) and (item.data or item.text) and item_type in main_text_labels:
                     md_table = ""
                     table = []
                     if item.data is not None:
@@ -560,9 +510,7 @@ def export_to_markdown(  # noqa: C901
 
                     if len(table) > 1 and len(table[0]) > 0:
                         try:
-                            md_table = tabulate(
-                                table[1:], headers=table[0], tablefmt="github"
-                            )
+                            md_table = tabulate(table[1:], headers=table[0], tablefmt="github")
                         except ValueError:
                             md_table = tabulate(
                                 table[1:],
@@ -575,19 +523,14 @@ def export_to_markdown(  # noqa: C901
                     if item.text:
                         markdown_text = item.text
                     if not strict_text:
-                        markdown_text += (
-                            "\n\n" if len(markdown_text) > 0 else ""
-                        ) + md_table
+                        markdown_text += ("\n\n" if len(markdown_text) > 0 else "") + md_table
 
                 elif isinstance(item, Figure) and item_type in main_text_labels:
-
                     markdown_text = ""
                     if item.text:
                         markdown_text = item.text
                     if not strict_text:
-                        markdown_text += (
-                            "\n" if len(markdown_text) > 0 else ""
-                        ) + image_placeholder
+                        markdown_text += ("\n" if len(markdown_text) > 0 else "") + image_placeholder
 
                 if markdown_text:
                     md_texts.append(markdown_text)
@@ -636,12 +579,7 @@ def export_to_document_tokens(
 
         if self.main_text is not None:
             for orig_item in self.main_text[main_text_start:main_text_stop]:
-
-                item = (
-                    self._resolve_ref(orig_item)
-                    if isinstance(orig_item, Ref)
-                    else orig_item
-                )
+                item = self._resolve_ref(orig_item) if isinstance(orig_item, Ref) else orig_item
 
                 if item is None:
                     continue
@@ -652,13 +590,7 @@ def export_to_document_tokens(
                 page_w = 0.0
                 page_h = 0.0
 
-                if (
-                    add_location
-                    and self.page_dimensions is not None
-                    and prov is not None
-                    and len(prov) > 0
-                ):
-
+                if add_location and self.page_dimensions is not None and prov is not None and len(prov) > 0:
                     page_i = prov[0].page
                     page_dim = self.page_dimensions[page_i - 1]
 
@@ -667,7 +599,6 @@ def export_to_document_tokens(
 
                 item_type = item.obj_type
                 if isinstance(item, BaseText) and (item_type in main_text_labels):
-
                     doctags += item.export_to_document_tokens(
                         new_line=new_line,
                         page_w=page_w,
@@ -680,7 +611,6 @@ def export_to_document_tokens(
                     )
 
                 elif isinstance(item, Table) and (item_type in main_text_labels):
-
                     doctags += item.export_to_document_tokens(
                         new_line=new_line,
                         page_w=page_w,
@@ -697,7 +627,6 @@ def export_to_document_tokens(
                     )
 
                 elif isinstance(item, Figure) and (item_type in main_text_labels):
-
                     doctags += item.export_to_document_tokens(
                         new_line=new_line,
                         page_w=page_w,
diff --git a/docling_core/types/legacy_doc/tokens.py b/docling_core/types/legacy_doc/tokens.py
index 3936ecad..3ac6f0fc 100644
--- a/docling_core/types/legacy_doc/tokens.py
+++ b/docling_core/types/legacy_doc/tokens.py
@@ -99,10 +99,10 @@ def get_special_tokens(
         special_tokens = [token.value for token in cls]
 
         # Adding dynamically generated row and col tokens
-        for i in range(0, max_rows + 1):
+        for i in range(max_rows + 1):
             special_tokens += [f"<row_{i}>", f"</row_{i}>"]
 
-        for i in range(0, max_cols + 1):
+        for i in range(max_cols + 1):
             special_tokens += [f"<col_{i}>", f"</col_{i}>"]
 
         for i in range(6):
@@ -113,12 +113,12 @@ def get_special_tokens(
             special_tokens += [f"<subtitle-level-{i}>", f"</subtitle-level-{i}>"]
 
         # Adding dynamically generated page-tokens
-        for i in range(0, max_pages + 1):
+        for i in range(max_pages + 1):
             special_tokens.append(f"<page_{i}>")
             special_tokens.append(f"</page_{i}>")
 
         # Adding dynamically generated location-tokens
-        for i in range(0, max(page_dimension[0] + 1, page_dimension[1] + 1)):
+        for i in range(max(page_dimension[0] + 1, page_dimension[1] + 1)):
             special_tokens.append(f"<loc_{i}>")
 
         return special_tokens
diff --git a/docling_core/types/nlp/qa.py b/docling_core/types/nlp/qa.py
index 6f9159cf..3d7e1e3d 100644
--- a/docling_core/types/nlp/qa.py
+++ b/docling_core/types/nlp/qa.py
@@ -13,18 +13,11 @@ class QAPair(BaseModel, Generic[DescriptionAdvancedT]):
     """A representation of a question-answering (QA) pair."""
 
     context: StrictStr = Field(
-        description=(
-            "A single string containing the context of the question enabling the"
-            " presentation of the answer."
-        )
+        description=("A single string containing the context of the question enabling the presentation of the answer.")
     )
     question: StrictStr = Field(description="A question on the given context.")
-    answer: StrictStr = Field(
-        description="The answer to the question from the context."
-    )
-    short_answer: Optional[StrictStr] = Field(
-        default=None, description="Alternative and concise answer."
-    )
+    answer: StrictStr = Field(description="The answer to the question from the context.")
+    short_answer: Optional[StrictStr] = Field(default=None, description="Alternative and concise answer.")
     retrieved_context: Optional[StrictBool] = Field(
         default=False,
         description="Whether the context was retrieved from the question.",
@@ -35,14 +28,10 @@ class QAPair(BaseModel, Generic[DescriptionAdvancedT]):
     generated_answer: Optional[StrictBool] = Field(
         default=False, description="Whether the answer was generated by an AI model."
     )
-    created: StrictDateTime = Field(
-        description="Datetime when the QA pair was created ."
-    )
+    created: StrictDateTime = Field(description="Datetime when the QA pair was created .")
     user: Optional[StrictStr] = Field(
         default=None,
-        description=(
-            "Unique identifier of the user that created or curated this QA pair."
-        ),
+        description=("Unique identifier of the user that created or curated this QA pair."),
         json_schema_extra=es_field(type="keyword", ignore_above=8191),
     )
     model: Optional[StrictStr] = Field(
@@ -51,20 +40,12 @@ class QAPair(BaseModel, Generic[DescriptionAdvancedT]):
         json_schema_extra=es_field(type="keyword", ignore_above=8191),
     )
     paths: UniqueList[StrictStr] = Field(
-        description=(
-            "One or more references to a document that identify the provenance of the"
-            " QA pair context."
-        ),
-        examples=[
-            "badce7c84d0ba7ba0fb5e94492b0d91e2506a7cb48e4524ad572c546a35f768e#/"
-            "main-text/4"
-        ],
+        description=("One or more references to a document that identify the provenance of the QA pair context."),
+        examples=["badce7c84d0ba7ba0fb5e94492b0d91e2506a7cb48e4524ad572c546a35f768e#/main-text/4"],
         json_schema_extra=es_field(type="keyword", ignore_above=8191),
     )
     advanced: Optional[DescriptionAdvancedT] = Field(
         default=None,
         description="Document metadata to provide more details on the context.",
     )
-    labels: Optional[QALabelling] = Field(
-        default=None, description="QApair labelling axes."
-    )
+    labels: Optional[QALabelling] = Field(default=None, description="QApair labelling axes.")
diff --git a/docling_core/types/rec/attribute.py b/docling_core/types/rec/attribute.py
index 95d77fcc..b6b8d74b 100644
--- a/docling_core/types/rec/attribute.py
+++ b/docling_core/types/rec/attribute.py
@@ -31,13 +31,11 @@ class Attribute(
 ):
     """Attribute model that describes a list of characteristics."""
 
-    conf: Annotated[float, Field(strict=True, ge=0.0, le=1.0, allow_inf_nan=False)] = (
-        Field(
-            ...,
-            title="Confidence",
-            description="The confidence level of this attribute characteristics.",
-            json_schema_extra=es_field(type="float"),
-        )
+    conf: Annotated[float, Field(strict=True, ge=0.0, le=1.0, allow_inf_nan=False)] = Field(
+        ...,
+        title="Confidence",
+        description="The confidence level of this attribute characteristics.",
+        json_schema_extra=es_field(type="float"),
     )
 
     prov: Optional[list[ProvenanceItem[IdentifierTypeT, ProvenanceTypeT]]] = Field(
@@ -46,6 +44,6 @@ class Attribute(
         description="The sources of this attribute characteristics.",
     )
 
-    predicates: list[
-        Predicate[PredicateValueTypeT, PredicateKeyNameT, PredicateKeyTypeT]
-    ] = Field(..., description="A list of characteristics (type, value, and name).")
+    predicates: list[Predicate[PredicateValueTypeT, PredicateKeyNameT, PredicateKeyTypeT]] = Field(
+        ..., description="A list of characteristics (type, value, and name)."
+    )
diff --git a/docling_core/types/rec/base.py b/docling_core/types/rec/base.py
index 0d1af762..557166e6 100644
--- a/docling_core/types/rec/base.py
+++ b/docling_core/types/rec/base.py
@@ -10,19 +10,14 @@
 from docling_core.utils.alias import AliasModel
 
 
-class ProvenanceItem(
-    AliasModel, Generic[IdentifierTypeT, ProvenanceTypeT], extra="forbid"
-):
+class ProvenanceItem(AliasModel, Generic[IdentifierTypeT, ProvenanceTypeT], extra="forbid"):
     """A representation of an object provenance."""
 
     type_: Optional[ProvenanceTypeT] = Field(
         default=None,
         alias="type",
         title="The provenance type",
-        description=(
-            "Any string representing the type of provenance, e.g. `sentence`, "
-            "`table`, or `doi`."
-        ),
+        description=("Any string representing the type of provenance, e.g. `sentence`, `table`, or `doi`."),
         json_schema_extra=es_field(type="keyword", ignore_above=8191),
     )
 
@@ -30,8 +25,7 @@ class ProvenanceItem(
         default=None,
         title="Evidence of the provenance",
         description=(
-            "A text representing the evidence of the provenance, e.g. the sentence "
-            "text or the content of a table cell"
+            "A text representing the evidence of the provenance, e.g. the sentence text or the content of a table cell"
         ),
         json_schema_extra=es_field(type="keyword", ignore_above=8191),
     )
@@ -56,15 +50,10 @@ class ProvenanceItem(
         json_schema_extra=es_field(type="keyword", ignore_above=8191),
     )
 
-    span: Optional[Annotated[List[StrictInt], Field(min_length=2, max_length=2)]] = (
-        Field(
-            default=None,
-            title="The location of the item in the text/table",
-            description=(
-                "location of the item in the text/table referenced by the `path`,"
-                " e.g., `[34, 67]`"
-            ),
-        )
+    span: Optional[Annotated[List[StrictInt], Field(min_length=2, max_length=2)]] = Field(
+        default=None,
+        title="The location of the item in the text/table",
+        description=("location of the item in the text/table referenced by the `path`, e.g., `[34, 67]`"),
     )
 
 
diff --git a/docling_core/types/rec/predicate.py b/docling_core/types/rec/predicate.py
index c632c33b..983a8129 100644
--- a/docling_core/types/rec/predicate.py
+++ b/docling_core/types/rec/predicate.py
@@ -29,17 +29,13 @@ class NumericalValue(BaseModel, extra="forbid"):
     max: StrictFloat = Field(..., json_schema_extra=es_field(type="float"))
     val: StrictFloat = Field(..., json_schema_extra=es_field(type="float"))
     err: StrictFloat = Field(..., json_schema_extra=es_field(type="float"))
-    unit: StrictStr = Field(
-        ..., json_schema_extra=es_field(type="keyword", ignore_above=8191)
-    )
+    unit: StrictStr = Field(..., json_schema_extra=es_field(type="keyword", ignore_above=8191))
 
 
 class NominalValue(BaseModel, extra="forbid"):
     """Model for nominal (categorical) values."""
 
-    value: StrictStr = Field(
-        ..., json_schema_extra=es_field(type="keyword", ignore_above=8191)
-    )
+    value: StrictStr = Field(..., json_schema_extra=es_field(type="keyword", ignore_above=8191))
 
 
 class TextValue(BaseModel, extra="forbid"):
@@ -79,9 +75,7 @@ def validate_coordinates(cls, v):
         return v
 
 
-class PredicateKey(
-    AliasModel, Generic[PredicateKeyNameT, PredicateKeyTypeT], extra="forbid"
-):
+class PredicateKey(AliasModel, Generic[PredicateKeyNameT, PredicateKeyTypeT], extra="forbid"):
     """Model for the key (unique identifier) of a predicate."""
 
     name: PredicateKeyNameT = Field(
diff --git a/docling_core/types/rec/record.py b/docling_core/types/rec/record.py
index a63a4373..a78e7e48 100644
--- a/docling_core/types/rec/record.py
+++ b/docling_core/types/rec/record.py
@@ -28,25 +28,18 @@
 class RecordDescription(BaseModel, Generic[CollectionNameTypeT]):
     """Additional record metadata, including optional collection-specific fields."""
 
-    logs: list[Log] = Field(
-        description="Logs that describe the ETL tasks applied to this record."
-    )
+    logs: list[Log] = Field(description="Logs that describe the ETL tasks applied to this record.")
     publication_date: Optional[StrictDateTime] = Field(
         default=None,
         title="Publication date",
-        description=(
-            "The date that best represents the last publication time of a record."
-        ),
+        description=("The date that best represents the last publication time of a record."),
     )
     collection: Optional[CollectionRecordInfo[CollectionNameTypeT]] = Field(
         default=None, description="The collection information of this record."
     )
     acquisition: Optional[Acquisition] = Field(
         default=None,
-        description=(
-            "Information on how the document was obtained, for data governance"
-            " purposes."
-        ),
+        description=("Information on how the document was obtained, for data governance purposes."),
     )
 
 
diff --git a/docling_core/types/rec/subject.py b/docling_core/types/rec/subject.py
index 45655308..15f550f5 100644
--- a/docling_core/types/rec/subject.py
+++ b/docling_core/types/rec/subject.py
@@ -48,16 +48,12 @@ class Subject(
     type_: SubjectTypeT = Field(
         alias="type",
         description=(
-            "Main subject type. For instance, `material`, `material-class`, "
-            "`material-device`, `company`, or `person`."
+            "Main subject type. For instance, `material`, `material-class`, `material-device`, `company`, or `person`."
         ),
         json_schema_extra=es_field(type="keyword", ignore_above=8191),
     )
     names: list[SubjectNameIdentifier[SubjectNameTypeT]] = Field(
-        description=(
-            "List of given names for this subject. They may not be unique across "
-            "different subjects."
-        )
+        description=("List of given names for this subject. They may not be unique across different subjects.")
     )
     identifiers: Optional[list[Identifier[IdentifierTypeT]]] = Field(
         default=None,
diff --git a/docling_core/utils/file.py b/docling_core/utils/file.py
index 78d2eb2a..f98be74d 100644
--- a/docling_core/utils/file.py
+++ b/docling_core/utils/file.py
@@ -11,7 +11,7 @@
 from pydantic import AnyHttpUrl, TypeAdapter, ValidationError
 from typing_extensions import deprecated
 
-from docling_core.types.doc.utils import relative_path  # noqa
+from docling_core.types.doc.utils import relative_path
 from docling_core.types.io import DocumentStream
 
 
diff --git a/docling_core/utils/generate_docs.py b/docling_core/utils/generate_docs.py
index 61ba4eb8..509491ff 100644
--- a/docling_core/utils/generate_docs.py
+++ b/docling_core/utils/generate_docs.py
@@ -43,9 +43,7 @@ def generate_collection_jsonschema(folder: str):
     """
     for item in MODELS:
         json_schema = generate_json_schema(item)
-        with open(
-            os.path.join(folder, f"{item}.json"), mode="w", encoding="utf8"
-        ) as json_file:
+        with open(os.path.join(folder, f"{item}.json"), mode="w", encoding="utf8") as json_file:
             json.dump(json_schema, json_file, ensure_ascii=False, indent=2)
 
 
@@ -54,10 +52,7 @@ def main() -> None:
     argparser = argparse.ArgumentParser()
     argparser.add_argument(
         "directory",
-        help=(
-            "Directory to generate files. If it exists, any existing content will be"
-            " removed."
-        ),
+        help=("Directory to generate files. If it exists, any existing content will be removed."),
     )
     argparser.add_argument(
         "--clean",
diff --git a/docling_core/utils/generate_jsonschema.py b/docling_core/utils/generate_jsonschema.py
index a06328de..af51f71b 100644
--- a/docling_core/utils/generate_jsonschema.py
+++ b/docling_core/utils/generate_jsonschema.py
@@ -43,15 +43,11 @@ def generate_json_schema(class_reference: str) -> Union[dict, None]:
 def main() -> None:
     """Print the JSON Schema of a model."""
     argparser = argparse.ArgumentParser()
-    argparser.add_argument(
-        "class_ref", help="Class reference, e.g., doc.document.TableCell"
-    )
+    argparser.add_argument("class_ref", help="Class reference, e.g., doc.document.TableCell")
     args = argparser.parse_args()
 
     json_schema = generate_json_schema(args.class_ref)
-    print(
-        json.dumps(json_schema, ensure_ascii=False, indent=2).encode("utf-8").decode()
-    )
+    print(json.dumps(json_schema, ensure_ascii=False, indent=2).encode("utf-8").decode())
 
 
 if __name__ == "__main__":
diff --git a/docling_core/utils/legacy.py b/docling_core/utils/legacy.py
index 6f8fdf99..41436506 100644
--- a/docling_core/utils/legacy.py
+++ b/docling_core/utils/legacy.py
@@ -139,7 +139,6 @@ def docling_document_to_legacy(doc: DoclingDocument, fallback_filaname: str = "f
 
     embedded_captions = set()
     for ix, (item, level) in enumerate(doc.iterate_items(doc.body)):
-
         if isinstance(item, (TableItem, PictureItem)) and len(item.captions) > 0:
             caption = item.caption_text(doc)
             if caption:
@@ -150,7 +149,6 @@ def docling_document_to_legacy(doc: DoclingDocument, fallback_filaname: str = "f
             item_type = item.label
 
             if isinstance(item, (TextItem, ListItem, SectionHeaderItem)):
-
                 if isinstance(item, ListItem) and item.marker:
                     text = f"{item.marker} {item.text}"
                 else:
@@ -249,9 +247,7 @@ def _make_spans(cell: TableCell, table_item: TableItem):
                             table_data[i][j] = GlmTableCell(
                                 text=cell._get_text(doc=doc),
                                 bbox=(
-                                    cell.bbox.as_tuple()
-                                    if cell.bbox is not None
-                                    else None
+                                    cell.bbox.as_tuple() if cell.bbox is not None else None
                                 ),  # check if this is bottom-left
                                 spans=spans,
                                 obj_type=celltype,
@@ -322,8 +318,7 @@ def _make_spans(cell: TableCell, table_item: TableItem):
                 )
 
     page_dimensions = [
-        PageDimensions(page=p.page_no, height=p.size.height, width=p.size.width)
-        for p in doc.pages.values()
+        PageDimensions(page=p.page_no, height=p.size.height, width=p.size.width) for p in doc.pages.values()
     ]
 
     legacy_doc: DsDocument = DsDocument(
@@ -362,9 +357,7 @@ def _transform_prov(item: BaseCell) -> Optional[ProvenanceItem]:
             prov = ProvenanceItem(
                 page_no=int(item.prov[0].page),
                 charspan=tuple(item.prov[0].span),
-                bbox=BoundingBox.from_tuple(
-                    tuple(item.prov[0].bbox), origin=CoordOrigin.BOTTOMLEFT
-                ),
+                bbox=BoundingBox.from_tuple(tuple(item.prov[0].bbox), origin=CoordOrigin.BOTTOMLEFT),
             )
         return prov
 
@@ -415,9 +408,7 @@ def _transform_prov(item: BaseCell) -> Optional[ProvenanceItem]:
             if text_item.text is None:
                 continue
             prov = _transform_prov(text_item)
-            doc.add_text(
-                label=DocItemLabel.FOOTNOTE, text=text_item.text, parent=doc.furniture
-            )
+            doc.add_text(label=DocItemLabel.FOOTNOTE, text=text_item.text, parent=doc.furniture)
 
     # main-text content
     if legacy_doc.main_text is not None:
@@ -427,11 +418,7 @@ def _transform_prov(item: BaseCell) -> Optional[ProvenanceItem]:
         # to avoid repeating them
         embedded_captions: Dict[str, int] = {}
         for ix, orig_item in enumerate(legacy_doc.main_text):
-            item = (
-                legacy_doc._resolve_ref(orig_item)
-                if isinstance(orig_item, Ref)
-                else orig_item
-            )
+            item = legacy_doc._resolve_ref(orig_item) if isinstance(orig_item, Ref) else orig_item
             if item is None:
                 continue
 
@@ -441,21 +428,14 @@ def _transform_prov(item: BaseCell) -> Optional[ProvenanceItem]:
         # build lookup from floating objects to their caption item
         floating_to_caption: Dict[int, BaseText] = {}
         for ix, orig_item in enumerate(legacy_doc.main_text):
-            item = (
-                legacy_doc._resolve_ref(orig_item)
-                if isinstance(orig_item, Ref)
-                else orig_item
-            )
+            item = legacy_doc._resolve_ref(orig_item) if isinstance(orig_item, Ref) else orig_item
             if item is None:
                 continue
 
             item_type = item.obj_type.lower()
             if (
                 isinstance(item, BaseText)
-                and (
-                    item_type == "caption"
-                    or (item.name is not None and item.name.lower() == "caption")
-                )
+                and (item_type == "caption" or (item.name is not None and item.name.lower() == "caption"))
                 and item.text in embedded_captions
             ):
                 floating_ix = embedded_captions[item.text]
@@ -464,11 +444,7 @@ def _transform_prov(item: BaseCell) -> Optional[ProvenanceItem]:
         # main loop iteration
         current_list: Optional[GroupItem] = None
         for ix, orig_item in enumerate(legacy_doc.main_text):
-            item = (
-                legacy_doc._resolve_ref(orig_item)
-                if isinstance(orig_item, Ref)
-                else orig_item
-            )
+            item = legacy_doc._resolve_ref(orig_item) if isinstance(orig_item, Ref) else orig_item
             if item is None:
                 continue
 
@@ -476,9 +452,7 @@ def _transform_prov(item: BaseCell) -> Optional[ProvenanceItem]:
             item_type = item.obj_type.lower()
 
             # if a group is needed, add it
-            if isinstance(item, BaseText) and (
-                item_type in "list-item-level-1" or item.name in {"list", "list-item"}
-            ):
+            if isinstance(item, BaseText) and (item_type in "list-item-level-1" or item.name in {"list", "list-item"}):
                 if current_list is None:
                     current_list = doc.add_list_group(name="list")
             else:
@@ -514,9 +488,7 @@ def _transform_prov(item: BaseCell) -> Optional[ProvenanceItem]:
                     "list-item",
                 }:
                     # TODO: Infer if this is a numbered or a bullet list item
-                    doc.add_list_item(
-                        text=text, enumerated=False, prov=prov, parent=current_list
-                    )
+                    doc.add_list_item(text=text, enumerated=False, prov=prov, parent=current_list)
 
                 # normal text
                 else:
@@ -530,13 +502,11 @@ def _transform_prov(item: BaseCell) -> Optional[ProvenanceItem]:
                     doc.add_text(label=label, text=text, prov=prov)
 
             elif isinstance(item, DsSchemaTable):
-
                 table_data = TableData(num_cols=item.num_cols, num_rows=item.num_rows)
                 if item.data is not None:
                     seen_spans = set()
                     for row_ix, row in enumerate(item.data):
                         for col_ix, orig_cell_data in enumerate(row):
-
                             cell_bbox: Optional[BoundingBox] = (
                                 BoundingBox.from_tuple(
                                     tuple(orig_cell_data.bbox),
@@ -559,9 +529,7 @@ def _transform_prov(item: BaseCell) -> Optional[ProvenanceItem]:
 
                             if orig_cell_data.spans is not None:
                                 # convert to a tuple of tuples for hashing
-                                spans_tuple = tuple(
-                                    tuple(span) for span in orig_cell_data.spans
-                                )
+                                spans_tuple = tuple(tuple(span) for span in orig_cell_data.spans)
 
                                 # skip repeated spans
                                 if spans_tuple in seen_spans:
@@ -569,25 +537,13 @@ def _transform_prov(item: BaseCell) -> Optional[ProvenanceItem]:
 
                                 seen_spans.add(spans_tuple)
 
-                                cell.start_row_offset_idx = min(
-                                    s[0] for s in spans_tuple
-                                )
-                                cell.end_row_offset_idx = (
-                                    max(s[0] for s in spans_tuple) + 1
-                                )
-                                cell.start_col_offset_idx = min(
-                                    s[1] for s in spans_tuple
-                                )
-                                cell.end_col_offset_idx = (
-                                    max(s[1] for s in spans_tuple) + 1
-                                )
+                                cell.start_row_offset_idx = min(s[0] for s in spans_tuple)
+                                cell.end_row_offset_idx = max(s[0] for s in spans_tuple) + 1
+                                cell.start_col_offset_idx = min(s[1] for s in spans_tuple)
+                                cell.end_col_offset_idx = max(s[1] for s in spans_tuple) + 1
 
-                                cell.row_span = (
-                                    cell.end_row_offset_idx - cell.start_row_offset_idx
-                                )
-                                cell.col_span = (
-                                    cell.end_col_offset_idx - cell.start_col_offset_idx
-                                )
+                                cell.row_span = cell.end_row_offset_idx - cell.start_row_offset_idx
+                                cell.col_span = cell.end_col_offset_idx - cell.start_col_offset_idx
 
                             table_data.table_cells.append(cell)
 
@@ -617,11 +573,7 @@ def _transform_prov(item: BaseCell) -> Optional[ProvenanceItem]:
                         new_item.captions.append(caption.get_ref())
 
             # equations
-            elif (
-                isinstance(item, BaseCell)
-                and item.text is not None
-                and item_type in {"formula", "equation"}
-            ):
+            elif isinstance(item, BaseCell) and item.text is not None and item_type in {"formula", "equation"}:
                 doc.add_text(label=DocItemLabel.FORMULA, text=item.text, prov=prov)
 
     return doc
diff --git a/docling_core/utils/validate.py b/docling_core/utils/validate.py
index 51f6baf9..cc9668c0 100644
--- a/docling_core/utils/validate.py
+++ b/docling_core/utils/validate.py
@@ -17,13 +17,9 @@ def parse_arguments():
     """Parse the arguments from the command line."""
     argparser = argparse.ArgumentParser(description="validate example-file with schema")
 
-    argparser.add_argument(
-        "-f", "--format", required=True, help="format of the file [RAW, ANN, OCR]"
-    )
+    argparser.add_argument("-f", "--format", required=True, help="format of the file [RAW, ANN, OCR]")
 
-    argparser.add_argument(
-        "-i", "--input-file", required=True, help="JSON filename to be validated"
-    )
+    argparser.add_argument("-i", "--input-file", required=True, help="JSON filename to be validated")
 
     pargs = argparser.parse_args()
 
@@ -54,7 +50,7 @@ def run():
     if result[0]:
         logger.info("Done!")
     else:
-        logger.error("invalid schema: {}".format(result[1]))
+        logger.error(f"invalid schema: {result[1]}")
 
 
 def main():
diff --git a/docling_core/utils/validators.py b/docling_core/utils/validators.py
index 7c7178b7..8a576d99 100644
--- a/docling_core/utils/validators.py
+++ b/docling_core/utils/validators.py
@@ -32,11 +32,7 @@ def validate_raw_schema(file_: dict) -> tuple[bool, str]:
     """Validate a RAW file."""
     logger.debug("validate RAW schema ... ")
 
-    schema_txt = (
-        resources.files("docling_core")
-        .joinpath("resources/schemas/legacy_doc/RAW.json")
-        .read_text("utf-8")
-    )
+    schema_txt = resources.files("docling_core").joinpath("resources/schemas/legacy_doc/RAW.json").read_text("utf-8")
     schema = json.loads(schema_txt)
 
     return validate_schema(file_, schema)
@@ -46,11 +42,7 @@ def validate_ann_schema(file_: dict) -> tuple[bool, str]:
     """Validate an annotated (ANN) file."""
     logger.debug("validate ANN schema ... ")
 
-    schema_txt = (
-        resources.files("docling_core")
-        .joinpath("resources/schemas/legacy_doc/ANN.json")
-        .read_text("utf-8")
-    )
+    schema_txt = resources.files("docling_core").joinpath("resources/schemas/legacy_doc/ANN.json").read_text("utf-8")
     schema = json.loads(schema_txt)
 
     return validate_schema(file_, schema)
@@ -61,9 +53,7 @@ def validate_ocr_schema(file_: dict) -> tuple[bool, str]:
     logger.debug("validate OCR schema ... ")
 
     schema_txt = (
-        resources.files("docling_core")
-        .joinpath("resources/schemas/legacy_doc/OCR-output.json")
-        .read_text("utf-8")
+        resources.files("docling_core").joinpath("resources/schemas/legacy_doc/OCR-output.json").read_text("utf-8")
     )
     schema = json.loads(schema_txt)
 
diff --git a/examples/rich_table_cells.ipynb b/examples/rich_table_cells.ipynb
index 2ccc7926..3beaebac 100644
--- a/examples/rich_table_cells.ipynb
+++ b/examples/rich_table_cells.ipynb
@@ -7,7 +7,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from docling_core.types.doc import DoclingDocument, TableData, TableCell, RichTableCell, DocItemLabel\n",
+    "from docling_core.types.doc import (\n",
+    "    DoclingDocument,\n",
+    "    TableData,\n",
+    "    TableCell,\n",
+    "    RichTableCell,\n",
+    "    DocItemLabel,\n",
+    ")\n",
     "\n",
     "doc = DoclingDocument(name=\"\")\n",
     "doc.add_text(label=DocItemLabel.TITLE, text=\"Rich tables\")\n",
@@ -226,7 +232,7 @@
     }
    ],
    "source": [
-    "print(doc.tables[0].export_to_doctags(doc=doc))\n"
+    "print(doc.tables[0].export_to_doctags(doc=doc))"
    ]
   },
   {
diff --git a/examples/table_annotations.ipynb b/examples/table_annotations.ipynb
index f8a9017c..3c4fef91 100644
--- a/examples/table_annotations.ipynb
+++ b/examples/table_annotations.ipynb
@@ -34,9 +34,14 @@
     "from rich.console import Console\n",
     "from rich.panel import Panel\n",
     "\n",
+    "\n",
     "def print_excerpt(\n",
-    "    txt: str, *, limit: int = 2000, title: Optional[str] = None, min_width: int = 80,\n",
-    "    table_end: str = \"--|\"\n",
+    "    txt: str,\n",
+    "    *,\n",
+    "    limit: int = 2000,\n",
+    "    title: Optional[str] = None,\n",
+    "    min_width: int = 80,\n",
+    "    table_end: str = \"--|\",\n",
     "):\n",
     "    excerpt = txt[:limit]\n",
     "    width = max(\n",
@@ -44,7 +49,7 @@
     "        min_width,\n",
     "    )\n",
     "    console = Console(width=width)\n",
-    "    console.print(Panel(f\"{excerpt}{'...' if len(txt)>limit else ''}\", title=title))"
+    "    console.print(Panel(f\"{excerpt}{'...' if len(txt) > limit else ''}\", title=title))"
    ]
   },
   {
@@ -205,6 +210,7 @@
     "from docling_core.transforms.serializer.markdown import MarkdownAnnotationSerializer\n",
     "from docling_core.types.doc.document import MiscAnnotation, DocItem\n",
     "\n",
+    "\n",
     "class CustomAnnotationSerializer(MarkdownAnnotationSerializer):\n",
     "    def serialize(\n",
     "        self,\n",
diff --git a/pyproject.toml b/pyproject.toml
index 7c78cd18..abf36ac4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -111,6 +111,7 @@ dev = [
     "pytest~=8.3",
     "pytest-cov>=6.1.1",
     "python-semantic-release~=7.32",
+    "ruff>=0.14.8",
 ]
 
 [tool.uv]
@@ -124,20 +125,81 @@ namespaces = true
 [tool.setuptools.package-data]
 "*" = ["*.json"]
 
-[tool.black]
-line-length = 88
-target-version = ["py39", "py310"]
-include = '\.pyi?$'
-preview = true
+[tool.ruff]
+target-version = "py39"
+line-length = 120
+respect-gitignore = true
+exclude = [
+    "test/data/**"
+]
+
+[tool.ruff.format]
+skip-magic-trailing-comma = false
+
+[tool.ruff.lint]
+select = [
+  "C",     # flake8-comprehensions
+  "C9",    # mccabe
+  "E",     # pycodestyle errors (default)
+  "F",     # pyflakes (default)
+  "I",     # isort
+  "PD",    # pandas-vet
+  "PIE",   # pie
+  "Q",     # flake8-quotes
+  "RUF",   # Enable all ruff-specific checks
+  "S307",  # eval
+  "W",     # pycodestyle warnings
+  "ASYNC", # async
+  "UP",    # pyupgrade
+]
+
+ignore = [
+  "C403",   # Unnecessary `list()` call (rewrite as a literal)
+  "C408",   # Unnecessary `dict()` call (rewrite as a literal)
+  "C413",   # Unnecessary `reversed()` call around `sorted()`
+  "C416",   # Unnecessary set comprehension (rewrite using `set()`)
+  "E501",   # Line too long, handled by ruff formatter
+  "E203",   # whitespace-before-punctuation
+  "E741",   # Ambiguous variable name: `l`
+  "D107",   # "Missing docstring in __init__",
+  "F401",   # imported but unused; consider using `importlib.util.find_spec` to test for "
+  "F811",   # "redefinition of the same function"
+  "PL",     # Pylint
+  "PD901",  # Avoid using the generic variable name `df` for DataFrames
+  "RUF002", # Docstring contains ambiguous `‑` (NON-BREAKING HYPHEN).
+  "RUF003", # Comment contains ambiguous `‑` (NON-BREAKING HYPHEN).
+  "RUF005", # Consider ... instead of concatenation
+  "RUF012", # Mutable Class Attributes
+  "RUF034", # Useless `if`-`else` condition
+  "UP006",  # List vs list, etc
+  "UP007",  # Option and Union
+  "UP015",  # Unnecessary mode argument
+  "UP035",  # `typing.Set` is deprecated, use `set` instead"
+  "UP045",  # Use `X | None` for type annotations
+]
+
+[tool.ruff.lint.pep8-naming]
+classmethod-decorators = [
+  "classmethod",
+  "validator",
+  "pydantic.validator",
+]
+
+[tool.ruff.lint.pydocstyle]
+convention = "google"
+
+[tool.ruff.lint.per-file-ignores]
+"__init__.py" = ["E402", "F401"]
+"tests/*.py" = ["ASYNC"]         # Disable ASYNC check for tests
+"*.ipynb" = ["I"]  # Disable import sorting for notebooks
+
+[tool.ruff.lint.mccabe]
+max-complexity = 30
 
-[tool.isort]
-profile = "black"
-line_length = 88
-py_version = 39
-multi_line_output = 3
-include_trailing_comma = true
+[tool.ruff.lint.isort]
+combine-as-imports = false
 
-[tool.autoflake]
+[toolruff.lint.autoflake]
 in-place = true
 ignore-init-module-imports = true
 remove-all-unused-imports = true
@@ -147,8 +209,6 @@ recursive = true
 
 [tool.mypy]
 pretty = true
-# strict = true
-# disallow_untyped_defs = true
 no_implicit_optional = true
 namespace_packages = true
 show_error_codes = true
diff --git a/test/conftest.py b/test/conftest.py
index 7dadfc5c..6af24335 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -35,9 +35,7 @@ def _construct_doc() -> DoclingDocument:
     leading_list = doc.add_list_group(parent=None)
     doc.add_list_item(parent=leading_list, text="item of leading list", marker="■")
 
-    title = doc.add_title(
-        text="Title of the Document"
-    )  # can be done if such information is present, or ommitted.
+    title = doc.add_title(text="Title of the Document")  # can be done if such information is present, or ommitted.
 
     # group, heading, paragraph, table, figure, title, list, provenance
     doc.add_text(parent=title, label=DocItemLabel.TEXT, text="Author 1\nAffiliation 1")
@@ -92,9 +90,7 @@ def _construct_doc() -> DoclingDocument:
 
     doc.add_list_item(parent=mylist_level_1, text="list item 4", marker="■")
 
-    tab_caption = doc.add_text(
-        label=DocItemLabel.CAPTION, text="This is the caption of table 1."
-    )
+    tab_caption = doc.add_text(label=DocItemLabel.CAPTION, text="This is the caption of table 1.")
 
     # Make some table cells
     table_cells = []
@@ -166,9 +162,7 @@ def _construct_doc() -> DoclingDocument:
     table_data = TableData(num_rows=3, num_cols=3, table_cells=table_cells)
     doc.add_table(data=table_data, caption=tab_caption)
 
-    fig_caption_1 = doc.add_text(
-        label=DocItemLabel.CAPTION, text="This is the caption of figure 1."
-    )
+    fig_caption_1 = doc.add_text(label=DocItemLabel.CAPTION, text="This is the caption of figure 1.")
     doc.add_picture(caption=fig_caption_1)
 
     size = (64, 64)
@@ -189,12 +183,8 @@ def _construct_doc() -> DoclingDocument:
     # Draw the red square
     # draw.rectangle([x1, y1, x2, y2], fill="red")
 
-    fig_caption_2 = doc.add_text(
-        label=DocItemLabel.CAPTION, text="This is the caption of figure 2."
-    )
-    doc.add_picture(
-        image=ImageRef.from_pil(image=fig2_image, dpi=72), caption=fig_caption_2
-    )
+    fig_caption_2 = doc.add_text(label=DocItemLabel.CAPTION, text="This is the caption of figure 2.")
+    doc.add_picture(image=ImageRef.from_pil(image=fig2_image, dpi=72), caption=fig_caption_2)
 
     g0 = doc.add_list_group(parent=None)
     doc.add_list_item(text="item 1 of list", parent=g0, marker="■")
@@ -225,9 +215,7 @@ def _construct_doc() -> DoclingDocument:
         parent=inline1,
         code_language=CodeLanguageLabel.PYTHON,
     )
-    doc.add_text(
-        label=DocItemLabel.TEXT, text="(to be displayed inline)", parent=inline1
-    )
+    doc.add_text(label=DocItemLabel.TEXT, text="(to be displayed inline)", parent=inline1)
 
     g2_subgroup_li_2 = doc.add_list_item(text="", parent=g2_subgroup, marker="□")
     inline2 = doc.add_inline_group(parent=g2_subgroup_li_2)
@@ -237,14 +225,10 @@ def _construct_doc() -> DoclingDocument:
         parent=inline2,
     )
     doc.add_text(label=DocItemLabel.FORMULA, text="E=mc^2", parent=inline2)
-    doc.add_text(
-        label=DocItemLabel.TEXT, text="(to be displayed inline)", parent=inline2
-    )
+    doc.add_text(label=DocItemLabel.TEXT, text="(to be displayed inline)", parent=inline2)
 
     doc.add_text(label=DocItemLabel.TEXT, text="Here a code block:", parent=None)
-    doc.add_code(
-        text='print("Hello world")', parent=None, code_language=CodeLanguageLabel.PYTHON
-    )
+    doc.add_code(text='print("Hello world")', parent=None, code_language=CodeLanguageLabel.PYTHON)
 
     doc.add_text(label=DocItemLabel.TEXT, text="Here a formula block:", parent=None)
     doc.add_text(label=DocItemLabel.FORMULA, text="E=mc^2", parent=None)
@@ -279,9 +263,7 @@ def _construct_doc() -> DoclingDocument:
     doc.add_form(graph=graph)
 
     inline_fmt = doc.add_inline_group()
-    doc.add_text(
-        label=DocItemLabel.TEXT, text="Some formatting chops:", parent=inline_fmt
-    )
+    doc.add_text(label=DocItemLabel.TEXT, text="Some formatting chops:", parent=inline_fmt)
     doc.add_text(
         label=DocItemLabel.TEXT,
         text="bold",
@@ -341,21 +323,13 @@ def _construct_doc() -> DoclingDocument:
     )
 
     parent_A = doc.add_list_group(name="list A")
-    doc.add_list_item(
-        text="Item 1 in A", enumerated=True, marker="(i)", parent=parent_A
-    )
-    doc.add_list_item(
-        text="Item 2 in A", enumerated=True, marker="(ii)", parent=parent_A
-    )
-    item_A_3 = doc.add_list_item(
-        text="Item 3 in A", enumerated=True, marker="(iii)", parent=parent_A
-    )
+    doc.add_list_item(text="Item 1 in A", enumerated=True, marker="(i)", parent=parent_A)
+    doc.add_list_item(text="Item 2 in A", enumerated=True, marker="(ii)", parent=parent_A)
+    item_A_3 = doc.add_list_item(text="Item 3 in A", enumerated=True, marker="(iii)", parent=parent_A)
 
     parent_B = doc.add_list_group(parent=item_A_3, name="list B")
     doc.add_list_item(text="Item 1 in B", enumerated=True, parent=parent_B)
-    item_B_2 = doc.add_list_item(
-        text="Item 2 in B", enumerated=True, marker="42.", parent=parent_B
-    )
+    item_B_2 = doc.add_list_item(text="Item 2 in B", enumerated=True, marker="42.", parent=parent_B)
 
     parent_C = doc.add_list_group(parent=item_B_2, name="list C")
     doc.add_list_item(text="Item 1 in C", enumerated=True, parent=parent_C)
@@ -363,9 +337,7 @@ def _construct_doc() -> DoclingDocument:
 
     doc.add_list_item(text="Item 3 in B", enumerated=True, parent=parent_B)
 
-    doc.add_list_item(
-        text="Item 4 in A", enumerated=True, marker="(iv)", parent=parent_A
-    )
+    doc.add_list_item(text="Item 4 in A", enumerated=True, marker="(iv)", parent=parent_A)
 
     with pytest.warns(DeprecationWarning, match="list group"):
         doc.add_list_item(text="List item without parent list group")
@@ -407,9 +379,7 @@ def _rich_table_doc() -> DoclingDocument:
     doc.add_list_item(parent=rich_item_2, text="list item 1")
     doc.add_list_item(parent=rich_item_2, text="list item 2")
 
-    rich_item_3 = doc.add_table(
-        data=TableData(num_rows=2, num_cols=3), parent=table_item
-    )
+    rich_item_3 = doc.add_table(data=TableData(num_rows=2, num_cols=3), parent=table_item)
 
     rich_item_4 = doc.add_group(parent=table_item, label=GroupLabel.UNSPECIFIED)
     doc.add_text(
@@ -417,9 +387,7 @@ def _rich_table_doc() -> DoclingDocument:
         text="Some text in a generic group.",
         label=DocItemLabel.TEXT,
     )
-    doc.add_text(
-        parent=rich_item_4, text="More text in the group.", label=DocItemLabel.TEXT
-    )
+    doc.add_text(parent=rich_item_4, text="More text in the group.", label=DocItemLabel.TEXT)
 
     for i in range(rich_item_3.data.num_rows):
         for j in range(rich_item_3.data.num_cols):
diff --git a/test/test_azure_serializer.py b/test/test_azure_serializer.py
index 3b91d968..8d076729 100644
--- a/test/test_azure_serializer.py
+++ b/test/test_azure_serializer.py
@@ -47,9 +47,7 @@ def _assert_json_like_equal(a: Any, b: Any, eps: float = 1e-3, path: str = "$")
         # If either is float, compare with tolerance; if both int, exact match
         if isinstance(a, float) or isinstance(b, float):
             diff = abs(float(a) - float(b))
-            assert (
-                diff <= eps
-            ), f"Float mismatch at {path}: {a} != {b} (diff={diff}, eps={eps})"
+            assert diff <= eps, f"Float mismatch at {path}: {a} != {b} (diff={diff}, eps={eps})"
         else:
             assert a == b, f"Int mismatch at {path}: {a} != {b}"
         return
@@ -116,9 +114,7 @@ def _ensure_prov(item, l=10.0, t=10.0, r=200.0, b=40.0):
             item.prov = [
                 ProvenanceItem(
                     page_no=min(sample_doc.pages.keys()),
-                    bbox=BoundingBox(
-                        l=l, t=t, r=r, b=b, coord_origin=CoordOrigin.TOPLEFT
-                    ),
+                    bbox=BoundingBox(l=l, t=t, r=r, b=b, coord_origin=CoordOrigin.TOPLEFT),
                     charspan=(0, 0),
                 )
             ]
@@ -146,9 +142,7 @@ def _ensure_prov(item, l=10.0, t=10.0, r=200.0, b=40.0):
     # Basic structure check
     data = json.loads(actual_json)
     assert isinstance(data, dict)
-    assert (
-        "pages" in data and isinstance(data["pages"], list) and len(data["pages"]) >= 1
-    )
+    assert "pages" in data and isinstance(data["pages"], list) and len(data["pages"]) >= 1
     assert "paragraphs" in data and isinstance(data["paragraphs"], list)
 
     exp_file = Path("./test/data/doc/constructed.gt.azure.json")
diff --git a/test/test_base.py b/test/test_base.py
index 806e7fa6..d9476604 100644
--- a/test/test_base.py
+++ b/test/test_base.py
@@ -27,9 +27,7 @@ def test_identifier():
 
     # dict(): important to set by_alias=True, if the model has aliases
     assert data.model_dump(by_alias=True) == gold_dict
-    assert data.model_dump_json(by_alias=True, indent=2) == json.dumps(
-        gold_dict, indent=2
-    )
+    assert data.model_dump_json(by_alias=True, indent=2) == json.dumps(gold_dict, indent=2)
 
     # schema_json(): no need to set by_alias since it is True by the default
     with open("test/data/json_schemas/base_identifier.json", encoding="utf-8") as tf:
@@ -75,9 +73,7 @@ def test_log():
         comment="UCMI 3.10",
         date="2021-11-03T04:42:54.844631+00:00",
     )
-    data = Log(
-        task=None, agent="CXS", type="parsing", date="2021-11-03T04:42:54.844631+00:00"
-    )
+    data = Log(task=None, agent="CXS", type="parsing", date="2021-11-03T04:42:54.844631+00:00")
 
     gold_dict = {
         "agent": "CXS",
@@ -93,20 +89,13 @@ def test_log():
     # Models that inherit from AliasModel will generate data with alias field names
     assert Log(**gold_dict).model_dump(exclude_unset=True) == gold_dict
     # ***Best practice***: exclude_unset=True, exclude_none=True, by_alias=True
-    assert (
-        Log(**gold_dict).model_dump(
-            exclude_unset=True, exclude_none=True, by_alias=True
-        )
-        == gold_dict
-    )
+    assert Log(**gold_dict).model_dump(exclude_unset=True, exclude_none=True, by_alias=True) == gold_dict
 
     with open("test/data/json_schemas/base_log.json", encoding="utf-8") as tf:
         gold_json_schema = json.load(tf)
     assert Log.model_json_schema() == gold_json_schema
 
-    with pytest.raises(
-        ValidationError, match="Value type must be a datetime or a non-numeric string"
-    ):
+    with pytest.raises(ValidationError, match="Value type must be a datetime or a non-numeric string"):
         Log(agent="CXS", type="annotation", date=123456789)
 
 
@@ -124,9 +113,7 @@ def test_file_info_object():
 
     gold_dict.pop("filename-prov")
     gold_json = json.dumps(gold_dict)
-    FileInfoObject(**gold_dict).model_dump_json(
-        exclude_unset=True, exclude_none=True
-    ) == gold_json
+    FileInfoObject(**gold_dict).model_dump_json(exclude_unset=True, exclude_none=True) == gold_json
 
     # creating an instance with input variables requires the use of field names. Since
     # document-hash is an invalid function parameter name, 'populate_by_name' needs to
@@ -166,19 +153,10 @@ def test_collection_info():
     }
     clean_dict = {"name": "patent USPTO", "type": "Document", "version": "3.2.0"}
     data = CollectionInfo(**input_dict)
-    assert (
-        data.model_dump(by_alias=True, exclude_unset=True, exclude_none=True)
-        != input_dict
-    )
-    assert (
-        data.model_dump(by_alias=True, exclude_unset=True, exclude_none=True)
-        == clean_dict
-    )
+    assert data.model_dump(by_alias=True, exclude_unset=True, exclude_none=True) != input_dict
+    assert data.model_dump(by_alias=True, exclude_unset=True, exclude_none=True) == clean_dict
     data = CollectionInfo(**clean_dict)
-    assert (
-        data.model_dump(by_alias=True, exclude_unset=True, exclude_none=True)
-        == clean_dict
-    )
+    assert data.model_dump(by_alias=True, exclude_unset=True, exclude_none=True) == clean_dict
 
 
 def test_collection_document_info():
@@ -190,10 +168,7 @@ def test_collection_document_info():
         "alias": ["patent"],
     }
     data = CollectionDocumentInfo(**gold_dict)
-    assert (
-        data.model_dump(by_alias=True, exclude_unset=True, exclude_none=True)
-        == gold_dict
-    )
+    assert data.model_dump(by_alias=True, exclude_unset=True, exclude_none=True) == gold_dict
 
     # within dictionary
     desc_dict = {
@@ -214,7 +189,7 @@ def test_collection_document_info():
     CCSDocumentDescription(**desc_dict)
 
     desc_dict["collection"]["type"] = "Record"
-    with pytest.raises(ValidationError, match="collection.type"):
+    with pytest.raises(ValidationError, match="collection\\.type"):
         CCSDocumentDescription(**desc_dict)
 
 
@@ -227,10 +202,7 @@ def test_collection_record_info():
         "alias": ["chemical", "Material Sciences"],
     }
     data = CollectionRecordInfo(**gold_dict)
-    assert (
-        data.model_dump(by_alias=True, exclude_unset=True, exclude_none=True)
-        == gold_dict
-    )
+    assert data.model_dump(by_alias=True, exclude_unset=True, exclude_none=True) == gold_dict
 
     # within dictionary
     desc_dict = {
@@ -251,11 +223,11 @@ def test_collection_record_info():
     RecordDescription(**desc_dict)
 
     desc_dict["collection"]["type"] = "Document"
-    with pytest.raises(ValidationError, match="collection.type"):
+    with pytest.raises(ValidationError, match="collection\\.type"):
         RecordDescription(**desc_dict)
 
     desc_dict["collection"]["type"] = "record"
-    with pytest.raises(ValidationError, match="collection.type"):
+    with pytest.raises(ValidationError, match="collection\\.type"):
         RecordDescription(**desc_dict)
 
 
diff --git a/test/test_code_chunker.py b/test/test_code_chunker.py
index e90a3d58..5b89d801 100644
--- a/test/test_code_chunker.py
+++ b/test/test_code_chunker.py
@@ -53,16 +53,9 @@ def create_documents_from_repository(
 
     all_files = []
     for extension in all_extensions:
-        all_files.extend(
-            [
-                f
-                for f in sorted(
-                    glob.glob(f"{file_dir}/**/*{extension}", recursive=True)
-                )
-            ]
-        )
+        all_files.extend([f for f in sorted(glob.glob(f"{file_dir}/**/*{extension}", recursive=True))])
 
-    all_files = sorted(list(set(all_files)))
+    all_files = sorted(set(all_files))
 
     for file_path in all_files:
         with open(file_path, "r", encoding="utf-8") as f:
@@ -72,11 +65,7 @@ def create_documents_from_repository(
 
         origin = DocumentOrigin(
             filename=file_relative,
-            uri=(
-                f"{repo_url}/blob/{commit_id}/{file_relative}"
-                if commit_id
-                else f"{repo_url}/{file_relative}"
-            ),
+            uri=(f"{repo_url}/blob/{commit_id}/{file_relative}" if commit_id else f"{repo_url}/{file_relative}"),
             mimetype="text/plain",
             binary_hash=_create_hash(file_content),
         )
@@ -97,41 +86,31 @@ def create_documents_from_repository(
         "Java",
         "/test/data/chunker_repo/repos/acmeair",
         "https://github.com/acmeair/acmeair",
-        lambda: HierarchicalChunker(
-            code_chunking_strategy=StandardCodeChunkingStrategy(max_tokens=5000)
-        ),
+        lambda: HierarchicalChunker(code_chunking_strategy=StandardCodeChunkingStrategy(max_tokens=5000)),
     ),
     (
         "TypeScript",
         "/test/data/chunker_repo/repos/outline",
         "https://github.com/outline/outline",
-        lambda: HierarchicalChunker(
-            code_chunking_strategy=StandardCodeChunkingStrategy(max_tokens=5000)
-        ),
+        lambda: HierarchicalChunker(code_chunking_strategy=StandardCodeChunkingStrategy(max_tokens=5000)),
     ),
     (
         "JavaScript",
         "/test/data/chunker_repo/repos/jquery",
         "https://github.com/jquery/jquery",
-        lambda: HierarchicalChunker(
-            code_chunking_strategy=StandardCodeChunkingStrategy(max_tokens=5000)
-        ),
+        lambda: HierarchicalChunker(code_chunking_strategy=StandardCodeChunkingStrategy(max_tokens=5000)),
     ),
     (
         "Python",
         "/test/data/chunker_repo/repos/docling",
         "https://github.com/docling-project/docling",
-        lambda: HierarchicalChunker(
-            code_chunking_strategy=StandardCodeChunkingStrategy(max_tokens=5000)
-        ),
+        lambda: HierarchicalChunker(code_chunking_strategy=StandardCodeChunkingStrategy(max_tokens=5000)),
     ),
     (
         "C",
         "/test/data/chunker_repo/repos/json-c",
         "https://github.com/json-c/json-c",
-        lambda: HierarchicalChunker(
-            code_chunking_strategy=StandardCodeChunkingStrategy(max_tokens=5000)
-        ),
+        lambda: HierarchicalChunker(code_chunking_strategy=StandardCodeChunkingStrategy(max_tokens=5000)),
     ),
 ]
 
@@ -150,7 +129,6 @@ def _dump_or_assert(act_data: dict, out_path: pathlib.Path):
 
 @pytest.mark.parametrize("name,local_path,repo_url,chunker_factory", REPO_SPECS)
 def test_function_chunkers_repo(name, local_path, repo_url, chunker_factory):
-
     local_path_full = os.getcwd() + local_path
 
     if not os.path.isdir(local_path_full):
@@ -162,11 +140,7 @@ def test_function_chunkers_repo(name, local_path, repo_url, chunker_factory):
         language=CodeLanguageLabel(name),
         commit_id="abc123def456",
     )
-    docs = [
-        doc
-        for doc in docs
-        if any(text.label == DocItemLabel.CODE and text.text for text in doc.texts)
-    ]
+    docs = [doc for doc in docs if any(text.label == DocItemLabel.CODE and text.text for text in doc.texts)]
     if not docs:
         pytest.skip(f"No documents found in {local_path_full} for {name}.")
 
diff --git a/test/test_code_chunking_strategy.py b/test/test_code_chunking_strategy.py
index 783746cd..c8234b56 100644
--- a/test/test_code_chunking_strategy.py
+++ b/test/test_code_chunking_strategy.py
@@ -49,9 +49,7 @@ def factorial(n):
             format_code_blocks=False,
         ),
     )
-    chunks = list(
-        strategy.chunk_code_item(item=code_item, doc=doc, doc_serializer=doc_ser)
-    )
+    chunks = list(strategy.chunk_code_item(item=code_item, doc=doc, doc_serializer=doc_ser))
 
     assert len(chunks) > 0
     for chunk in chunks:
@@ -77,12 +75,8 @@ def fibonacci(n):
         text="Here's some Python code:",
         orig="Here's some Python code:",
     )
-    doc.add_code(
-        text=python_code, code_language=CodeLanguageLabel.PYTHON, orig=python_code
-    )
-    doc.origin = DocumentOrigin(
-        filename="test.py", mimetype="text/x-python", binary_hash=12345
-    )
+    doc.add_code(text=python_code, code_language=CodeLanguageLabel.PYTHON, orig=python_code)
+    doc.origin = DocumentOrigin(filename="test.py", mimetype="text/x-python", binary_hash=12345)
 
     strategy = StandardCodeChunkingStrategy(min_chunk_size=50, max_tokens=1000)
     chunker_with_strategy = HierarchicalChunker(code_chunking_strategy=strategy)
@@ -114,9 +108,7 @@ def test_hybrid_chunker_with_code_files(test_data_dir):
         pytest.skip("Python test file not found")
 
     doc = DoclingDocument(name="sample.py")
-    doc.origin = DocumentOrigin(
-        filename="sample.py", mimetype="text/x-python", binary_hash=12345
-    )
+    doc.origin = DocumentOrigin(filename="sample.py", mimetype="text/x-python", binary_hash=12345)
 
     with open(python_file, "r", encoding="utf-8") as f:
         content = f.read()
@@ -141,9 +133,7 @@ def test_unsupported_language_fallback(test_data_dir):
     go_file = test_data_dir / "sample.go"
     if go_file.exists():
         doc = DoclingDocument(name="sample.go")
-        doc.origin = DocumentOrigin(
-            filename="sample.go", mimetype="text/plain", binary_hash=12345
-        )
+        doc.origin = DocumentOrigin(filename="sample.go", mimetype="text/plain", binary_hash=12345)
 
         with open(go_file, "r", encoding="utf-8") as f:
             content = f.read()
@@ -162,9 +152,7 @@ def test_unsupported_language_fallback(test_data_dir):
     md_file = test_data_dir / "sample.md"
     if md_file.exists():
         doc = DoclingDocument(name="sample.md")
-        doc.origin = DocumentOrigin(
-            filename="sample.md", mimetype="text/plain", binary_hash=12345
-        )
+        doc.origin = DocumentOrigin(filename="sample.md", mimetype="text/plain", binary_hash=12345)
 
         with open(md_file, "r", encoding="utf-8") as f:
             content = f.read()
@@ -191,9 +179,7 @@ def test_repository_processing(test_data_dir):
     all_chunks = []
     for file_path in test_data_dir.glob("sample.*"):
         doc = DoclingDocument(name=file_path.name)
-        doc.origin = DocumentOrigin(
-            filename=file_path.name, mimetype="text/plain", binary_hash=12345
-        )
+        doc.origin = DocumentOrigin(filename=file_path.name, mimetype="text/plain", binary_hash=12345)
 
         with open(file_path, "r", encoding="utf-8") as f:
             content = f.read()
diff --git a/test/test_collection.py b/test/test_collection.py
index cc9d46d4..ab307847 100644
--- a/test/test_collection.py
+++ b/test/test_collection.py
@@ -56,15 +56,12 @@ def test_table_export_to_tokens():
         doc = Document.model_validate_json(file_json)
 
         if doc.tables is not None and doc.page_dimensions is not None:
-
             pagedims = doc.get_map_to_page_dimensions()
 
             if doc.tables is not None:
                 for i, table in enumerate(doc.tables):
                     page = table.prov[0].page
-                    out = table.export_to_document_tokens(
-                        page_w=pagedims[page][0], page_h=pagedims[page][1]
-                    )
+                    out = table.export_to_document_tokens(page_w=pagedims[page][0], page_h=pagedims[page][1])
 
                     fname = f"{filename}_table_{i}.dt.txt"
                     if GENERATE:
@@ -81,13 +78,10 @@ def test_table_export_to_tokens():
                     break
 
         elif doc.tables is not None and doc.page_dimensions is None:
-
             if doc.tables is not None:
                 for i, table in enumerate(doc.tables):
                     page = table.prov[0].page
-                    out = table.export_to_document_tokens(
-                        add_table_location=False, add_cell_location=False
-                    )
+                    out = table.export_to_document_tokens(add_table_location=False, add_cell_location=False)
 
                     fname = f"{filename}_table_{i}.dt.txt"
                     if GENERATE:
@@ -113,9 +107,7 @@ def test_document_export_to_md():
     md = doc.export_to_markdown()
 
     if GENERATE:
-        with open(
-            "test/data/legacy_doc/doc-export.md", "w", encoding="utf-8"
-        ) as gold_obj:
+        with open("test/data/legacy_doc/doc-export.md", "w", encoding="utf-8") as gold_obj:
             gold_obj.write(md)
 
     with open("test/data/legacy_doc/doc-export.md", encoding="utf-8") as gold_obj:
@@ -133,14 +125,10 @@ def test_document_export_to_tokens():
     xml = doc.export_to_document_tokens(delim=True)
 
     if GENERATE:
-        with open(
-            "test/data/legacy_doc/doc-export.dt.txt", "w", encoding="utf-8"
-        ) as gold_obj:
+        with open("test/data/legacy_doc/doc-export.dt.txt", "w", encoding="utf-8") as gold_obj:
             gold_obj.write(xml)
 
-    with open(
-        "test/data/legacy_doc/doc-export.dt.txt", "r", encoding="utf-8"
-    ) as gold_obj:
+    with open("test/data/legacy_doc/doc-export.dt.txt", "r", encoding="utf-8") as gold_obj:
         gold_data = gold_obj.read().strip()
 
     assert xml == gold_data
diff --git a/test/test_doc_schema.py b/test/test_doc_schema.py
index 9776e791..0efc0f5c 100644
--- a/test/test_doc_schema.py
+++ b/test/test_doc_schema.py
@@ -44,9 +44,9 @@ def test_ccs_document():
     except ValidationError as e:
         for error in e.errors():
             # print(type(error))
-            assert all(
-                item in error["loc"] for item in ("description", "logs")
-            ), f"Data in file {filename} should fail in logs"
+            assert all(item in error["loc"] for item in ("description", "logs")), (
+                f"Data in file {filename} should fail in logs"
+            )
 
     # check doc-error-2 is invalid for missing page-hashes
     with (
@@ -90,9 +90,7 @@ def test_description_advanced_t():
     # any dictionary is valid, since it is not parametrized
     CCSDocumentDescription(**desc, advanced={"serial": "CXS12345"})
     CCSDocumentDescription(**desc, advanced={0: "CXS12345"})
-    with pytest.raises(
-        ValidationError, match="should be a valid dictionary or instance of BaseModel"
-    ):
+    with pytest.raises(ValidationError, match="should be a valid dictionary or instance of BaseModel"):
         CCSDocumentDescription(**desc, advanced=False)
 
     class MyAdvanced(BaseModel):
diff --git a/test/test_doc_schema_extractor.py b/test/test_doc_schema_extractor.py
index afa874eb..7871343e 100644
--- a/test/test_doc_schema_extractor.py
+++ b/test/test_doc_schema_extractor.py
@@ -20,7 +20,7 @@ def test_ccs_document_update():
             doc = CCSDocument.model_validate(raw_doc)
 
             if doc.description.abstract:
-                assert False, f"Abstract should not be present"
+                assert False, "Abstract should not be present"
 
     except ValidationError as e:
         print(f"Validation error in file {filename}:\n{e.json()}")
diff --git a/test/test_docling_doc.py b/test/test_docling_doc.py
index 25ecddff..1250dabe 100644
--- a/test/test_docling_doc.py
+++ b/test/test_docling_doc.py
@@ -85,7 +85,6 @@ def test_overlaps_horizontally():
 
 
 def test_overlaps_vertically():
-
     page_height = 300
 
     # Same CoordOrigin (TOPLEFT)
@@ -230,27 +229,19 @@ def test_y_overlap_with():
 
 def test_union_area_with():
     # Overlapping (TOPLEFT)
-    bbox1 = BoundingBox(
-        l=0, t=0, r=10, b=10, coord_origin=CoordOrigin.TOPLEFT
-    )  # Area 100
-    bbox2 = BoundingBox(
-        l=5, t=5, r=15, b=15, coord_origin=CoordOrigin.TOPLEFT
-    )  # Area 100
+    bbox1 = BoundingBox(l=0, t=0, r=10, b=10, coord_origin=CoordOrigin.TOPLEFT)  # Area 100
+    bbox2 = BoundingBox(l=5, t=5, r=15, b=15, coord_origin=CoordOrigin.TOPLEFT)  # Area 100
     # Intersection area 25
     # Union area = 100 + 100 - 25 = 175
     assert abs(bbox1.union_area_with(bbox2) - 175.0) < 1.0e-3
 
     # Non-overlapping (TOPLEFT)
-    bbox3 = BoundingBox(
-        l=20, t=0, r=30, b=10, coord_origin=CoordOrigin.TOPLEFT
-    )  # Area 100
+    bbox3 = BoundingBox(l=20, t=0, r=30, b=10, coord_origin=CoordOrigin.TOPLEFT)  # Area 100
     # Union area = 100 + 100 - 0 = 200
     assert abs(bbox1.union_area_with(bbox3) - 200.0) < 1.0e-3
 
     # Touching edges (TOPLEFT)
-    bbox4 = BoundingBox(
-        l=10, t=0, r=20, b=10, coord_origin=CoordOrigin.TOPLEFT
-    )  # Area 100
+    bbox4 = BoundingBox(l=10, t=0, r=20, b=10, coord_origin=CoordOrigin.TOPLEFT)  # Area 100
     # Union area = 100 + 100 - 0 = 200
     assert abs(bbox1.union_area_with(bbox4) - 200.0) < 1.0e-3
 
@@ -260,12 +251,8 @@ def test_union_area_with():
     assert abs(bbox1.union_area_with(bbox5) - 100.0) < 1.0e-3
 
     # Overlapping (BOTTOMLEFT)
-    bbox6 = BoundingBox(
-        l=0, b=0, r=10, t=10, coord_origin=CoordOrigin.BOTTOMLEFT
-    )  # Area 100
-    bbox7 = BoundingBox(
-        l=5, b=5, r=15, t=15, coord_origin=CoordOrigin.BOTTOMLEFT
-    )  # Area 100
+    bbox6 = BoundingBox(l=0, b=0, r=10, t=10, coord_origin=CoordOrigin.BOTTOMLEFT)  # Area 100
+    bbox7 = BoundingBox(l=5, b=5, r=15, t=15, coord_origin=CoordOrigin.BOTTOMLEFT)  # Area 100
     # Intersection area 25
     # Union area = 100 + 100 - 25 = 175
     assert abs(bbox6.union_area_with(bbox7) - 175.0) < 1.0e-3
@@ -307,7 +294,6 @@ def test_x_union_with():
 
 
 def test_y_union_with():
-
     bbox1_tl = BoundingBox(l=0, t=0, r=10, b=10, coord_origin=CoordOrigin.TOPLEFT)
     bbox2_tl = BoundingBox(l=0, t=5, r=10, b=15, coord_origin=CoordOrigin.TOPLEFT)
     # y_union = max(10, 15) - min(0, 5) = 15 - 0 = 15
@@ -355,7 +341,6 @@ def test_y_union_with():
 
 
 def test_orientation():
-
     page_height = 300
 
     # Same CoordOrigin (TOPLEFT)
@@ -380,12 +365,9 @@ def test_orientation():
 
 
 def test_docitems():
-
     # Iterative function to find all subclasses
     def find_all_subclasses_iterative(base_class):
-        subclasses = deque(
-            [base_class]
-        )  # Use a deque for efficient popping from the front
+        subclasses = deque([base_class])  # Use a deque for efficient popping from the front
         all_subclasses = []
 
         while subclasses:
@@ -400,15 +382,11 @@ def serialise(obj):
         return yaml.safe_dump(obj.model_dump(mode="json", by_alias=True))
 
     def write(name: str, serialisation: str):
-        with open(
-            f"./test/data/docling_document/unit/{name}.yaml", "w", encoding="utf-8"
-        ) as fw:
+        with open(f"./test/data/docling_document/unit/{name}.yaml", "w", encoding="utf-8") as fw:
             fw.write(serialisation)
 
     def read(name: str):
-        with open(
-            f"./test/data/docling_document/unit/{name}.yaml", "r", encoding="utf-8"
-        ) as fr:
+        with open(f"./test/data/docling_document/unit/{name}.yaml", "r", encoding="utf-8") as fr:
             gold = fr.read()
         return yaml.safe_load(gold)
 
@@ -428,7 +406,6 @@ def verify(dc, obj):
     # Iterate over the derived classes of the BaseClass
     derived_classes = find_all_subclasses_iterative(DocItem)
     for dc in derived_classes:
-
         if dc is TextItem:
             obj = dc(
                 text="whatever",
@@ -454,7 +431,6 @@ def verify(dc, obj):
             verify(dc, obj)
 
         elif dc is KeyValueItem:
-
             graph = GraphData(
                 cells=[
                     GraphCell(
@@ -476,9 +452,7 @@ def verify(dc, obj):
                         source_cell_id=0,
                         target_cell_id=1,
                     ),
-                    GraphLink(
-                        label=GraphLinkLabel.TO_KEY, source_cell_id=1, target_cell_id=0
-                    ),
+                    GraphLink(label=GraphLinkLabel.TO_KEY, source_cell_id=1, target_cell_id=0),
                 ],
             )
 
@@ -490,7 +464,6 @@ def verify(dc, obj):
             verify(dc, obj)
 
         elif dc is FormItem:
-
             graph = GraphData(
                 cells=[
                     GraphCell(
@@ -512,9 +485,7 @@ def verify(dc, obj):
                         source_cell_id=0,
                         target_cell_id=1,
                     ),
-                    GraphLink(
-                        label=GraphLinkLabel.TO_KEY, source_cell_id=1, target_cell_id=0
-                    ),
+                    GraphLink(label=GraphLinkLabel.TO_KEY, source_cell_id=1, target_cell_id=0),
                 ],
             )
 
@@ -578,7 +549,6 @@ def verify(dc, obj):
 
 
 def test_reference_doc():
-
     filename = "test/data/doc/dummy_doc.yaml"
 
     # Read YAML file of manual reference doc
@@ -598,9 +568,7 @@ def test_reference_doc():
     obj = doc.texts[2]  # Text item with parent
     parent = obj.parent.resolve(doc=doc)  # it is a figure
 
-    obj2 = parent.children[0].resolve(
-        doc=doc
-    )  # Child of figure must be the same as obj
+    obj2 = parent.children[0].resolve(doc=doc)  # Child of figure must be the same as obj
 
     assert obj == obj2
     assert obj is obj2
@@ -619,7 +587,6 @@ def test_reference_doc():
 
 
 def test_parse_doc():
-
     filename = "test/data/doc/2206.01062.yaml"
 
     with open(filename, "r", encoding="utf-8") as fp:
@@ -633,7 +600,6 @@ def test_parse_doc():
 
 
 def test_construct_doc(sample_doc):
-
     filename = "test/data/doc/constructed_document.yaml"
 
     assert sample_doc.validate_tree(sample_doc.body)
@@ -647,7 +613,6 @@ def test_construct_doc(sample_doc):
 
 
 def test_construct_bad_doc():
-
     filename = "test/data/doc/bad_doc.yaml"
 
     doc = _construct_bad_doc()
@@ -685,17 +650,13 @@ def _verify_regression_test(pred: str, filename: str, ext: str):
         with open(filename + f".{ext}", "r", encoding="utf-8") as fr:
             gt_true = fr.read().rstrip()
 
-        assert (
-            gt_true == pred
-        ), f"Does not pass regression-test for {filename}.{ext}\n\n{gt_true}\n\n{pred}"
+        assert gt_true == pred, f"Does not pass regression-test for {filename}.{ext}\n\n{gt_true}\n\n{pred}"
     else:
         with open(filename + f".{ext}", "w", encoding="utf-8") as fw:
             fw.write(f"{pred}\n")
 
 
-def _test_export_methods(
-    doc: DoclingDocument, filename: str, page_break_placeholder: Optional[str] = None
-):
+def _test_export_methods(doc: DoclingDocument, filename: str, page_break_placeholder: Optional[str] = None):
     # Iterate all elements
     et_pred = doc.export_to_element_tree()
     _verify_regression_test(et_pred, filename=filename, ext="et")
@@ -777,7 +738,6 @@ def test_pil_image():
 
 
 def test_image_ref():
-
     data_uri = {
         "dpi": 72,
         "mimetype": "image/png",
@@ -816,7 +776,6 @@ class ContentOutput(BaseModel):
 
 
 def test_version_doc():
-
     # default version
     doc = DoclingDocument(name="Untitled 1")
     assert doc.version == CURRENT_VERSION
@@ -874,9 +833,7 @@ def test_formula_with_missing_fallback():
     prov = ProvenanceItem(page_no=1, bbox=bbox, charspan=(0, 2))
     doc.add_text(label=DocItemLabel.FORMULA, text="", orig="(II.24) 2 Imar", prov=prov)
 
-    doc.export_to_html(
-        formula_to_mathml=True, html_head="", image_mode=ImageRefMode.EMBEDDED
-    )
+    doc.export_to_html(formula_to_mathml=True, html_head="", image_mode=ImageRefMode.EMBEDDED)
 
     expected = """<!DOCTYPE html>
 <html lang="en">
@@ -926,17 +883,10 @@ def test_docitem_get_image():
     doc_item = DocItem(
         self_ref="#",
         label=DocItemLabel.TEXT,
-        prov=[
-            ProvenanceItem(
-                page_no=1, bbox=BoundingBox(l=2, t=4, r=4, b=8), charspan=(1, 2)
-            )
-        ],
+        prov=[ProvenanceItem(page_no=1, bbox=BoundingBox(l=2, t=4, r=4, b=8), charspan=(1, 2))],
     )
     returned_doc_item_image = doc_item.get_image(doc=doc)
-    assert (
-        returned_doc_item_image is not None
-        and returned_doc_item_image.tobytes() == doc_item_image.tobytes()
-    )
+    assert returned_doc_item_image is not None and returned_doc_item_image.tobytes() == doc_item_image.tobytes()
 
 
 def test_floatingitem_get_image():
@@ -959,20 +909,14 @@ def test_floatingitem_get_image():
     floating_item = FloatingItem(
         self_ref="#",
         label=DocItemLabel.PICTURE,
-        prov=[
-            ProvenanceItem(
-                page_no=1, bbox=BoundingBox(l=2, t=4, r=6, b=12), charspan=(1, 2)
-            )
-        ],
+        prov=[ProvenanceItem(page_no=1, bbox=BoundingBox(l=2, t=4, r=6, b=12), charspan=(1, 2))],
         image=ImageRef.from_pil(image=new_image, dpi=72),
     )
     retured_image = floating_item.get_image(doc=doc)
     assert retured_image is not None and retured_image.tobytes() == new_image.tobytes()
 
     # FloatingItem without explicit image and no provenance
-    floating_item = FloatingItem(
-        self_ref="#", label=DocItemLabel.PICTURE, prov=[], image=None
-    )
+    floating_item = FloatingItem(self_ref="#", label=DocItemLabel.PICTURE, prov=[], image=None)
     assert floating_item.get_image(doc=doc) is None
 
     # FloatingItem without explicit image on invalid page
@@ -997,24 +941,15 @@ def test_floatingitem_get_image():
     floating_item = FloatingItem(
         self_ref="#",
         label=DocItemLabel.PICTURE,
-        prov=[
-            ProvenanceItem(
-                page_no=1, bbox=BoundingBox(l=2, t=4, r=4, b=8), charspan=(1, 2)
-            )
-        ],
+        prov=[ProvenanceItem(page_no=1, bbox=BoundingBox(l=2, t=4, r=4, b=8), charspan=(1, 2))],
         image=None,
     )
     retured_image = floating_item.get_image(doc=doc)
-    assert (
-        retured_image is not None
-        and retured_image.tobytes() == floating_item_image.tobytes()
-    )
+    assert retured_image is not None and retured_image.tobytes() == floating_item_image.tobytes()
 
 
 def test_save_pictures(sample_doc):
-    new_doc = sample_doc._with_pictures_refs(
-        image_dir=Path("./test/data/constructed_images/"), page_no=None
-    )
+    new_doc = sample_doc._with_pictures_refs(image_dir=Path("./test/data/constructed_images/"), page_no=None)
 
     img_paths = new_doc._list_images_on_disk()
     assert len(img_paths) == 1, "len(img_paths)!=1"
@@ -1034,31 +969,24 @@ def test_save_pictures_with_page():
         image=ImageRef.from_pil(image=image, dpi=72),
         prov=ProvenanceItem(
             page_no=2,
-            bbox=BoundingBox(
-                b=0, l=0, r=200, t=400, coord_origin=CoordOrigin.BOTTOMLEFT
-            ),
+            bbox=BoundingBox(b=0, l=0, r=200, t=400, coord_origin=CoordOrigin.BOTTOMLEFT),
             charspan=(1, 2),
         ),
     )
 
     # When
-    with_ref = doc._with_pictures_refs(
-        image_dir=Path("./test/data/constructed_images/"), page_no=1
-    )
+    with_ref = doc._with_pictures_refs(image_dir=Path("./test/data/constructed_images/"), page_no=1)
     # Then
     n_images = len(with_ref._list_images_on_disk())
     assert n_images == 0
     # When
-    with_ref = with_ref._with_pictures_refs(
-        image_dir=Path("./test/data/constructed_images/"), page_no=2
-    )
+    with_ref = with_ref._with_pictures_refs(image_dir=Path("./test/data/constructed_images/"), page_no=2)
     n_images = len(with_ref._list_images_on_disk())
     # Then
     assert n_images == 1
 
 
 def _normalise_string_wrt_filepaths(instr: str, paths: List[Path]):
-
     for p in paths:
         instr = instr.replace(str(p), str(p.name))
 
@@ -1066,7 +994,6 @@ def _normalise_string_wrt_filepaths(instr: str, paths: List[Path]):
 
 
 def _verify_saved_output(filename: Union[str, Path], paths: List[Path]):
-
     pred = ""
     with open(filename, "r", encoding="utf-8") as fr:
         pred = fr.read()
@@ -1095,14 +1022,11 @@ def _verify_loaded_output(filename: Path, pred=None):
     pred = pred or DoclingDocument.load_from_json(Path(filename))
     assert isinstance(pred, DoclingDocument)
 
-    assert (
-        pred.export_to_dict() == gt.export_to_dict()
-    ), f"pred.export_to_dict() != gt.export_to_dict() for {filename}"
+    assert pred.export_to_dict() == gt.export_to_dict(), f"pred.export_to_dict() != gt.export_to_dict() for {filename}"
     assert pred == gt, f"pred!=gt for {filename}"
 
 
 def test_save_to_disk(sample_doc):
-
     test_dir = Path("./test/data/doc")
     image_dir = Path("constructed_images/")  # will be relative to test_dir
 
@@ -1118,41 +1042,29 @@ def test_save_to_disk(sample_doc):
     ### MarkDown
 
     filename: Path = test_dir / "constructed_doc.placeholder.md"
-    sample_doc.save_as_markdown(
-        filename=filename, artifacts_dir=image_dir, image_mode=ImageRefMode.PLACEHOLDER
-    )
+    sample_doc.save_as_markdown(filename=filename, artifacts_dir=image_dir, image_mode=ImageRefMode.PLACEHOLDER)
     _verify_saved_output(filename=filename, paths=paths)
 
     filename = test_dir / "constructed_doc.embedded.md"
-    sample_doc.save_as_markdown(
-        filename=filename, artifacts_dir=image_dir, image_mode=ImageRefMode.EMBEDDED
-    )
+    sample_doc.save_as_markdown(filename=filename, artifacts_dir=image_dir, image_mode=ImageRefMode.EMBEDDED)
     _verify_saved_output(filename=filename, paths=paths)
 
     filename = test_dir / "constructed_doc.referenced.md"
-    sample_doc.save_as_markdown(
-        filename=filename, artifacts_dir=image_dir, image_mode=ImageRefMode.REFERENCED
-    )
+    sample_doc.save_as_markdown(filename=filename, artifacts_dir=image_dir, image_mode=ImageRefMode.REFERENCED)
     _verify_saved_output(filename=filename, paths=paths)
 
     ### HTML
 
     filename = test_dir / "constructed_doc.placeholder.html"
-    sample_doc.save_as_html(
-        filename=filename, artifacts_dir=image_dir, image_mode=ImageRefMode.PLACEHOLDER
-    )
+    sample_doc.save_as_html(filename=filename, artifacts_dir=image_dir, image_mode=ImageRefMode.PLACEHOLDER)
     _verify_saved_output(filename=filename, paths=paths)
 
     filename = test_dir / "constructed_doc.embedded.html"
-    sample_doc.save_as_html(
-        filename=filename, artifacts_dir=image_dir, image_mode=ImageRefMode.EMBEDDED
-    )
+    sample_doc.save_as_html(filename=filename, artifacts_dir=image_dir, image_mode=ImageRefMode.EMBEDDED)
     _verify_saved_output(filename=filename, paths=paths)
 
     filename = test_dir / "constructed_doc.referenced.html"
-    sample_doc.save_as_html(
-        filename=filename, artifacts_dir=image_dir, image_mode=ImageRefMode.REFERENCED
-    )
+    sample_doc.save_as_html(filename=filename, artifacts_dir=image_dir, image_mode=ImageRefMode.REFERENCED)
     _verify_saved_output(filename=filename, paths=paths)
 
     ### Document Tokens
@@ -1207,7 +1119,6 @@ def test_save_to_disk(sample_doc):
 
 
 def test_document_stack_operations(sample_doc):
-
     # _print(document=doc)
 
     ref = RefItem(cref="#/texts/12")
@@ -1226,7 +1137,6 @@ def test_document_stack_operations(sample_doc):
 
 
 def test_document_manipulation(sample_doc: DoclingDocument) -> None:
-
     def _resolve(document: DoclingDocument, cref: str) -> NodeItem:
         ref = RefItem(cref=cref)
         return ref.resolve(doc=document)
@@ -1246,9 +1156,7 @@ def _verify(
         DoclingDocument.load_from_json(filename=_gt_filename(filename=filename))
 
         # test if the document is the same as the stored GT
-        _verify_loaded_output(
-            filename=filename, pred=DoclingDocument.model_validate(document)
-        )
+        _verify_loaded_output(filename=filename, pred=DoclingDocument.model_validate(document))
 
     image_dir = Path("./test/data/doc/constructed_images/")
 
@@ -1326,11 +1234,7 @@ def _verify(
                 label=DocItemLabel.TEXT,
                 text="foo",
                 orig="foo",
-                children=[
-                    _resolve(
-                        document=deepcopy(sample_doc), cref=text_item_4.self_ref
-                    ).get_ref()
-                ],
+                children=[_resolve(document=deepcopy(sample_doc), cref=text_item_4.self_ref).get_ref()],
             ),
             parent=sample_doc.body,
         )
@@ -1353,12 +1257,8 @@ def _verify(
 
     node = _resolve(document=sample_doc, cref="#/texts/45")
 
-    last_node = sample_doc.insert_list_group(
-        sibling=node, name="Inserted List Group", after=True
-    )
-    group_node = sample_doc.insert_inline_group(
-        sibling=node, name="Inserted Inline Group", after=False
-    )
+    last_node = sample_doc.insert_list_group(sibling=node, name="Inserted List Group", after=True)
+    group_node = sample_doc.insert_inline_group(sibling=node, name="Inserted Inline Group", after=False)
     sample_doc.insert_group(
         sibling=node,
         label=GroupLabel.LIST,
@@ -1430,16 +1330,12 @@ def _verify(
                 )
             )
 
-    table_data = TableData(
-        table_cells=table_cells, num_rows=num_rows, num_cols=num_cols
-    )
+    table_data = TableData(table_cells=table_cells, num_rows=num_rows, num_cols=num_cols)
     sample_doc.insert_table(sibling=node, data=table_data, after=False)
 
     size = (64, 64)
     img = PILImage.new("RGB", size, "black")
-    sample_doc.insert_picture(
-        sibling=node, image=ImageRef.from_pil(image=img, dpi=72), after=True
-    )
+    sample_doc.insert_picture(sibling=node, image=ImageRef.from_pil(image=img, dpi=72), after=True)
 
     sample_doc.insert_title(sibling=node, text="Inserted Title", after=False)
     sample_doc.insert_code(sibling=node, text="Inserted Code", after=True)
@@ -1480,12 +1376,8 @@ def _verify(
     # Test the handling of list items in insert_* methods, both with and without parent groups
 
     with pytest.warns(DeprecationWarning, match="ListItem parent must be a ListGroup"):
-        li_sibling = sample_doc.insert_list_item(
-            sibling=node, text="Inserted List Item, Incorrect Parent", after=False
-        )
-    sample_doc.insert_list_item(
-        sibling=li_sibling, text="Inserted List Item, Correct Parent", after=True
-    )
+        li_sibling = sample_doc.insert_list_item(sibling=node, text="Inserted List Item, Incorrect Parent", after=False)
+    sample_doc.insert_list_item(sibling=li_sibling, text="Inserted List Item, Correct Parent", after=True)
     sample_doc.insert_text(
         sibling=li_sibling,
         label=DocItemLabel.LIST_ITEM,
@@ -1518,9 +1410,7 @@ def _verify(
         label=DocItemLabel.TEXT,
     )
 
-    sample_doc.add_node_items(
-        node_items=[text_item_6, text_item_7], doc=sample_doc, parent=group_node
-    )
+    sample_doc.add_node_items(node_items=[text_item_6, text_item_7], doc=sample_doc, parent=group_node)
 
     filename = Path("test/data/doc/constructed_doc.bulk_item_addition.json")
     _verify(filename=filename, document=sample_doc, generate=GEN_TEST_DATA)
@@ -1540,9 +1430,7 @@ def _verify(
         label=DocItemLabel.TEXT,
     )
 
-    sample_doc.insert_node_items(
-        sibling=node, node_items=[text_item_8, text_item_9], doc=sample_doc, after=False
-    )
+    sample_doc.insert_node_items(sibling=node, node_items=[text_item_8, text_item_9], doc=sample_doc, after=False)
 
     filename = Path("test/data/doc/constructed_doc.bulk_item_insertion.json")
     _verify(filename=filename, document=sample_doc, generate=GEN_TEST_DATA)
@@ -1574,9 +1462,7 @@ def _verify(
     with pytest.raises(ValueError):
         extracted_doc = sample_doc.extract_items_range(start=li_sibling, end=node)
 
-    extracted_doc = sample_doc.extract_items_range(
-        start=group_node, end=node, end_inclusive=False, delete=True
-    )
+    extracted_doc = sample_doc.extract_items_range(start=group_node, end=node, end_inclusive=False, delete=True)
 
     filename = Path("test/data/doc/constructed_doc.extracted_with_deletion.json")
     _verify(filename=filename, document=sample_doc, generate=GEN_TEST_DATA)
@@ -1642,9 +1528,7 @@ def test_concatenate():
     docs = [DoclingDocument.load_from_json(filename=f) for f in files]
     doc = DoclingDocument.concatenate(docs=docs)
 
-    html_data = doc.export_to_html(
-        image_mode=ImageRefMode.EMBEDDED, split_page_view=True
-    )
+    html_data = doc.export_to_html(image_mode=ImageRefMode.EMBEDDED, split_page_view=True)
 
     exp_json_file = Path("test/data/doc/concatenated.json")
     exp_html_file = exp_json_file.with_suffix(".html")
@@ -1675,9 +1559,7 @@ def test_list_group_with_non_list_items():
     bad_doc = DoclingDocument(name="")
     l1 = bad_doc.add_list_group()
     bad_doc.add_list_item(text="ListItem 1", parent=l1)
-    bad_doc.add_text(
-        text="non-ListItem in ListGroup", label=DocItemLabel.TEXT, parent=l1
-    )
+    bad_doc.add_text(text="non-ListItem in ListGroup", label=DocItemLabel.TEXT, parent=l1)
 
     with pytest.raises(ValueError):
         bad_doc._validate_rules()
@@ -1811,7 +1693,6 @@ def test_invalid_rich_table_doc():
 
 
 def test_rich_table_item_insertion_normalization():
-
     doc = DoclingDocument(name="")
     doc.add_text(label=DocItemLabel.TITLE, text="Rich tables")
 
@@ -1888,9 +1769,7 @@ def test_filter_pages():
     orig_doc = DoclingDocument.load_from_json(src)
     doc = orig_doc.filter(page_nrs={2, 3, 5})
 
-    html_data = doc.export_to_html(
-        image_mode=ImageRefMode.EMBEDDED, split_page_view=True
-    )
+    html_data = doc.export_to_html(image_mode=ImageRefMode.EMBEDDED, split_page_view=True)
 
     exp_json_file = src.with_name(f"{src.stem}_p2_p3_p5.gt.json")
     exp_html_file = exp_json_file.with_suffix(".html")
@@ -1911,27 +1790,20 @@ def test_filter_pages():
 def _create_doc_for_filtering():
     doc = DoclingDocument(
         name="",
-        pages={
-            i: PageItem(page_no=i, size=Size(width=100, height=100), image=None)
-            for i in range(1, 3)
-        },
+        pages={i: PageItem(page_no=i, size=Size(width=100, height=100), image=None) for i in range(1, 3)},
     )
     p1_text = doc.add_text(
         text="Text 1",
         parent=doc.body,
         label=DocItemLabel.TEXT,
-        prov=ProvenanceItem(
-            page_no=1, bbox=BoundingBox(l=0, t=0, r=100, b=100), charspan=(0, 1)
-        ),
+        prov=ProvenanceItem(page_no=1, bbox=BoundingBox(l=0, t=0, r=100, b=100), charspan=(0, 1)),
     )
     doc.add_group(parent=p1_text)
     doc.add_text(
         text="Text 2",
         parent=doc.body,
         label=DocItemLabel.TEXT,
-        prov=ProvenanceItem(
-            page_no=2, bbox=BoundingBox(l=0, t=0, r=100, b=100), charspan=(0, 1)
-        ),
+        prov=ProvenanceItem(page_no=2, bbox=BoundingBox(l=0, t=0, r=100, b=100), charspan=(0, 1)),
     )
     return doc
 
@@ -1950,9 +1822,7 @@ def test_filter_invalid_pages():
     doc = _create_doc_for_filtering()
     with pytest.raises(
         ValueError,
-        match=re.escape(
-            "The following page numbers are not present in the document: {3}"
-        ),
+        match=re.escape("The following page numbers are not present in the document: {3}"),
     ):
         doc.filter(page_nrs={3})
 
diff --git a/test/test_doctags_load.py b/test/test_doctags_load.py
index 5355c2d1..37cfe190 100644
--- a/test/test_doctags_load.py
+++ b/test/test_doctags_load.py
@@ -22,11 +22,7 @@ def verify(exp_file: Path, actual: dict):
         # as the test was flaky due to URIs
         def strip_image_uris(d):
             if isinstance(d, dict):
-                return {
-                    k: strip_image_uris(v)
-                    for k, v in d.items()
-                    if k not in {"uri", "image_uri"}
-                }
+                return {k: strip_image_uris(v) for k, v in d.items() if k not in {"uri", "image_uri"}}
             elif isinstance(d, list):
                 return [strip_image_uris(x) for x in d]
             else:
@@ -34,9 +30,7 @@ def strip_image_uris(d):
 
         expected_stripped = strip_image_uris(expected)
         actual_stripped = strip_image_uris(actual)
-        assert (
-            expected_stripped == actual_stripped
-        ), "Dicts differ (ignoring image URIs)"
+        assert expected_stripped == actual_stripped, "Dicts differ (ignoring image URIs)"
 
         if "data:image/png;base64" in str(expected):
             # check if the image URIs are the same
@@ -44,7 +38,6 @@ def strip_image_uris(d):
 
 
 def test_doctags_load_from_files():
-
     doctags_doc = DocTagsDocument.from_doctags_and_image_pairs(
         [Path("test/data/doc/page_with_pic.dt")],
         [Path("test/data/doc/page_with_pic.png")],
@@ -59,7 +52,6 @@ def test_doctags_load_from_files():
 
 
 def test_doctags_load_from_memory():
-
     with Path("test/data/doc/page_with_pic.dt").open() as file:
         doctags = file.read()
     image = PILImage.open(Path("test/data/doc/page_with_pic.png"))
@@ -155,11 +147,7 @@ def test_doctags_inline():
 
     doctags_doc = DocTagsDocument.from_multipage_doctags_and_images(
         doctags=doctags,
-        images=[
-            pil_img
-            for p in doc.pages
-            if (img_ref := doc.pages[p].image) and (pil_img := img_ref.pil_image)
-        ],
+        images=[pil_img for p in doc.pages if (img_ref := doc.pages[p].image) and (pil_img := img_ref.pil_image)],
     )
 
     deser_doc = DoclingDocument.load_from_doctags(doctags_doc)
diff --git a/test/test_hierarchical_chunker.py b/test/test_hierarchical_chunker.py
index 8e09a9f9..46c65d39 100644
--- a/test/test_hierarchical_chunker.py
+++ b/test/test_hierarchical_chunker.py
@@ -32,9 +32,7 @@ def test_chunk():
         merge_list_items=True,
     )
     chunks = chunker.chunk(dl_doc=dl_doc)
-    act_data = dict(
-        root=[DocChunk.model_validate(n).export_json_dict() for n in chunks]
-    )
+    act_data = dict(root=[DocChunk.model_validate(n).export_json_dict() for n in chunks])
     _process(
         act_data=act_data,
         exp_path_str="test/data/chunker/0_out_chunks.json",
@@ -59,9 +57,7 @@ def get_serializer(self, doc: DoclingDocument):
     )
 
     chunks = chunker.chunk(dl_doc=dl_doc)
-    act_data = dict(
-        root=[DocChunk.model_validate(n).export_json_dict() for n in chunks]
-    )
+    act_data = dict(root=[DocChunk.model_validate(n).export_json_dict() for n in chunks])
     _process(
         act_data=act_data,
         exp_path_str="test/data/chunker/0b_out_chunks.json",
diff --git a/test/test_hybrid_chunker.py b/test/test_hybrid_chunker.py
index 41075c75..21818c8c 100644
--- a/test/test_hybrid_chunker.py
+++ b/test/test_hybrid_chunker.py
@@ -53,9 +53,7 @@ def test_chunk_merge_peers():
 
     chunk_iter = chunker.chunk(dl_doc=dl_doc)
     chunks = list(chunk_iter)
-    act_data = dict(
-        root=[DocChunk.model_validate(n).export_json_dict() for n in chunks]
-    )
+    act_data = dict(root=[DocChunk.model_validate(n).export_json_dict() for n in chunks])
     _process(
         act_data=act_data,
         exp_path_str=EXPECTED_OUT_FILE,
@@ -79,9 +77,7 @@ def test_chunk_with_model_name():
 
     chunk_iter = chunker.chunk(dl_doc=dl_doc)
     chunks = list(chunk_iter)
-    act_data = dict(
-        root=[DocChunk.model_validate(n).export_json_dict() for n in chunks]
-    )
+    act_data = dict(root=[DocChunk.model_validate(n).export_json_dict() for n in chunks])
     _process(
         act_data=act_data,
         exp_path_str=EXPECTED_OUT_FILE,
@@ -102,9 +98,7 @@ def test_chunk_deprecated_max_tokens():
 
     chunk_iter = chunker.chunk(dl_doc=dl_doc)
     chunks = list(chunk_iter)
-    act_data = dict(
-        root=[DocChunk.model_validate(n).export_json_dict() for n in chunks]
-    )
+    act_data = dict(root=[DocChunk.model_validate(n).export_json_dict() for n in chunks])
     _process(
         act_data=act_data,
         exp_path_str=EXPECTED_OUT_FILE,
@@ -160,9 +154,7 @@ def test_chunk_no_merge_peers():
     )
 
     chunks = chunker.chunk(dl_doc=dl_doc)
-    act_data = dict(
-        root=[DocChunk.model_validate(n).export_json_dict() for n in chunks]
-    )
+    act_data = dict(root=[DocChunk.model_validate(n).export_json_dict() for n in chunks])
     _process(
         act_data=act_data,
         exp_path_str=EXPECTED_OUT_FILE,
@@ -183,9 +175,7 @@ def test_chunk_deprecated_explicit_hf_obj():
 
     chunk_iter = chunker.chunk(dl_doc=dl_doc)
     chunks = list(chunk_iter)
-    act_data = dict(
-        root=[DocChunk.model_validate(n).export_json_dict() for n in chunks]
-    )
+    act_data = dict(root=[DocChunk.model_validate(n).export_json_dict() for n in chunks])
     _process(
         act_data=act_data,
         exp_path_str=EXPECTED_OUT_FILE,
@@ -208,9 +198,7 @@ def test_ignore_deprecated_param_if_new_tokenizer_passed():
 
     chunk_iter = chunker.chunk(dl_doc=dl_doc)
     chunks = list(chunk_iter)
-    act_data = dict(
-        root=[DocChunk.model_validate(n).export_json_dict() for n in chunks]
-    )
+    act_data = dict(root=[DocChunk.model_validate(n).export_json_dict() for n in chunks])
     _process(
         act_data=act_data,
         exp_path_str=EXPECTED_OUT_FILE,
@@ -232,9 +220,7 @@ def test_deprecated_no_max_tokens():
 
     chunk_iter = chunker.chunk(dl_doc=dl_doc)
     chunks = list(chunk_iter)
-    act_data = dict(
-        root=[DocChunk.model_validate(n).export_json_dict() for n in chunks]
-    )
+    act_data = dict(root=[DocChunk.model_validate(n).export_json_dict() for n in chunks])
     _process(
         act_data=act_data,
         exp_path_str=EXPECTED_OUT_FILE,
@@ -300,9 +286,7 @@ def get_serializer(self, doc: DoclingDocument):
 
     chunk_iter = chunker.chunk(dl_doc=dl_doc)
     chunks = list(chunk_iter)
-    act_data = dict(
-        root=[DocChunk.model_validate(n).export_json_dict() for n in chunks]
-    )
+    act_data = dict(root=[DocChunk.model_validate(n).export_json_dict() for n in chunks])
     _process(
         act_data=act_data,
         exp_path_str=EXPECTED_OUT_FILE,
@@ -325,9 +309,7 @@ def test_chunk_openai():
 
     chunk_iter = chunker.chunk(dl_doc=dl_doc)
     chunks = list(chunk_iter)
-    act_data = dict(
-        root=[DocChunk.model_validate(n).export_json_dict() for n in chunks]
-    )
+    act_data = dict(root=[DocChunk.model_validate(n).export_json_dict() for n in chunks])
     _process(
         act_data=act_data,
         exp_path_str=EXPECTED_OUT_FILE,
@@ -345,9 +327,7 @@ def test_chunk_default():
 
     chunk_iter = chunker.chunk(dl_doc=dl_doc)
     chunks = list(chunk_iter)
-    act_data = dict(
-        root=[DocChunk.model_validate(n).export_json_dict() for n in chunks]
-    )
+    act_data = dict(root=[DocChunk.model_validate(n).export_json_dict() for n in chunks])
     _process(
         act_data=act_data,
         exp_path_str=EXPECTED_OUT_FILE,
@@ -370,9 +350,7 @@ def test_chunk_explicit():
 
     chunk_iter = chunker.chunk(dl_doc=dl_doc)
     chunks = list(chunk_iter)
-    act_data = dict(
-        root=[DocChunk.model_validate(n).export_json_dict() for n in chunks]
-    )
+    act_data = dict(root=[DocChunk.model_validate(n).export_json_dict() for n in chunks])
     _process(
         act_data=act_data,
         exp_path_str=EXPECTED_OUT_FILE,
diff --git a/test/test_json_schema_to_search_mapper.py b/test/test_json_schema_to_search_mapper.py
index 9a6acbe4..6061ebef 100644
--- a/test/test_json_schema_to_search_mapper.py
+++ b/test/test_json_schema_to_search_mapper.py
@@ -41,16 +41,14 @@ def test_json_schema_to_search_mapper_0():
 
     assert index_def is not None
 
-    filename = os.path.abspath(
-        os.path.join(os.path.dirname(__file__), "data/json_schemas/document-ref.json")
-    )
+    filename = os.path.abspath(os.path.join(os.path.dirname(__file__), "data/json_schemas/document-ref.json"))
     index_ref = _load(filename)
 
     diff = jsondiff.diff(index_ref, index_def)
     # print(json.dumps(index_def, indent=2))
-    assert (
-        index_def == index_ref
-    ), f"Error in search mappings of ExportedCCSDocument. Difference:\n{json.dumps(diff, indent=2)}"
+    assert index_def == index_ref, (
+        f"Error in search mappings of ExportedCCSDocument. Difference:\n{json.dumps(diff, indent=2)}"
+    )
 
 
 def test_json_schema_to_search_mapper_1():
@@ -90,13 +88,9 @@ def test_json_schema_to_search_mapper_1():
 
     assert index_def is not None
 
-    filename = os.path.abspath(
-        os.path.join(os.path.dirname(__file__), "data/json_schemas/dbrecord-ref.json")
-    )
+    filename = os.path.abspath(os.path.join(os.path.dirname(__file__), "data/json_schemas/dbrecord-ref.json"))
     index_ref = _load(filename)
 
     diff = jsondiff.diff(index_ref, index_def)
     # print(json.dumps(index_def, indent=2))
-    assert (
-        index_def == index_ref
-    ), f"Error in search mappings of Record. Difference:\n{json.dumps(diff, indent=2)}"
+    assert index_def == index_ref, f"Error in search mappings of Record. Difference:\n{json.dumps(diff, indent=2)}"
diff --git a/test/test_metadata.py b/test/test_metadata.py
index e73144c6..e739d497 100644
--- a/test/test_metadata.py
+++ b/test/test_metadata.py
@@ -41,9 +41,7 @@ def test_metadata_usage() -> None:
 
     # add a custom metadata object to the item
     value = CustomCoordinates(longitude=47.3769, latitude=8.5417)
-    target_name = example_item.meta.set_custom_field(
-        namespace="my_corp", name="coords", value=value
-    )
+    target_name = example_item.meta.set_custom_field(namespace="my_corp", name="coords", value=value)
     assert target_name == "my_corp__coords"
 
     # save the document
@@ -77,42 +75,24 @@ def test_namespace_absence_raises():
 
 def _create_doc_with_group_with_metadata() -> DoclingDocument:
     doc = DoclingDocument(name="")
-    doc.body.meta = BaseMeta(
-        summary=SummaryMetaField(text="This document talks about various topics.")
-    )
+    doc.body.meta = BaseMeta(summary=SummaryMetaField(text="This document talks about various topics."))
     grp1 = doc.add_group(name="1", label=GroupLabel.CHAPTER)
-    grp1.meta = BaseMeta(
-        summary=SummaryMetaField(text="This chapter discusses foo and bar.")
-    )
-    doc.add_text(
-        text="This is some introductory text.", label=DocItemLabel.TEXT, parent=grp1
-    )
+    grp1.meta = BaseMeta(summary=SummaryMetaField(text="This chapter discusses foo and bar."))
+    doc.add_text(text="This is some introductory text.", label=DocItemLabel.TEXT, parent=grp1)
 
     grp1a = doc.add_group(parent=grp1, name="1a", label=GroupLabel.SECTION)
-    grp1a.meta = BaseMeta(
-        summary=SummaryMetaField(text="This section talks about foo.")
-    )
-    grp1a.meta.set_custom_field(
-        namespace="my_corp", name="test_1", value="custom field value 1"
-    )
+    grp1a.meta = BaseMeta(summary=SummaryMetaField(text="This section talks about foo."))
+    grp1a.meta.set_custom_field(namespace="my_corp", name="test_1", value="custom field value 1")
     txt1 = doc.add_text(text="Regarding foo...", label=DocItemLabel.TEXT, parent=grp1a)
-    txt1.meta = BaseMeta(
-        summary=SummaryMetaField(text="This paragraph provides more details about foo.")
-    )
+    txt1.meta = BaseMeta(summary=SummaryMetaField(text="This paragraph provides more details about foo."))
     lst1a = doc.add_list_group(parent=grp1a)
-    lst1a.meta = BaseMeta(
-        summary=SummaryMetaField(text="Here some foo specifics are listed.")
-    )
+    lst1a.meta = BaseMeta(summary=SummaryMetaField(text="Here some foo specifics are listed."))
     doc.add_list_item(text="lorem", parent=lst1a, enumerated=True)
     doc.add_list_item(text="ipsum", parent=lst1a, enumerated=True)
 
     grp1b = doc.add_group(parent=grp1, name="1b", label=GroupLabel.SECTION)
-    grp1b.meta = BaseMeta(
-        summary=SummaryMetaField(text="This section talks about bar.")
-    )
-    grp1b.meta.set_custom_field(
-        namespace="my_corp", name="test_2", value="custom field value 2"
-    )
+    grp1b.meta = BaseMeta(summary=SummaryMetaField(text="This section talks about bar."))
+    grp1b.meta.set_custom_field(namespace="my_corp", name="test_2", value="custom field value 2")
     doc.add_text(text="Regarding bar...", label=DocItemLabel.TEXT, parent=grp1b)
 
     return doc
@@ -231,9 +211,7 @@ def test_md_ser_without_non_meta():
 
 
 def test_ser_custom_meta_serializer():
-
     class SummaryMarkdownMetaSerializer(MarkdownMetaSerializer):
-
         @override
         def serialize(
             self,
@@ -249,15 +227,8 @@ def serialize(
                 text="\n\n".join(
                     [
                         f"{'  ' * (level or 0)}[{item.self_ref}] [{item.__class__.__name__}:{item.label.value}] {tmp}"  # type:ignore[attr-defined]
-                        for key in (
-                            list(item.meta.__class__.model_fields)
-                            + list(item.meta.get_custom_part())
-                        )
-                        if (
-                            tmp := self._serialize_meta_field(
-                                item.meta, key, params.mark_meta
-                            )
-                        )
+                        for key in (list(item.meta.__class__.model_fields) + list(item.meta.get_custom_part()))
+                        if (tmp := self._serialize_meta_field(item.meta, key, params.mark_meta))
                     ]
                     if item.meta
                     else []
@@ -265,18 +236,10 @@ def serialize(
                 span_source=item if isinstance(item, DocItem) else [],
             )
 
-        def _serialize_meta_field(
-            self, meta: BaseMeta, name: str, mark_meta: bool
-        ) -> Optional[str]:
-            if (field_val := getattr(meta, name)) is not None and isinstance(
-                field_val, SummaryMetaField
-            ):
+        def _serialize_meta_field(self, meta: BaseMeta, name: str, mark_meta: bool) -> Optional[str]:
+            if (field_val := getattr(meta, name)) is not None and isinstance(field_val, SummaryMetaField):
                 txt = field_val.text
-                return (
-                    f"[{self._humanize_text(name, title=True)}] {txt}"
-                    if mark_meta
-                    else txt
-                )
+                return f"[{self._humanize_text(name, title=True)}] {txt}" if mark_meta else txt
             else:
                 return None
 
@@ -286,9 +249,7 @@ def _serialize_meta_field(
     params = MarkdownParams(
         include_non_meta=False,
     )
-    ser = MarkdownDocSerializer(
-        doc=doc, params=params, meta_serializer=SummaryMarkdownMetaSerializer()
-    )
+    ser = MarkdownDocSerializer(doc=doc, params=params, meta_serializer=SummaryMarkdownMetaSerializer())
     ser_res = ser.serialize()
     actual = ser_res.text
     exp_file = Path("test/data/doc/group_with_metadata_summaries.md")
diff --git a/test/test_otsl_table_export.py b/test/test_otsl_table_export.py
index 84dd5005..54bbb520 100644
--- a/test/test_otsl_table_export.py
+++ b/test/test_otsl_table_export.py
@@ -2,7 +2,6 @@
 
 
 def test_table_export_to_otsl():
-
     data_table_cells = []
     num_cols = 6
     num_rows = 5
@@ -271,9 +270,7 @@ def test_table_export_to_otsl():
     data = TableData(num_rows=num_rows, num_cols=num_cols, table_cells=data_table_cells)
     doc.add_table(data=data)
 
-    otsl_string = doc.tables[0].export_to_otsl(
-        add_cell_location=False, add_cell_text=False, doc=doc
-    )
+    otsl_string = doc.tables[0].export_to_otsl(add_cell_location=False, add_cell_text=False, doc=doc)
     otsl_string.split("<nl>")
     # print("OTSL out:")
 
diff --git a/test/test_page.py b/test/test_page.py
index 14b141da..72e50e00 100644
--- a/test/test_page.py
+++ b/test/test_page.py
@@ -207,8 +207,6 @@
         (R_315_TL, 7 * np.pi / 4, 315),
     ],
 )
-def test_bounding_rectangle_angle(
-    rectangle: BoundingRectangle, expected_angle: float, expected_angle_360: int
-):
+def test_bounding_rectangle_angle(rectangle: BoundingRectangle, expected_angle: float, expected_angle_360: int):
     assert pytest.approx(rectangle.angle, abs=1e-6) == expected_angle
     assert pytest.approx(rectangle.angle_360, abs=1e-6) == expected_angle_360
diff --git a/test/test_page_chunker.py b/test/test_page_chunker.py
index de280493..46f16f26 100644
--- a/test/test_page_chunker.py
+++ b/test/test_page_chunker.py
@@ -27,9 +27,7 @@ def test_page_chunks():
 
     chunk_iter = chunker.chunk(dl_doc=doc)
     chunks = list(chunk_iter)
-    act_data = dict(
-        root=[DocChunk.model_validate(n).export_json_dict() for n in chunks]
-    )
+    act_data = dict(root=[DocChunk.model_validate(n).export_json_dict() for n in chunks])
     _process(
         act_data=act_data,
         exp_path_str=src.parent / f"{src.stem}_chunks.json",
diff --git a/test/test_rec_schema.py b/test/test_rec_schema.py
index 3d62b825..83ac93ce 100644
--- a/test/test_rec_schema.py
+++ b/test/test_rec_schema.py
@@ -37,7 +37,7 @@ def test_predicates_wrong(self):
 
         filename = "test/data/rec/error-predicate-02.json"
         with (
-            pytest.raises(ValidationError, match="geopoint_value.conf"),
+            pytest.raises(ValidationError, match="geopoint_value\\.conf"),
             open(filename, encoding="utf-8") as file_obj,
         ):
             file_json = file_obj.read()
@@ -78,9 +78,7 @@ def test_subjects(self):
     def test_subjects2(self):
         """Validate data with Subject schema."""
         # IdentifierTypeT, SubjectTypeT, SubjectNameTypeT
-        subject = Subject[
-            Literal["db"], Literal["material"], Literal["chemical_name", "sum_formula"]
-        ]
+        subject = Subject[Literal["db"], Literal["material"], Literal["chemical_name", "sum_formula"]]
         for filename in glob.glob("test/data/rec/subject-*.json"):
             try:
                 with open(filename, encoding="utf-8") as file_obj:
@@ -93,9 +91,7 @@ def test_subjects2(self):
     def test_subjects_wrong(self):
         """Validate data with Subject schema."""
         # IdentifierTypeT, SubjectTypeT, SubjectNameTypeT
-        subject = Subject[
-            Literal["db_"], Literal["material"], Literal["chemical_name", "sum_formula"]
-        ]
+        subject = Subject[Literal["db_"], Literal["material"], Literal["chemical_name", "sum_formula"]]
         for filename in glob.glob("test/data/rec/subject-*.json"):
             with (
                 self.assertRaises(ValidationError),
@@ -103,9 +99,7 @@ def test_subjects_wrong(self):
             ):
                 file_json = file_obj.read()
                 subject.model_validate_json(file_json)
-        subject = Subject[
-            Literal["db"], Literal["material_"], Literal["chemical_name", "sum_formula"]
-        ]
+        subject = Subject[Literal["db"], Literal["material_"], Literal["chemical_name", "sum_formula"]]
         for filename in glob.glob("test/data/rec/subject-*.json"):
             with (
                 self.assertRaises(ValidationError),
diff --git a/test/test_regions_to_table.py b/test/test_regions_to_table.py
index 309e39fd..28611bd3 100644
--- a/test/test_regions_to_table.py
+++ b/test/test_regions_to_table.py
@@ -67,11 +67,11 @@ def test_regions_to_table_convert():
     assert table_data.table_cells[0].bbox.b == 25.0
 
     assert table_data.table_cells[0].col_span == 2
-    assert table_data.table_cells[0].column_header == True
-    assert table_data.table_cells[1].column_header == True
+    assert table_data.table_cells[0].column_header
+    assert table_data.table_cells[1].column_header
 
-    assert table_data.table_cells[10].row_header == True
-    assert table_data.table_cells[12].row_section == True
+    assert table_data.table_cells[10].row_header
+    assert table_data.table_cells[12].row_section
 
     assert table_data.table_cells[17].bbox.l == 75.0
     assert table_data.table_cells[17].bbox.t == 100.0
diff --git a/test/test_search_meta.py b/test/test_search_meta.py
index d5b39daa..5904452e 100644
--- a/test/test_search_meta.py
+++ b/test/test_search_meta.py
@@ -12,9 +12,7 @@
 def test_meta():
     """Validate data with Meta schema."""
     taxonomy = Literal["Public", "PI"]
-    domain = Literal[
-        "Science", "Technology", "History", "Art", "Literature", "Geography"
-    ]
+    domain = Literal["Science", "Technology", "History", "Art", "Literature", "Geography"]
 
     for filename in glob.glob("test/data/search/meta-*.json"):
         try:
diff --git a/test/test_serialization.py b/test/test_serialization.py
index a783c410..a2ebb4f4 100644
--- a/test/test_serialization.py
+++ b/test/test_serialization.py
@@ -43,7 +43,7 @@ def verify(exp_file: Path, actual: str):
 
         # Normalize platform-dependent quote escaping for DocTags outputs
         name = exp_file.name
-        if name.endswith(".dt") or name.endswith(".idt.xml"):
+        if name.endswith((".dt", ".idt.xml")):
 
             def _normalize_quotes(s: str) -> str:
                 return s.replace("&quot;", '"').replace("&#34;", '"')
@@ -209,7 +209,6 @@ def test_md_list_item_markers(sample_doc):
     root_dir = Path("./test/data/doc")
     for mode in OrigListItemMarkerMode:
         for valid in [False, True]:
-
             ser = MarkdownDocSerializer(
                 doc=sample_doc,
                 params=MarkdownParams(
@@ -219,8 +218,7 @@ def test_md_list_item_markers(sample_doc):
             )
             actual = ser.serialize().text
             verify(
-                root_dir
-                / f"constructed_mode_{str(mode.value).lower()}_valid_{str(valid).lower()}.gt.md",
+                root_dir / f"constructed_mode_{str(mode.value).lower()}_valid_{str(valid).lower()}.gt.md",
                 actual=actual,
             )
 
@@ -265,9 +263,7 @@ def test_md_legacy_annotations_mark_true(sample_doc):
     exp_file = Path("./test/data/doc/constructed_legacy_annot_mark_true.gt.md")
     with pytest.warns(DeprecationWarning):
         sample_doc.tables[0].annotations.append(
-            DescriptionAnnotation(
-                text="This is a description of table 1.", provenance="foo"
-            )
+            DescriptionAnnotation(text="This is a description of table 1.", provenance="foo")
         )
         ser = MarkdownDocSerializer(
             doc=sample_doc,
@@ -286,9 +282,7 @@ def test_md_legacy_annotations_mark_false(sample_doc):
     exp_file = Path("./test/data/doc/constructed_legacy_annot_mark_false.gt.md")
     with pytest.warns(DeprecationWarning):
         sample_doc.tables[0].annotations.append(
-            DescriptionAnnotation(
-                text="This is a description of table 1.", provenance="foo"
-            )
+            DescriptionAnnotation(text="This is a description of table 1.", provenance="foo")
         )
         ser = MarkdownDocSerializer(
             doc=sample_doc,
@@ -521,7 +515,6 @@ def test_html_include_annotations_true():
 def test_html_list_item_markers(sample_doc):
     root_dir = Path("./test/data/doc")
     for orig in [False, True]:
-
         ser = HTMLDocSerializer(
             doc=sample_doc,
             params=HTMLParams(
diff --git a/test/test_visualization.py b/test/test_visualization.py
index ae74fa04..12e0ae8b 100644
--- a/test/test_visualization.py
+++ b/test/test_visualization.py
@@ -12,7 +12,7 @@
 
 def verify(exp_file: Path, actual: PIL.Image.Image):
     if GEN_TEST_DATA:
-        with open(exp_file, "w", encoding="utf-8") as f:
+        with open(exp_file, "w", encoding="utf-8"):
             actual.save(exp_file)
     else:
         with PIL.Image.open(exp_file) as expected:
@@ -72,14 +72,11 @@ def test_table_visualization_for_rows_and_cols():
     src = Path("./test/data/doc/2408.09869v3_enriched.json")
     doc = DoclingDocument.load_from_json(src)
 
-    visualizer = TableVisualizer(
-        params=TableVisualizer.Params(show_cells=False, show_rows=True, show_cols=True)
-    )
+    visualizer = TableVisualizer(params=TableVisualizer.Params(show_cells=False, show_rows=True, show_cols=True))
     viz_pages = visualizer.get_visualization(doc=doc)
 
     verify(
-        exp_file=VIZ_TEST_DATA_PATH
-        / f"{src.stem}_table_viz_wout_lbl_p5_rows_and_cols.png",
+        exp_file=VIZ_TEST_DATA_PATH / f"{src.stem}_table_viz_wout_lbl_p5_rows_and_cols.png",
         actual=viz_pages[5],
     )
 
@@ -92,6 +89,6 @@ def test_cross_page_lists_with_branch_nums():
 
     for i in range(2):
         verify(
-            exp_file=VIZ_TEST_DATA_PATH / f"{src.stem}_p{i+1}.png",
+            exp_file=VIZ_TEST_DATA_PATH / f"{src.stem}_p{i + 1}.png",
             actual=viz_pages[i + 1],
         )
diff --git a/uv.lock b/uv.lock
index 30f766a2..04452c23 100644
--- a/uv.lock
+++ b/uv.lock
@@ -783,6 +783,7 @@ dev = [
     { name = "pytest" },
     { name = "pytest-cov" },
     { name = "python-semantic-release" },
+    { name = "ruff" },
     { name = "types-setuptools" },
 ]
 
@@ -835,6 +836,7 @@ dev = [
     { name = "pytest", specifier = "~=8.3" },
     { name = "pytest-cov", specifier = ">=6.1.1" },
     { name = "python-semantic-release", specifier = "~=7.32" },
+    { name = "ruff", specifier = ">=0.14.8" },
     { name = "types-setuptools", specifier = "~=70.3" },
 ]
 
@@ -3281,6 +3283,32 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ed/d2/4a73b18821fd4669762c855fd1f4e80ceb66fb72d71162d14da58444a763/rpds_py-0.28.0-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:5d0145edba8abd3db0ab22b5300c99dc152f5c9021fab861be0f0544dc3cbc5f", size = 552199, upload-time = "2025-10-22T22:24:26.54Z" },
 ]
 
+[[package]]
+name = "ruff"
+version = "0.14.8"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ed/d9/f7a0c4b3a2bf2556cd5d99b05372c29980249ef71e8e32669ba77428c82c/ruff-0.14.8.tar.gz", hash = "sha256:774ed0dd87d6ce925e3b8496feb3a00ac564bea52b9feb551ecd17e0a23d1eed", size = 5765385, upload-time = "2025-12-04T15:06:17.669Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/48/b8/9537b52010134b1d2b72870cc3f92d5fb759394094741b09ceccae183fbe/ruff-0.14.8-py3-none-linux_armv6l.whl", hash = "sha256:ec071e9c82eca417f6111fd39f7043acb53cd3fde9b1f95bbed745962e345afb", size = 13441540, upload-time = "2025-12-04T15:06:14.896Z" },
+    { url = "https://files.pythonhosted.org/packages/24/00/99031684efb025829713682012b6dd37279b1f695ed1b01725f85fd94b38/ruff-0.14.8-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:8cdb162a7159f4ca36ce980a18c43d8f036966e7f73f866ac8f493b75e0c27e9", size = 13669384, upload-time = "2025-12-04T15:06:51.809Z" },
+    { url = "https://files.pythonhosted.org/packages/72/64/3eb5949169fc19c50c04f28ece2c189d3b6edd57e5b533649dae6ca484fe/ruff-0.14.8-py3-none-macosx_11_0_arm64.whl", hash = "sha256:2e2fcbefe91f9fad0916850edf0854530c15bd1926b6b779de47e9ab619ea38f", size = 12806917, upload-time = "2025-12-04T15:06:08.925Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/08/5250babb0b1b11910f470370ec0cbc67470231f7cdc033cee57d4976f941/ruff-0.14.8-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9d70721066a296f45786ec31916dc287b44040f553da21564de0ab4d45a869b", size = 13256112, upload-time = "2025-12-04T15:06:23.498Z" },
+    { url = "https://files.pythonhosted.org/packages/78/4c/6c588e97a8e8c2d4b522c31a579e1df2b4d003eddfbe23d1f262b1a431ff/ruff-0.14.8-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2c87e09b3cd9d126fc67a9ecd3b5b1d3ded2b9c7fce3f16e315346b9d05cfb52", size = 13227559, upload-time = "2025-12-04T15:06:33.432Z" },
+    { url = "https://files.pythonhosted.org/packages/23/ce/5f78cea13eda8eceac71b5f6fa6e9223df9b87bb2c1891c166d1f0dce9f1/ruff-0.14.8-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d62cb310c4fbcb9ee4ac023fe17f984ae1e12b8a4a02e3d21489f9a2a5f730c", size = 13896379, upload-time = "2025-12-04T15:06:02.687Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/79/13de4517c4dadce9218a20035b21212a4c180e009507731f0d3b3f5df85a/ruff-0.14.8-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:1af35c2d62633d4da0521178e8a2641c636d2a7153da0bac1b30cfd4ccd91344", size = 15372786, upload-time = "2025-12-04T15:06:29.828Z" },
+    { url = "https://files.pythonhosted.org/packages/00/06/33df72b3bb42be8a1c3815fd4fae83fa2945fc725a25d87ba3e42d1cc108/ruff-0.14.8-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:25add4575ffecc53d60eed3f24b1e934493631b48ebbc6ebaf9d8517924aca4b", size = 14990029, upload-time = "2025-12-04T15:06:36.812Z" },
+    { url = "https://files.pythonhosted.org/packages/64/61/0f34927bd90925880394de0e081ce1afab66d7b3525336f5771dcf0cb46c/ruff-0.14.8-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4c943d847b7f02f7db4201a0600ea7d244d8a404fbb639b439e987edcf2baf9a", size = 14407037, upload-time = "2025-12-04T15:06:39.979Z" },
+    { url = "https://files.pythonhosted.org/packages/96/bc/058fe0aefc0fbf0d19614cb6d1a3e2c048f7dc77ca64957f33b12cfdc5ef/ruff-0.14.8-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cb6e8bf7b4f627548daa1b69283dac5a296bfe9ce856703b03130732e20ddfe2", size = 14102390, upload-time = "2025-12-04T15:06:46.372Z" },
+    { url = "https://files.pythonhosted.org/packages/af/a4/e4f77b02b804546f4c17e8b37a524c27012dd6ff05855d2243b49a7d3cb9/ruff-0.14.8-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:7aaf2974f378e6b01d1e257c6948207aec6a9b5ba53fab23d0182efb887a0e4a", size = 14230793, upload-time = "2025-12-04T15:06:20.497Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/52/bb8c02373f79552e8d087cedaffad76b8892033d2876c2498a2582f09dcf/ruff-0.14.8-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e5758ca513c43ad8a4ef13f0f081f80f08008f410790f3611a21a92421ab045b", size = 13160039, upload-time = "2025-12-04T15:06:49.06Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/ad/b69d6962e477842e25c0b11622548df746290cc6d76f9e0f4ed7456c2c31/ruff-0.14.8-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:f74f7ba163b6e85a8d81a590363bf71618847e5078d90827749bfda1d88c9cdf", size = 13205158, upload-time = "2025-12-04T15:06:54.574Z" },
+    { url = "https://files.pythonhosted.org/packages/06/63/54f23da1315c0b3dfc1bc03fbc34e10378918a20c0b0f086418734e57e74/ruff-0.14.8-py3-none-musllinux_1_2_i686.whl", hash = "sha256:eed28f6fafcc9591994c42254f5a5c5ca40e69a30721d2ab18bb0bb3baac3ab6", size = 13469550, upload-time = "2025-12-04T15:05:59.209Z" },
+    { url = "https://files.pythonhosted.org/packages/70/7d/a4d7b1961e4903bc37fffb7ddcfaa7beb250f67d97cfd1ee1d5cddb1ec90/ruff-0.14.8-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:21d48fa744c9d1cb8d71eb0a740c4dd02751a5de9db9a730a8ef75ca34cf138e", size = 14211332, upload-time = "2025-12-04T15:06:06.027Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/93/2a5063341fa17054e5c86582136e9895db773e3c2ffb770dde50a09f35f0/ruff-0.14.8-py3-none-win32.whl", hash = "sha256:15f04cb45c051159baebb0f0037f404f1dc2f15a927418f29730f411a79bc4e7", size = 13151890, upload-time = "2025-12-04T15:06:11.668Z" },
+    { url = "https://files.pythonhosted.org/packages/02/1c/65c61a0859c0add13a3e1cbb6024b42de587456a43006ca2d4fd3d1618fe/ruff-0.14.8-py3-none-win_amd64.whl", hash = "sha256:9eeb0b24242b5bbff3011409a739929f497f3fb5fe3b5698aba5e77e8c833097", size = 14537826, upload-time = "2025-12-04T15:06:26.409Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/63/8b41cea3afd7f58eb64ac9251668ee0073789a3bc9ac6f816c8c6fef986d/ruff-0.14.8-py3-none-win_arm64.whl", hash = "sha256:965a582c93c63fe715fd3e3f8aa37c4b776777203d8e1d8aa3cc0c14424a4b99", size = 13634522, upload-time = "2025-12-04T15:06:43.212Z" },
+]
+
 [[package]]
 name = "safetensors"
 version = "0.6.2"