fix: Use model name instead of local path for AutoProcessor.from_pretrained

b-g-d · b-g-d · commit b3bbd02b7ae3 · 2025-11-25T17:39:53.000-05:00
Fixes AttributeError when do_formula_enrichment=True. When a local Path is passed to AutoProcessor.from_pretrained(), transformers loads the config as a dict but then tries to access .model_type as an attribute. Using the model name ('docling-project/CodeFormulaV2') allows transformers to properly load the config as an object, while still using the cached model automatically. Fixes #2681
diff --git a/docling/models/code_formula_model.py b/docling/models/code_formula_model.py
@@ -105,8 +105,11 @@ def __init__(
             else:
                 artifacts_path = artifacts_path / self._model_repo_folder
 
+            # Use model name instead of local path to avoid transformers bug where
+            # config is loaded as dict but accessed as object attribute
+            # Transformers will automatically use the cached model from artifacts_path
             self._processor = AutoProcessor.from_pretrained(
-                artifacts_path,
+                "docling-project/CodeFormulaV2",
             )
             self._model_max_length = self._processor.tokenizer.model_max_length
             self._model = AutoModelForImageTextToText.from_pretrained(