diff --git a/docling/backend/html_backend.py b/docling/backend/html_backend.py
index 8190e3cd9..91084907d 100644
--- a/docling/backend/html_backend.py
+++ b/docling/backend/html_backend.py
@@ -40,6 +40,7 @@
from docling.datamodel.base_models import InputFormat
from docling.datamodel.document import InputDocument
from docling.exceptions import OperationNotAllowed
+from docling.utils.http_client import request_with_retry
_log = logging.getLogger(__name__)
@@ -1256,9 +1257,9 @@ def _load_image_data(self, src_loc: str) -> Optional[bytes]:
"Fetching remote resources is only allowed when set explicitly. "
"Set options.enable_remote_fetch=True."
)
- response = requests.get(src_loc, stream=True)
- response.raise_for_status()
- return response.content
+ with request_with_retry("GET", src_loc, stream=True) as response:
+ response.raise_for_status()
+ return response.content
elif src_loc.startswith("data:"):
data = re.sub(r"^data:image/.+;base64,", "", src_loc)
return base64.b64decode(data)
diff --git a/docling/utils/api_image_request.py b/docling/utils/api_image_request.py
index d998467bc..948386a5c 100644
--- a/docling/utils/api_image_request.py
+++ b/docling/utils/api_image_request.py
@@ -2,14 +2,14 @@
import json
import logging
from io import BytesIO
-from typing import Dict, List, Optional, Tuple
+from typing import Optional, Tuple
-import requests
from PIL import Image
from pydantic import AnyUrl
from docling.datamodel.base_models import OpenAiApiResponse, VlmStopReason
from docling.models.utils.generation_utils import GenerationStopper
+from docling.utils.http_client import request_with_retry
_log = logging.getLogger(__name__)
@@ -63,7 +63,8 @@ def api_image_request(
headers = headers or {}
- r = requests.post(
+ r = request_with_retry(
+ "POST",
str(url),
headers=headers,
json=payload,
@@ -142,8 +143,13 @@ def api_image_request_streaming(
hdrs["X-Temperature"] = str(params["temperature"])
# Stream the HTTP response
- with requests.post(
- str(url), headers=hdrs, json=payload, timeout=timeout, stream=True
+ with request_with_retry(
+ "POST",
+ str(url),
+ headers=hdrs,
+ json=payload,
+ timeout=timeout,
+ stream=True,
) as r:
if not r.ok:
_log.error(
diff --git a/docling/utils/http_client.py b/docling/utils/http_client.py
new file mode 100644
index 000000000..e65661223
--- /dev/null
+++ b/docling/utils/http_client.py
@@ -0,0 +1,79 @@
+from __future__ import annotations
+
+from typing import Collection, Iterable
+
+import requests
+from requests import Response, Session
+from requests.adapters import HTTPAdapter
+from urllib3.util.retry import Retry
+
+# Status codes worth retrying because they are transient or throttling related.
+_DEFAULT_STATUS_FORCELIST = (408, 425, 429, 500, 502, 503, 504)
+
+# Methods that are safe or idempotent enough for retries in our usage.
+_DEFAULT_ALLOWED_METHODS = frozenset(
+ ["DELETE", "GET", "HEAD", "OPTIONS", "PATCH", "POST", "PUT"]
+)
+
+
+def _build_retry(
+ *,
+ total: int = 5,
+ backoff_factor: float = 0.2,
+ status_forcelist: Collection[int] = _DEFAULT_STATUS_FORCELIST,
+ allowed_methods: Iterable[str] | None = _DEFAULT_ALLOWED_METHODS,
+) -> Retry:
+ return Retry(
+ total=total,
+ read=total,
+ connect=total,
+ status=total,
+ backoff_factor=backoff_factor,
+ status_forcelist=status_forcelist,
+ allowed_methods=frozenset(allowed_methods) if allowed_methods else None,
+ raise_on_status=False,
+ )
+
+
+def create_retry_session(
+ *,
+ total: int = 5,
+ backoff_factor: float = 0.2,
+ status_forcelist: Collection[int] = _DEFAULT_STATUS_FORCELIST,
+ allowed_methods: Iterable[str] | None = _DEFAULT_ALLOWED_METHODS,
+) -> Session:
+ """Return a requests Session configured with retry/backoff handling."""
+ session = requests.Session()
+ retry = _build_retry(
+ total=total,
+ backoff_factor=backoff_factor,
+ status_forcelist=status_forcelist,
+ allowed_methods=allowed_methods,
+ )
+ adapter = HTTPAdapter(max_retries=retry)
+ session.mount("http://", adapter)
+ session.mount("https://", adapter)
+ return session
+
+
+_DEFAULT_SESSION: Session | None = None
+
+
+def get_retry_session() -> Session:
+ """Return the lazily-created default retry-enabled Session."""
+ global _DEFAULT_SESSION
+ if _DEFAULT_SESSION is None:
+ _DEFAULT_SESSION = create_retry_session()
+ return _DEFAULT_SESSION
+
+
+def request_with_retry(
+ method: str,
+ url: str,
+ *,
+ session: Session | None = None,
+ **kwargs,
+) -> Response:
+ """Perform an HTTP request using a retry-enabled Session."""
+ sess = session or get_retry_session()
+ return sess.request(method=method, url=url, **kwargs)
diff --git a/docling/utils/utils.py b/docling/utils/utils.py
index 6425820c0..bf3f95298 100644
--- a/docling/utils/utils.py
+++ b/docling/utils/utils.py
@@ -4,9 +4,10 @@
from pathlib import Path
from typing import List, Union
-import requests
from tqdm import tqdm
+from docling.utils.http_client import request_with_retry
+
def chunkify(iterator, chunk_size):
"""Yield successive chunks of chunk_size from the iterable."""
@@ -46,7 +47,7 @@ def create_hash(string: str):
def download_url_with_progress(url: str, progress: bool = False) -> BytesIO:
buf = BytesIO()
- with requests.get(url, stream=True, allow_redirects=True) as response:
+ with request_with_retry("GET", url, stream=True, allow_redirects=True) as response:
total_size = int(response.headers.get("content-length", 0))
progress_bar = tqdm(
total=total_size,