diff --git a/docling/backend/html_backend.py b/docling/backend/html_backend.py
index 8190e3cd9..91084907d 100644
--- a/docling/backend/html_backend.py
+++ b/docling/backend/html_backend.py
@@ -40,6 +40,7 @@
 from docling.datamodel.base_models import InputFormat
 from docling.datamodel.document import InputDocument
 from docling.exceptions import OperationNotAllowed
+from docling.utils.http_client import request_with_retry
 
 _log = logging.getLogger(__name__)
 
@@ -1256,9 +1257,9 @@ def _load_image_data(self, src_loc: str) -> Optional[bytes]:
                     "Fetching remote resources is only allowed when set explicitly. "
                     "Set options.enable_remote_fetch=True."
                 )
-            response = requests.get(src_loc, stream=True)
-            response.raise_for_status()
-            return response.content
+            with request_with_retry("GET", src_loc, stream=True) as response:
+                response.raise_for_status()
+                return response.content
         elif src_loc.startswith("data:"):
             data = re.sub(r"^data:image/.+;base64,", "", src_loc)
             return base64.b64decode(data)
diff --git a/docling/utils/api_image_request.py b/docling/utils/api_image_request.py
index d998467bc..948386a5c 100644
--- a/docling/utils/api_image_request.py
+++ b/docling/utils/api_image_request.py
@@ -2,14 +2,14 @@
 import json
 import logging
 from io import BytesIO
-from typing import Dict, List, Optional, Tuple
+from typing import Optional, Tuple
 
-import requests
 from PIL import Image
 from pydantic import AnyUrl
 
 from docling.datamodel.base_models import OpenAiApiResponse, VlmStopReason
 from docling.models.utils.generation_utils import GenerationStopper
+from docling.utils.http_client import request_with_retry
 
 _log = logging.getLogger(__name__)
 
@@ -63,7 +63,8 @@ def api_image_request(
 
             headers = headers or {}
 
-            r = requests.post(
+            r = request_with_retry(
+                "POST",
                 str(url),
                 headers=headers,
                 json=payload,
@@ -142,8 +143,13 @@ def api_image_request_streaming(
         hdrs["X-Temperature"] = str(params["temperature"])
 
     # Stream the HTTP response
-    with requests.post(
-        str(url), headers=hdrs, json=payload, timeout=timeout, stream=True
+    with request_with_retry(
+        "POST",
+        str(url),
+        headers=hdrs,
+        json=payload,
+        timeout=timeout,
+        stream=True,
     ) as r:
         if not r.ok:
             _log.error(
diff --git a/docling/utils/http_client.py b/docling/utils/http_client.py
new file mode 100644
index 000000000..e65661223
--- /dev/null
+++ b/docling/utils/http_client.py
@@ -0,0 +1,79 @@
+from __future__ import annotations
+
+from typing import Collection, Iterable
+
+import requests
+from requests import Response, Session
+from requests.adapters import HTTPAdapter
+from urllib3.util.retry import Retry
+
+# Status codes worth retrying because they are transient or throttling related.
+_DEFAULT_STATUS_FORCELIST = (408, 425, 429, 500, 502, 503, 504)
+
+# Methods that are safe or idempotent enough for retries in our usage.
+_DEFAULT_ALLOWED_METHODS = frozenset(
+    ["DELETE", "GET", "HEAD", "OPTIONS", "PATCH", "POST", "PUT"]
+)
+
+
+def _build_retry(
+    *,
+    total: int = 5,
+    backoff_factor: float = 0.2,
+    status_forcelist: Collection[int] = _DEFAULT_STATUS_FORCELIST,
+    allowed_methods: Iterable[str] | None = _DEFAULT_ALLOWED_METHODS,
+) -> Retry:
+    return Retry(
+        total=total,
+        read=total,
+        connect=total,
+        status=total,
+        backoff_factor=backoff_factor,
+        status_forcelist=status_forcelist,
+        allowed_methods=frozenset(allowed_methods) if allowed_methods else None,
+        raise_on_status=False,
+    )
+
+
+def create_retry_session(
+    *,
+    total: int = 5,
+    backoff_factor: float = 0.2,
+    status_forcelist: Collection[int] = _DEFAULT_STATUS_FORCELIST,
+    allowed_methods: Iterable[str] | None = _DEFAULT_ALLOWED_METHODS,
+) -> Session:
+    """Return a requests Session configured with retry/backoff handling."""
+    session = requests.Session()
+    retry = _build_retry(
+        total=total,
+        backoff_factor=backoff_factor,
+        status_forcelist=status_forcelist,
+        allowed_methods=allowed_methods,
+    )
+    adapter = HTTPAdapter(max_retries=retry)
+    session.mount("http://", adapter)
+    session.mount("https://", adapter)
+    return session
+
+
+_DEFAULT_SESSION: Session | None = None
+
+
+def get_retry_session() -> Session:
+    """Return the lazily-created default retry-enabled Session."""
+    global _DEFAULT_SESSION
+    if _DEFAULT_SESSION is None:
+        _DEFAULT_SESSION = create_retry_session()
+    return _DEFAULT_SESSION
+
+
+def request_with_retry(
+    method: str,
+    url: str,
+    *,
+    session: Session | None = None,
+    **kwargs,
+) -> Response:
+    """Perform an HTTP request using a retry-enabled Session."""
+    sess = session or get_retry_session()
+    return sess.request(method=method, url=url, **kwargs)
diff --git a/docling/utils/utils.py b/docling/utils/utils.py
index 6425820c0..bf3f95298 100644
--- a/docling/utils/utils.py
+++ b/docling/utils/utils.py
@@ -4,9 +4,10 @@
 from pathlib import Path
 from typing import List, Union
 
-import requests
 from tqdm import tqdm
 
+from docling.utils.http_client import request_with_retry
+
 
 def chunkify(iterator, chunk_size):
     """Yield successive chunks of chunk_size from the iterable."""
@@ -46,7 +47,7 @@ def create_hash(string: str):
 
 def download_url_with_progress(url: str, progress: bool = False) -> BytesIO:
     buf = BytesIO()
-    with requests.get(url, stream=True, allow_redirects=True) as response:
+    with request_with_retry("GET", url, stream=True, allow_redirects=True) as response:
         total_size = int(response.headers.get("content-length", 0))
         progress_bar = tqdm(
             total=total_size,