diff --git a/ddtrace/appsec/_iast/_taint_tracking/CMakeLists.txt b/ddtrace/appsec/_iast/_taint_tracking/CMakeLists.txt index bc7bdf1885c..c92a0267622 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/CMakeLists.txt +++ b/ddtrace/appsec/_iast/_taint_tracking/CMakeLists.txt @@ -49,7 +49,11 @@ elseif(CMAKE_BUILD_TYPE STREQUAL "Debug") add_definitions(-DDONT_COMPILE_ABSEIL) # Define DONT_COMPILE_ABSEIL preprocessor variable else() message("Release, RelWithDebInfo, or MinSizeRel mode: using abseil (DD_COMPILE_ABSEIL unset or not 0/false)") - FetchContent_Declare(absl URL "https://github.com/abseil/abseil-cpp/archive/refs/tags/20250127.1.zip") + FetchContent_Declare( + absl + URL "https://github.com/abseil/abseil-cpp/archive/refs/tags/20250127.1.zip" + TIMEOUT 180 + INACTIVITY_TIMEOUT 120 DOWNLOAD_EXTRACT_TIMESTAMP TRUE) FetchContent_MakeAvailable(absl) endif() diff --git a/ddtrace/appsec/_iast/_taint_tracking/tests/CMakeLists.txt b/ddtrace/appsec/_iast/_taint_tracking/tests/CMakeLists.txt index 9c3f3ed170c..84d770e1c41 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/tests/CMakeLists.txt +++ b/ddtrace/appsec/_iast/_taint_tracking/tests/CMakeLists.txt @@ -1,7 +1,11 @@ cmake_minimum_required(VERSION 3.19) include(FetchContent) -FetchContent_Declare(googletest URL https://github.com/google/googletest/archive/refs/tags/v1.16.0.zip) +FetchContent_Declare( + googletest + URL https://github.com/google/googletest/archive/refs/tags/v1.16.0.zip + TIMEOUT 180 + INACTIVITY_TIMEOUT 120) FetchContent_MakeAvailable(googletest) enable_testing() diff --git a/ddtrace/internal/datadog/profiling/dd_wrapper/test/CMakeLists.txt b/ddtrace/internal/datadog/profiling/dd_wrapper/test/CMakeLists.txt index ade703ca4ad..1a2fd6bc748 100644 --- a/ddtrace/internal/datadog/profiling/dd_wrapper/test/CMakeLists.txt +++ b/ddtrace/internal/datadog/profiling/dd_wrapper/test/CMakeLists.txt @@ -2,7 +2,9 @@ include(FetchContent) FetchContent_Declare( googletest GIT_REPOSITORY https://github.com/google/googletest.git - GIT_TAG v1.15.2) + GIT_TAG v1.15.2 + TIMEOUT 180 + INACTIVITY_TIMEOUT 120) set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) diff --git a/ddtrace/internal/datadog/profiling/stack_v2/test/CMakeLists.txt b/ddtrace/internal/datadog/profiling/stack_v2/test/CMakeLists.txt index ea2bc39ef9d..be287aebaea 100644 --- a/ddtrace/internal/datadog/profiling/stack_v2/test/CMakeLists.txt +++ b/ddtrace/internal/datadog/profiling/stack_v2/test/CMakeLists.txt @@ -1,7 +1,9 @@ FetchContent_Declare( googletest GIT_REPOSITORY https://github.com/google/googletest.git - GIT_TAG v1.15.2) + GIT_TAG v1.15.2 + TIMEOUT 180 + INACTIVITY_TIMEOUT 120) set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) diff --git a/docs/build_system.rst b/docs/build_system.rst index 6b181ca1e59..9c98c76e09f 100644 --- a/docs/build_system.rst +++ b/docs/build_system.rst @@ -224,3 +224,38 @@ These environment variables modify aspects of the build process. version_added: v3.10.0: + + DD_DOWNLOAD_MAX_RETRIES: + type: Integer + default: 10 + + description: | + Maximum number of retry attempts for transient download failures from GitHub. + Retries are triggered by HTTP 429 (rate limit), 502/503/504 (server errors), + and network timeouts. Uses exponential backoff with jitter between retries. + + version_added: + v4.1.0: + + DD_DOWNLOAD_INITIAL_DELAY: + type: Float + default: 1.0 + + description: | + Initial delay in seconds before the first retry attempt. + Delay increases exponentially with backoff_factor=1.618 (Fibonacci-like). + Useful for tuning retry behavior in different environments. + + version_added: + v4.1.0: + + DD_DOWNLOAD_MAX_DELAY: + type: Integer + default: 120 + + description: | + Maximum delay in seconds between retry attempts. + Prevents excessive wait times during exponential backoff. + + version_added: + v4.1.0: diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt index 254b8e05f05..5161021023d 100644 --- a/docs/spelling_wordlist.txt +++ b/docs/spelling_wordlist.txt @@ -40,6 +40,7 @@ AWS ARN backend backends +backoff backport backported backporting diff --git a/setup.py b/setup.py index 88b679aaa03..3718142aa97 100644 --- a/setup.py +++ b/setup.py @@ -3,6 +3,7 @@ from itertools import chain import os import platform +import random import re import shutil import subprocess @@ -41,7 +42,9 @@ "https://ddtrace.readthedocs.io/en/stable/installation_quickstart.html" ) +from functools import wraps from urllib.error import HTTPError +from urllib.error import URLError from urllib.request import urlretrieve @@ -84,6 +87,11 @@ SCCACHE_COMPILE = os.getenv("DD_USE_SCCACHE", "0").lower() in ("1", "yes", "on", "true") +# Retry configuration for downloads (handles GitHub API failures like 503, 429) +DOWNLOAD_MAX_RETRIES = int(os.getenv("DD_DOWNLOAD_MAX_RETRIES", "10")) +DOWNLOAD_INITIAL_DELAY = float(os.getenv("DD_DOWNLOAD_INITIAL_DELAY", "1.0")) +DOWNLOAD_MAX_DELAY = float(os.getenv("DD_DOWNLOAD_MAX_DELAY", "120")) + IS_PYSTON = hasattr(sys, "pyston_version_info") IS_EDITABLE = False # Set to True if the package is being installed in editable mode @@ -139,6 +147,71 @@ def interpose_sccache(): os.environ["CXX"] = str(sccache_path) + " " + str(cxx_path) +def retry_download( + max_attempts=DOWNLOAD_MAX_RETRIES, + initial_delay=DOWNLOAD_INITIAL_DELAY, + max_delay=DOWNLOAD_MAX_DELAY, + backoff_factor=1.618, +): + """ + Decorator to retry downloads with exponential backoff. + Handles HTTP 503, 429, network errors from GitHub API, and cargo install failures. + Retriable errors: HTTP 429 (rate limit), 502, 503, 504, network timeouts, and subprocess errors. + """ + + def decorator(func): + @wraps(func) + def wrapper(*args, **kwargs): + for attempt in range(max_attempts): + try: + return func(*args, **kwargs) + except (HTTPError, URLError, TimeoutError, OSError, subprocess.CalledProcessError) as e: + # Check if it's a retriable error + is_retriable = False + if isinstance(e, HTTPError): + # Retry on 429 (rate limit), 502/503/504 (server errors) + is_retriable = e.code in (429, 502, 503, 504) + error_code = f"HTTP {e.code}" + elif isinstance(e, (URLError, TimeoutError)): + # Retry on network errors and timeouts + is_retriable = True + error_code = type(e).__name__ + elif isinstance(e, OSError): + # Retry on connection errors + is_retriable = True + error_code = type(e).__name__ + elif isinstance(e, subprocess.CalledProcessError): + # Retry on subprocess errors (e.g., cargo install network failures) + # These often indicate temporary network issues + is_retriable = True + error_code = f"subprocess exit code {e.returncode}" + else: + error_code = type(e).__name__ + + if not is_retriable: + print(f"ERROR: Operation failed (non-retriable {error_code}): {e}") + raise + + if attempt == max_attempts - 1: + print(f"ERROR: Operation failed after {max_attempts} attempts (last error: {error_code})") + raise + + # Calculate delay with jitter + delay = min(initial_delay * (backoff_factor**attempt), max_delay) + jitter = random.uniform(0, delay * 0.1) + total_delay = delay + jitter + + print(f"WARNING: Operation failed (attempt {attempt + 1}/{max_attempts}): {error_code} - {e}") + print(f" Retrying in {total_delay:.1f} seconds...") + time.sleep(total_delay) + + return func(*args, **kwargs) + + return wrapper + + return decorator + + def verify_checksum_from_file(sha256_filename, filename): # sha256 File format is ``checksum`` followed by two whitespaces, then ``filename`` then ``\n`` expected_checksum, expected_filename = list(filter(None, open(sha256_filename, "r").read().strip().split(" "))) @@ -298,18 +371,24 @@ def is_installed(self, bin_file): def install_dedup_headers(self): """Install dedup_headers if not already installed.""" if not self.is_installed("dedup_headers"): - subprocess.run( - [ - "cargo", - "install", - "--git", - "https://github.com/DataDog/libdatadog", - "--bin", - "dedup_headers", - "tools", - ], - check=True, - ) + # Create retry-wrapped cargo install function + @retry_download(max_attempts=DOWNLOAD_MAX_RETRIES, initial_delay=2.0) + def cargo_install_with_retry(): + """Run cargo install with retry on network failures.""" + subprocess.run( + [ + "cargo", + "install", + "--git", + "https://github.com/DataDog/libdatadog", + "--bin", + "dedup_headers", + "tools", + ], + check=True, + ) + + cargo_install_with_retry() def run(self): """Run the build process with additional post-processing.""" @@ -411,16 +490,20 @@ def download_artifacts(cls): if not (cls.USE_CACHE and download_dest.exists()): print(f"Downloading {archive_name} to {download_dest}") start_ns = time.time_ns() - try: - filename, _ = urlretrieve(download_address, str(download_dest)) - except HTTPError as e: - print("No archive found for dynamic library {}: {}".format(cls.name, archive_dir)) - raise e + + # Create retry-wrapped download function + @retry_download() + def download_file(url, dest): + """Download file with automatic retry on transient errors.""" + return urlretrieve(url, str(dest)) + + filename, _ = download_file(download_address, download_dest) # Verify checksum of downloaded file if cls.expected_checksums is None: sha256_address = download_address + ".sha256" - sha256_filename, _ = urlretrieve(sha256_address, str(download_dest) + ".sha256") + sha256_dest = str(download_dest) + ".sha256" + sha256_filename, _ = download_file(sha256_address, sha256_dest) verify_checksum_from_file(sha256_filename, str(download_dest)) else: expected_checksum = cls.expected_checksums[CURRENT_OS][arch] diff --git a/src/native/.cargo/config.toml b/src/native/.cargo/config.toml index c9882cd5c14..4e6b83c218e 100644 --- a/src/native/.cargo/config.toml +++ b/src/native/.cargo/config.toml @@ -11,3 +11,16 @@ rustflags = ["-C", "target-feature=-crt-static"] [target.aarch64-unknown-linux-musl] rustflags = ["-C", "target-feature=-crt-static"] + +[net] +# Increase retries for GitHub API failures (default is 3) +# Handles HTTP 503, 429, and other transient errors +retry = 10 + +# Use system git for better reliability and retry handling +git-fetch-with-cli = true + +[http] +# Timeout for HTTP operations (3 minutes) +# Prevents hanging on slow/unresponsive servers +timeout = 180