diff --git a/ddtrace/appsec/_constants.py b/ddtrace/appsec/_constants.py index b75d760e28d..4349151e499 100644 --- a/ddtrace/appsec/_constants.py +++ b/ddtrace/appsec/_constants.py @@ -132,6 +132,7 @@ class APPSEC(metaclass=Constant_Class): TELEMETRY_DEBUG_NAME = "DEBUG" TELEMETRY_MANDATORY_NAME = "MANDATORY" TELEMETRY_INFORMATION_NAME = "INFORMATION" +IAST_TRUNCATION_MAX_VALUE_LENGTH_DEFAULT = 250 TELEMETRY_DEBUG_VERBOSITY = 10 TELEMETRY_INFORMATION_VERBOSITY = 20 @@ -153,6 +154,9 @@ class IAST(metaclass=Constant_Class): ) DD_IAST_MAX_CONCURRENT_REQUESTS: Literal["DD_IAST_MAX_CONCURRENT_REQUESTS"] = "DD_IAST_MAX_CONCURRENT_REQUESTS" ENV_TELEMETRY_REPORT_LVL: Literal["DD_IAST_TELEMETRY_VERBOSITY"] = "DD_IAST_TELEMETRY_VERBOSITY" + ENV_DD_IAST_TRUNCATION_MAX_VALUE_LENGTH: Literal["DD_IAST_TRUNCATION_MAX_VALUE_LENGTH"] = ( + "DD_IAST_TRUNCATION_MAX_VALUE_LENGTH" + ) LAZY_TAINT: Literal["_DD_IAST_LAZY_TAINT"] = "_DD_IAST_LAZY_TAINT" JSON: Literal["_dd.iast.json"] = "_dd.iast.json" STRUCT: Literal["iast"] = "iast" diff --git a/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_operator_add.cpp b/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_operator_add.cpp index f0d4490dc0f..b973f92a874 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_operator_add.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_operator_add.cpp @@ -26,7 +26,7 @@ add_aspect(PyObject* result_o, } const auto& to_candidate_text = get_tainted_object(candidate_text, tx_taint_map); - if (to_candidate_text and to_candidate_text->get_ranges().size() >= TaintedObject::TAINT_RANGE_LIMIT) { + if (to_candidate_text and !to_candidate_text->has_free_tainted_ranges_space()) { const auto& res_new_id = new_pyobject_id(result_o); Py_DECREF(result_o); // If left side is already at the maximum taint ranges, we just reuse its diff --git a/ddtrace/appsec/_iast/_taint_tracking/native.cpp b/ddtrace/appsec/_iast/_taint_tracking/native.cpp index 1d30d03299f..f40e6b97f74 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/native.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/native.cpp @@ -19,7 +19,9 @@ #include "aspects/aspects_exports.h" #include "constants.h" #include "context/_taint_engine_context.h" +#include "taint_tracking/source.h" #include "taint_tracking/taint_tracking.h" +#include "taint_tracking/tainted_object.h" #include "tainted_ops/tainted_ops.h" #include "utils/generic_utils.h" @@ -246,6 +248,18 @@ PYBIND11_MODULE(_native, m) "Normally called automatically at module load, but can be called manually " "from Python for explicit initialization control."); + // Export testing utilities + m.def("reset_taint_range_limit_cache", + &reset_taint_range_limit_cache, + "Reset the cached taint range limit for testing purposes. " + "This forces get_taint_range_limit() to re-read DD_IAST_MAX_RANGE_COUNT environment variable."); + + m.def("reset_source_truncation_cache", + &reset_source_truncation_cache, + "Reset the cached source truncation length for testing purposes. " + "This forces get_source_truncation_max_length() to re-read DD_IAST_TRUNCATION_MAX_VALUE_LENGTH environment " + "variable."); + // Note: the order of these definitions matter. For example, // stacktrace_element definitions must be before the ones of the // classes inheriting from it. diff --git a/ddtrace/appsec/_iast/_taint_tracking/taint_tracking/source.cpp b/ddtrace/appsec/_iast/_taint_tracking/taint_tracking/source.cpp index b933f37e198..555d31347f9 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/taint_tracking/source.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/taint_tracking/source.cpp @@ -1,3 +1,5 @@ +#include +#include #include #include "source.h" @@ -6,16 +8,67 @@ using namespace std; namespace py = pybind11; using namespace pybind11::literals; +// Default truncation length if environment variable is not set +constexpr size_t DEFAULT_TRUNCATION_LENGTH = 250; + +// Static variables for caching the truncation length +namespace { +size_t g_cached_truncation_length = 0; +} + +// Get the truncation max length from environment variable +size_t +get_source_truncation_max_length() +{ + if (g_cached_truncation_length == 0) { + const char* env_value = std::getenv("DD_IAST_TRUNCATION_MAX_VALUE_LENGTH"); + if (env_value != nullptr) { + try { + long parsed_value = std::strtol(env_value, nullptr, 10); + if (parsed_value > 0) { + g_cached_truncation_length = static_cast(parsed_value); + } else { + g_cached_truncation_length = DEFAULT_TRUNCATION_LENGTH; + } + } catch (...) { + g_cached_truncation_length = DEFAULT_TRUNCATION_LENGTH; + } + } else { + g_cached_truncation_length = DEFAULT_TRUNCATION_LENGTH; + } + } + + return g_cached_truncation_length; +} + +// Reset the cached truncation length (for testing purposes only) +void +reset_source_truncation_cache() +{ + g_cached_truncation_length = 0; +} + +// Truncate value string if it exceeds the max length +string +truncate_source_value(string value) +{ + size_t max_length = get_source_truncation_max_length(); + if (value.length() > max_length) { + return value.substr(0, max_length); + } + return value; +} + Source::Source(string name, string value, OriginType origin) : name(std::move(name)) - , value(std::move(value)) + , value(truncate_source_value(std::move(value))) , origin(origin) { } Source::Source(int name, string value, const OriginType origin) : name(origin_to_str(OriginType{ name })) - , value(std::move(value)) + , value(truncate_source_value(std::move(value))) , origin(origin) { } diff --git a/ddtrace/appsec/_iast/_taint_tracking/taint_tracking/source.h b/ddtrace/appsec/_iast/_taint_tracking/taint_tracking/source.h index 86cbf6536ea..00ceabf9dd2 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/taint_tracking/source.h +++ b/ddtrace/appsec/_iast/_taint_tracking/taint_tracking/source.h @@ -30,6 +30,18 @@ enum class TagMappingMode Mapper_Replace }; +// Helper function to get truncation max length from environment variable +size_t +get_source_truncation_max_length(); + +// Reset the cached truncation length (for testing purposes only) +void +reset_source_truncation_cache(); + +// Helper function to truncate value string if needed +string +truncate_source_value(string value); + struct Source { Source(string, string, OriginType); @@ -44,7 +56,7 @@ struct Source void set_values(string name_ = "", string value_ = "", OriginType origin_ = OriginType()) { name = std::move(name_); - value = std::move(value_); + value = truncate_source_value(std::move(value_)); origin = origin_; } diff --git a/ddtrace/appsec/_iast/_taint_tracking/taint_tracking/tainted_object.cpp b/ddtrace/appsec/_iast/_taint_tracking/taint_tracking/tainted_object.cpp index 81c2d1d0cc7..2068f70e14b 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/taint_tracking/tainted_object.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/taint_tracking/tainted_object.cpp @@ -1,8 +1,50 @@ #include "api/safe_initializer.h" #include "initializer/initializer.h" +#include namespace py = pybind11; +// Default max range count if environment variable is not set +constexpr int DEFAULT_MAX_RANGE_COUNT = 30; + +// Static variables for caching the taint range limit +namespace { +int g_cached_limit = 0; +bool g_limit_initialized = false; +} + +// Get the max range count from environment variable +int +get_taint_range_limit() +{ + if (g_cached_limit == 0) { + const char* env_value = std::getenv("DD_IAST_MAX_RANGE_COUNT"); + if (env_value != nullptr) { + try { + long parsed_value = std::strtol(env_value, nullptr, 10); + if (parsed_value > 0) { + g_cached_limit = static_cast(parsed_value); + } else { + g_cached_limit = DEFAULT_MAX_RANGE_COUNT; + } + } catch (...) { + g_cached_limit = DEFAULT_MAX_RANGE_COUNT; + } + } else { + g_cached_limit = DEFAULT_MAX_RANGE_COUNT; + } + } + + return g_cached_limit; +} + +// Reset the cached taint range limit (for testing purposes only) +void +reset_taint_range_limit_cache() +{ + g_cached_limit = 0; +} + /** * This function allocates a new taint range with the given offset and maximum length. * @@ -74,7 +116,7 @@ TaintedObject::add_ranges_shifted(TaintRangeRefs ranges, const RANGE_LENGTH max_length, const RANGE_START orig_offset) { - if (const auto to_add = static_cast(min(ranges.size(), TAINT_RANGE_LIMIT - ranges_.size())); + if (const auto to_add = static_cast(min(ranges.size(), get_free_tainted_ranges_space())); !ranges.empty() and to_add > 0) { ranges_.reserve(ranges_.size() + to_add); if (offset == 0 and max_length == -1) { diff --git a/ddtrace/appsec/_iast/_taint_tracking/taint_tracking/tainted_object.h b/ddtrace/appsec/_iast/_taint_tracking/taint_tracking/tainted_object.h index 26e2bcfed06..2189e7398f8 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/taint_tracking/tainted_object.h +++ b/ddtrace/appsec/_iast/_taint_tracking/taint_tracking/tainted_object.h @@ -2,6 +2,14 @@ #include "taint_tracking/taint_range.h" #include +// Helper function to get max range count from environment variable +int +get_taint_range_limit(); + +// Reset the cached taint range limit (for testing purposes only) +void +reset_taint_range_limit_cache(); + class TaintedObject { friend class Initializer; @@ -10,7 +18,6 @@ class TaintedObject TaintRangeRefs ranges_; public: - constexpr static int TAINT_RANGE_LIMIT = 100; constexpr static int RANGES_INITIAL_RESERVE = 16; TaintedObject() { ranges_.reserve(RANGES_INITIAL_RESERVE); }; @@ -35,6 +42,21 @@ class TaintedObject [[nodiscard]] TaintRangeRefs get_ranges_copy() const { return ranges_; } + [[nodiscard]] bool has_free_tainted_ranges_space() const + { + const int range_limit = get_taint_range_limit(); + return ranges_.size() < static_cast(range_limit); + } + + [[nodiscard]] size_t get_free_tainted_ranges_space() const + { + const int range_limit = get_taint_range_limit(); + if (ranges_.size() >= static_cast(range_limit)) { + return 0; + } + return static_cast(range_limit) - ranges_.size(); + } + void add_ranges_shifted(TaintedObjectPtr tainted_object, RANGE_START offset, RANGE_LENGTH max_length = -1, diff --git a/ddtrace/appsec/_iast/reporter.py b/ddtrace/appsec/_iast/reporter.py index 63f571ba11c..b2573684b40 100644 --- a/ddtrace/appsec/_iast/reporter.py +++ b/ddtrace/appsec/_iast/reporter.py @@ -20,6 +20,7 @@ from ddtrace.appsec._iast.constants import VULN_WEAK_CIPHER_TYPE from ddtrace.appsec._iast.constants import VULN_WEAK_RANDOMNESS from ddtrace.internal.logger import get_logger +from ddtrace.internal.settings.asm import config as asm_config log = get_logger(__name__) @@ -27,6 +28,19 @@ ATTRS_TO_SKIP = frozenset({"_ranges", "_evidences_with_no_sources", "dialect"}) EVIDENCES_WITH_NO_SOURCES = [VULN_INSECURE_HASHING_TYPE, VULN_WEAK_CIPHER_TYPE, VULN_WEAK_RANDOMNESS] +# Default truncation length if environment variable is not set +DEFAULT_EVIDENCE_TRUNCATION_LENGTH = 250 + + +def _truncate_evidence_value(value: Optional[str]) -> Optional[str]: + """Truncate evidence value if it exceeds the max length.""" + if value is None: + return None + max_length = asm_config._iast_truncation_max_value_length + if len(value) > max_length: + return value[:max_length] + return value + class NotNoneDictable: def _to_dict(self): @@ -258,7 +272,7 @@ def _from_dict(self, data: Dict[str, Any]): if "ranges" in i["evidence"]: evidence._ranges = i["evidence"]["ranges"] if "value" in i["evidence"]: - evidence.value = i["evidence"]["value"] + evidence.value = _truncate_evidence_value(i["evidence"]["value"]) if "valueParts" in i["evidence"]: evidence.valueParts = i["evidence"]["valueParts"] if "dialect" in i["evidence"]: @@ -342,6 +356,10 @@ def build_and_scrub_value_parts(self) -> Dict[str, Any]: ) if scrubbing_result: redacted_value_parts = scrubbing_result["redacted_value_parts"] + # Truncate each value in redacted_value_parts + for part in redacted_value_parts: + if "value" in part: + part["value"] = _truncate_evidence_value(part["value"]) redacted_sources = scrubbing_result["redacted_sources"] i = 0 for source in self.sources: @@ -373,18 +391,21 @@ def get_unredacted_value_parts(self, evidence_value: str, ranges: List[dict], so for range_ in ranges: if from_index < range_["start"]: - value_parts.append({"value": evidence_value[from_index : range_["start"]]}) + value_parts.append({"value": _truncate_evidence_value(evidence_value[from_index : range_["start"]])}) source_index = _get_source_index(sources, range_["source"]) value_parts.append( - {"value": evidence_value[range_["start"] : range_["end"]], "source": source_index} # type: ignore[dict-item] + { + "value": _truncate_evidence_value(evidence_value[range_["start"] : range_["end"]]), + "source": source_index, # type: ignore[dict-item] + } ) from_index = range_["end"] if from_index < len(evidence_value): - value_parts.append({"value": evidence_value[from_index:]}) + value_parts.append({"value": _truncate_evidence_value(evidence_value[from_index:])}) return value_parts diff --git a/ddtrace/internal/settings/asm.py b/ddtrace/internal/settings/asm.py index d8064418630..a6abf0b01fa 100644 --- a/ddtrace/internal/settings/asm.py +++ b/ddtrace/internal/settings/asm.py @@ -11,6 +11,7 @@ from ddtrace.appsec._constants import DEFAULT from ddtrace.appsec._constants import EXPLOIT_PREVENTION from ddtrace.appsec._constants import IAST +from ddtrace.appsec._constants import IAST_TRUNCATION_MAX_VALUE_LENGTH_DEFAULT from ddtrace.appsec._constants import LOGIN_EVENTS_MODE from ddtrace.appsec._constants import TELEMETRY_INFORMATION_NAME from ddtrace.constants import APPSEC_ENV @@ -81,6 +82,9 @@ class ASMConfig(DDConfig): _iast_debug = DDConfig.var(bool, IAST.ENV_DEBUG, default=False, private=True) _iast_propagation_debug = DDConfig.var(bool, IAST.ENV_PROPAGATION_DEBUG, default=False, private=True) _iast_telemetry_report_lvl = DDConfig.var(str, IAST.ENV_TELEMETRY_REPORT_LVL, default=TELEMETRY_INFORMATION_NAME) + _iast_truncation_max_value_length = DDConfig.var( + int, IAST.ENV_DD_IAST_TRUNCATION_MAX_VALUE_LENGTH, default=IAST_TRUNCATION_MAX_VALUE_LENGTH_DEFAULT + ) _apm_tracing_enabled = DDConfig.var(bool, APPSEC.APM_TRACING_ENV, default=True) _use_metastruct_for_triggers = True _use_metastruct_for_iast = True @@ -219,6 +223,7 @@ class ASMConfig(DDConfig): "_iast_security_controls", "_iast_is_testing", "_iast_use_root_span", + "_iast_truncation_max_value_length", "_ep_enabled", "_use_metastruct_for_triggers", "_use_metastruct_for_iast", diff --git a/tests/appsec/iast/aspects/test_join_aspect_memory_optimizations.py b/tests/appsec/iast/aspects/test_join_aspect_memory_optimizations.py new file mode 100644 index 00000000000..44f2298ca86 --- /dev/null +++ b/tests/appsec/iast/aspects/test_join_aspect_memory_optimizations.py @@ -0,0 +1,338 @@ +""" +Tests for IAST memory optimizations: +- DD_IAST_TRUNCATION_MAX_VALUE_LENGTH: Truncate Source.value to limit memory +- DD_IAST_MAX_RANGE_COUNT: Limit number of TaintRange objects per TaintedObject +""" + +import pytest + +from ddtrace.appsec._iast._taint_tracking import OriginType +from ddtrace.appsec._iast._taint_tracking import get_ranges +from ddtrace.appsec._iast._taint_tracking._native import reset_source_truncation_cache +from ddtrace.appsec._iast._taint_tracking._native import reset_taint_range_limit_cache +from ddtrace.appsec._iast._taint_tracking._taint_objects import taint_pyobject +from ddtrace.appsec._iast._taint_tracking.aspects import join_aspect + + +@pytest.fixture(autouse=True) +def reset_cache_after_test(): + """Reset both caches after each test to ensure clean state.""" + yield + reset_taint_range_limit_cache() + reset_source_truncation_cache() + + +@pytest.fixture +def set_max_range_count(monkeypatch): + """Fixture to set DD_IAST_MAX_RANGE_COUNT and reset the cache.""" + + def _set_value(value): + monkeypatch.setenv("DD_IAST_MAX_RANGE_COUNT", str(value)) + reset_taint_range_limit_cache() + + return _set_value + + +@pytest.fixture +def set_truncation_max_length(monkeypatch): + """Fixture to set DD_IAST_TRUNCATION_MAX_VALUE_LENGTH and reset the cache.""" + + def _set_value(value): + monkeypatch.setenv("DD_IAST_TRUNCATION_MAX_VALUE_LENGTH", str(value)) + reset_source_truncation_cache() + + return _set_value + + +def taint_string(s, name="test_input"): + """Helper to taint a string for testing.""" + return taint_pyobject( + pyobject=s, + source_name=name, + source_value=s, + source_origin=OriginType.PARAMETER, + ) + + +class TestSourceValueTruncation: + """Test DD_IAST_TRUNCATION_MAX_VALUE_LENGTH environment variable.""" + + @pytest.mark.parametrize( + "string_length,max_length,expected_length", + [ + (10, 250, 10), # Small string - no truncation needed (default limit) + (100, 250, 100), # Medium string - no truncation (default limit) + (500, 50, 50), # Large string - should be truncated to 50 + (1000, 50, 50), # Very large string - should be truncated to 50 + (10000, 50, 50), # Huge string - should be truncated to 50 + ], + ) + def test_source_value_truncation(self, string_length, max_length, expected_length, set_truncation_max_length): + """Test that Source.value is truncated according to DD_IAST_TRUNCATION_MAX_VALUE_LENGTH.""" + set_truncation_max_length(max_length) + + test_string = "x" * string_length + tainted = taint_string(test_string, "truncation_test") + ranges = get_ranges(tainted) + + assert ranges is not None, "Tainted string should have ranges" + assert len(ranges) > 0, "Should have at least one range" + + source_value = ranges[0].source.value + assert len(source_value) <= max_length, f"Source.value length {len(source_value)} exceeds max {max_length}" + assert len(source_value) == expected_length, ( + f"Expected source.value length {expected_length}, got {len(source_value)}" + ) + + @pytest.mark.parametrize( + "string_value,max_length,expected", + [ + ("a", 250, "a"), # Single char + ("hello", 250, "hello"), # Short string + ("x" * 250, 250, "x" * 250), # Exactly at limit (default 250) + ("x" * 250, 50, "x" * 50), # 250 chars truncated to 50 + ], + ) + def test_source_value_exact_preservation(self, string_value, max_length, expected, set_truncation_max_length): + """Test that short strings are preserved exactly or truncated correctly.""" + set_truncation_max_length(max_length) + + tainted = taint_string(string_value, "exact_test") + ranges = get_ranges(tainted) + + assert ranges is not None + assert len(ranges) > 0 + + source_value = ranges[0].source.value + assert source_value == expected, f"Expected '{expected}', got '{source_value}'" + + def test_source_value_empty_string(self): + """Test that empty strings are handled correctly.""" + tainted = taint_string("", "empty_test") + ranges = get_ranges(tainted) + + # Empty string may or may not have ranges depending on implementation + if ranges is not None and len(ranges) > 0: + source_value = ranges[0].source.value + assert source_value == "" + + def test_source_value_truncation_join_aspect(self, set_truncation_max_length): + """Test that Source.value truncation works with join_aspect.""" + max_length = 50 + set_truncation_max_length(max_length) + + # Create items with large strings + large_string = "y" * 1000 + items = [large_string for _ in range(5)] + separator = "," + + tainted_sep = taint_string(separator, "sep") + tainted_items = [taint_string(item, f"item_{i}") for i, item in enumerate(items)] + + result = join_aspect("".join, 1, tainted_sep, tainted_items) + result_ranges = get_ranges(result) + + assert result_ranges is not None + assert len(result_ranges) > 0 + + # Check that all source values are truncated + for i, range_obj in enumerate(result_ranges): + source_value_len = len(range_obj.source.value) + assert source_value_len <= max_length, ( + f"Range {i}: source.value length {source_value_len} exceeds max {max_length}" + ) + + +class TestRangeCountLimiting: + """Test DD_IAST_MAX_RANGE_COUNT environment variable.""" + + @pytest.mark.parametrize( + "num_items,max_ranges,expected_ranges", + [ + (3, 30, 5), # Few items - should create all ranges (3 items + 2 separators = 5) + (5, 30, 9), # Some items - should create all ranges (5 items + 4 separators = 9) + (10, 10, 10), # Many items - should be limited by max (10 items + 9 sep = 19, limited to 10) + (20, 10, 10), # Many items - should be limited by max (20 items + 19 sep = 39, limited to 10) + (50, 10, 10), # Many items - should be limited by max (50 items + 49 sep = 99, limited to 10) + (1000, 10, 10), # Many items - should be limited by max (50 items + 49 sep = 99, limited to 10) + ], + ) + def test_range_count_limiting_join(self, num_items, max_ranges, expected_ranges, set_max_range_count): + """Test that TaintRange count is limited by DD_IAST_MAX_RANGE_COUNT.""" + set_max_range_count(max_ranges) + + separator = "," + items = [f"item_{i}" for i in range(num_items)] + + tainted_sep = taint_string(separator, "sep") + tainted_items = [taint_string(item, f"item_{i}") for i, item in enumerate(items)] + + result = join_aspect("".join, 1, tainted_sep, tainted_items) + result_ranges = get_ranges(result) + + assert result_ranges is not None + + actual_ranges = len(result_ranges) + assert actual_ranges <= max_ranges, f"Range count {actual_ranges} exceeds max {max_ranges}" + assert actual_ranges == expected_ranges, f"Expected {expected_ranges} ranges, got {actual_ranges}" + + @pytest.mark.parametrize( + "string_length,num_items,max_ranges", + [ + (10, 5, 30), # Small strings, few items + (100, 10, 30), # Medium strings, more items + (1000, 20, 10), # Large strings, many items + ], + ) + def test_range_limiting_with_large_strings(self, string_length, num_items, max_ranges, set_max_range_count): + """Test that range limiting works independently of string size.""" + set_max_range_count(max_ranges) + + separator = "-" + items = ["x" * string_length for _ in range(num_items)] + + tainted_sep = taint_string(separator, "sep") + tainted_items = [taint_string(item, f"item_{i}") for i, item in enumerate(items)] + + result = join_aspect("".join, 1, tainted_sep, tainted_items) + result_ranges = get_ranges(result) + + assert result_ranges is not None + assert len(result_ranges) <= max_ranges, f"Range count {len(result_ranges)} exceeds max {max_ranges}" + + def test_range_limiting_repeated_operations(self, set_max_range_count): + """Test that range limiting persists across multiple operations.""" + max_ranges = 10 + set_max_range_count(max_ranges) + + # Perform multiple join operations + for iteration in range(10): + separator = "," + items = [f"iter{iteration}_item{i}" for i in range(20)] + + tainted_sep = taint_string(separator, f"sep_{iteration}") + tainted_items = [taint_string(item, f"item_{i}") for i, item in enumerate(items)] + + result = join_aspect("".join, 1, tainted_sep, tainted_items) + result_ranges = get_ranges(result) + + assert result_ranges is not None + assert len(result_ranges) <= max_ranges, ( + f"Iteration {iteration}: range count {len(result_ranges)} exceeds max {max_ranges}" + ) + + +class TestCombinedOptimizations: + """Test that both optimizations work together.""" + + def test_both_optimizations_active(self, set_max_range_count, set_truncation_max_length): + """Test that truncation and range limiting both work in the same operation.""" + max_length = 50 + max_ranges = 10 + set_max_range_count(max_ranges) + set_truncation_max_length(max_length) + + # Create scenario with large strings and many items + large_string = "z" * 5000 + num_items = 30 + items = [large_string for _ in range(num_items)] + separator = "," + + tainted_sep = taint_string(separator, "sep") + tainted_items = [taint_string(item, f"item_{i}") for i, item in enumerate(items)] + + result = join_aspect("".join, 1, tainted_sep, tainted_items) + result_ranges = get_ranges(result) + + assert result_ranges is not None + + # Check range count limiting + assert len(result_ranges) <= max_ranges, f"Range count {len(result_ranges)} exceeds max {max_ranges}" + + # Check source value truncation + for i, range_obj in enumerate(result_ranges): + source_value_len = len(range_obj.source.value) + assert source_value_len <= max_length, ( + f"Range {i}: source.value length {source_value_len} exceeds max {max_length}" + ) + + def test_current_configuration(self, set_max_range_count, set_truncation_max_length): + """Test that current environment configuration is respected.""" + # Set specific values for testing + actual_max_length = 50 + actual_max_ranges = 10 + set_max_range_count(actual_max_ranges) + set_truncation_max_length(actual_max_length) + + # Test with large strings and many items + string_size = 5000 + num_items = 30 + test_string = "x" * string_size + items = [test_string for _ in range(num_items)] + separator = "," + + tainted_sep = taint_string(separator, "sep") + tainted_items = [taint_string(item, f"item_{i}") for i, item in enumerate(items)] + + result = join_aspect("".join, 1, tainted_sep, tainted_items) + result_ranges = get_ranges(result) + + assert result_ranges is not None + + # Verify that current configuration is being used + assert len(result_ranges) <= actual_max_ranges, ( + f"Range count {len(result_ranges)} exceeds configured max {actual_max_ranges}" + ) + + for i, range_obj in enumerate(result_ranges): + source_value_len = len(range_obj.source.value) + assert source_value_len <= actual_max_length, ( + f"Range {i}: source.value length {source_value_len} exceeds configured max {actual_max_length}" + ) + + +class TestMemoryScaling: + """Test that memory scales with range count, not string size.""" + + def test_memory_independent_of_string_size(self, set_max_range_count): + """Test that different string sizes produce similar range counts.""" + max_ranges = 10 + set_max_range_count(max_ranges) + + num_items = 20 + separator = "," + + # Test with different string sizes + for string_size in [10, 100, 1000, 10000]: + items = ["x" * string_size for _ in range(num_items)] + + tainted_sep = taint_string(separator, "sep") + tainted_items = [taint_string(item, f"item_{i}") for i, item in enumerate(items)] + + result = join_aspect("".join, 1, tainted_sep, tainted_items) + result_ranges = get_ranges(result) + + assert result_ranges is not None + # Range count should be the same regardless of string size + assert len(result_ranges) <= max_ranges + # The actual count should be consistent across different sizes + # (all should hit the limit since we have 20 items = 39 theoretical ranges) + + def test_source_value_storage_bounded(self, set_truncation_max_length): + """Test that source value storage is bounded regardless of input size.""" + max_length = 50 + set_truncation_max_length(max_length) + + # Create very large strings + huge_string = "y" * 100000 # 100KB string + tainted = taint_string(huge_string, "huge_test") + ranges = get_ranges(tainted) + + assert ranges is not None + assert len(ranges) > 0 + + # Even though input is 100KB, stored value should be limited + source_value = ranges[0].source.value + assert len(source_value) <= max_length + # Memory used for source.value should be minimal (not 100KB) + assert len(source_value) == max_length # Should be exactly the max length diff --git a/tests/appsec/iast/conftest.py b/tests/appsec/iast/conftest.py index 566c996c321..fd97af5039c 100644 --- a/tests/appsec/iast/conftest.py +++ b/tests/appsec/iast/conftest.py @@ -14,6 +14,8 @@ from ddtrace.appsec._iast._taint_tracking import initialize_native_state from ddtrace.appsec._iast._taint_tracking._context import debug_context_array_free_slots_number from ddtrace.appsec._iast._taint_tracking._context import debug_context_array_size +from ddtrace.appsec._iast._taint_tracking._native import reset_source_truncation_cache +from ddtrace.appsec._iast._taint_tracking._native import reset_taint_range_limit_cache from ddtrace.appsec._iast.taint_sinks.code_injection import patch as code_injection_patch from ddtrace.appsec._iast.taint_sinks.header_injection import patch as header_injection_patch from ddtrace.appsec._iast.taint_sinks.untrusted_serialization import patch as unstrusted_serialization_patch @@ -82,11 +84,15 @@ class MockSpan: weak_hash_unpatch() _testing_unpatch_iast() _end_iast_context_and_oce(span) + reset_taint_range_limit_cache() + reset_source_truncation_cache() @pytest.fixture def iast_context_defaults(): - yield from iast_context(dict(DD_IAST_ENABLED="true")) + yield from iast_context( + dict(DD_IAST_ENABLED="true", DD_IAST_MAX_RANGE_COUNT="5000", DD_IAST_TRUNCATION_MAX_VALUE_LENGTH="10000") + ) @pytest.fixture diff --git a/tests/appsec/iast/taint_sinks/test_code_injection_redacted.py b/tests/appsec/iast/taint_sinks/test_code_injection_redacted.py index 1291402ca10..6bfa17a53f3 100644 --- a/tests/appsec/iast/taint_sinks/test_code_injection_redacted.py +++ b/tests/appsec/iast/taint_sinks/test_code_injection_redacted.py @@ -18,7 +18,7 @@ list(get_parametrize(VULN_CODE_INJECTION, ignore_list=_ignore_list)), ) def test_code_injection_redaction_suite( - evidence_input, sources_expected, vulnerabilities_expected, iast_context_defaults, element + iast_context_defaults, evidence_input, sources_expected, vulnerabilities_expected, element ): tainted_object = evidence_input_value = evidence_input.get("value", "") if evidence_input_value: diff --git a/tests/appsec/iast/taint_sinks/test_command_injection_redacted.py b/tests/appsec/iast/taint_sinks/test_command_injection_redacted.py index 3533e613527..5285036a0d1 100644 --- a/tests/appsec/iast/taint_sinks/test_command_injection_redacted.py +++ b/tests/appsec/iast/taint_sinks/test_command_injection_redacted.py @@ -21,7 +21,7 @@ "evidence_input,sources_expected,vulnerabilities_expected,element", list(get_parametrize(VULN_CMDI)) ) def test_cmdi_redaction_suite( - evidence_input, sources_expected, vulnerabilities_expected, iast_context_defaults, element + iast_context_defaults, evidence_input, sources_expected, vulnerabilities_expected, element ): tainted_object = _taint_pyobject_multiranges( evidence_input["value"], diff --git a/tests/appsec/iast/taint_sinks/test_header_injection_redacted.py b/tests/appsec/iast/taint_sinks/test_header_injection_redacted.py index 66f0e049770..fdf2b7c7d59 100644 --- a/tests/appsec/iast/taint_sinks/test_header_injection_redacted.py +++ b/tests/appsec/iast/taint_sinks/test_header_injection_redacted.py @@ -97,7 +97,7 @@ def test_common_django_header_injection_redact(header_name, header_value, value_ list(get_parametrize(VULN_HEADER_INJECTION)), ) def test_header_injection_redaction_suite( - evidence_input, sources_expected, vulnerabilities_expected, iast_context_defaults, element + iast_context_defaults, evidence_input, sources_expected, vulnerabilities_expected, element ): tainted_object = _taint_pyobject_multiranges( evidence_input["value"], diff --git a/tests/appsec/iast/taint_sinks/test_path_traversal_redacted.py b/tests/appsec/iast/taint_sinks/test_path_traversal_redacted.py index bc64d16b5a6..6e9c7dcb635 100644 --- a/tests/appsec/iast/taint_sinks/test_path_traversal_redacted.py +++ b/tests/appsec/iast/taint_sinks/test_path_traversal_redacted.py @@ -136,7 +136,7 @@ def test_path_traversal_redact_abs_paths(iast_context_defaults): list(get_parametrize(VULN_PATH_TRAVERSAL)), ) def test_path_traversal_redaction_suite( - evidence_input, sources_expected, vulnerabilities_expected, iast_context_defaults, element + iast_context_defaults, evidence_input, sources_expected, vulnerabilities_expected, element ): tainted_object = _taint_pyobject_multiranges( evidence_input["value"], diff --git a/tests/appsec/iast/taint_sinks/test_unvalidated_redirect_redacted.py b/tests/appsec/iast/taint_sinks/test_unvalidated_redirect_redacted.py index 2ab96a6c388..71e333bbc72 100644 --- a/tests/appsec/iast/taint_sinks/test_unvalidated_redirect_redacted.py +++ b/tests/appsec/iast/taint_sinks/test_unvalidated_redirect_redacted.py @@ -20,7 +20,7 @@ "evidence_input,sources_expected,vulnerabilities_expected,element", list(get_parametrize(VULN_UNVALIDATED_REDIRECT)) ) def test_unvalidated_redirect_redaction_suite( - evidence_input, sources_expected, vulnerabilities_expected, iast_context_defaults, element + iast_context_defaults, evidence_input, sources_expected, vulnerabilities_expected, element ): tainted_object = evidence_input_value = evidence_input.get("value", "") if evidence_input_value: diff --git a/tests/appsec/iast/test_iast_propagation_path.py b/tests/appsec/iast/test_iast_propagation_path.py index fea0e4195d4..652c7bdd23f 100644 --- a/tests/appsec/iast/test_iast_propagation_path.py +++ b/tests/appsec/iast/test_iast_propagation_path.py @@ -235,7 +235,7 @@ def test_propagation_path_2_origins_3_propagation(origin1, origin2, iast_context (b"taintsource1", bytearray(b"taintsource2")), ], ) -def test_propagation_path_2_origins_5_propagation(origin1, origin2, iast_context_defaults): +def test_propagation_path_2_origins_5_propagation(iast_context_defaults, origin1, origin2): mod = _iast_patched_module("tests.appsec.iast.fixtures.propagation_path") tainted_string_1 = taint_pyobject(origin1, source_name="path1", source_value=origin1, source_origin=OriginType.PATH) diff --git a/tests/appsec/iast/test_reporter.py b/tests/appsec/iast/test_reporter.py index 4068b87adae..560cde14139 100644 --- a/tests/appsec/iast/test_reporter.py +++ b/tests/appsec/iast/test_reporter.py @@ -1,7 +1,13 @@ +import os + +import pytest + from ddtrace.appsec._iast.reporter import Evidence from ddtrace.appsec._iast.reporter import Location from ddtrace.appsec._iast.reporter import Source from ddtrace.appsec._iast.reporter import Vulnerability +from ddtrace.appsec._iast.reporter import _truncate_evidence_value +from ddtrace.internal.settings.asm import config as asm_config def _do_assert_hash(e, f, g, e2): @@ -70,3 +76,99 @@ def test_source_hash_and_equality(): _do_assert_hash(e, f, g, e2) _do_assert_equality(e, f, g, e2) + + +class TestEvidenceTruncation: + """Tests for DD_IAST_TRUNCATION_MAX_VALUE_LENGTH in Evidence values.""" + + def test_get_truncation_length_from_config(self): + """Test getting truncation length from configuration.""" + max_length = asm_config._iast_truncation_max_value_length + # Should be either the env var value or default (250) + assert max_length > 0 + assert isinstance(max_length, int) + + @pytest.mark.parametrize( + "value,expected_length", + [ + ("short", 5), # Short string - no truncation + ("x" * 100, 100), # Medium string - may or may not be truncated + ("x" * 500, None), # Long string - will be truncated to max + ], + ) + def test_truncate_evidence_value(self, value, expected_length): + """Test that _truncate_evidence_value truncates correctly.""" + max_length = asm_config._iast_truncation_max_value_length + + result = _truncate_evidence_value(value) + + assert result is not None + assert len(result) <= max_length + + if expected_length is None: + # Long string - should be truncated to max + assert len(result) == max_length + else: + # Short/medium string - should be preserved or truncated to max + assert len(result) == min(expected_length, max_length) + + def test_truncate_none_value(self): + """Test that None values remain None.""" + result = _truncate_evidence_value(None) + assert result is None + + def test_truncate_empty_string(self): + """Test that empty strings remain empty.""" + result = _truncate_evidence_value("") + assert result == "" + + @pytest.mark.parametrize( + "string_length", + [ + 1, + 10, + 50, + 100, + 250, + 500, + 1000, + 10000, + ], + ) + def test_truncation_preserves_prefix(self, string_length): + """Test that truncation preserves the beginning of the string.""" + max_length = asm_config._iast_truncation_max_value_length + + original = "x" * string_length + truncated = _truncate_evidence_value(original) + + assert truncated is not None + + # Truncated value should match the prefix of original + expected_length = min(string_length, max_length) + assert truncated == original[:expected_length] + + def test_truncation_with_unicode(self): + """Test truncation with unicode characters.""" + max_length = asm_config._iast_truncation_max_value_length + + # Mix of ASCII and unicode + unicode_string = "Hello 🌍 World " * 50 + truncated = _truncate_evidence_value(unicode_string) + + assert truncated is not None + assert len(truncated) <= max_length + # Should preserve the beginning + assert truncated == unicode_string[: len(truncated)] + + def test_default_truncation_length(self): + """Test that default truncation length is 250 when env var not set.""" + # Get the current max length + max_length = asm_config._iast_truncation_max_value_length + + # Default should be 250 unless overridden by env var + env_value = os.environ.get("DD_IAST_TRUNCATION_MAX_VALUE_LENGTH") + if env_value: + assert max_length == int(env_value) + else: + assert max_length == 250 diff --git a/tests/appsec/iast_memcheck/test_iast_mem_check.py b/tests/appsec/iast_memcheck/test_iast_mem_check.py index c80b7d9466d..65f0b175640 100644 --- a/tests/appsec/iast_memcheck/test_iast_mem_check.py +++ b/tests/appsec/iast_memcheck/test_iast_mem_check.py @@ -284,3 +284,187 @@ def test_slice_memory_check_repeated_operations(iast_context_defaults): ) _iast_finish_request() + + +@pytest.mark.limit_leaks("2.0 KB", filter_fn=IASTFilter()) +def test_slice_memory_check_repeated_operations_iast_disable(): + from ddtrace.appsec._iast._taint_tracking.aspects import slice_aspect + + # Taint a test string + test_string = "abcdefghijklmnopqrstuvwxyz0123456789" + tainted_string = taint_pyobject( + test_string, source_name="test_input", source_value=test_string, source_origin=OriginType.PARAMETER + ) + + # Get baseline + initial_context_size = debug_context_array_size() + + # Perform many slice operations (simulating benchmark workload) + for i in range(100): + # Various slice operations + _ = slice_aspect(tainted_string, 0, 10, 1) + _ = slice_aspect(tainted_string, 5, 15, 1) + _ = slice_aspect(tainted_string, 10, 20, 2) + _ = slice_aspect(tainted_string, 1, -1, 1) + + # Context array size should not have grown significantly + final_context_size = debug_context_array_size() + + # Allow small variation but no significant growth + # With the old buggy code, this would grow proportionally to iterations + assert final_context_size <= initial_context_size + 10, ( + f"Context size grew from {initial_context_size} to {final_context_size}" + ) + + +@pytest.mark.limit_leaks("2.0 KB", filter_fn=IASTFilter()) +@pytest.mark.parametrize( + "separator, items", + [ + (",", ["a", "b", "c", "d", "e", "f"]), + ("-", ["foo", "bar", "baz"]), + ("", ["x", "y", "z"]), + (" ", ["hello", "world", "test"]), + (b",", [b"a", b"b", b"c", b"d", b"e", b"f"]), + (b"-", [b"foo", b"bar", b"baz"]), + (bytearray(b","), [bytearray(b"a"), bytearray(b"b"), bytearray(b"c")]), + (bytearray(b" "), [bytearray(b"hello"), bytearray(b"world")]), + ], +) +def test_join_memory_check(separator, items, iast_context_defaults): + """Test that join_aspect doesn't leak memory. + + This test verifies that join_aspect properly manages memory when joining + multiple tainted strings. The implementation should not create excessive + intermediate data structures. + """ + from ddtrace.appsec._iast._taint_tracking.aspects import join_aspect + + _num_objects_tainted = 0 + _debug_context_array_size = 0 + _iast_finish_request() + + for iteration in range(LOOPS): + _iast_start_request() + + # Taint the separator + tainted_separator = taint_pyobject( + separator, source_name="separator", source_value=separator, source_origin=OriginType.PARAMETER + ) + + # Taint the items to join + tainted_items = [ + taint_pyobject(item, source_name=f"item_{i}", source_value=item, source_origin=OriginType.PARAMETER) + for i, item in enumerate(items) + ] + + # Perform join operation + result = join_aspect("".join, 1, tainted_separator, tainted_items) + + # Verify the result is properly tainted + tainted_ranges = get_tainted_ranges(result) + assert len(tainted_ranges) > 0 + + # Track memory metrics + _num_objects_tainted = _num_objects_tainted_in_request() + assert _num_objects_tainted > 0 + + _debug_context_array_size = debug_context_array_size() + assert _debug_context_array_size > 0 + + # Verify no memory leak - context array size should remain stable + assert _debug_context_array_size == debug_context_array_size() + + _iast_finish_request() + + +@pytest.mark.limit_leaks("2.0 KB", filter_fn=IASTFilter()) +def test_join_memory_check_repeated_operations(iast_context_defaults): + """Test that repeated join operations don't accumulate memory. + + This simulates scenarios where join_aspect is called many times + on similar data. The implementation should remain memory-stable + and not accumulate intermediate structures. + """ + from ddtrace.appsec._iast._taint_tracking.aspects import join_aspect + + _iast_finish_request() + _iast_start_request() + + # Taint separator and test items + separator = "," + tainted_separator = taint_pyobject( + separator, source_name="separator", source_value=separator, source_origin=OriginType.PARAMETER + ) + + items = ["item1", "item2", "item3", "item4", "item5"] + tainted_items = [ + taint_pyobject(item, source_name=f"item_{i}", source_value=item, source_origin=OriginType.PARAMETER) + for i, item in enumerate(items) + ] + + # Get baseline + initial_context_size = debug_context_array_size() + assert initial_context_size > 0 + + # Perform many join operations (simulating benchmark workload) + for i in range(100): + # Various join operations + result1 = join_aspect("".join, 1, tainted_separator, tainted_items[:3]) + result2 = join_aspect("".join, 1, tainted_separator, tainted_items[2:]) + result3 = join_aspect("".join, 1, tainted_separator, tainted_items) + result4 = join_aspect("".join, 1, tainted_separator, [tainted_items[0], tainted_items[-1]]) + + # Verify results are tainted + assert len(get_tainted_ranges(result1)) > 0 + assert len(get_tainted_ranges(result2)) > 0 + assert len(get_tainted_ranges(result3)) > 0 + assert len(get_tainted_ranges(result4)) > 0 + + # Context array size should not have grown significantly + final_context_size = debug_context_array_size() + + # Allow small variation but no significant growth + # With buggy code, this would grow proportionally to iterations + assert final_context_size <= initial_context_size + 10, ( + f"Context size grew from {initial_context_size} to {final_context_size}" + ) + + _iast_finish_request() + + +@pytest.mark.limit_leaks("2.0 KB", filter_fn=IASTFilter()) +def test_join_memory_check_repeated_operations_iast_disable(): + from ddtrace.appsec._iast._taint_tracking.aspects import join_aspect + + # Taint separator and test items + separator = "," + tainted_separator = taint_pyobject( + separator, source_name="separator", source_value=separator, source_origin=OriginType.PARAMETER + ) + + items = ["item1", "item2", "item3", "item4", "item5"] + tainted_items = [ + taint_pyobject(item, source_name=f"item_{i}", source_value=item, source_origin=OriginType.PARAMETER) + for i, item in enumerate(items) + ] + + # Get baseline + initial_context_size = debug_context_array_size() + + # Perform many join operations (simulating benchmark workload) + for i in range(100): + # Various join operations + _ = join_aspect("".join, 1, tainted_separator, tainted_items[:3]) + _ = join_aspect("".join, 1, tainted_separator, tainted_items[2:]) + _ = join_aspect("".join, 1, tainted_separator, tainted_items) + _ = join_aspect("".join, 1, tainted_separator, [tainted_items[0], tainted_items[-1]]) + + # Context array size should not have grown significantly + final_context_size = debug_context_array_size() + + # Allow small variation but no significant growth + # With buggy code, this would grow proportionally to iterations + assert final_context_size <= initial_context_size + 10, ( + f"Context size grew from {initial_context_size} to {final_context_size}" + ) diff --git a/tests/telemetry/test_writer.py b/tests/telemetry/test_writer.py index 914ab46a4bc..ca4c9d05ca5 100644 --- a/tests/telemetry/test_writer.py +++ b/tests/telemetry/test_writer.py @@ -274,6 +274,7 @@ def test_app_started_event_configuration_override(test_agent_session, run_python {"name": "DD_IAST_SECURITY_CONTROLS_CONFIGURATION", "origin": "default", "value": ""}, {"name": "DD_IAST_STACK_TRACE_ENABLED", "origin": "default", "value": True}, {"name": "DD_IAST_TELEMETRY_VERBOSITY", "origin": "default", "value": "INFORMATION"}, + {"name": "DD_IAST_TRUNCATION_MAX_VALUE_LENGTH", "origin": "default", "value": 250}, {"name": "DD_IAST_VULNERABILITIES_PER_REQUEST", "origin": "default", "value": 2}, {"name": "DD_INJECTION_ENABLED", "origin": "env_var", "value": "tracer"}, {"name": "DD_INJECT_FORCE", "origin": "env_var", "value": True},