Skip to content

Commit 01850f2

Browse files
authored
feat(openai): handle file and image prompt variables types (#15359)
## Description Adds support for mixed input types (text, images, files) in OpenAI reusable prompt tracking. **Key Changes:** - `_normalize_prompt_variables()`: Extracts values from all OpenAI SDK response objects ([ResponseInputText](https://github.com/openai/openai-python/blob/41ee03ffe985a7362f0275c7f500080cb1d58cdd/src/openai/types/responses/response_input_text.py), [ResponseInputImage](https://github.com/openai/openai-python/blob/41ee03ffe985a7362f0275c7f500080cb1d58cdd/src/openai/types/responses/response_input_image.py#L11), [ResponseInputFile](https://github.com/openai/openai-python/blob/41ee03ffe985a7362f0275c7f500080cb1d58cdd/src/openai/types/responses/response_input_file.py)) - `_extract_chat_template_from_instructions()`: Generates templates using `{{variable_name}}` when values are available, falls back to `[image]`/`[file]` when OpenAI strips them - **Refactoring**: Extracted helper functions (`_extract_image_reference`, `_extract_file_reference`, `_extract_content_item_text`) to eliminate code duplication and added constants for magic strings 👉 **Design details:** See [Prompt Tracking with Mixed Input Types](https://docs.google.com/document/d/1qLgC45fU0ZSKhxucHFnLXydBM5pa_i74--9RVBzxgcA/edit?usp=sharing) ## Testing - `test_response_with_mixed_input_prompt_tracking`: Text + images + file with `include` parameter ## Risks :shrug: ## Additional Notes Implementation is consistent with dd-trace-js ([#6941](DataDog/dd-trace-js#6941)).
1 parent 22fc114 commit 01850f2

File tree

8 files changed

+880
-67
lines changed

8 files changed

+880
-67
lines changed

ddtrace/llmobs/_constants.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,3 +116,12 @@
116116
EXPERIMENTS_INPUT = "_ml_obs.meta.input"
117117
EXPERIMENTS_OUTPUT = "_ml_obs.meta.output"
118118
DEFAULT_PROJECT_NAME = "default-project"
119+
120+
# Fallback markers for prompt tracking when OpenAI strips values
121+
IMAGE_FALLBACK_MARKER = "[image]"
122+
FILE_FALLBACK_MARKER = "[file]"
123+
124+
# OpenAI input types
125+
INPUT_TYPE_IMAGE = "input_image"
126+
INPUT_TYPE_FILE = "input_file"
127+
INPUT_TYPE_TEXT = "input_text"

ddtrace/llmobs/_integrations/utils.py

Lines changed: 84 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,14 @@
1515
from ddtrace.internal.utils.formats import format_trace_id
1616
from ddtrace.llmobs._constants import DISPATCH_ON_LLM_TOOL_CHOICE
1717
from ddtrace.llmobs._constants import DISPATCH_ON_TOOL_CALL_OUTPUT_USED
18+
from ddtrace.llmobs._constants import FILE_FALLBACK_MARKER
19+
from ddtrace.llmobs._constants import IMAGE_FALLBACK_MARKER
1820
from ddtrace.llmobs._constants import INPUT_MESSAGES
1921
from ddtrace.llmobs._constants import INPUT_PROMPT
2022
from ddtrace.llmobs._constants import INPUT_TOKENS_METRIC_KEY
23+
from ddtrace.llmobs._constants import INPUT_TYPE_FILE
24+
from ddtrace.llmobs._constants import INPUT_TYPE_IMAGE
25+
from ddtrace.llmobs._constants import INPUT_TYPE_TEXT
2126
from ddtrace.llmobs._constants import INPUT_VALUE
2227
from ddtrace.llmobs._constants import METADATA
2328
from ddtrace.llmobs._constants import OAI_HANDOFF_TOOL_ARG
@@ -581,6 +586,12 @@ def _openai_parse_input_response_messages(
581586
for content in item["content"]:
582587
processed_item_content += str(content.get("text", "") or "")
583588
processed_item_content += str(content.get("refusal", "") or "")
589+
590+
item_type = content.get("type", None)
591+
if item_type == INPUT_TYPE_IMAGE:
592+
processed_item_content += _extract_image_reference(content)
593+
elif item_type == INPUT_TYPE_FILE:
594+
processed_item_content += _extract_file_reference(content)
584595
else:
585596
processed_item_content = item["content"]
586597
if processed_item_content:
@@ -771,34 +782,74 @@ def openai_get_metadata_from_response(
771782
return metadata
772783

773784

785+
def _extract_image_reference(obj: Any) -> str:
786+
"""Extract image reference with fallback priority: image_url → file_id → [image]."""
787+
return _get_attr(obj, "image_url", None) or _get_attr(obj, "file_id", None) or IMAGE_FALLBACK_MARKER
788+
789+
790+
def _extract_file_reference(obj: Any) -> str:
791+
"""Extract file reference with fallback priority: file_url → file_id → filename → [file]."""
792+
return (
793+
_get_attr(obj, "file_url", None)
794+
or _get_attr(obj, "file_id", None)
795+
or _get_attr(obj, "filename", None)
796+
or FILE_FALLBACK_MARKER
797+
)
798+
799+
800+
def _extract_content_item_text(content_item: Any) -> str:
801+
"""Extract text representation from a content item (text/image/file)."""
802+
item_type = _get_attr(content_item, "type", None)
803+
if item_type == INPUT_TYPE_IMAGE:
804+
return _extract_image_reference(content_item)
805+
elif item_type == INPUT_TYPE_FILE:
806+
return _extract_file_reference(content_item)
807+
elif item_type == INPUT_TYPE_TEXT or item_type is None:
808+
text = _get_attr(content_item, "text", "")
809+
return str(text) if text else ""
810+
811+
return ""
812+
813+
814+
def _normalize_prompt_variables(variables: Dict[str, Any]) -> Dict[str, Any]:
815+
"""Converts OpenAI SDK response objects or dicts into simple key-value pairs.
816+
817+
Example:
818+
Input: {"msg": ResponseInputText(text="Hello"), "doc": ResponseInputFile(file_id="file-123")}
819+
Output: {"msg": "Hello", "doc": "file-123"}
820+
"""
821+
if not variables or not isinstance(variables, dict):
822+
return {}
823+
824+
return {key: _extract_content_item_text(value) or value for key, value in variables.items()}
825+
826+
774827
def _extract_chat_template_from_instructions(
775828
instructions: List[Any], variables: Dict[str, Any]
776829
) -> List[Dict[str, str]]:
777830
"""
778831
Extract a chat template from OpenAI response instructions by replacing variable values with placeholders.
779832
833+
Uses {{variable_name}} when values are available. Falls back to [image]/[file] markers when
834+
OpenAI strips the values (e.g., by default URL stripping behavior).
835+
780836
Args:
781837
instructions: List of instruction messages from the OpenAI response
782838
variables: Dictionary of variables used in the prompt
783839
784840
Returns:
785-
List of chat template messages with placeholders (e.g., {{variable_name}})
841+
List of chat template messages with placeholders (e.g., {{variable_name}}, [image], [file])
786842
"""
787843
chat_template = []
788844

789-
# Create a mapping of variable values to placeholder names
845+
# Build value:placeholder map - exclude fallback markers so they remain as-is in the template
790846
value_to_placeholder = {}
791847
for var_name, var_value in variables.items():
792-
if hasattr(var_value, "text"): # ResponseInputText
793-
value_str = str(var_value.text)
794-
else:
795-
value_str = str(var_value)
796-
797-
# Skip empty values
798-
if not value_str:
848+
if var_value is None:
799849
continue
800-
801-
value_to_placeholder[value_str] = f"{{{{{var_name}}}}}"
850+
value_str = str(var_value)
851+
if value_str and value_str not in (IMAGE_FALLBACK_MARKER, FILE_FALLBACK_MARKER):
852+
value_to_placeholder[value_str] = f"{{{{{var_name}}}}}"
802853

803854
# Sort by length (longest first) to handle overlapping values correctly
804855
sorted_values = sorted(value_to_placeholder.keys(), key=len, reverse=True)
@@ -812,18 +863,14 @@ def _extract_chat_template_from_instructions(
812863
if not content_items:
813864
continue
814865

815-
text_parts = []
816-
for content_item in content_items:
817-
text = _get_attr(content_item, "text", "")
818-
if text:
819-
text_parts.append(str(text))
866+
text_parts = [_extract_content_item_text(item) for item in content_items]
867+
text_parts = [part for part in text_parts if part]
820868

821869
if not text_parts:
822870
continue
823871

872+
# Combine text and replace variable values with placeholders (longest first)
824873
full_text = "".join(text_parts)
825-
826-
# Replace variable values with placeholders (longest first)
827874
for value_str in sorted_values:
828875
placeholder = value_to_placeholder[value_str]
829876
full_text = full_text.replace(value_str, placeholder)
@@ -857,24 +904,26 @@ def openai_set_meta_tags_from_response(
857904
}
858905
)
859906

860-
if "prompt" in kwargs:
861-
prompt_data = kwargs.get("prompt")
862-
if prompt_data:
863-
try:
864-
# Extract chat_template from response instructions if available
865-
if response and not prompt_data.get("chat_template") and not prompt_data.get("template"):
866-
instructions = _get_attr(response, "instructions", None)
907+
prompt_data = kwargs.get("prompt")
908+
if prompt_data:
909+
try:
910+
prompt_data = dict(prompt_data) # Make a copy to avoid modifying the original
911+
912+
# Extract chat_template from response instructions if not already provided
913+
if response and not prompt_data.get("chat_template") and not prompt_data.get("template"):
914+
instructions = _get_attr(response, "instructions", None)
915+
if instructions:
867916
variables = prompt_data.get("variables", {})
868-
if instructions and variables:
869-
chat_template = _extract_chat_template_from_instructions(instructions, variables)
870-
if chat_template:
871-
prompt_data = dict(prompt_data) # Make a copy to avoid modifying the original
872-
prompt_data["chat_template"] = chat_template
873-
874-
validated_prompt = _validate_prompt(prompt_data, strict_validation=False)
875-
span._set_ctx_item(INPUT_PROMPT, validated_prompt)
876-
except (TypeError, ValueError, AttributeError) as e:
877-
logger.debug("Failed to validate prompt for OpenAI response: %s", e)
917+
normalized_variables = _normalize_prompt_variables(variables)
918+
chat_template = _extract_chat_template_from_instructions(instructions, normalized_variables)
919+
if chat_template:
920+
prompt_data["chat_template"] = chat_template
921+
prompt_data["variables"] = normalized_variables
922+
923+
validated_prompt = _validate_prompt(prompt_data, strict_validation=False)
924+
span._set_ctx_item(INPUT_PROMPT, validated_prompt)
925+
except (TypeError, ValueError, AttributeError) as e:
926+
logger.debug("Failed to validate prompt for OpenAI response: %s", e)
878927

879928
if span.error or not response:
880929
span._set_ctx_item(OUTPUT_MESSAGES, [Message(content="")])

0 commit comments

Comments
 (0)