Skip to content

Commit ddb3870

Browse files
authored
feat(llmobs): emitting reasoning tokens metric for openai integration (#15478)
## Description Emit reasoning tokens metric, and remove it from metadata. MLOB-4264 <!-- Provide an overview of the change and motivation for the change --> ## Testing <!-- Describe your testing strategy or note what tests are included --> ## Risks <!-- Note any risks associated with this change, or "None" if no risks --> ## Additional Notes <!-- Any other information that would be helpful for reviewers -->
1 parent 2656bf2 commit ddb3870

File tree

7 files changed

+218
-23
lines changed

7 files changed

+218
-23
lines changed

ddtrace/llmobs/_constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
CACHE_WRITE_INPUT_TOKENS_METRIC_KEY = "cache_write_input_tokens"
4444
CACHE_READ_INPUT_TOKENS_METRIC_KEY = "cache_read_input_tokens"
4545
BILLABLE_CHARACTER_COUNT_METRIC_KEY = "billable_character_count"
46+
REASONING_OUTPUT_TOKENS_METRIC_KEY = "reasoning_output_tokens"
4647

4748
EVAL_ENDPOINT = "/api/intake/llm-obs/v2/eval-metric"
4849
SPAN_ENDPOINT = "/api/v2/llmobs"

ddtrace/llmobs/_integrations/openai.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from ddtrace.llmobs._constants import OUTPUT_TOKENS_METRIC_KEY
2020
from ddtrace.llmobs._constants import OUTPUT_VALUE
2121
from ddtrace.llmobs._constants import PROXY_REQUEST
22+
from ddtrace.llmobs._constants import REASONING_OUTPUT_TOKENS_METRIC_KEY
2223
from ddtrace.llmobs._constants import SPAN_KIND
2324
from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY
2425
from ddtrace.llmobs._integrations.base import BaseLLMIntegration
@@ -221,6 +222,13 @@ def _extract_llmobs_metrics_tags(
221222
cached_tokens = _get_attr(prompt_tokens_details, "cached_tokens", None)
222223
if cached_tokens is not None:
223224
metrics[CACHE_READ_INPUT_TOKENS_METRIC_KEY] = cached_tokens
225+
# Chat completion returns `completion_tokens_details` while responses api returns `output_tokens_details`
226+
reasoning_output_tokens_details = _get_attr(token_usage, "completion_tokens_details", {}) or _get_attr(
227+
token_usage, "output_tokens_details", {}
228+
)
229+
reasoning_output_tokens = _get_attr(reasoning_output_tokens_details, "reasoning_tokens", None)
230+
if reasoning_output_tokens is not None:
231+
metrics[REASONING_OUTPUT_TOKENS_METRIC_KEY] = reasoning_output_tokens
224232
return metrics
225233
elif kwargs.get("stream") and resp is not None:
226234
prompt_tokens = _compute_prompt_tokens(kwargs.get("prompt", None), kwargs.get("messages", None))

ddtrace/llmobs/_integrations/utils.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -768,11 +768,6 @@ def openai_get_metadata_from_response(
768768
if value is not None:
769769
metadata[field] = load_data_value(value)
770770

771-
usage = getattr(response, "usage", None)
772-
output_tokens_details = getattr(usage, "output_tokens_details", None)
773-
reasoning_tokens = getattr(output_tokens_details, "reasoning_tokens", 0)
774-
metadata["reasoning_tokens"] = reasoning_tokens
775-
776771
return metadata
777772

778773

@@ -1181,6 +1176,13 @@ def llmobs_metrics(self) -> Optional[Dict[str, Any]]:
11811176
metrics["output_tokens"] = usage.output_tokens
11821177
if hasattr(usage, "total_tokens"):
11831178
metrics["total_tokens"] = usage.total_tokens
1179+
# Chat completion returns `completion_tokens_details` while responses api returns `output_tokens_details`
1180+
reasoning_output_tokens_details = _get_attr(usage, "completion_tokens_details", {}) or _get_attr(
1181+
usage, "output_tokens_details", {}
1182+
)
1183+
reasoning_output_tokens = _get_attr(reasoning_output_tokens_details, "reasoning_tokens", None)
1184+
if reasoning_output_tokens is not None:
1185+
metrics["reasoning_output_tokens"] = reasoning_output_tokens
11841186

11851187
return metrics if metrics else None
11861188

@@ -1202,9 +1204,6 @@ def llmobs_metadata(self) -> Optional[Dict[str, Any]]:
12021204
if hasattr(self.response, "text") and self.response.text:
12031205
metadata["text"] = load_data_value(self.response.text)
12041206

1205-
if hasattr(self.response, "usage") and hasattr(self.response.usage, "output_tokens_details"):
1206-
metadata["reasoning_tokens"] = self.response.usage.output_tokens_details.reasoning_tokens
1207-
12081207
if self.span_type == "custom" and hasattr(self._raw_oai_span.span_data, "data"):
12091208
custom_data = getattr(self._raw_oai_span.span_data, "data", None)
12101209
if custom_data:
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
features:
3+
- |
4+
LLM Observability: Reasoning token counts are now captured from OpenAI and OpenAI Agents responses.
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
interactions:
2+
- request:
3+
body: '{"input":[{"content":"You are a helpful assistant.","role":"system"},{"content":"Who
4+
won the world series in 2020?","role":"user"},{"content":"The Los Angeles Dodgers
5+
won the World Series in 2020.","role":"assistant"},{"content":"Where was it
6+
played?","role":"user"}],"max_output_tokens":500,"model":"gpt-5-mini","user":"ddtrace-test"}'
7+
headers:
8+
accept:
9+
- application/json
10+
accept-encoding:
11+
- gzip, deflate
12+
connection:
13+
- keep-alive
14+
content-length:
15+
- '335'
16+
content-type:
17+
- application/json
18+
host:
19+
- api.openai.com
20+
user-agent:
21+
- OpenAI/Python 2.3.0
22+
x-stainless-arch:
23+
- arm64
24+
x-stainless-async:
25+
- 'false'
26+
x-stainless-lang:
27+
- python
28+
x-stainless-os:
29+
- MacOS
30+
x-stainless-package-version:
31+
- 2.3.0
32+
x-stainless-read-timeout:
33+
- '600'
34+
x-stainless-retry-count:
35+
- '0'
36+
x-stainless-runtime:
37+
- CPython
38+
x-stainless-runtime-version:
39+
- 3.12.10
40+
method: POST
41+
uri: https://api.openai.com/v1/responses
42+
response:
43+
body:
44+
string: !!binary |
45+
H4sIAAAAAAAAAwAAAP//dFTBjts4DL3PVwg6tcCkkJ3EsedW7GKLBRboYYvdQ1EYtERntCNLgkRN
46+
Jyjy7wvLiWO3Mzebj6Qe+Uj+uGOMa8UfGA8YfSsQVQ2laOoSDyBLUVRNqfp9uce+ruuiUYBNqapt
47+
01UHhG134PdjCtf9h5KuaZyNONllQCBULYxYcah2VdHs94eMRQJKcYyRbvAGCdUU1IF8OgaX7Mir
48+
BxNxMmtjtD3yB/bjjjHGuIcThjFe4TMa5zHwO8bO2RlDcCNmkzHZoO31lVYhgTZxjUYKSZJ2dmUf
49+
4KV1iXyiltwTZnAvxIyRc6aVYNbZBqfQjMSOnjb7zaCt3pSi3G9EvRHXjuWs/IF9zcVMJd3EiG9L
50+
UZWykKMUXS8O0O12qtiJcretc+KchE4eJzEgOjs2bYZiGgYIp/Hhb9l2vn+NwBCPbzNododKZAYK
51+
5LbZV1W/3WJZil8ZDBgjHHHx/huqZ1A6S2hvXVkSW6W9aoIvNEdnB7DWEVx1/PptBRp39MF1ryA5
52+
0QPjXx6RlaIU7F8XjGJ/Y9AY2XeIzBs4oWJoSQc0JwbEPhnXIftL98j+0GgU05Z9DOOMkrP37Au+
53+
QGTvgFlMFMBsoiZkEALYIw5oiXUoIUVkrmf0iOy3z//8+fumaJgHq3DQ8v0HPpM8X75m3jw4k3sB
54+
MepIYGlyHh2zE/cQwBg06ymlkKZ98gGftUuxva5sm6Wfp9gHN3hqJchHbJ/w9CYWcBRNO7v0uI3e
55+
vK/Y9y7QNBVKp+Gi3GIkx+h5hSP0SKdWqzF5r3G1zhHDs5bYkr6egB6SmYaBR3IBV7eDcPAYgFK2
56+
Fx/ExZpVv9DrXRjg9r+Ytuy3XBb+jKFzUdNpUc1MfGr3o9Ny0ieR4zNwGz5OzreLkRSz0S85hmQl
57+
XLrLlY7QmeuhTHm15gK0Xd+p3f2v9sXxm8vMKqpboFiV+vP5K6rda8hriecZWESX9So7OQJzg8ui
58+
ntuY4kVaRQEkbgjjRd8BCRQQjG+d787/AwAA//8DAIgarZTFBgAA
59+
headers:
60+
CF-RAY:
61+
- 9a7533cccb62a16e-BOS
62+
Connection:
63+
- keep-alive
64+
Content-Encoding:
65+
- gzip
66+
Content-Type:
67+
- application/json
68+
Date:
69+
- Mon, 01 Dec 2025 20:06:02 GMT
70+
Server:
71+
- cloudflare
72+
Set-Cookie:
73+
- __cf_bm=2kGXnalD2OsHSa2nTDfcC.x2Ika0f2_xb7jheZf7DU4-1764619562-1.0.1.1-Au.Yl85NNbXOu16kuncX5kBMQzP4RpRl0PV6BVnlPTErwQVRCI0DupekNakHl7Axhdd.qH_fm2I0r0zPzdZKWnuDTN0VPhzRIJ4F0baZOQ0;
74+
path=/; expires=Mon, 01-Dec-25 20:36:02 GMT; domain=.api.openai.com; HttpOnly;
75+
Secure; SameSite=None
76+
- _cfuvid=HDI66hWV9Becq_pRyYlPYhGoaajJRBY_ADJKn2yxQxA-1764619562018-0.0.1.1-604800000;
77+
path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
78+
Strict-Transport-Security:
79+
- max-age=31536000; includeSubDomains; preload
80+
Transfer-Encoding:
81+
- chunked
82+
X-Content-Type-Options:
83+
- nosniff
84+
alt-svc:
85+
- h3=":443"; ma=86400
86+
cf-cache-status:
87+
- DYNAMIC
88+
openai-organization:
89+
- datadog-staging
90+
openai-processing-ms:
91+
- '4080'
92+
openai-project:
93+
- proj_gt6TQZPRbZfoY2J9AQlEJMpd
94+
openai-version:
95+
- '2020-10-01'
96+
x-envoy-upstream-service-time:
97+
- '4083'
98+
x-ratelimit-limit-requests:
99+
- '30000'
100+
x-ratelimit-limit-tokens:
101+
- '180000000'
102+
x-ratelimit-remaining-requests:
103+
- '29999'
104+
x-ratelimit-remaining-tokens:
105+
- '180000000'
106+
x-ratelimit-reset-requests:
107+
- 2ms
108+
x-ratelimit-reset-tokens:
109+
- 0s
110+
x-request-id:
111+
- req_47f608c3045b489ea39bcb3e4ad37cb8
112+
status:
113+
code: 200
114+
message: OK
115+
version: 1

0 commit comments

Comments
 (0)