Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions ddtrace/llmobs/_integrations/google_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from ddtrace.llmobs._constants import CACHE_READ_INPUT_TOKENS_METRIC_KEY
from ddtrace.llmobs._constants import INPUT_TOKENS_METRIC_KEY
from ddtrace.llmobs._constants import OUTPUT_TOKENS_METRIC_KEY
from ddtrace.llmobs._constants import REASONING_OUTPUT_TOKENS_METRIC_KEY
from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY
from ddtrace.llmobs._utils import _get_attr
from ddtrace.llmobs._utils import safe_json
Expand Down Expand Up @@ -150,6 +151,8 @@ def extract_generation_metrics_google_genai(response) -> Dict[str, Any]:
usage[CACHE_READ_INPUT_TOKENS_METRIC_KEY] = cached_tokens
if total_tokens is not None:
usage[TOTAL_TOKENS_METRIC_KEY] = total_tokens
if thought_tokens is not None:
usage[REASONING_OUTPUT_TOKENS_METRIC_KEY] = thought_tokens

return usage

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
features:
- |
LLM Observability: Reasoning token counts are now captured from Google GenAI responses.
10 changes: 10 additions & 0 deletions tests/contrib/google_genai/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from tests.contrib.google_genai.utils import MOCK_EMBED_CONTENT_RESPONSE
from tests.contrib.google_genai.utils import MOCK_GENERATE_CONTENT_RESPONSE
from tests.contrib.google_genai.utils import MOCK_GENERATE_CONTENT_RESPONSE_STREAM
from tests.contrib.google_genai.utils import MOCK_GENERATE_CONTENT_RESPONSE_WITH_REASONING
from tests.contrib.google_genai.utils import MOCK_TOOL_CALL_RESPONSE
from tests.contrib.google_genai.utils import MOCK_TOOL_CALL_RESPONSE_STREAM
from tests.contrib.google_genai.utils import MOCK_TOOL_FINAL_RESPONSE
Expand Down Expand Up @@ -109,6 +110,15 @@ def _fake_generate_content(self, *, model: str, contents, config=None):
yield


@pytest.fixture
def mock_generate_content_with_reasoning(genai):
def _fake_generate_content(self, *, model: str, contents, config=None):
return MOCK_GENERATE_CONTENT_RESPONSE_WITH_REASONING

with mock_patch.object(genai.models.Models, "_generate_content", _fake_generate_content):
yield


@pytest.fixture
def mock_async_generate_content(genai):
async def _fake_async_generate_content(self, *, model: str, contents, config=None):
Expand Down
36 changes: 36 additions & 0 deletions tests/contrib/google_genai/test_google_genai_llmobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,18 @@ def test_generate_content(self, genai_client, llmobs_events, mock_tracer, mock_g
assert len(llmobs_events) == 1
assert llmobs_events[0] == expected_llmobs_span_event(span)

def test_generate_content_with_reasoning_tokens(
self, genai_client, llmobs_events, mock_tracer, mock_generate_content_with_reasoning
):
genai_client.models.generate_content(
model="gemini-2.5-pro",
contents="Why is the sky blue? Explain in 2-3 sentences.",
config=FULL_GENERATE_CONTENT_CONFIG,
)
span = mock_tracer.pop_traces()[0][0]
assert len(llmobs_events) == 1
assert llmobs_events[0] == expected_llmobs_span_event_with_reasoning(span)

def test_generate_content_error(self, genai_client, llmobs_events, mock_tracer, mock_generate_content):
with pytest.raises(TypeError):
genai_client.models.generate_content(
Expand Down Expand Up @@ -589,3 +601,27 @@ def expected_llmobs_embedding_error_span_event(span):
},
tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.google_genai"},
)


def expected_llmobs_span_event_with_reasoning(span):
return _expected_llmobs_llm_span_event(
span,
model_name="gemini-2.5-pro",
model_provider="google",
input_messages=[
{"content": "You are a helpful assistant.", "role": "system"},
{"content": "Why is the sky blue? Explain in 2-3 sentences.", "role": "user"},
],
output_messages=[
{"content": "Let me think about this...", "role": "assistant"},
{"content": "The sky is blue due to rayleigh scattering", "role": "assistant"},
],
metadata=get_expected_metadata(),
token_metrics={
"input_tokens": 8,
"output_tokens": 14,
"total_tokens": 22,
"reasoning_output_tokens": 5,
},
tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.google_genai"},
)
17 changes: 17 additions & 0 deletions tests/contrib/google_genai/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,23 @@
),
)

MOCK_GENERATE_CONTENT_RESPONSE_WITH_REASONING = types.GenerateContentResponse(
candidates=[
types.Candidate(
content=types.Content(
role="model",
parts=[
types.Part.from_text(text="Let me think about this..."),
types.Part.from_text(text="The sky is blue due to rayleigh scattering"),
],
)
)
],
usage_metadata=types.GenerateContentResponseUsageMetadata(
prompt_token_count=8, candidates_token_count=9, thoughts_token_count=5, total_token_count=22
),
)

MOCK_GENERATE_CONTENT_RESPONSE_STREAM = [
types.GenerateContentResponse(
candidates=[types.Candidate(content=types.Content(role="model", parts=[types.Part.from_text(text="The sky")]))],
Expand Down
Loading