fix(llmobs): persist annotation context across batches (#15571)

ZStriker19 · web-flow · commit 113fac7c6644 · 2025-12-12T11:33:23.000-05:00
## Description [**For this support card** ](https://datadoghq.atlassian.net/browse/MLOS-298) Taking a look at the [with_structured_output method](https://github.com/langchain-ai/langchain/blob/85012ae601a8ce2bd1ecfcdc889fb9151423ddaf/libs/partners/openai/langchain_openai/chat_models/azure.py#L833) it calls RunnableSequence.batch(), instead of BaseChatModel.batch() (what is called otherwise) which causes us to not reactivate the context after the first llm batch call. Essentially RunnableSequence.batch() creates a root-level span that encompasses the entire operation. When this root span finishes, the context provider's cleanup logic runs and (because _reactivate=False) the annotation context is lost. Here’s a nice code flow cursor made for me: User calls: gpt_4.with_structured_output(Schema).batch([...]) ↓ RunnableSequence.batch() - TRACED (creates chain span as ROOT) ↓ Internally calls ChatModel.generate() - creates child LLM spans ↓ Chain span finishes → triggers context provider's _update_active() ↓ Since _reactivate=False, annotation context is NOT restored ↓ Second batch() has no active context → annotations don't apply What we can do is track the context differently and reactivate it for batches, and then deactivate it when we know we’re actually leaving the context block. ## Testing Added tests to make sure we persist context across multiple roots ## Risks  ## Additional Notes
diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py
@@ -1067,15 +1067,22 @@ def annotation_context(
         """
         # id to track an annotation for registering / de-registering
         annotation_id = rand64bits()
+        # Track context we create so we can clean up _reactivate on exit.
+        # Using a dict as a mutable container to share state between closures.
+        state = {"created_context": None}
 
         def get_annotations_context_id():
             current_ctx = cls._instance.tracer.current_trace_context()
             # default the context id to the annotation id
             ctx_id = annotation_id
             if current_ctx is None:
+                # No context exists - create one and enable reactivation so spans finishing
+                # within this annotation_context don't clear the context for subsequent operations
                 current_ctx = Context(is_remote=False)
                 current_ctx.set_baggage_item(ANNOTATIONS_CONTEXT_ID, ctx_id)
+                current_ctx._reactivate = True
                 cls._instance.tracer.context_provider.activate(current_ctx)
+                state["created_context"] = current_ctx
             elif not current_ctx.get_baggage_item(ANNOTATIONS_CONTEXT_ID):
                 current_ctx.set_baggage_item(ANNOTATIONS_CONTEXT_ID, ctx_id)
             else:
@@ -1098,9 +1105,13 @@ def deregister_annotation():
                 for i, (key, _, _) in enumerate(cls._instance._annotations):
                     if key == annotation_id:
                         cls._instance._annotations.pop(i)
-                        return
+                        break
                 else:
                     log.debug("Failed to pop annotation context")
+            # Disable reactivation on context we created to prevent it from being
+            # restored after exiting the annotation_context block
+            if state["created_context"] is not None:
+                state["created_context"]._reactivate = False
 
         return AnnotationContext(register_annotation, deregister_annotation)
 
diff --git a/releasenotes/notes/llmobs_annotation_context_persist_fix-632f1824de8446f5.yaml b/releasenotes/notes/llmobs_annotation_context_persist_fix-632f1824de8446f5.yaml
@@ -0,0 +1,7 @@
+---
+fixes:
+  - |
+    LLM Observability: This fix resolves an issue where ``LLMObs.annotation_context()`` properties (tags, prompt, 
+    and name) were not applied to subsequent LLM operations within the same context block. This occurred when 
+    multiple sequential operations (such as Langchain batch calls with structured outputs) were performed, 
+    causing only the first operation to receive the annotations.
diff --git a/tests/llmobs/test_llmobs_service.py b/tests/llmobs/test_llmobs_service.py
@@ -1399,6 +1399,44 @@ def test_annotation_context_separate_traces_maintained(llmobs, llmobs_events):
     assert agent_span["parent_id"] == "undefined"
 
 
+def test_annotation_context_persists_across_multiple_root_span_operations(llmobs):
+    """
+    Regression test: verifies that annotation context tags persist across multiple
+    sequential root span operations. This simulates scenarios like multiple batch()
+    calls with structured outputs in Langchain, where each batch creates a root span
+    that finishes before the next batch starts.
+
+    The bug occurred because the trace context wasn't being reactivated after a root
+    span finished, causing subsequent operations to lose the annotation context's baggage.
+    """
+    with llmobs.annotation_context(tags={"test_tag": "should_persist"}):
+        # First operation - creates and finishes a root span
+        with llmobs.workflow(name="first_batch") as span1:
+            assert span1._get_ctx_item(TAGS) == {"test_tag": "should_persist"}
+
+        # Second operation - should still have annotation context applied
+        with llmobs.workflow(name="second_batch") as span2:
+            assert span2._get_ctx_item(TAGS) == {"test_tag": "should_persist"}
+
+        # Third operation - verify it continues to work
+        with llmobs.agent(name="third_operation") as span3:
+            assert span3._get_ctx_item(TAGS) == {"test_tag": "should_persist"}
+
+
+def test_annotation_context_not_reactivated_after_exit(llmobs):
+    """
+    Verifies that once an annotation context exits, the context we created is not
+    reactivated even after subsequent span operations within a new context.
+    """
+    with llmobs.annotation_context(tags={"inside": "context"}):
+        with llmobs.workflow(name="inside_span") as span1:
+            assert span1._get_ctx_item(TAGS) == {"inside": "context"}
+
+    # After exiting annotation_context, tags should not be applied
+    with llmobs.workflow(name="outside_span") as span2:
+        assert span2._get_ctx_item(TAGS) is None
+
+
 def test_annotation_context_only_applies_to_local_context(llmobs):
     """
     tests that annotation contexts only apply to spans belonging to the same