Skip to content

Commit 6abccd1

Browse files
committed
feat(llmobs): add base datasets and experiments
1 parent d75a587 commit 6abccd1

File tree

7 files changed

+134
-1
lines changed

7 files changed

+134
-1
lines changed

ddtrace/llmobs/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,11 @@
66
LLMObs.enable()
77
"""
88

9+
from ._experiment import Dataset
10+
from ._experiment import DatasetRecord
11+
from ._experiment import Experiment
912
from ._llmobs import LLMObs
1013
from ._llmobs import LLMObsSpan
1114

1215

13-
__all__ = ["LLMObs", "LLMObsSpan"]
16+
__all__ = ["LLMObs", "LLMObsSpan", "Experiment", "Dataset"]

ddtrace/llmobs/_experiment.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
from typing import Any
2+
from typing import Dict
3+
from typing import List
4+
from typing import NotRequired
5+
from typing import Optional
6+
from typing import TypedDict
7+
from typing import Union
8+
9+
10+
JSONType = Union[str, int, float, bool, None, List["JSONType"], Dict[str, "JSONType"]]
11+
12+
13+
class DatasetRecord(TypedDict):
14+
input: JSONType
15+
expected_output: JSONType
16+
record_id: NotRequired[Optional[str]]
17+
metadata: NotRequired[Optional[Dict[str, Any]]]
18+
19+
20+
class Dataset:
21+
_name: str
22+
_id: str
23+
_data: List[DatasetRecord]
24+
25+
def __init__(self, name: str, id: str, data: List[DatasetRecord]) -> None:
26+
self._id = id
27+
self._data = data
28+
29+
30+
class Experiment:
31+
def __init__(self, name: str, dataset: Dataset, description: str = "") -> None:
32+
self.name = name
33+
self._dataset = dataset
34+
self._experiment_id: Optional[str] = None
35+
self._project_id: Optional[str] = None
36+
37+
def run(self):
38+
pass
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
__all__ = ["Dataset", "Experiment", "task", "evaluator", "init", "summary_metric"]

ddtrace/llmobs/_llmobs.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from typing import TypedDict
1515
from typing import Union
1616
from typing import cast
17+
from urllib.parse import quote
1718

1819
import ddtrace
1920
from ddtrace import config
@@ -73,6 +74,8 @@
7374
from ddtrace.llmobs._constants import TAGS
7475
from ddtrace.llmobs._context import LLMObsContextProvider
7576
from ddtrace.llmobs._evaluators.runner import EvaluatorRunner
77+
from ddtrace.llmobs._experiment import Dataset
78+
from ddtrace.llmobs._experiment import Experiment
7679
from ddtrace.llmobs._utils import AnnotationContext
7780
from ddtrace.llmobs._utils import LinkTracker
7881
from ddtrace.llmobs._utils import ToolCallTracker
@@ -85,6 +88,7 @@
8588
from ddtrace.llmobs._utils import validate_prompt
8689
from ddtrace.llmobs._writer import LLMObsEvalMetricWriter
8790
from ddtrace.llmobs._writer import LLMObsEvaluationMetricEvent
91+
from ddtrace.llmobs._writer import LLMObsExperimentsClient
8892
from ddtrace.llmobs._writer import LLMObsSpanEvent
8993
from ddtrace.llmobs._writer import LLMObsSpanWriter
9094
from ddtrace.llmobs._writer import should_use_agentless
@@ -166,6 +170,7 @@ def __init__(
166170
self._llmobs_context_provider = LLMObsContextProvider()
167171
self._user_span_processor = span_processor
168172
agentless_enabled = config._llmobs_agentless_enabled if config._llmobs_agentless_enabled is not None else True
173+
169174
self._llmobs_span_writer = LLMObsSpanWriter(
170175
interval=float(os.getenv("_DD_LLMOBS_WRITER_INTERVAL", 1.0)),
171176
timeout=float(os.getenv("_DD_LLMOBS_WRITER_TIMEOUT", 5.0)),
@@ -180,6 +185,11 @@ def __init__(
180185
interval=float(os.getenv("_DD_LLMOBS_EVALUATOR_INTERVAL", 1.0)),
181186
llmobs_service=self,
182187
)
188+
self._dne_client = LLMObsExperimentsClient(
189+
interval=float(os.getenv("_DD_LLMOBS_WRITER_INTERVAL", 1.0)),
190+
timeout=float(os.getenv("_DD_LLMOBS_WRITER_TIMEOUT", 5.0)),
191+
is_agentless=agentless_enabled,
192+
)
183193

184194
forksafe.register(self._child_after_fork)
185195

@@ -549,6 +559,13 @@ def enable(
549559
config._llmobs_ml_app,
550560
)
551561

562+
@classmethod
563+
def pull_dataset(cls, name: str) -> Dataset:
564+
return cls._instance._dne_client.dataset_pull(name)
565+
566+
def experiment(self, name: str, dataset: Dataset) -> Experiment:
567+
return Experiment(name, dataset)
568+
552569
@classmethod
553570
def register_processor(cls, processor: Optional[Callable[[LLMObsSpan], LLMObsSpan]] = None) -> None:
554571
"""Register a processor to be called on each LLMObs span.

ddtrace/llmobs/_writer.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from typing import List
55
from typing import Optional
66
from typing import Union
7+
from urllib.parse import quote
78

89

910
# TypedDict was added to typing in python 3.8
@@ -109,6 +110,7 @@ def __init__(
109110
is_agentless: bool,
110111
_site: str = "",
111112
_api_key: str = "",
113+
_app_key: str = "",
112114
_override_url: str = "",
113115
) -> None:
114116
super(BaseLLMObsWriter, self).__init__(interval=interval)
@@ -118,6 +120,7 @@ def __init__(
118120
self._timeout: float = timeout
119121
self._api_key: str = _api_key or config._dd_api_key
120122
self._site: str = _site or config._dd_site
123+
self._app_key: str = _app_key or config._dd_app_key
121124
self._override_url: str = _override_url
122125

123126
self._agentless: bool = is_agentless
@@ -263,6 +266,71 @@ def _data(self, events: List[LLMObsEvaluationMetricEvent]) -> Dict[str, Any]:
263266
return {"data": {"type": "evaluation_metric", "attributes": {"metrics": events}}}
264267

265268

269+
class LLMObsExperimentsClient(BaseLLMObsWriter):
270+
271+
def request(self, method: str, path: str, body: bytes = b"") -> None:
272+
headers = {
273+
"Content-Type": "application/json",
274+
"DD-API-KEY": self._api_key,
275+
"DD-APPLICATION-KEY": self._app_key,
276+
}
277+
site = self._site
278+
if site == "datad0g.com":
279+
base = "https://dd.datad0g.com"
280+
else:
281+
base = f"https://api.{site}"
282+
283+
conn = get_connection(base + path)
284+
try:
285+
conn.request(method, base, body, headers)
286+
resp = conn.getresponse()
287+
return Response.from_http_response(resp)
288+
finally:
289+
conn.close()
290+
291+
def dataset_pull(self, name: str) -> None:
292+
from ddtrace.llmobs._experiment import Dataset
293+
from ddtrace.llmobs._experiment import DatasetRecord
294+
295+
path = f"/api/unstable/llm-obs/v1/datasets?filter[name]={quote(name)}"
296+
resp = self.request("GET", path)
297+
298+
response_data = resp.get_json()
299+
datasets = response_data.get("data", [])
300+
301+
if not datasets:
302+
raise ValueError(f"Dataset '{name}' not found")
303+
304+
dataset_id = datasets[0]["id"]
305+
306+
url = f"/api/unstable/llm-obs/v1/datasets/{dataset_id}/records"
307+
try:
308+
resp = self.request("GET", url)
309+
records_data = resp.get_json()
310+
except ValueError as e:
311+
if "404" in str(e):
312+
raise ValueError(f"Dataset '{name}' not found") from e
313+
raise
314+
315+
# Transform records into the expected format
316+
class_records: List[DatasetRecord] = []
317+
for record in records_data.get("data", []):
318+
attrs = record.get("attributes", {})
319+
input_data = attrs.get("input")
320+
expected_output = attrs.get("expected_output")
321+
322+
class_records.append(
323+
{
324+
"record_id": record.get("id"),
325+
"input": input_data,
326+
"expected_output": expected_output,
327+
**attrs.get("metadata", {}),
328+
}
329+
)
330+
331+
return Dataset(name, class_records)
332+
333+
266334
class LLMObsSpanWriter(BaseLLMObsWriter):
267335
"""Writes span events to the LLMObs Span Endpoint."""
268336

ddtrace/settings/_config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -645,6 +645,7 @@ def __init__(self):
645645
self._telemetry_install_time = _get_config("DD_INSTRUMENTATION_INSTALL_TYPE")
646646

647647
self._dd_api_key = _get_config("DD_API_KEY")
648+
self._dd_app_key = _get_config("DD_APP_KEY")
648649
self._dd_site = _get_config("DD_SITE", "datadoghq.com")
649650

650651
self._llmobs_enabled = _get_config("DD_LLMOBS_ENABLED", False, asbool)

tests/llmobs/test_experiments.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
2+
3+
def test_dataset_pull(llmobs):
4+
dataset = llmobs.pull_dataset(name="test_dataset")
5+
assert dataset.name == "test_dataset"

0 commit comments

Comments
 (0)