Skip to content

Commit 3a7c8a4

Browse files
committed
feat(llmobs): add base datasets and experiments
1 parent 8d3e778 commit 3a7c8a4

File tree

7 files changed

+144
-3
lines changed

7 files changed

+144
-3
lines changed

ddtrace/llmobs/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,11 @@
66
LLMObs.enable()
77
"""
88

9+
from ._experiment import Dataset
10+
from ._experiment import DatasetRecord
11+
from ._experiment import Experiment
912
from ._llmobs import LLMObs
1013
from ._llmobs import LLMObsSpan
1114

1215

13-
__all__ = ["LLMObs", "LLMObsSpan"]
16+
__all__ = ["LLMObs", "LLMObsSpan", "Experiment", "Dataset", "DatasetRecord"]

ddtrace/llmobs/_experiment.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
from typing import Any
2+
from typing import Dict
3+
from typing import List
4+
from typing import NotRequired
5+
from typing import Optional
6+
from typing import TypedDict
7+
from typing import Union
8+
9+
10+
JSONType = Union[str, int, float, bool, None, List["JSONType"], Dict[str, "JSONType"]]
11+
12+
13+
class DatasetRecord(TypedDict):
14+
input: JSONType
15+
expected_output: JSONType
16+
record_id: NotRequired[Optional[str]]
17+
metadata: NotRequired[Optional[Dict[str, Any]]]
18+
19+
20+
class Dataset:
21+
_name: str
22+
_id: str
23+
_data: List[DatasetRecord]
24+
25+
def __init__(self, name: str, id: str, data: List[DatasetRecord]) -> None:
26+
self._name = name
27+
self._id = id
28+
self._data = data
29+
30+
def __str__(self) -> str:
31+
return f"Dataset(name={self._name}, id={self._id}, data={self._data})"
32+
33+
34+
class Experiment:
35+
def __init__(self, name: str, dataset: Dataset, description: str = "") -> None:
36+
self.name = name
37+
self._dataset = dataset
38+
self._experiment_id: Optional[str] = None
39+
self._project_id: Optional[str] = None
40+
41+
def run(self):
42+
pass

ddtrace/llmobs/_llmobs.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from typing import TypedDict
1515
from typing import Union
1616
from typing import cast
17+
from urllib.parse import quote
1718

1819
import ddtrace
1920
from ddtrace import config
@@ -73,6 +74,8 @@
7374
from ddtrace.llmobs._constants import TAGS
7475
from ddtrace.llmobs._context import LLMObsContextProvider
7576
from ddtrace.llmobs._evaluators.runner import EvaluatorRunner
77+
from ddtrace.llmobs._experiment import Dataset
78+
from ddtrace.llmobs._experiment import Experiment
7679
from ddtrace.llmobs._utils import AnnotationContext
7780
from ddtrace.llmobs._utils import LinkTracker
7881
from ddtrace.llmobs._utils import ToolCallTracker
@@ -85,6 +88,7 @@
8588
from ddtrace.llmobs._utils import validate_prompt
8689
from ddtrace.llmobs._writer import LLMObsEvalMetricWriter
8790
from ddtrace.llmobs._writer import LLMObsEvaluationMetricEvent
91+
from ddtrace.llmobs._writer import LLMObsExperimentsClient
8892
from ddtrace.llmobs._writer import LLMObsSpanEvent
8993
from ddtrace.llmobs._writer import LLMObsSpanWriter
9094
from ddtrace.llmobs._writer import should_use_agentless
@@ -180,6 +184,11 @@ def __init__(
180184
interval=float(os.getenv("_DD_LLMOBS_EVALUATOR_INTERVAL", 1.0)),
181185
llmobs_service=self,
182186
)
187+
self._dne_client = LLMObsExperimentsClient(
188+
interval=float(os.getenv("_DD_LLMOBS_WRITER_INTERVAL", 1.0)),
189+
timeout=float(os.getenv("_DD_LLMOBS_WRITER_TIMEOUT", 5.0)),
190+
is_agentless=agentless_enabled,
191+
)
183192

184193
forksafe.register(self._child_after_fork)
185194

@@ -549,6 +558,13 @@ def enable(
549558
config._llmobs_ml_app,
550559
)
551560

561+
@classmethod
562+
def pull_dataset(cls, name: str) -> Dataset:
563+
return cls._instance._dne_client.dataset_pull(name)
564+
565+
def experiment(self, name: str, dataset: Dataset) -> Experiment:
566+
return Experiment(name, dataset)
567+
552568
@classmethod
553569
def register_processor(cls, processor: Optional[Callable[[LLMObsSpan], LLMObsSpan]] = None) -> None:
554570
"""Register a processor to be called on each LLMObs span.

ddtrace/llmobs/_writer.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from typing import List
55
from typing import Optional
66
from typing import Union
7+
from urllib.parse import quote
78

89

910
# TypedDict was added to typing in python 3.8
@@ -35,6 +36,8 @@
3536
from ddtrace.llmobs._constants import EVP_SUBDOMAIN_HEADER_NAME
3637
from ddtrace.llmobs._constants import SPAN_ENDPOINT
3738
from ddtrace.llmobs._constants import SPAN_SUBDOMAIN_NAME
39+
from ddtrace.llmobs._experiment import Dataset
40+
from ddtrace.llmobs._experiment import DatasetRecord
3841
from ddtrace.llmobs._utils import safe_json
3942
from ddtrace.settings._agent import config as agent_config
4043

@@ -109,6 +112,7 @@ def __init__(
109112
is_agentless: bool,
110113
_site: str = "",
111114
_api_key: str = "",
115+
_app_key: str = "",
112116
_override_url: str = "",
113117
) -> None:
114118
super(BaseLLMObsWriter, self).__init__(interval=interval)
@@ -118,6 +122,7 @@ def __init__(
118122
self._timeout: float = timeout
119123
self._api_key: str = _api_key or config._dd_api_key
120124
self._site: str = _site or config._dd_site
125+
self._app_key: str = _app_key or config._dd_app_key
121126
self._override_url: str = _override_url
122127

123128
self._agentless: bool = is_agentless
@@ -263,6 +268,71 @@ def _data(self, events: List[LLMObsEvaluationMetricEvent]) -> Dict[str, Any]:
263268
return {"data": {"type": "evaluation_metric", "attributes": {"metrics": events}}}
264269

265270

271+
class LLMObsExperimentsClient(BaseLLMObsWriter):
272+
273+
def request(self, method: str, path: str, body: bytes = b"") -> Response:
274+
headers = {
275+
"Content-Type": "application/json",
276+
"DD-API-KEY": self._api_key,
277+
"DD-APPLICATION-KEY": self._app_key,
278+
}
279+
site = self._site
280+
if site == "datad0g.com":
281+
base = "https://dd.datad0g.com"
282+
else:
283+
base = f"https://api.{site}"
284+
285+
conn = get_connection(base)
286+
try:
287+
url = base + path
288+
logger.debug("requesting %s", url)
289+
conn.request(method, url, body, headers)
290+
resp = conn.getresponse()
291+
if resp.status >= 300:
292+
raise ValueError(f"Failed to {method} {path}: {resp.status}")
293+
return Response.from_http_response(resp)
294+
finally:
295+
conn.close()
296+
297+
def dataset_pull(self, name: str) -> Dataset:
298+
299+
path = f"/api/unstable/llm-obs/v1/datasets?filter[name]={quote(name)}"
300+
resp = self.request("GET", path)
301+
302+
response_data = resp.get_json()
303+
datasets = response_data.get("data", [])
304+
305+
if not datasets:
306+
raise ValueError(f"Dataset '{name}' not found")
307+
308+
dataset_id = datasets[0]["id"]
309+
url = f"/api/unstable/llm-obs/v1/datasets/{dataset_id}/records"
310+
try:
311+
resp = self.request("GET", url)
312+
records_data = resp.get_json()
313+
except ValueError as e:
314+
if "404" in str(e):
315+
raise ValueError(f"Dataset '{name}' not found") from e
316+
raise
317+
318+
class_records: List[DatasetRecord] = []
319+
for record in records_data.get("data", []):
320+
attrs = record.get("attributes", {})
321+
input_data = attrs.get("input")
322+
expected_output = attrs.get("expected_output")
323+
324+
class_records.append(
325+
{
326+
"record_id": record.get("id"),
327+
"input": input_data,
328+
"expected_output": expected_output,
329+
**attrs.get("metadata", {}),
330+
}
331+
)
332+
333+
return Dataset(name, dataset_id, class_records)
334+
335+
266336
class LLMObsSpanWriter(BaseLLMObsWriter):
267337
"""Writes span events to the LLMObs Span Endpoint."""
268338

ddtrace/settings/_config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -645,6 +645,7 @@ def __init__(self):
645645
self._telemetry_install_time = _get_config("DD_INSTRUMENTATION_INSTALL_TYPE")
646646

647647
self._dd_api_key = _get_config("DD_API_KEY")
648+
self._dd_app_key = _get_config("DD_APP_KEY")
648649
self._dd_site = _get_config("DD_SITE", "datadoghq.com")
649650

650651
self._llmobs_enabled = _get_config("DD_LLMOBS_ENABLED", False, asbool)

tests/llmobs/conftest.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,11 @@ def ddtrace_global_config():
9999

100100

101101
def default_global_config():
102-
return {"_dd_api_key": "<not-a-real-api_key>", "_llmobs_ml_app": "unnamed-ml-app", "service": "tests.llmobs"}
102+
return {
103+
"_dd_api_key": os.environ.get("DD_API_KEY", "<not-a-real-api_key>"),
104+
"_llmobs_ml_app": "unnamed-ml-app",
105+
"service": "tests.llmobs",
106+
}
103107

104108

105109
@pytest.fixture
@@ -172,7 +176,7 @@ def tracer():
172176
@pytest.fixture
173177
def llmobs_env():
174178
return {
175-
"DD_API_KEY": "<default-not-a-real-key>",
179+
"DD_API_KEY": os.environ.get("DD_API_KEY", "<default-not-a-real-key>"),
176180
"DD_LLMOBS_ML_APP": "unnamed-ml-app",
177181
}
178182

tests/llmobs/test_experiments.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
2+
3+
def test_dataset_pull(llmobs):
4+
dataset = llmobs.pull_dataset(name="kyle-test")
5+
assert dataset._id == "929531d1-3cd2-473d-ab4e-2423b40c5db5"

0 commit comments

Comments
 (0)