diff --git a/datastore/api/responses.py b/datastore/api/responses.py index 543e2df..19f9aa1 100644 --- a/datastore/api/responses.py +++ b/datastore/api/responses.py @@ -27,11 +27,12 @@ def _orjson_default(obj: Any) -> Any: if hasattr(obj, "model_dump"): return obj.model_dump(exclude_none=True) # BigQuery `NUMERIC` / `BIGNUMERIC` columns come back as Decimal — - # JSON has no native form, and orjson refuses by default. Stringify - # to preserve full precision (NUMERIC = 38 digits, BIGNUMERIC = 76+, - # both beyond IEEE-754 double). + # JSON has no native form, and orjson refuses by default. Emit as a + # JSON number so clients can do arithmetic without parsing a string; + # values past ~15 significant digits round to the nearest IEEE-754 + # double (full-precision callers should CAST to STRING in SQL). if isinstance(obj, Decimal): - return str(obj) + return float(obj) # `BYTES` columns come back as raw `bytes`; base64-encode so the # response stays UTF-8 and round-trippable. if isinstance(obj, bytes): diff --git a/datastore/services/streaming.py b/datastore/services/streaming.py index 12df494..883cd1e 100644 --- a/datastore/services/streaming.py +++ b/datastore/services/streaming.py @@ -43,17 +43,17 @@ def _json_default(obj: Any) -> Any: """Serialise types `orjson` refuses out of the box. BigQuery `NUMERIC` / `BIGNUMERIC` columns come back as - `decimal.Decimal`, which has no native JSON representation. - Stringifying preserves full precision (NUMERIC is 38 digits, - BIGNUMERIC is 76+ — beyond what a JSON number / IEEE-754 double - can represent without loss) and matches CKAN's datastore - convention of returning high-precision numerics as strings. + `decimal.Decimal`. Emit them as JSON numbers so clients can do + arithmetic without parsing a string. The cost is that values past + ~15 significant digits round to the nearest IEEE-754 double — + full-precision callers should `CAST(... AS STRING)` in + `datastore_search_sql` instead. `bytes` (BigQuery `BYTES` columns) are base64-encoded so the response stays UTF-8 and round-trippable. """ if isinstance(obj, Decimal): - return str(obj) + return float(obj) if isinstance(obj, bytes): return base64.b64encode(obj).decode("ascii") raise TypeError( diff --git a/tests/test_streaming.py b/tests/test_streaming.py index 4377f8b..4a0cc49 100644 --- a/tests/test_streaming.py +++ b/tests/test_streaming.py @@ -28,8 +28,7 @@ def _join(parts: list[bytes]) -> str: def test_records_object_array_serialises_decimal_and_bytes() -> None: """Rows with NUMERIC (Decimal) + BYTES values must stream without - blowing up; Decimal is stringified (preserves precision); bytes is - base64-encoded.""" + blowing up; Decimal lands as a JSON number; bytes is base64-encoded.""" rows = iter( [ ("DCL", Decimal("47.82"), b"\x00\xff"), @@ -44,15 +43,19 @@ def test_records_object_array_serialises_decimal_and_bytes() -> None: assert records == [ { "product_code": "DCL", - "clearing_price_gbp_per_mwh": "47.82", + "clearing_price_gbp_per_mwh": 47.82, "signature": "AP8=", # b64("\x00\xff") }, { "product_code": "DCH", - "clearing_price_gbp_per_mwh": "1E-38", + "clearing_price_gbp_per_mwh": 1e-38, "signature": "YWJj", # b64(b"abc") }, ] + # Confirm the type, not just the value — `47.82 == "47.82"` would be + # False but the eq above could pass with both as strings if the field + # ever flipped back. Pin the JSON number contract explicitly. + assert isinstance(records[0]["clearing_price_gbp_per_mwh"], float) def test_records_array_array_serialises_decimal_and_bytes() -> None: @@ -62,7 +65,8 @@ def test_records_array_array_serialises_decimal_and_bytes() -> None: body = _join(list(_records_array_array(rows))) records = json.loads(body) - assert records == [["DCL", "47.82", "AP8="]] + assert records == [["DCL", 47.82, "AP8="]] + assert isinstance(records[0][1], float) def test_unsupported_type_still_raises() -> None: