Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions datastore/api/responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,12 @@ def _orjson_default(obj: Any) -> Any:
if hasattr(obj, "model_dump"):
return obj.model_dump(exclude_none=True)
# BigQuery `NUMERIC` / `BIGNUMERIC` columns come back as Decimal —
# JSON has no native form, and orjson refuses by default. Stringify
# to preserve full precision (NUMERIC = 38 digits, BIGNUMERIC = 76+,
# both beyond IEEE-754 double).
# JSON has no native form, and orjson refuses by default. Emit as a
# JSON number so clients can do arithmetic without parsing a string;
# values past ~15 significant digits round to the nearest IEEE-754
# double (full-precision callers should CAST to STRING in SQL).
if isinstance(obj, Decimal):
return str(obj)
return float(obj)
# `BYTES` columns come back as raw `bytes`; base64-encode so the
# response stays UTF-8 and round-trippable.
if isinstance(obj, bytes):
Expand Down
12 changes: 6 additions & 6 deletions datastore/services/streaming.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,17 +43,17 @@ def _json_default(obj: Any) -> Any:
"""Serialise types `orjson` refuses out of the box.

BigQuery `NUMERIC` / `BIGNUMERIC` columns come back as
`decimal.Decimal`, which has no native JSON representation.
Stringifying preserves full precision (NUMERIC is 38 digits,
BIGNUMERIC is 76+ — beyond what a JSON number / IEEE-754 double
can represent without loss) and matches CKAN's datastore
convention of returning high-precision numerics as strings.
`decimal.Decimal`. Emit them as JSON numbers so clients can do
arithmetic without parsing a string. The cost is that values past
~15 significant digits round to the nearest IEEE-754 double
full-precision callers should `CAST(... AS STRING)` in
`datastore_search_sql` instead.

`bytes` (BigQuery `BYTES` columns) are base64-encoded so the
response stays UTF-8 and round-trippable.
"""
if isinstance(obj, Decimal):
return str(obj)
return float(obj)
if isinstance(obj, bytes):
return base64.b64encode(obj).decode("ascii")
raise TypeError(
Expand Down
14 changes: 9 additions & 5 deletions tests/test_streaming.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,7 @@ def _join(parts: list[bytes]) -> str:

def test_records_object_array_serialises_decimal_and_bytes() -> None:
"""Rows with NUMERIC (Decimal) + BYTES values must stream without
blowing up; Decimal is stringified (preserves precision); bytes is
base64-encoded."""
blowing up; Decimal lands as a JSON number; bytes is base64-encoded."""
rows = iter(
[
("DCL", Decimal("47.82"), b"\x00\xff"),
Expand All @@ -44,15 +43,19 @@ def test_records_object_array_serialises_decimal_and_bytes() -> None:
assert records == [
{
"product_code": "DCL",
"clearing_price_gbp_per_mwh": "47.82",
"clearing_price_gbp_per_mwh": 47.82,
"signature": "AP8=", # b64("\x00\xff")
},
{
"product_code": "DCH",
"clearing_price_gbp_per_mwh": "1E-38",
"clearing_price_gbp_per_mwh": 1e-38,
"signature": "YWJj", # b64(b"abc")
},
]
# Confirm the type, not just the value — `47.82 == "47.82"` would be
# False but the eq above could pass with both as strings if the field
# ever flipped back. Pin the JSON number contract explicitly.
assert isinstance(records[0]["clearing_price_gbp_per_mwh"], float)


def test_records_array_array_serialises_decimal_and_bytes() -> None:
Expand All @@ -62,7 +65,8 @@ def test_records_array_array_serialises_decimal_and_bytes() -> None:
body = _join(list(_records_array_array(rows)))
records = json.loads(body)

assert records == [["DCL", "47.82", "AP8="]]
assert records == [["DCL", 47.82, "AP8="]]
assert isinstance(records[0][1], float)


def test_unsupported_type_still_raises() -> None:
Expand Down
Loading