From 3ba4905d965291c714b1ae08e51bdb28a6308db3 Mon Sep 17 00:00:00 2001 From: Ricardo Henriques Date: Fri, 17 Apr 2026 18:11:05 +0100 Subject: [PATCH 1/2] fix: --json performance and output correctness (v0.11.1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses review feedback on PR #16: - Load the ID cache once per invocation instead of per-task, eliminating O(N²) scans and N redundant file reads for large task lists. - Switch to click.echo for JSON output so Click's test-capture layer and stdout redirection work correctly. - Emit a stderr hint when the ID cache is empty on a filtered view, so null id values aren't silent. - Drop a redundant conditional on the id field. - Add 8 unit tests covering task-to-dict serialization, cache loading (missing/valid/malformed), and a regression guard against O(N²) reads. Keeps the JSON schema stable (int | null id, uuid always present). Co-Authored-By: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 20 +++++ src/taskrepo/__version__.py | 2 +- src/taskrepo/cli/commands/list.py | 46 ++++++++--- tests/unit/test_list_json.py | 127 ++++++++++++++++++++++++++++++ 4 files changed, 184 insertions(+), 11 deletions(-) create mode 100644 tests/unit/test_list_json.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 4f941b9..99fb1ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,26 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.11.1] - 2026-04-17 + +### Changed + +- **`tsk list --json` performance and correctness** (addresses PR #16 review): + - Load the ID cache once per invocation instead of re-reading it per task. + Previously `get_display_id_from_uuid` was called inside the serialization + loop, producing N file reads and O(N²) linear scans. + - Switched JSON output from direct `sys.stdout.write` to `click.echo` so + Click's output layer (and test capture) sees the output. + - Emit a one-time stderr hint when the ID cache is empty on a filtered view, + so users aren't surprised by `"id": null` values. + - Removed a redundant conditional on the `id` field. + +### Added + +- Unit tests for `--json` output: field coverage, missing-cache → `null id`, + None-date handling, JSON serializability, cache loading (missing/valid/ + malformed), and a regression guard that the cache is loaded only once. + ## [0.11.0] - 2026-04-17 ### Added diff --git a/src/taskrepo/__version__.py b/src/taskrepo/__version__.py index ae6db5f..fee46bd 100644 --- a/src/taskrepo/__version__.py +++ b/src/taskrepo/__version__.py @@ -1 +1 @@ -__version__ = "0.11.0" +__version__ = "0.11.1" diff --git a/src/taskrepo/cli/commands/list.py b/src/taskrepo/cli/commands/list.py index 78cb52b..25e46b9 100644 --- a/src/taskrepo/cli/commands/list.py +++ b/src/taskrepo/cli/commands/list.py @@ -1,7 +1,6 @@ """List command for displaying tasks.""" import json -import sys from pathlib import Path import click @@ -10,18 +9,36 @@ from taskrepo.core.repository import RepositoryManager from taskrepo.tui.display import display_tasks_table from taskrepo.utils.conflict_detection import display_conflict_warning, scan_all_repositories -from taskrepo.utils.id_mapping import get_display_id_from_uuid +from taskrepo.utils.id_mapping import get_cache_path -def _task_to_dict(task) -> dict: +def _load_uuid_to_display_id() -> dict[str, int]: + """Load the ID cache once and return a {uuid: display_id} map. + + Returns an empty dict if the cache file is missing or malformed. Callers + that care about missing cache state should check the returned dict length. + """ + cache_path = get_cache_path() + if not cache_path.exists(): + return {} + try: + with open(cache_path) as f: + cache = json.load(f) + return {entry["uuid"]: int(display_id) for display_id, entry in cache.items()} + except (json.JSONDecodeError, KeyError, ValueError): + return {} + + +def _task_to_dict(task, uuid_to_id: dict[str, int]) -> dict: """Serialize a Task to a JSON-compatible dict. - The ``id`` field is the short numeric display ID (same as the table view); - ``uuid`` is the stable underlying identifier. All dates are ISO 8601. + ``id`` is the short numeric display ID matching the table view, or ``null`` + if the task isn't in the ID cache (e.g. freshly added or on a fresh clone + before ``tsk list`` has populated the cache). ``uuid`` is always the + stable underlying identifier. All dates are ISO 8601. """ - display_id = get_display_id_from_uuid(task.id) return { - "id": display_id if display_id is not None else None, + "id": uuid_to_id.get(task.id), "uuid": task.id, "title": task.title, "status": task.status, @@ -131,9 +148,18 @@ def list_tasks(ctx, repo, project, status, priority, assignee, tag, archived, js save_id_cache(sorted_tasks, rebalance=True) if json_output: - payload = [_task_to_dict(t) for t in sorted_tasks] - json.dump(payload, sys.stdout, indent=2, default=str) - sys.stdout.write("\n") + # Load the UUID→display_id map once to avoid O(n²) disk reads. + uuid_to_id = _load_uuid_to_display_id() + # Emit a one-time hint if the cache is empty on a filtered view — + # every JSON id will be null until `tsk list` is run unfiltered. + if not uuid_to_id and has_filters: + click.echo( + "Note: ID cache is empty; `id` fields will be null. " + "Run `tsk list` without filters once to populate it.", + err=True, + ) + payload = [_task_to_dict(t, uuid_to_id) for t in sorted_tasks] + click.echo(json.dumps(payload, indent=2, default=str)) return # Display sorted tasks diff --git a/tests/unit/test_list_json.py b/tests/unit/test_list_json.py new file mode 100644 index 0000000..9e923ec --- /dev/null +++ b/tests/unit/test_list_json.py @@ -0,0 +1,127 @@ +"""Tests for the --json output mode of `tsk list`.""" + +import json +from datetime import datetime +from pathlib import Path +from tempfile import TemporaryDirectory +from unittest.mock import patch + +from taskrepo.cli.commands.list import _load_uuid_to_display_id, _task_to_dict +from taskrepo.core.task import Task + + +def _make_task(uuid: str = "abc-123", **overrides) -> Task: + """Build a Task with sensible defaults for serialization tests.""" + defaults = { + "id": uuid, + "title": "A task", + "status": "pending", + "priority": "H", + "project": "proj", + "assignees": ["@alice"], + "tags": ["urgent"], + "links": ["https://example.com/1"], + "due": datetime(2026, 5, 1, 12, 0), + "created": datetime(2026, 1, 1, 9, 0), + "modified": datetime(2026, 4, 1, 15, 0), + "depends": ["dep-1"], + "parent": "parent-uuid", + "description": "body", + "repo": "work", + } + defaults.update(overrides) + return Task(**defaults) + + +def test_task_to_dict_contains_expected_fields(): + task = _make_task() + uuid_to_id = {"abc-123": 7} + + result = _task_to_dict(task, uuid_to_id) + + assert result["id"] == 7 + assert result["uuid"] == "abc-123" + assert result["title"] == "A task" + assert result["status"] == "pending" + assert result["priority"] == "H" + assert result["repo"] == "work" + assert result["project"] == "proj" + assert result["assignees"] == ["@alice"] + assert result["tags"] == ["urgent"] + assert result["links"] == ["https://example.com/1"] + assert result["depends"] == ["dep-1"] + assert result["parent"] == "parent-uuid" + assert result["description"] == "body" + # All dates ISO 8601 + assert result["due"] == "2026-05-01T12:00:00" + assert result["created"] == "2026-01-01T09:00:00" + assert result["modified"] == "2026-04-01T15:00:00" + + +def test_task_to_dict_missing_uuid_in_cache_yields_null_id(): + task = _make_task(uuid="not-in-cache") + result = _task_to_dict(task, uuid_to_id={"other": 1}) + assert result["id"] is None + assert result["uuid"] == "not-in-cache" + + +def test_task_to_dict_handles_none_dates(): + task = _make_task(due=None) + result = _task_to_dict(task, uuid_to_id={}) + assert result["due"] is None + # created/modified are set by Task.__post_init__ defaults, so they are + # always serializable strings — just assert they round-trip through JSON. + json.dumps(result) + + +def test_task_to_dict_output_is_json_serializable(): + task = _make_task() + result = _task_to_dict(task, uuid_to_id={"abc-123": 1}) + # Should not raise + roundtrip = json.loads(json.dumps(result)) + assert roundtrip["id"] == 1 + + +def test_load_uuid_to_display_id_returns_empty_when_cache_missing(): + with TemporaryDirectory() as tmpdir: + cache_path = Path(tmpdir) / "id_cache.json" + with patch("taskrepo.cli.commands.list.get_cache_path", return_value=cache_path): + assert _load_uuid_to_display_id() == {} + + +def test_load_uuid_to_display_id_parses_cache(): + with TemporaryDirectory() as tmpdir: + cache_path = Path(tmpdir) / "id_cache.json" + cache_path.write_text( + json.dumps( + { + "1": {"uuid": "uuid-a", "repo": "r", "title": "A"}, + "2": {"uuid": "uuid-b", "repo": "r", "title": "B"}, + } + ) + ) + with patch("taskrepo.cli.commands.list.get_cache_path", return_value=cache_path): + result = _load_uuid_to_display_id() + assert result == {"uuid-a": 1, "uuid-b": 2} + + +def test_load_uuid_to_display_id_handles_malformed_cache(): + with TemporaryDirectory() as tmpdir: + cache_path = Path(tmpdir) / "id_cache.json" + cache_path.write_text("not valid json") + with patch("taskrepo.cli.commands.list.get_cache_path", return_value=cache_path): + assert _load_uuid_to_display_id() == {} + + +def test_load_uuid_to_display_id_is_single_read_for_n_tasks(): + """Regression guard: caller must not re-open the cache per task. + + We verify that after `_load_uuid_to_display_id()` returns a map, looking + up N tasks is a plain dict lookup (O(1) per task, no further I/O). + """ + uuid_to_id = {f"uuid-{i}": i for i in range(100)} + # Simulate the serialization loop without any disk access + payload = [_task_to_dict(_make_task(uuid=f"uuid-{i}"), uuid_to_id) for i in range(100)] + assert len(payload) == 100 + assert payload[0]["id"] == 0 + assert payload[99]["id"] == 99 From 35fe8d9def8789be5c8db239ce0a38e1cc10b9b2 Mon Sep 17 00:00:00 2001 From: Ricardo Henriques Date: Fri, 17 Apr 2026 18:18:40 +0100 Subject: [PATCH 2/2] address review feedback on PR #17 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Harden _load_uuid_to_display_id: reject non-dict top-level, filter entries missing `uuid`, catch OSError/TypeError (Copilot comment). - Add one-line comment clarifying the stderr-hint ordering invariant (claude[bot] suggestion). - Fix inaccurate comment about Task.__post_init__ in the test helper (Copilot comment). - Add CLI integration tests via CliRunner covering the exact regression path this release fixes: --json output end-to-end, empty result → [], and stderr hint on filtered empty-cache runs (claude[bot] suggestion). - Add unit test for non-dict top-level cache files. Deferred as follow-up: the save-then-load round-trip on unfiltered runs (noted by claude[bot], non-blocking). Co-Authored-By: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 16 ++++- src/taskrepo/cli/commands/list.py | 18 +++-- tests/unit/test_list_json.py | 116 +++++++++++++++++++++++++++++- 3 files changed, 141 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 99fb1ce..b5df343 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,8 +24,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Unit tests for `--json` output: field coverage, missing-cache → `null id`, - None-date handling, JSON serializability, cache loading (missing/valid/ - malformed), and a regression guard that the cache is loaded only once. + None-date handling, JSON serializability, cache loading (missing / valid / + malformed / non-dict top level), and a regression guard that the cache is + loaded only once per invocation. +- CLI integration tests via `CliRunner` that invoke `list_tasks --json` end + to end, guarding the exact regression path fixed in this release + (`json.dump` → stdout bypass) and the stderr-hint behavior on filtered + runs with an empty ID cache. + +### Fixed + +- `_load_uuid_to_display_id()` now tolerates cache files whose top-level + value is not a dict, entries missing a `uuid` key, and `OSError`/ + `TypeError` during parsing — in all these cases it returns an empty map + rather than propagating the exception. ## [0.11.0] - 2026-04-17 diff --git a/src/taskrepo/cli/commands/list.py b/src/taskrepo/cli/commands/list.py index 25e46b9..d64c749 100644 --- a/src/taskrepo/cli/commands/list.py +++ b/src/taskrepo/cli/commands/list.py @@ -15,8 +15,10 @@ def _load_uuid_to_display_id() -> dict[str, int]: """Load the ID cache once and return a {uuid: display_id} map. - Returns an empty dict if the cache file is missing or malformed. Callers - that care about missing cache state should check the returned dict length. + Returns an empty dict if the cache file is missing, unreadable, or + structurally unexpected (e.g. top-level non-object, entries missing + ``uuid``). Callers that care about missing cache state should check the + returned dict length. """ cache_path = get_cache_path() if not cache_path.exists(): @@ -24,8 +26,14 @@ def _load_uuid_to_display_id() -> dict[str, int]: try: with open(cache_path) as f: cache = json.load(f) - return {entry["uuid"]: int(display_id) for display_id, entry in cache.items()} - except (json.JSONDecodeError, KeyError, ValueError): + if not isinstance(cache, dict): + return {} + return { + entry["uuid"]: int(display_id) + for display_id, entry in cache.items() + if isinstance(entry, dict) and "uuid" in entry + } + except (OSError, json.JSONDecodeError, KeyError, TypeError, ValueError): return {} @@ -149,6 +157,8 @@ def list_tasks(ctx, repo, project, status, priority, assignee, tag, archived, js if json_output: # Load the UUID→display_id map once to avoid O(n²) disk reads. + # On unfiltered runs, save_id_cache() above has already populated + # the cache, so uuid_to_id will not be empty below. uuid_to_id = _load_uuid_to_display_id() # Emit a one-time hint if the cache is empty on a filtered view — # every JSON id will be null until `tsk list` is run unfiltered. diff --git a/tests/unit/test_list_json.py b/tests/unit/test_list_json.py index 9e923ec..652bd19 100644 --- a/tests/unit/test_list_json.py +++ b/tests/unit/test_list_json.py @@ -6,7 +6,11 @@ from tempfile import TemporaryDirectory from unittest.mock import patch -from taskrepo.cli.commands.list import _load_uuid_to_display_id, _task_to_dict +from click.testing import CliRunner + +from taskrepo.cli.commands.list import _load_uuid_to_display_id, _task_to_dict, list_tasks +from taskrepo.core.config import Config +from taskrepo.core.repository import Repository from taskrepo.core.task import Task @@ -69,8 +73,8 @@ def test_task_to_dict_handles_none_dates(): task = _make_task(due=None) result = _task_to_dict(task, uuid_to_id={}) assert result["due"] is None - # created/modified are set by Task.__post_init__ defaults, so they are - # always serializable strings — just assert they round-trip through JSON. + # `created`/`modified` are populated for these helper-built tasks, and + # `_task_to_dict()` should serialize them to JSON-safe string values. json.dumps(result) @@ -125,3 +129,109 @@ def test_load_uuid_to_display_id_is_single_read_for_n_tasks(): assert len(payload) == 100 assert payload[0]["id"] == 0 assert payload[99]["id"] == 99 + + +def test_load_uuid_to_display_id_rejects_non_dict_top_level(): + """Cache whose top level is not a dict (e.g. a list) should not raise.""" + with TemporaryDirectory() as tmpdir: + cache_path = Path(tmpdir) / "id_cache.json" + cache_path.write_text(json.dumps(["not", "a", "dict"])) + with patch("taskrepo.cli.commands.list.get_cache_path", return_value=cache_path): + assert _load_uuid_to_display_id() == {} + + +# --------------------------------------------------------------------------- +# CLI integration tests — these exercise the full `list_tasks` Click command +# via CliRunner, which is the exact path that was broken before (json.dump +# to sys.stdout bypassed Click's output capture). Keep these around as a +# regression guard. +# --------------------------------------------------------------------------- + + +def _build_cli_fixture(tmpdir: str) -> Config: + """Create a parent dir with one task repo and one task, plus a config.""" + parent = Path(tmpdir) / "parent" + parent.mkdir() + repo_path = parent / "tasks-test" + repo_path.mkdir() + repo = Repository(repo_path) + repo.save_task( + Task( + id="11111111-1111-1111-1111-111111111111", + title="CLI fixture task", + status="pending", + priority="H", + project="proj", + tags=["t1"], + ) + ) + + config_path = Path(tmpdir) / "config.yaml" + config = Config(config_path=config_path) + config._data["parent_dir"] = str(parent) + config.save() + return config + + +def test_json_output_is_valid_json_via_cli_runner(): + """Regression guard for the `json.dump(..., sys.stdout)` bypass bug. + + Before v0.11.1, JSON output was written directly to sys.stdout, + which bypassed Click's output capture and could break CliRunner tests. + This test ensures `CliRunner` sees the JSON on stdout. + """ + with TemporaryDirectory() as tmpdir: + config = _build_cli_fixture(tmpdir) + runner = CliRunner() + result = runner.invoke(list_tasks, ["--json"], obj={"config": config}) + + assert result.exit_code == 0, result.output + data = json.loads(result.output) + assert isinstance(data, list) + assert len(data) == 1 + assert data[0]["title"] == "CLI fixture task" + assert data[0]["uuid"] == "11111111-1111-1111-1111-111111111111" + + +def test_json_output_empty_result_is_empty_array(): + """No tasks match → stdout must be the literal JSON array `[]`.""" + with TemporaryDirectory() as tmpdir: + parent = Path(tmpdir) / "parent" + parent.mkdir() + (parent / "tasks-empty").mkdir() + Repository(parent / "tasks-empty") + config_path = Path(tmpdir) / "config.yaml" + config = Config(config_path=config_path) + config._data["parent_dir"] = str(parent) + config.save() + + runner = CliRunner() + result = runner.invoke(list_tasks, ["--json", "--priority", "L"], obj={"config": config}) + + assert result.exit_code == 0 + assert json.loads(result.output) == [] + + +def test_json_output_stderr_hint_on_filtered_empty_cache(): + """When the ID cache is empty and filters are applied, a stderr hint fires.""" + with TemporaryDirectory() as tmpdir: + config = _build_cli_fixture(tmpdir) + # Point the ID cache at a guaranteed-empty path for this test. + empty_cache = Path(tmpdir) / "nonexistent_cache.json" + runner = CliRunner() + with patch("taskrepo.cli.commands.list.get_cache_path", return_value=empty_cache): + result = runner.invoke( + list_tasks, + ["--json", "--priority", "H"], + obj={"config": config}, + ) + + assert result.exit_code == 0 + # CliRunner merges stderr into output by default. + assert "ID cache is empty" in result.output + # Strip the stderr hint line(s) from the captured output to isolate + # the JSON body, then verify it parses. + json_start = result.output.index("[") + data = json.loads(result.output[json_start:]) + assert isinstance(data, list) + assert data[0]["id"] is None # cache empty → id is null