diff --git a/web/README.md b/web/README.md
index e215bc4cc..72ff4cf61 100644
--- a/web/README.md
+++ b/web/README.md
@@ -1,36 +1,40 @@
-This is a [Next.js](https://nextjs.org) project bootstrapped with [`create-next-app`](https://nextjs.org/docs/app/api-reference/cli/create-next-app).
+# Course Web App
-## Getting Started
+This Next.js app renders the root-level `s01_*` through `s20_*` course
+chapters. The app does not treat `web/src/data/generated/*.json` or
+`web/public/course-assets/` as source of truth; those files are generated from
+the repository root.
-First, run the development server:
+## Local Development
```bash
+npm ci
npm run dev
-# or
-yarn dev
-# or
-pnpm dev
-# or
-bun dev
```
-Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.
+`npm run dev` runs `npm run extract` first, so edits to root chapter README
+files, `code.py` files, or chapter SVG assets are copied into the web app
+before the dev server starts.
-You can start editing the page by modifying `app/page.tsx`. The page auto-updates as you edit the file.
+## Updating Generated Course Data
-This project uses [`next/font`](https://nextjs.org/docs/app/building-your-application/optimizing/fonts) to automatically optimize and load [Geist](https://vercel.com/font), a new font family for Vercel.
+After changing any root-level chapter content, run:
-## Learn More
-
-To learn more about Next.js, take a look at the following resources:
+```bash
+npm run extract
+```
-- [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API.
-- [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial.
+Commit the resulting updates under:
-You can check out [the Next.js GitHub repository](https://github.com/vercel/next.js) - your feedback and contributions are welcome!
+- `web/src/data/generated/`
+- `web/public/course-assets/`
-## Deploy on Vercel
+This keeps the web course aligned with the canonical root chapters and prevents
+the site from showing stale code signatures, line counts, documentation text, or
+chapter diagrams.
-The easiest way to deploy your Next.js app is to use the [Vercel Platform](https://vercel.com/new?utm_medium=default-template&filter=next.js&utm_source=create-next-app&utm_campaign=create-next-app-readme) from the creators of Next.js.
+## Production Build
-Check out our [Next.js deployment documentation](https://nextjs.org/docs/app/building-your-application/deploying) for more details.
+```bash
+npm run build
+```
diff --git a/web/public/course-assets/s09_memory/memory-subsystems.en.svg b/web/public/course-assets/s09_memory/memory-subsystems.en.svg
index 914f1b0fe..3dbc3db7f 100644
--- a/web/public/course-assets/s09_memory/memory-subsystems.en.svg
+++ b/web/public/course-assets/s09_memory/memory-subsystems.en.svg
@@ -53,7 +53,7 @@
.memory/ — MEMORY.md index + *.md files (YAML frontmatter: name / description / type)
-
+
read/write
diff --git a/web/public/course-assets/s09_memory/memory-subsystems.ja.svg b/web/public/course-assets/s09_memory/memory-subsystems.ja.svg
index 6bbd6814a..21bc37585 100644
--- a/web/public/course-assets/s09_memory/memory-subsystems.ja.svg
+++ b/web/public/course-assets/s09_memory/memory-subsystems.ja.svg
@@ -53,7 +53,7 @@
.memory/ — MEMORY.md インデックス + *.md ファイル(YAML frontmatter: name / description / type)
-
+
読み/書き
diff --git a/web/public/course-assets/s09_memory/memory-subsystems.svg b/web/public/course-assets/s09_memory/memory-subsystems.svg
index f7673169a..069cb0f3d 100644
--- a/web/public/course-assets/s09_memory/memory-subsystems.svg
+++ b/web/public/course-assets/s09_memory/memory-subsystems.svg
@@ -53,7 +53,7 @@
.memory/ — MEMORY.md 索引 + *.md 文件(YAML frontmatter: name / description / type)
-
+
写入/读取
diff --git a/web/src/data/generated/docs.json b/web/src/data/generated/docs.json
index 4e50b7380..57886370c 100644
--- a/web/src/data/generated/docs.json
+++ b/web/src/data/generated/docs.json
@@ -39,19 +39,19 @@
"version": "s03",
"locale": "en",
"title": "s03: Permission — Check Permissions Before Execution",
- "content": "# s03: Permission — Check Permissions Before Execution\n\ns01 → s02 → `s03` → [s04](/en/s04) → s05 → ... → s20\n> *\"Check permissions before executing\"* — The permission pipeline decides which operations need approval.\n>\n> **Harness Layer**: Permission — a gate before tool execution.\n\n---\n\n## The Problem\n\ns02's Agent has 5 tools. File tools are protected by `safe_path`, but bash is unrestricted. Ask it to \"clean up the project,\" and it might run `rm -rf /`.\n\nSafety can't rely on trusting the model — it needs code: a check before every tool execution.\n\n---\n\n## The Solution\n\n\n\ns02's loop is fully preserved. The only change is inserting `check_permission()` before tool execution — each tool call passes through three gates in a fixed order: hard deny first, then soft ask, and if neither matches, allow.\n\nThe three gates correspond to three decisions:\n\n| Gate | Purpose | On Match |\n|------|---------|----------|\n| 1. Deny List | Permanently forbidden operations (`rm -rf /`, `sudo`) | Denied immediately, not executed |\n| 2. Rule Matching | Context-dependent operations (writing outside workspace, `rm` files) | Passed to Gate 3 |\n| 3. User Approval | After Gate 2 matches, pauses for user confirmation | User decides allow or deny |\n\nNone of the three gates match → execute directly. Most routine operations take this path.\n\n---\n\n## How It Works\n\n\n\n**Gate 1**: A hard deny list. Check first; if matched, return a block message. (Teaching demo: simple string matching is not a reliable security mechanism — command variants and shell expansion can bypass it. CC's approach is in the appendix.)\n\n```python\nDENY_LIST = [\n \"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\",\n \"mkfs\", \"dd if=\", \"> /dev/sda\",\n]\n\ndef check_deny_list(command: str) -> str | None:\n for pattern in DENY_LIST:\n if pattern in command:\n return f\"Blocked: '{pattern}' is on the deny list\"\n return None\n```\n\n**Gate 2**: Rule matching — describes \"when to ask the user.\" Each rule specifies a tool and a check condition.\n\n```python\nPERMISSION_RULES = [\n {\n \"tools\": [\"write_file\", \"edit_file\"],\n \"check\": lambda args: not (WORKDIR / args.get(\"path\", \"\")).resolve().is_relative_to(WORKDIR),\n \"message\": \"Writing outside workspace\",\n },\n {\n \"tools\": [\"bash\"],\n \"check\": lambda args: any(kw in args.get(\"command\", \"\") for kw in [\"rm \", \"> /etc/\", \"chmod 777\"]),\n \"message\": \"Potentially destructive command\",\n },\n]\n\ndef check_rules(tool_name: str, args: dict) -> str | None:\n for rule in PERMISSION_RULES:\n if tool_name in rule[\"tools\"] and rule[\"check\"](args):\n return rule[\"message\"]\n return None\n```\n\n**Gate 3**: After a rule matches, pause for user input.\n\n```python\ndef ask_user(tool_name: str, args: dict, reason: str) -> str:\n print(f\"\\n⚠ {reason}\")\n print(f\" Tool: {tool_name}({args})\")\n choice = input(\" Allow? [y/N] \").strip().lower()\n return \"allow\" if choice in (\"y\", \"yes\") else \"deny\"\n```\n\n**All three gates chained together**, inserted before tool execution:\n\n```python\ndef check_permission(block) -> bool:\n # Gate 1: Hard deny\n if block.name == \"bash\":\n reason = check_deny_list(block.input.get(\"command\", \"\"))\n if reason:\n print(f\"\\n⛔ {reason}\")\n return False\n\n # Gate 2 + 3: Rule matching → User approval\n reason = check_rules(block.name, block.input)\n if reason:\n decision = ask_user(block.name, block.input, reason)\n if decision == \"deny\":\n return False\n\n return True\n\n# In agent_loop — s02's loop with just one line added:\nfor block in response.content:\n if block.type == \"tool_use\":\n if not check_permission(block): # ← NEW\n results.append({... \"content\": \"Permission denied.\"})\n continue\n output = TOOL_HANDLERS[block.name](**block.input) # s02 original\n results.append(...)\n```\n\n---\n\n## Changes from s02\n\n| Component | Before (s02) | After (s03) |\n|-----------|-------------|-------------|\n| Security model | None (trust the model) | Three-gate permission pipeline |\n| New functions | — | check_deny_list, check_rules, ask_user, check_permission |\n| Loop | Executes all tools directly | Inserts check_permission() before execution |\n\n---\n\n## Try It\n\n```sh\ncd learn-claude-code\npython s03_permission/code.py\n```\n\nTry these prompts:\n\n1. `Create a file called test.txt in the current directory` (should pass through)\n2. `Delete all temporary files in /tmp` (bash + rm triggers Gate 2)\n3. `What files are in the current directory?` (read-only, all pass)\n4. `Try to write a file to /etc/something` (writing outside workspace triggers Gate 2)\n\nWhat to watch for: Which operations pass through? Which need your confirmation? Which are denied outright?\n\n---\n\n## What's Next\n\nPermission checks are in place — but every check is hardcoded as `check_permission()` inside the loop. What if you want to add logging before and after each tool execution? What if you want to auto-trigger a git commit after certain operations? Scattering this extension logic throughout the loop makes it bloat.\n\n→ s04 Hooks: Add hooks to the loop. Extension logic hangs on hooks; the loop stays clean.\n\n\nDive into CC Source Code
\n\n> The following is based on a review of CC source code `types/permissions.ts`, `utils/permissions/permissions.ts`, `toolExecution.ts`, `utils/permissions/yoloClassifier.ts`, `tools/AgentTool/forkSubagent.ts`.\n\n### 1. PermissionResult: Not 3, but 4\n\nThe teaching version's three gates (deny → ask → allow) don't fully correspond to CC. CC's `PermissionResult` has 4 behaviors (`types/permissions.ts:241-266`):\n\n| behavior | Meaning | Teaching Version Equivalent |\n|----------|---------|---------------------------|\n| `allow` | Allow directly | Gate 3 passes |\n| `deny` | Deny directly | Gate 1 matches |\n| `ask` | Show dialog to user | Gate 2 matches |\n| `passthrough` | Tool doesn't express opinion, passes to generic pipeline | Not in teaching version |\n\n### 2. Production Verification Stages\n\nCC's tool calls don't go through three gates — they go through multiple stages distributed across `checkPermissionsAndCallTool()` (`toolExecution.ts:599-1745`), hooks, `hasPermissionsToUseToolInner()` (`utils/permissions/permissions.ts:1158-1310`), and classifier logic:\n\n1. **Zod schema validation** (`toolExecution.ts:614-680`) — parameter type checking\n2. **validateInput()** (`toolExecution.ts:682-733`) — tool-level semantic validation\n3. **backfillObservableInput()** (`toolExecution.ts:784`) — backfill legacy fields\n4. **PreToolUse hooks** (`toolExecution.ts:800-862`) — hooks can return allow/deny/ask\n5. **resolveHookPermissionDecision()** (`toolExecution.ts:921-931`) — coordinate hook + pipeline decisions\n6. **hasPermissionsToUseToolInner()** (`permissions.ts:1158-1310`) — multi-layer rule check:\n - Entire tool disabled by deny rule → `deny`\n - Entire tool flagged by ask rule → `ask`\n - `tool.checkPermissions()` tool's own judgment\n - Tool itself returns deny → `deny`\n - `requiresUserInteraction()` → `ask`\n - Content-related ask rules → `ask` (not bypassable)\n - Security check violation → `ask` (not bypassable)\n - bypassPermissions mode → `allow`\n - Entire tool allowed by allow rule → `allow`\n - passthrough → converted to `ask`\n\n### 3. Deny List: Not One File, but 8 Sources\n\nCC doesn't have a single deny list. Permission rules come from 8 sources (`types/permissions.ts:54-62`):\n\n| Source | Configuration Location |\n|--------|----------------------|\n| `userSettings` | `~/.claude/settings.json` |\n| `projectSettings` | `.claude/settings.json` |\n| `localSettings` | `settings.local.json` |\n| `flagSettings` | Feature flags |\n| `policySettings` | Enterprise management policy |\n| `cliArg` | `--allowedTools` / `--deniedTools` |\n| `command` | Inline command |\n| `session` | In-session temporary authorization |\n\nEach rule format: `{ toolName: \"Bash\", ruleBehavior: \"deny\", ruleContent: \"npm publish:*\" }`. Rules from multiple sources are merged, with higher-priority sources overriding lower ones (low to high: user < project < local < flag < policy, plus cliArg, command, session).\n\n### 4. What is isDestructive()\n\nIn CC, `isDestructive` (`Tool.ts:405-406`) is **purely for UI display** — showing a `[destructive]` label in the tool list. It doesn't participate in permission decisions. All tools return `false` by default. Only ExitWorktree (on remove) and MCP tools (depending on `annotations.destructiveHint`) override it.\n\n### 5. YoloClassifier (Auto-Approval)\n\nIn CC's auto mode, it doesn't pop a dialog every time. `classifyYoloAction` (`utils/permissions/yoloClassifier.ts:1012`) sends the tool call + conversation context to a classifier LLM to judge safety. It first tries acceptEdits mode simulation (`permissions.ts:620-656`, if acceptEdits allows → auto-approve), then checks the safe tool whitelist (`permissions.ts:658-686`), and finally calls the classifier. If the classifier rejects too many times in a row → falls back to manual approval.\n\n### 6. Permission Bubbling\n\nA sub-Agent's (forked via AgentTool) `permissionMode` is set to `'bubble'` (`forkSubagent.ts:50`). This means permission dialogs **bubble up to the parent Agent's terminal**, rather than being silently denied in the sub-Agent. The Bash classifier continues running during this process — displaying the permission dialog while judging in the background whether auto-approval is possible.\n\n### The Teaching Version's Simplification Is Intentional\n\n- Multi-stage pipeline → 3 gates: dramatically lower barrier to understanding\n- 8 rule sources → 1 local DENY_LIST: manageable concept count\n- isDestructive → omitted (teaching version has no UI layer, and it doesn't participate in permission decisions in CC either)\n- YoloClassifier → omitted (depends on additional LLM calls and telemetry)\n- Permission bubbling → omitted (s15 covers multi-Agent)\n\n \n\n\n"
+ "content": "# s03: Permission — Check Permissions Before Execution\n\ns01 → s02 → `s03` → [s04](/en/s04) → s05 → ... → s20\n> *\"Check permissions before executing\"* — The permission pipeline decides which operations need approval.\n>\n> **Harness Layer**: Permission — a gate before tool execution.\n\n---\n\n## The Problem\n\ns02's Agent has 5 tools. File tools are protected by `safe_path`, but bash is unrestricted. Ask it to \"clean up the project,\" and it might run `rm -rf /`.\n\nSafety can't rely on trusting the model — it needs code: a check before every tool execution.\n\n---\n\n## The Solution\n\n\n\ns02's loop is fully preserved. The only change is inserting `check_permission()` before tool execution — each tool call passes through three gates in a fixed order: hard deny first, then soft ask, and if neither matches, allow.\n\nThe three gates correspond to three decisions:\n\n| Gate | Purpose | On Match |\n|------|---------|----------|\n| 1. Deny List | Permanently forbidden operations (`rm -rf /`, `sudo`) | Denied immediately, not executed |\n| 2. Rule Matching | Context-dependent operations (writing outside workspace, `rm` files) | Passed to Gate 3 |\n| 3. User Approval | After Gate 2 matches, pauses for user confirmation | User decides allow or deny |\n\nNone of the three gates match → execute directly. Most routine operations take this path.\n\n---\n\n## How It Works\n\n\n\n**Gate 1**: A hard deny list. Check first; if matched, return a block message. (Teaching demo: simple string matching is not a reliable security mechanism — command variants and shell expansion can bypass it. CC's approach is in the appendix.)\n\n```python\nDENY_LIST = [\n \"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\",\n \"mkfs\", \"dd if=\", \"> /dev/sda\",\n]\n\ndef check_deny_list(command: str) -> str | None:\n for pattern in DENY_LIST:\n if pattern in command:\n return f\"Blocked: '{pattern}' is on the deny list\"\n return None\n```\n\n**Gate 2**: Rule matching — describes \"when to ask the user.\" Each rule specifies a tool and a check condition.\n\n```python\nPERMISSION_RULES = [\n {\n \"tools\": [\"write_file\", \"edit_file\"],\n \"check\": lambda args: not (WORKDIR / args.get(\"path\", \"\")).resolve().is_relative_to(WORKDIR),\n \"message\": \"Writing outside workspace\",\n },\n {\n \"tools\": [\"bash\"],\n \"check\": lambda args: any(kw in args.get(\"command\", \"\") for kw in [\"rm \", \"> /etc/\", \"chmod 777\"]),\n \"message\": \"Potentially destructive command\",\n },\n]\n\ndef check_rules(tool_name: str, args: dict) -> str | None:\n for rule in PERMISSION_RULES:\n if tool_name in rule[\"tools\"] and rule[\"check\"](args):\n return rule[\"message\"]\n return None\n```\n\n**Gate 3**: After a rule matches, pause for user input.\n\n```python\ndef ask_user(tool_name: str, args: dict, reason: str) -> str:\n print(f\"\\n⚠ {reason}\")\n print(f\" Tool: {tool_name}({args})\")\n choice = input(\" Allow? [y/N] \").strip().lower()\n return \"allow\" if choice in (\"y\", \"yes\") else \"deny\"\n```\n\n**All three gates chained together**, inserted before tool execution:\n\n```python\ndef check_permission(block) -> bool:\n # Gate 1: Hard deny\n if block.name == \"bash\":\n reason = check_deny_list(block.input.get(\"command\", \"\"))\n if reason:\n print(f\"\\n⛔ {reason}\")\n return False\n\n # Gate 2 + 3: Rule matching → User approval\n reason = check_rules(block.name, block.input)\n if reason:\n decision = ask_user(block.name, block.input, reason)\n if decision == \"deny\":\n return False\n\n return True\n\n# In agent_loop — s02's loop with just one line added:\nfor block in response.content:\n if block.type == \"tool_use\":\n if not check_permission(block): # ← NEW\n results.append({... \"content\": \"Permission denied.\"})\n continue\n output = TOOL_HANDLERS[block.name](**block.input) # s02 original\n results.append(...)\n```\n\n---\n\n## Changes from s02\n\n| Component | Before (s02) | After (s03) |\n|-----------|-------------|-------------|\n| Security model | None (trust the model) | Three-gate permission pipeline |\n| New functions | — | check_deny_list, check_rules, ask_user, check_permission |\n| Loop | Executes all tools directly | Inserts check_permission() before execution |\n\n---\n\n## Try It\n\n```sh\ncd learn-claude-code\npython s03_permission/code.py\n```\n\nTry these prompts:\n\n1. `Create a file called test.txt in the current directory` (should pass through)\n2. `Delete the file test.txt` (bash + rm triggers Gate 2)\n3. `What files are in the current directory?` (read-only, all pass)\n4. `Try to write a file to /etc/something` (writing outside workspace triggers Gate 2)\n\nWhat to watch for: Which operations pass through? Which need your confirmation? Which are denied outright?\n\n---\n\n## What's Next\n\nPermission checks are in place — but every check is hardcoded as `check_permission()` inside the loop. What if you want to add logging before and after each tool execution? What if you want to auto-trigger a git commit after certain operations? Scattering this extension logic throughout the loop makes it bloat.\n\n→ s04 Hooks: Add hooks to the loop. Extension logic hangs on hooks; the loop stays clean.\n\n\nDive into CC Source Code
\n\n> The following is based on a review of CC source code `types/permissions.ts`, `utils/permissions/permissions.ts`, `toolExecution.ts`, `utils/permissions/yoloClassifier.ts`, `tools/AgentTool/forkSubagent.ts`.\n\n### 1. PermissionResult: Not 3, but 4\n\nThe teaching version's three gates (deny → ask → allow) don't fully correspond to CC. CC's `PermissionResult` has 4 behaviors (`types/permissions.ts:241-266`):\n\n| behavior | Meaning | Teaching Version Equivalent |\n|----------|---------|---------------------------|\n| `allow` | Allow directly | Gate 3 passes |\n| `deny` | Deny directly | Gate 1 matches |\n| `ask` | Show dialog to user | Gate 2 matches |\n| `passthrough` | Tool doesn't express opinion, passes to generic pipeline | Not in teaching version |\n\n### 2. Production Verification Stages\n\nCC's tool calls don't go through three gates — they go through multiple stages distributed across `checkPermissionsAndCallTool()` (`toolExecution.ts:599-1745`), hooks, `hasPermissionsToUseToolInner()` (`utils/permissions/permissions.ts:1158-1310`), and classifier logic:\n\n1. **Zod schema validation** (`toolExecution.ts:614-680`) — parameter type checking\n2. **validateInput()** (`toolExecution.ts:682-733`) — tool-level semantic validation\n3. **backfillObservableInput()** (`toolExecution.ts:784`) — backfill legacy fields\n4. **PreToolUse hooks** (`toolExecution.ts:800-862`) — hooks can return allow/deny/ask\n5. **resolveHookPermissionDecision()** (`toolExecution.ts:921-931`) — coordinate hook + pipeline decisions\n6. **hasPermissionsToUseToolInner()** (`permissions.ts:1158-1310`) — multi-layer rule check:\n - Entire tool disabled by deny rule → `deny`\n - Entire tool flagged by ask rule → `ask`\n - `tool.checkPermissions()` tool's own judgment\n - Tool itself returns deny → `deny`\n - `requiresUserInteraction()` → `ask`\n - Content-related ask rules → `ask` (not bypassable)\n - Security check violation → `ask` (not bypassable)\n - bypassPermissions mode → `allow`\n - Entire tool allowed by allow rule → `allow`\n - passthrough → converted to `ask`\n\n### 3. Deny List: Not One File, but 8 Sources\n\nCC doesn't have a single deny list. Permission rules come from 8 sources (`types/permissions.ts:54-62`):\n\n| Source | Configuration Location |\n|--------|----------------------|\n| `userSettings` | `~/.claude/settings.json` |\n| `projectSettings` | `.claude/settings.json` |\n| `localSettings` | `settings.local.json` |\n| `flagSettings` | Feature flags |\n| `policySettings` | Enterprise management policy |\n| `cliArg` | `--allowedTools` / `--deniedTools` |\n| `command` | Inline command |\n| `session` | In-session temporary authorization |\n\nEach rule format: `{ toolName: \"Bash\", ruleBehavior: \"deny\", ruleContent: \"npm publish:*\" }`. Rules from multiple sources are merged, with higher-priority sources overriding lower ones (low to high: user < project < local < flag < policy, plus cliArg, command, session).\n\n### 4. What is isDestructive()\n\nIn CC, `isDestructive` (`Tool.ts:405-406`) is **purely for UI display** — showing a `[destructive]` label in the tool list. It doesn't participate in permission decisions. All tools return `false` by default. Only ExitWorktree (on remove) and MCP tools (depending on `annotations.destructiveHint`) override it.\n\n### 5. YoloClassifier (Auto-Approval)\n\nIn CC's auto mode, it doesn't pop a dialog every time. `classifyYoloAction` (`utils/permissions/yoloClassifier.ts:1012`) sends the tool call + conversation context to a classifier LLM to judge safety. It first tries acceptEdits mode simulation (`permissions.ts:620-656`, if acceptEdits allows → auto-approve), then checks the safe tool whitelist (`permissions.ts:658-686`), and finally calls the classifier. If the classifier rejects too many times in a row → falls back to manual approval.\n\n### 6. Permission Bubbling\n\nA sub-Agent's (forked via AgentTool) `permissionMode` is set to `'bubble'` (`forkSubagent.ts:50`). This means permission dialogs **bubble up to the parent Agent's terminal**, rather than being silently denied in the sub-Agent. The Bash classifier continues running during this process — displaying the permission dialog while judging in the background whether auto-approval is possible.\n\n### The Teaching Version's Simplification Is Intentional\n\n- Multi-stage pipeline → 3 gates: dramatically lower barrier to understanding\n- 8 rule sources → 1 local DENY_LIST: manageable concept count\n- isDestructive → omitted (teaching version has no UI layer, and it doesn't participate in permission decisions in CC either)\n- YoloClassifier → omitted (depends on additional LLM calls and telemetry)\n- Permission bubbling → omitted (s15 covers multi-Agent)\n\n \n\n\n"
},
{
"version": "s03",
"locale": "zh",
"title": "s03: Permission — 执行前做权限判断",
- "content": "# s03: Permission — 执行前做权限判断\n\ns01 → s02 → `s03` → [s04](/zh/s04) → s05 → ... → s20\n> *\"工具执行前先做权限判断\"* — 权限管线决定哪些操作需要审批。\n>\n> **Harness 层**: 权限 — 在工具执行前加一道门。\n\n---\n\n## 问题\n\ns02 的 Agent 有 5 个工具。file tools 受 `safe_path` 保护,但 bash 不受限制。让它\"清理一下项目\",可能执行 `rm -rf /`。\n\n安全不能靠信任模型,要靠代码——在工具执行之前做判断。\n\n---\n\n## 解决方案\n\n\n\ns02 的循环完全保留。唯一的变动在工具执行前插入 `check_permission()`——每个工具调用经过三道闸门,顺序固定:硬拒绝优先,软询问次之,都没命中就放行。\n\n三道闸门对应三种决策:\n\n| 闸门 | 作用 | 命中后 |\n|------|------|--------|\n| 1. 拒绝列表 | 永远禁止的操作(`rm -rf /`、`sudo`) | 直接拒绝,不执行 |\n| 2. 规则匹配 | 取决于上下文的操作(写工作区外、`rm` 文件) | 交给闸门 3 |\n| 3. 用户审批 | 闸门 2 命中后,暂停等用户确认 | 用户决定允许或拒绝 |\n\n三道都没命中 → 直接执行。大部分日常操作走这条路。\n\n---\n\n## 工作原理\n\n\n\n**闸门 1**:一张硬拒绝表,先查,命中就返回阻止信息。(教学示意:简单字符串匹配不是可靠安全机制,命令变体和 shell 展开可能绕过。CC 的做法见附录。)\n\n```python\nDENY_LIST = [\n \"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\",\n \"mkfs\", \"dd if=\", \"> /dev/sda\",\n]\n\ndef check_deny_list(command: str) -> str | None:\n for pattern in DENY_LIST:\n if pattern in command:\n return f\"Blocked: '{pattern}' is on the deny list\"\n return None\n```\n\n**闸门 2**:规则匹配——描述\"什么时候需要问用户\"。每条规则指定工具和检查条件。\n\n```python\nPERMISSION_RULES = [\n {\n \"tools\": [\"write_file\", \"edit_file\"],\n \"check\": lambda args: not (WORKDIR / args.get(\"path\", \"\")).resolve().is_relative_to(WORKDIR),\n \"message\": \"Writing outside workspace\",\n },\n {\n \"tools\": [\"bash\"],\n \"check\": lambda args: any(kw in args.get(\"command\", \"\") for kw in [\"rm \", \"> /etc/\", \"chmod 777\"]),\n \"message\": \"Potentially destructive command\",\n },\n]\n\ndef check_rules(tool_name: str, args: dict) -> str | None:\n for rule in PERMISSION_RULES:\n if tool_name in rule[\"tools\"] and rule[\"check\"](args):\n return rule[\"message\"]\n return None\n```\n\n**闸门 3**:规则命中后,暂停等用户输入。\n\n```python\ndef ask_user(tool_name: str, args: dict, reason: str) -> str:\n print(f\"\\n⚠ {reason}\")\n print(f\" Tool: {tool_name}({args})\")\n choice = input(\" Allow? [y/N] \").strip().lower()\n return \"allow\" if choice in (\"y\", \"yes\") else \"deny\"\n```\n\n**三道闸门串在一起**,插在工具执行之前:\n\n```python\ndef check_permission(block) -> bool:\n # 闸门 1: 硬拒绝\n if block.name == \"bash\":\n reason = check_deny_list(block.input.get(\"command\", \"\"))\n if reason:\n print(f\"\\n⛔ {reason}\")\n return False\n\n # 闸门 2 + 3: 规则匹配 → 用户审批\n reason = check_rules(block.name, block.input)\n if reason:\n decision = ask_user(block.name, block.input, reason)\n if decision == \"deny\":\n return False\n\n return True\n\n# 在 agent_loop 中——s02 的循环只加了一行:\nfor block in response.content:\n if block.type == \"tool_use\":\n if not check_permission(block): # ← 新增\n results.append({... \"content\": \"Permission denied.\"})\n continue\n output = TOOL_HANDLERS[block.name](**block.input) # s02 原有\n results.append(...)\n```\n\n---\n\n## 相对 s02 的变更\n\n| 组件 | 之前 (s02) | 之后 (s03) |\n|------|-----------|-----------|\n| 安全模型 | 无(信任模型) | 三道闸门权限管线 |\n| 新函数 | — | check_deny_list, check_rules, ask_user, check_permission |\n| 循环 | 直接执行所有工具 | 执行前插入 check_permission() |\n\n---\n\n## 试一下\n\n```sh\ncd learn-claude-code\npython s03_permission/code.py\n```\n\n试试这些 prompt:\n\n1. `Create a file called test.txt in the current directory`(应该直接通过)\n2. `Delete all temporary files in /tmp`(bash + rm 会触发闸门 2)\n3. `What files are in the current directory?`(只读,全部通过)\n4. `Try to write a file to /etc/something`(写工作区外,触发闸门 2)\n\n观察重点:哪些操作直接通过?哪些需要你确认?哪些被直接拒绝?\n\n---\n\n## 接下来\n\n权限检查做了——但每次都在循环里硬编码 `check_permission()`。如果我想在每次工具执行前后加日志?如果想在某些操作后自动触发 git commit?这些扩展逻辑散落在 loop 里,循环很快就会膨胀。\n\ns04 Hooks → 给循环加钩子,扩展逻辑挂在钩子上,循环保持干净。\n\n\n深入 CC 源码
\n\n> 以下基于 CC 源码 `types/permissions.ts`、`utils/permissions/permissions.ts`、`toolExecution.ts`、`utils/permissions/yoloClassifier.ts`、`tools/AgentTool/forkSubagent.ts` 的核查。\n\n### 一、PermissionResult:不是 3 种,是 4 种\n\n教学版的三道闸门(deny → ask → allow)和 CC 不完全对应。CC 的 `PermissionResult` 有 4 个 behavior(`types/permissions.ts:241-266`):\n\n| behavior | 含义 | 教学版对应 |\n|----------|------|-----------|\n| `allow` | 直接允许 | 闸门 3 通过 |\n| `deny` | 直接拒绝 | 闸门 1 命中 |\n| `ask` | 弹出对话框问用户 | 闸门 2 命中 |\n| `passthrough` | 工具不表态,交给通用管线决定 | 教学版无 |\n\n### 二、生产版的验证阶段\n\nCC 的工具调用不是经过三道闸门,而是经过多个阶段,分布在 `checkPermissionsAndCallTool()`(`toolExecution.ts:599-1745`)、hooks、`hasPermissionsToUseToolInner()`(`utils/permissions/permissions.ts:1158-1310`)和 classifier 逻辑里:\n\n1. **Zod schema 验证**(`toolExecution.ts:614-680`)— 参数类型检查\n2. **validateInput()**(`toolExecution.ts:682-733`)— 工具级语义验证\n3. **backfillObservableInput()**(`toolExecution.ts:784`)— 补全遗留字段\n4. **PreToolUse hooks**(`toolExecution.ts:800-862`)— 钩子可以返回 allow/deny/ask\n5. **resolveHookPermissionDecision()**(`toolExecution.ts:921-931`)— 协调钩子+管线决策\n6. **hasPermissionsToUseToolInner()**(`permissions.ts:1158-1310`)— 多层规则检查:\n - 整个工具被 deny rule 禁用 → `deny`\n - 整个工具被 ask rule 标记 → `ask`\n - `tool.checkPermissions()` 工具自己的判断\n - 工具自己返回 deny → `deny`\n - `requiresUserInteraction()` → `ask`\n - 内容相关的 ask 规则 → `ask`(不可绕过)\n - 安全检查违规 → `ask`(不可绕过)\n - bypassPermissions 模式 → `allow`\n - 整个工具被 allow rule 放行 → `allow`\n - passthrough → 转为 `ask`\n\n### 三、拒绝列表:不是一个文件,是 8 个来源\n\nCC 没有单一的 deny list。权限规则来自 8 个来源(`types/permissions.ts:54-62`):\n\n| 来源 | 配置位置 |\n|------|---------|\n| `userSettings` | `~/.claude/settings.json` |\n| `projectSettings` | `.claude/settings.json` |\n| `localSettings` | `settings.local.json` |\n| `flagSettings` | Feature flags |\n| `policySettings` | 企业管理策略 |\n| `cliArg` | `--allowedTools` / `--deniedTools` |\n| `command` | 内联命令 |\n| `session` | 会话内临时授权 |\n\n每条规则格式:`{ toolName: \"Bash\", ruleBehavior: \"deny\", ruleContent: \"npm publish:*\" }`。多个来源的规则合并,高优先级来源覆盖低优先级(从低到高:user < project < local < flag < policy,加上 cliArg、command、session)。\n\n### 四、isDestructive() 是什么\n\nCC 中 `isDestructive`(`Tool.ts:405-406`)**纯粹是 UI 展示用的**——在工具列表里显示 `[destructive]` 标签。它不参与权限决策。默认所有工具都返回 `false`。只有 ExitWorktree(remove 时)和 MCP 工具(依赖 `annotations.destructiveHint`)覆写了它。\n\n### 五、YoloClassifier(自动审批)\n\nCC 的 auto 模式下,不会每次都弹对话框。`classifyYoloAction`(`utils/permissions/yoloClassifier.ts:1012`)把工具调用 + 对话上下文发给一个分类器 LLM 判断是否安全。先尝试 acceptEdits 模式模拟(`permissions.ts:620-656`,如果 acceptEdits 允许 → 直接批准),再查安全工具白名单(`permissions.ts:658-686`),最后才调分类器。分类器连续拒绝太多次 → 回退到人工审批。\n\n### 六、权限冒泡\n\n子 Agent(通过 AgentTool fork 出来的)的 `permissionMode` 设为 `'bubble'`(`forkSubagent.ts:50`)。意思是权限弹窗**冒泡到父 Agent 的终端**,而不是在子 Agent 里静默拒绝。Bash 分类器在这个过程中继续跑——给权限对话框显示的同时在后台判断是否可以自动批准。\n\n### 教学版的简化是刻意的\n\n- 多阶段管线 → 3 道闸门:理解门槛大幅降低\n- 8 个规则来源 → 1 个本地 DENY_LIST:概念量可控\n- isDestructive → 忽略(教学版没有 UI 层,CC 里它也不参与权限决策)\n- YoloClassifier → 省略(依赖于额外的 LLM 调用和遥测系统)\n- 权限冒泡 → 省略(s15 才涉及多 Agent)\n\n \n\n\n"
+ "content": "# s03: Permission — 执行前做权限判断\n\ns01 → s02 → `s03` → [s04](/zh/s04) → s05 → ... → s20\n> *\"工具执行前先做权限判断\"* — 权限管线决定哪些操作需要审批。\n>\n> **Harness 层**: 权限 — 在工具执行前加一道门。\n\n---\n\n## 问题\n\ns02 的 Agent 有 5 个工具。file tools 受 `safe_path` 保护,但 bash 不受限制。让它\"清理一下项目\",可能执行 `rm -rf /`。\n\n安全不能靠信任模型,要靠代码——在工具执行之前做判断。\n\n---\n\n## 解决方案\n\n\n\ns02 的循环完全保留。唯一的变动在工具执行前插入 `check_permission()`——每个工具调用经过三道闸门,顺序固定:硬拒绝优先,软询问次之,都没命中就放行。\n\n三道闸门对应三种决策:\n\n| 闸门 | 作用 | 命中后 |\n|------|------|--------|\n| 1. 拒绝列表 | 永远禁止的操作(`rm -rf /`、`sudo`) | 直接拒绝,不执行 |\n| 2. 规则匹配 | 取决于上下文的操作(写工作区外、`rm` 文件) | 交给闸门 3 |\n| 3. 用户审批 | 闸门 2 命中后,暂停等用户确认 | 用户决定允许或拒绝 |\n\n三道都没命中 → 直接执行。大部分日常操作走这条路。\n\n---\n\n## 工作原理\n\n\n\n**闸门 1**:一张硬拒绝表,先查,命中就返回阻止信息。(教学示意:简单字符串匹配不是可靠安全机制,命令变体和 shell 展开可能绕过。CC 的做法见附录。)\n\n```python\nDENY_LIST = [\n \"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\",\n \"mkfs\", \"dd if=\", \"> /dev/sda\",\n]\n\ndef check_deny_list(command: str) -> str | None:\n for pattern in DENY_LIST:\n if pattern in command:\n return f\"Blocked: '{pattern}' is on the deny list\"\n return None\n```\n\n**闸门 2**:规则匹配——描述\"什么时候需要问用户\"。每条规则指定工具和检查条件。\n\n```python\nPERMISSION_RULES = [\n {\n \"tools\": [\"write_file\", \"edit_file\"],\n \"check\": lambda args: not (WORKDIR / args.get(\"path\", \"\")).resolve().is_relative_to(WORKDIR),\n \"message\": \"Writing outside workspace\",\n },\n {\n \"tools\": [\"bash\"],\n \"check\": lambda args: any(kw in args.get(\"command\", \"\") for kw in [\"rm \", \"> /etc/\", \"chmod 777\"]),\n \"message\": \"Potentially destructive command\",\n },\n]\n\ndef check_rules(tool_name: str, args: dict) -> str | None:\n for rule in PERMISSION_RULES:\n if tool_name in rule[\"tools\"] and rule[\"check\"](args):\n return rule[\"message\"]\n return None\n```\n\n**闸门 3**:规则命中后,暂停等用户输入。\n\n```python\ndef ask_user(tool_name: str, args: dict, reason: str) -> str:\n print(f\"\\n⚠ {reason}\")\n print(f\" Tool: {tool_name}({args})\")\n choice = input(\" Allow? [y/N] \").strip().lower()\n return \"allow\" if choice in (\"y\", \"yes\") else \"deny\"\n```\n\n**三道闸门串在一起**,插在工具执行之前:\n\n```python\ndef check_permission(block) -> bool:\n # 闸门 1: 硬拒绝\n if block.name == \"bash\":\n reason = check_deny_list(block.input.get(\"command\", \"\"))\n if reason:\n print(f\"\\n⛔ {reason}\")\n return False\n\n # 闸门 2 + 3: 规则匹配 → 用户审批\n reason = check_rules(block.name, block.input)\n if reason:\n decision = ask_user(block.name, block.input, reason)\n if decision == \"deny\":\n return False\n\n return True\n\n# 在 agent_loop 中——s02 的循环只加了一行:\nfor block in response.content:\n if block.type == \"tool_use\":\n if not check_permission(block): # ← 新增\n results.append({... \"content\": \"Permission denied.\"})\n continue\n output = TOOL_HANDLERS[block.name](**block.input) # s02 原有\n results.append(...)\n```\n\n---\n\n## 相对 s02 的变更\n\n| 组件 | 之前 (s02) | 之后 (s03) |\n|------|-----------|-----------|\n| 安全模型 | 无(信任模型) | 三道闸门权限管线 |\n| 新函数 | — | check_deny_list, check_rules, ask_user, check_permission |\n| 循环 | 直接执行所有工具 | 执行前插入 check_permission() |\n\n---\n\n## 试一下\n\n```sh\ncd learn-claude-code\npython s03_permission/code.py\n```\n\n试试这些 prompt:\n\n1. `Create a file called test.txt in the current directory`(应该直接通过)\n2. `Delete the file test.txt`(bash + rm 会触发闸门 2)\n3. `What files are in the current directory?`(只读,全部通过)\n4. `Try to write a file to /etc/something`(写工作区外,触发闸门 2)\n\n观察重点:哪些操作直接通过?哪些需要你确认?哪些被直接拒绝?\n\n---\n\n## 接下来\n\n权限检查做了——但每次都在循环里硬编码 `check_permission()`。如果我想在每次工具执行前后加日志?如果想在某些操作后自动触发 git commit?这些扩展逻辑散落在 loop 里,循环很快就会膨胀。\n\ns04 Hooks → 给循环加钩子,扩展逻辑挂在钩子上,循环保持干净。\n\n\n深入 CC 源码
\n\n> 以下基于 CC 源码 `types/permissions.ts`、`utils/permissions/permissions.ts`、`toolExecution.ts`、`utils/permissions/yoloClassifier.ts`、`tools/AgentTool/forkSubagent.ts` 的核查。\n\n### 一、PermissionResult:不是 3 种,是 4 种\n\n教学版的三道闸门(deny → ask → allow)和 CC 不完全对应。CC 的 `PermissionResult` 有 4 个 behavior(`types/permissions.ts:241-266`):\n\n| behavior | 含义 | 教学版对应 |\n|----------|------|-----------|\n| `allow` | 直接允许 | 闸门 3 通过 |\n| `deny` | 直接拒绝 | 闸门 1 命中 |\n| `ask` | 弹出对话框问用户 | 闸门 2 命中 |\n| `passthrough` | 工具不表态,交给通用管线决定 | 教学版无 |\n\n### 二、生产版的验证阶段\n\nCC 的工具调用不是经过三道闸门,而是经过多个阶段,分布在 `checkPermissionsAndCallTool()`(`toolExecution.ts:599-1745`)、hooks、`hasPermissionsToUseToolInner()`(`utils/permissions/permissions.ts:1158-1310`)和 classifier 逻辑里:\n\n1. **Zod schema 验证**(`toolExecution.ts:614-680`)— 参数类型检查\n2. **validateInput()**(`toolExecution.ts:682-733`)— 工具级语义验证\n3. **backfillObservableInput()**(`toolExecution.ts:784`)— 补全遗留字段\n4. **PreToolUse hooks**(`toolExecution.ts:800-862`)— 钩子可以返回 allow/deny/ask\n5. **resolveHookPermissionDecision()**(`toolExecution.ts:921-931`)— 协调钩子+管线决策\n6. **hasPermissionsToUseToolInner()**(`permissions.ts:1158-1310`)— 多层规则检查:\n - 整个工具被 deny rule 禁用 → `deny`\n - 整个工具被 ask rule 标记 → `ask`\n - `tool.checkPermissions()` 工具自己的判断\n - 工具自己返回 deny → `deny`\n - `requiresUserInteraction()` → `ask`\n - 内容相关的 ask 规则 → `ask`(不可绕过)\n - 安全检查违规 → `ask`(不可绕过)\n - bypassPermissions 模式 → `allow`\n - 整个工具被 allow rule 放行 → `allow`\n - passthrough → 转为 `ask`\n\n### 三、拒绝列表:不是一个文件,是 8 个来源\n\nCC 没有单一的 deny list。权限规则来自 8 个来源(`types/permissions.ts:54-62`):\n\n| 来源 | 配置位置 |\n|------|---------|\n| `userSettings` | `~/.claude/settings.json` |\n| `projectSettings` | `.claude/settings.json` |\n| `localSettings` | `settings.local.json` |\n| `flagSettings` | Feature flags |\n| `policySettings` | 企业管理策略 |\n| `cliArg` | `--allowedTools` / `--deniedTools` |\n| `command` | 内联命令 |\n| `session` | 会话内临时授权 |\n\n每条规则格式:`{ toolName: \"Bash\", ruleBehavior: \"deny\", ruleContent: \"npm publish:*\" }`。多个来源的规则合并,高优先级来源覆盖低优先级(从低到高:user < project < local < flag < policy,加上 cliArg、command、session)。\n\n### 四、isDestructive() 是什么\n\nCC 中 `isDestructive`(`Tool.ts:405-406`)**纯粹是 UI 展示用的**——在工具列表里显示 `[destructive]` 标签。它不参与权限决策。默认所有工具都返回 `false`。只有 ExitWorktree(remove 时)和 MCP 工具(依赖 `annotations.destructiveHint`)覆写了它。\n\n### 五、YoloClassifier(自动审批)\n\nCC 的 auto 模式下,不会每次都弹对话框。`classifyYoloAction`(`utils/permissions/yoloClassifier.ts:1012`)把工具调用 + 对话上下文发给一个分类器 LLM 判断是否安全。先尝试 acceptEdits 模式模拟(`permissions.ts:620-656`,如果 acceptEdits 允许 → 直接批准),再查安全工具白名单(`permissions.ts:658-686`),最后才调分类器。分类器连续拒绝太多次 → 回退到人工审批。\n\n### 六、权限冒泡\n\n子 Agent(通过 AgentTool fork 出来的)的 `permissionMode` 设为 `'bubble'`(`forkSubagent.ts:50`)。意思是权限弹窗**冒泡到父 Agent 的终端**,而不是在子 Agent 里静默拒绝。Bash 分类器在这个过程中继续跑——给权限对话框显示的同时在后台判断是否可以自动批准。\n\n### 教学版的简化是刻意的\n\n- 多阶段管线 → 3 道闸门:理解门槛大幅降低\n- 8 个规则来源 → 1 个本地 DENY_LIST:概念量可控\n- isDestructive → 忽略(教学版没有 UI 层,CC 里它也不参与权限决策)\n- YoloClassifier → 省略(依赖于额外的 LLM 调用和遥测系统)\n- 权限冒泡 → 省略(s15 才涉及多 Agent)\n\n \n\n\n"
},
{
"version": "s03",
"locale": "ja",
"title": "s03: Permission — 実行前に権限を判断する",
- "content": "# s03: Permission — 実行前に権限を判断する\n\ns01 → s02 → `s03` → [s04](/ja/s04) → s05 → ... → s20\n> *\"ツール実行前に権限を判断\"* — 権限パイプラインは、どの操作に承認が必要かを決める。\n>\n> **Harness レイヤー**: 権限 — ツール実行前に一つのゲートを追加。\n\n---\n\n## 課題\n\ns02 の Agent は 5 つのツールを持つ。file tools は `safe_path` で保護されるが、bash は制限なし。「プロジェクトを掃除して」と頼むと、`rm -rf /` を実行しかねない。\n\n安全性はモデルを信頼することではなく、コードに頼る — ツール実行前に判断を挟む。\n\n---\n\n## ソリューション\n\n\n\ns02 のループは完全に維持される。唯一の変更は、ツール実行前に `check_permission()` を挿入すること — 各ツール呼び出しは 3 つのゲートを固定順序で通過する:ハード拒否が最優先、次にソフト確認、どちらも一致しなければ許可。\n\n3 つのゲートは 3 つの決定に対応する:\n\n| ゲート | 役割 | 一致時 |\n|--------|------|--------|\n| 1. 拒否リスト | 常に禁止される操作(`rm -rf /`、`sudo`) | 即座に拒否、実行しない |\n| 2. ルールマッチング | コンテキスト依存の操作(作業ディレクトリ外への書き込み、`rm` ファイル) | ゲート 3 へ |\n| 3. ユーザー承認 | ゲート 2 が一致した場合、ユーザー確認を待機 | ユーザーが許可または拒否を決定 |\n\n3 つのゲートのどれにも一致しない → 直接実行。日常の操作の大部分はこの経路を通る。\n\n---\n\n## 仕組み\n\n\n\n**ゲート 1**:ハード拒否リスト。最初に確認し、一致すればブロックメッセージを返す。(教育デモ:単純な文字列マッチングは信頼できるセキュリティ機構ではない — コマンドの変種やシェル展開で回避される可能性がある。CC のアプローチは付録を参照。)\n\n```python\nDENY_LIST = [\n \"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\",\n \"mkfs\", \"dd if=\", \"> /dev/sda\",\n]\n\ndef check_deny_list(command: str) -> str | None:\n for pattern in DENY_LIST:\n if pattern in command:\n return f\"Blocked: '{pattern}' is on the deny list\"\n return None\n```\n\n**ゲート 2**:ルールマッチング — 「いつユーザーに聞くべきか」を記述する。各ルールはツールとチェック条件を指定する。\n\n```python\nPERMISSION_RULES = [\n {\n \"tools\": [\"write_file\", \"edit_file\"],\n \"check\": lambda args: not (WORKDIR / args.get(\"path\", \"\")).resolve().is_relative_to(WORKDIR),\n \"message\": \"Writing outside workspace\",\n },\n {\n \"tools\": [\"bash\"],\n \"check\": lambda args: any(kw in args.get(\"command\", \"\") for kw in [\"rm \", \"> /etc/\", \"chmod 777\"]),\n \"message\": \"Potentially destructive command\",\n },\n]\n\ndef check_rules(tool_name: str, args: dict) -> str | None:\n for rule in PERMISSION_RULES:\n if tool_name in rule[\"tools\"] and rule[\"check\"](args):\n return rule[\"message\"]\n return None\n```\n\n**ゲート 3**:ルールが一致した後、ユーザー入力を待機。\n\n```python\ndef ask_user(tool_name: str, args: dict, reason: str) -> str:\n print(f\"\\n⚠ {reason}\")\n print(f\" Tool: {tool_name}({args})\")\n choice = input(\" Allow? [y/N] \").strip().lower()\n return \"allow\" if choice in (\"y\", \"yes\") else \"deny\"\n```\n\n**3 つのゲートを直列に接続**、ツール実行前に挿入する:\n\n```python\ndef check_permission(block) -> bool:\n # ゲート 1: ハード拒否\n if block.name == \"bash\":\n reason = check_deny_list(block.input.get(\"command\", \"\"))\n if reason:\n print(f\"\\n⛔ {reason}\")\n return False\n\n # ゲート 2 + 3: ルールマッチング → ユーザー承認\n reason = check_rules(block.name, block.input)\n if reason:\n decision = ask_user(block.name, block.input, reason)\n if decision == \"deny\":\n return False\n\n return True\n\n# agent_loop で — s02 のループに 1 行追加するだけ:\nfor block in response.content:\n if block.type == \"tool_use\":\n if not check_permission(block): # ← 新規\n results.append({... \"content\": \"Permission denied.\"})\n continue\n output = TOOL_HANDLERS[block.name](**block.input) # s02 既存\n results.append(...)\n```\n\n---\n\n## s02 からの変更点\n\n| コンポーネント | 変更前 (s02) | 変更後 (s03) |\n|---------------|-------------|-------------|\n| セキュリティモデル | なし(モデルを信頼) | 3 ゲート権限パイプライン |\n| 新規関数 | — | check_deny_list, check_rules, ask_user, check_permission |\n| ループ | すべてのツールを直接実行 | 実行前に check_permission() を挿入 |\n\n---\n\n## 試してみよう\n\n```sh\ncd learn-claude-code\npython s03_permission/code.py\n```\n\n以下のプロンプトを試してみよう:\n\n1. `Create a file called test.txt in the current directory`(そのまま通過するはず)\n2. `Delete all temporary files in /tmp`(bash + rm でゲート 2 が発動)\n3. `What files are in the current directory?`(読み取り専用、すべて通過)\n4. `Try to write a file to /etc/something`(作業ディレクトリ外への書き込みでゲート 2 が発動)\n\n観察のポイント:どの操作がそのまま通過するか? どれに確認が必要か? どれが即座に拒否されるか?\n\n---\n\n## 次へ\n\n権限チェックは実装された — しかし、毎回ループ内に `check_permission()` をハードコードしている。ツール実行の前後にログを追加したい場合は? 特定の操作後に自動的に git commit をトリガーしたい場合は? このような拡張ロジックがループ内に散らばると、ループはすぐに膨張する。\n\n→ s04 Hooks:ループにフックを追加する。拡張ロジックはフックにぶら下げ、ループはクリーンに保つ。\n\n\nCC ソースコードを深掘り
\n\n> 以下は CC ソースコード `types/permissions.ts`、`utils/permissions/permissions.ts`、`toolExecution.ts`、`utils/permissions/yoloClassifier.ts`、`tools/AgentTool/forkSubagent.ts` の検証に基づく。\n\n### 一、PermissionResult:3 種ではなく、4 種\n\n教育版の 3 つのゲート(deny → ask → allow)は CC と完全には対応しない。CC の `PermissionResult` には 4 つの behavior がある(`types/permissions.ts:241-266`):\n\n| behavior | 意味 | 教育版の対応 |\n|----------|------|-------------|\n| `allow` | 直接許可 | ゲート 3 通過 |\n| `deny` | 直接拒否 | ゲート 1 一致 |\n| `ask` | ユーザーにダイアログを表示 | ゲート 2 一致 |\n| `passthrough` | ツールが意見を表明せず、汎用パイプラインに委ねる | 教育版にはなし |\n\n### 二、本番環境の検証段階\n\nCC のツール呼び出しは 3 つのゲートを通るのではなく、`checkPermissionsAndCallTool()`(`toolExecution.ts:599-1745`)、hooks、`hasPermissionsToUseToolInner()`(`utils/permissions/permissions.ts:1158-1310`)、classifier ロジックに分散する複数の段階を経る:\n\n1. **Zod schema 検証**(`toolExecution.ts:614-680`)— パラメータの型チェック\n2. **validateInput()**(`toolExecution.ts:682-733`)— ツールレベルの意味的検証\n3. **backfillObservableInput()**(`toolExecution.ts:784`)— レガシーフィールドの補完\n4. **PreToolUse hooks**(`toolExecution.ts:800-862`)— フックが allow/deny/ask を返す\n5. **resolveHookPermissionDecision()**(`toolExecution.ts:921-931`)— フック + パイプラインの決定を調整\n6. **hasPermissionsToUseToolInner()**(`permissions.ts:1158-1310`)— 多層ルールチェック:\n - ツール全体が deny rule で無効 → `deny`\n - ツール全体が ask rule でマーク → `ask`\n - `tool.checkPermissions()` ツール自身の判断\n - ツール自身が deny を返す → `deny`\n - `requiresUserInteraction()` → `ask`\n - コンテンツ関連の ask ルール → `ask`(バイパス不可)\n - セキュリティチェック違反 → `ask`(バイパス不可)\n - bypassPermissions モード → `allow`\n - ツール全体が allow rule で許可 → `allow`\n - passthrough → `ask` に変換\n\n### 三、拒否リスト:1 つのファイルではなく、8 つのソース\n\nCC には単一の deny list はない。権限ルールは 8 つのソースから来る(`types/permissions.ts:54-62`):\n\n| ソース | 設定場所 |\n|--------|---------|\n| `userSettings` | `~/.claude/settings.json` |\n| `projectSettings` | `.claude/settings.json` |\n| `localSettings` | `settings.local.json` |\n| `flagSettings` | フィーチャーフラグ |\n| `policySettings` | 企業管理ポリシー |\n| `cliArg` | `--allowedTools` / `--deniedTools` |\n| `command` | インラインコマンド |\n| `session` | セッション内一時承認 |\n\n各ルールの形式:`{ toolName: \"Bash\", ruleBehavior: \"deny\", ruleContent: \"npm publish:*\" }`。複数ソースのルールは統合され、高優先度ソースが低優先度を上書きする(低→高:user < project < local < flag < policy、さらに cliArg、command、session)。\n\n### 四、isDestructive() とは\n\nCC では `isDestructive`(`Tool.ts:405-406`)は**純粋に UI 表示用** — ツール一覧に `[destructive]` ラベルを表示するだけ。権限決定には参加しない。デフォルトではすべてのツールが `false` を返す。ExitWorktree(remove 時)と MCP ツール(`annotations.destructiveHint` に依存)のみがオーバーライドする。\n\n### 五、YoloClassifier(自動承認)\n\nCC の auto モードでは、毎回ダイアログを表示するわけではない。`classifyYoloAction`(`utils/permissions/yoloClassifier.ts:1012`)はツール呼び出し + 会話コンテキストを分類器 LLM に送って安全性を判断する。まず acceptEdits モードのシミュレーションを試み(`permissions.ts:620-656`、acceptEdits が許可すれば → 自動承認)、次にセーフツールホワイトリストを確認し(`permissions.ts:658-686`)、最後に分類器を呼び出す。分類器が連続して拒否しすぎた場合 → 手動承認にフォールバック。\n\n### 六、権限バブリング\n\nサブ Agent(AgentTool 経由でフォークされたもの)の `permissionMode` は `'bubble'` に設定される(`forkSubagent.ts:50`)。これは権限ダイアログが**親 Agent のターミナルにバブルアップ**することを意味する。サブ Agent で黙って拒否されるのではない。Bash 分類器はこの過程で引き続き実行され — 権限ダイアログを表示しつつ、バックグラウンドで自動承認可能か判断する。\n\n### 教育版の単純化は意図的\n\n- 多段階パイプライン → 3 ゲート:理解のハードルが大幅に下がる\n- 8 ルールソース → 1 つのローカル DENY_LIST:概念量を制御可能\n- isDestructive → 省略(教育版には UI レイヤーがなく、CC でも権限決定には参加しない)\n- YoloClassifier → 省略(追加の LLM 呼び出しとテレメトリに依存)\n- 権限バブリング → 省略(s15 でマルチ Agent を扱う)\n\n \n\n\n"
+ "content": "# s03: Permission — 実行前に権限を判断する\n\ns01 → s02 → `s03` → [s04](/ja/s04) → s05 → ... → s20\n> *\"ツール実行前に権限を判断\"* — 権限パイプラインは、どの操作に承認が必要かを決める。\n>\n> **Harness レイヤー**: 権限 — ツール実行前に一つのゲートを追加。\n\n---\n\n## 課題\n\ns02 の Agent は 5 つのツールを持つ。file tools は `safe_path` で保護されるが、bash は制限なし。「プロジェクトを掃除して」と頼むと、`rm -rf /` を実行しかねない。\n\n安全性はモデルを信頼することではなく、コードに頼る — ツール実行前に判断を挟む。\n\n---\n\n## ソリューション\n\n\n\ns02 のループは完全に維持される。唯一の変更は、ツール実行前に `check_permission()` を挿入すること — 各ツール呼び出しは 3 つのゲートを固定順序で通過する:ハード拒否が最優先、次にソフト確認、どちらも一致しなければ許可。\n\n3 つのゲートは 3 つの決定に対応する:\n\n| ゲート | 役割 | 一致時 |\n|--------|------|--------|\n| 1. 拒否リスト | 常に禁止される操作(`rm -rf /`、`sudo`) | 即座に拒否、実行しない |\n| 2. ルールマッチング | コンテキスト依存の操作(作業ディレクトリ外への書き込み、`rm` ファイル) | ゲート 3 へ |\n| 3. ユーザー承認 | ゲート 2 が一致した場合、ユーザー確認を待機 | ユーザーが許可または拒否を決定 |\n\n3 つのゲートのどれにも一致しない → 直接実行。日常の操作の大部分はこの経路を通る。\n\n---\n\n## 仕組み\n\n\n\n**ゲート 1**:ハード拒否リスト。最初に確認し、一致すればブロックメッセージを返す。(教育デモ:単純な文字列マッチングは信頼できるセキュリティ機構ではない — コマンドの変種やシェル展開で回避される可能性がある。CC のアプローチは付録を参照。)\n\n```python\nDENY_LIST = [\n \"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\",\n \"mkfs\", \"dd if=\", \"> /dev/sda\",\n]\n\ndef check_deny_list(command: str) -> str | None:\n for pattern in DENY_LIST:\n if pattern in command:\n return f\"Blocked: '{pattern}' is on the deny list\"\n return None\n```\n\n**ゲート 2**:ルールマッチング — 「いつユーザーに聞くべきか」を記述する。各ルールはツールとチェック条件を指定する。\n\n```python\nPERMISSION_RULES = [\n {\n \"tools\": [\"write_file\", \"edit_file\"],\n \"check\": lambda args: not (WORKDIR / args.get(\"path\", \"\")).resolve().is_relative_to(WORKDIR),\n \"message\": \"Writing outside workspace\",\n },\n {\n \"tools\": [\"bash\"],\n \"check\": lambda args: any(kw in args.get(\"command\", \"\") for kw in [\"rm \", \"> /etc/\", \"chmod 777\"]),\n \"message\": \"Potentially destructive command\",\n },\n]\n\ndef check_rules(tool_name: str, args: dict) -> str | None:\n for rule in PERMISSION_RULES:\n if tool_name in rule[\"tools\"] and rule[\"check\"](args):\n return rule[\"message\"]\n return None\n```\n\n**ゲート 3**:ルールが一致した後、ユーザー入力を待機。\n\n```python\ndef ask_user(tool_name: str, args: dict, reason: str) -> str:\n print(f\"\\n⚠ {reason}\")\n print(f\" Tool: {tool_name}({args})\")\n choice = input(\" Allow? [y/N] \").strip().lower()\n return \"allow\" if choice in (\"y\", \"yes\") else \"deny\"\n```\n\n**3 つのゲートを直列に接続**、ツール実行前に挿入する:\n\n```python\ndef check_permission(block) -> bool:\n # ゲート 1: ハード拒否\n if block.name == \"bash\":\n reason = check_deny_list(block.input.get(\"command\", \"\"))\n if reason:\n print(f\"\\n⛔ {reason}\")\n return False\n\n # ゲート 2 + 3: ルールマッチング → ユーザー承認\n reason = check_rules(block.name, block.input)\n if reason:\n decision = ask_user(block.name, block.input, reason)\n if decision == \"deny\":\n return False\n\n return True\n\n# agent_loop で — s02 のループに 1 行追加するだけ:\nfor block in response.content:\n if block.type == \"tool_use\":\n if not check_permission(block): # ← 新規\n results.append({... \"content\": \"Permission denied.\"})\n continue\n output = TOOL_HANDLERS[block.name](**block.input) # s02 既存\n results.append(...)\n```\n\n---\n\n## s02 からの変更点\n\n| コンポーネント | 変更前 (s02) | 変更後 (s03) |\n|---------------|-------------|-------------|\n| セキュリティモデル | なし(モデルを信頼) | 3 ゲート権限パイプライン |\n| 新規関数 | — | check_deny_list, check_rules, ask_user, check_permission |\n| ループ | すべてのツールを直接実行 | 実行前に check_permission() を挿入 |\n\n---\n\n## 試してみよう\n\n```sh\ncd learn-claude-code\npython s03_permission/code.py\n```\n\n以下のプロンプトを試してみよう:\n\n1. `Create a file called test.txt in the current directory`(そのまま通過するはず)\n2. `Delete the file test.txt`(bash + rm でゲート 2 が発動)\n3. `What files are in the current directory?`(読み取り専用、すべて通過)\n4. `Try to write a file to /etc/something`(作業ディレクトリ外への書き込みでゲート 2 が発動)\n\n観察のポイント:どの操作がそのまま通過するか? どれに確認が必要か? どれが即座に拒否されるか?\n\n---\n\n## 次へ\n\n権限チェックは実装された — しかし、毎回ループ内に `check_permission()` をハードコードしている。ツール実行の前後にログを追加したい場合は? 特定の操作後に自動的に git commit をトリガーしたい場合は? このような拡張ロジックがループ内に散らばると、ループはすぐに膨張する。\n\n→ s04 Hooks:ループにフックを追加する。拡張ロジックはフックにぶら下げ、ループはクリーンに保つ。\n\n\nCC ソースコードを深掘り
\n\n> 以下は CC ソースコード `types/permissions.ts`、`utils/permissions/permissions.ts`、`toolExecution.ts`、`utils/permissions/yoloClassifier.ts`、`tools/AgentTool/forkSubagent.ts` の検証に基づく。\n\n### 一、PermissionResult:3 種ではなく、4 種\n\n教育版の 3 つのゲート(deny → ask → allow)は CC と完全には対応しない。CC の `PermissionResult` には 4 つの behavior がある(`types/permissions.ts:241-266`):\n\n| behavior | 意味 | 教育版の対応 |\n|----------|------|-------------|\n| `allow` | 直接許可 | ゲート 3 通過 |\n| `deny` | 直接拒否 | ゲート 1 一致 |\n| `ask` | ユーザーにダイアログを表示 | ゲート 2 一致 |\n| `passthrough` | ツールが意見を表明せず、汎用パイプラインに委ねる | 教育版にはなし |\n\n### 二、本番環境の検証段階\n\nCC のツール呼び出しは 3 つのゲートを通るのではなく、`checkPermissionsAndCallTool()`(`toolExecution.ts:599-1745`)、hooks、`hasPermissionsToUseToolInner()`(`utils/permissions/permissions.ts:1158-1310`)、classifier ロジックに分散する複数の段階を経る:\n\n1. **Zod schema 検証**(`toolExecution.ts:614-680`)— パラメータの型チェック\n2. **validateInput()**(`toolExecution.ts:682-733`)— ツールレベルの意味的検証\n3. **backfillObservableInput()**(`toolExecution.ts:784`)— レガシーフィールドの補完\n4. **PreToolUse hooks**(`toolExecution.ts:800-862`)— フックが allow/deny/ask を返す\n5. **resolveHookPermissionDecision()**(`toolExecution.ts:921-931`)— フック + パイプラインの決定を調整\n6. **hasPermissionsToUseToolInner()**(`permissions.ts:1158-1310`)— 多層ルールチェック:\n - ツール全体が deny rule で無効 → `deny`\n - ツール全体が ask rule でマーク → `ask`\n - `tool.checkPermissions()` ツール自身の判断\n - ツール自身が deny を返す → `deny`\n - `requiresUserInteraction()` → `ask`\n - コンテンツ関連の ask ルール → `ask`(バイパス不可)\n - セキュリティチェック違反 → `ask`(バイパス不可)\n - bypassPermissions モード → `allow`\n - ツール全体が allow rule で許可 → `allow`\n - passthrough → `ask` に変換\n\n### 三、拒否リスト:1 つのファイルではなく、8 つのソース\n\nCC には単一の deny list はない。権限ルールは 8 つのソースから来る(`types/permissions.ts:54-62`):\n\n| ソース | 設定場所 |\n|--------|---------|\n| `userSettings` | `~/.claude/settings.json` |\n| `projectSettings` | `.claude/settings.json` |\n| `localSettings` | `settings.local.json` |\n| `flagSettings` | フィーチャーフラグ |\n| `policySettings` | 企業管理ポリシー |\n| `cliArg` | `--allowedTools` / `--deniedTools` |\n| `command` | インラインコマンド |\n| `session` | セッション内一時承認 |\n\n各ルールの形式:`{ toolName: \"Bash\", ruleBehavior: \"deny\", ruleContent: \"npm publish:*\" }`。複数ソースのルールは統合され、高優先度ソースが低優先度を上書きする(低→高:user < project < local < flag < policy、さらに cliArg、command、session)。\n\n### 四、isDestructive() とは\n\nCC では `isDestructive`(`Tool.ts:405-406`)は**純粋に UI 表示用** — ツール一覧に `[destructive]` ラベルを表示するだけ。権限決定には参加しない。デフォルトではすべてのツールが `false` を返す。ExitWorktree(remove 時)と MCP ツール(`annotations.destructiveHint` に依存)のみがオーバーライドする。\n\n### 五、YoloClassifier(自動承認)\n\nCC の auto モードでは、毎回ダイアログを表示するわけではない。`classifyYoloAction`(`utils/permissions/yoloClassifier.ts:1012`)はツール呼び出し + 会話コンテキストを分類器 LLM に送って安全性を判断する。まず acceptEdits モードのシミュレーションを試み(`permissions.ts:620-656`、acceptEdits が許可すれば → 自動承認)、次にセーフツールホワイトリストを確認し(`permissions.ts:658-686`)、最後に分類器を呼び出す。分類器が連続して拒否しすぎた場合 → 手動承認にフォールバック。\n\n### 六、権限バブリング\n\nサブ Agent(AgentTool 経由でフォークされたもの)の `permissionMode` は `'bubble'` に設定される(`forkSubagent.ts:50`)。これは権限ダイアログが**親 Agent のターミナルにバブルアップ**することを意味する。サブ Agent で黙って拒否されるのではない。Bash 分類器はこの過程で引き続き実行され — 権限ダイアログを表示しつつ、バックグラウンドで自動承認可能か判断する。\n\n### 教育版の単純化は意図的\n\n- 多段階パイプライン → 3 ゲート:理解のハードルが大幅に下がる\n- 8 ルールソース → 1 つのローカル DENY_LIST:概念量を制御可能\n- isDestructive → 省略(教育版には UI レイヤーがなく、CC でも権限決定には参加しない)\n- YoloClassifier → 省略(追加の LLM 呼び出しとテレメトリに依存)\n- 権限バブリング → 省略(s15 でマルチ Agent を扱う)\n\n \n\n\n"
},
{
"version": "s04",
@@ -129,37 +129,37 @@
"version": "s08",
"locale": "en",
"title": "s08: Context Compact — Context Will Fill Up, Have a Way to Make Room",
- "content": "# s08: Context Compact — Context Will Fill Up, Have a Way to Make Room\n\ns01 → s02 → s03 → s04 → s05 → s06 → s07 → `s08` → [s09](/en/s09) → s10 → ... → s20\n> *\"Context will fill up — have a way to make room\"* — Four-layer compression pipeline: cheap first, expensive last.\n>\n> **Harness Layer**: Compression — clean memory, unlimited sessions.\n\n---\n\n## The Problem\n\nThe agent is running along, then freezes.\n\nIt has bash, read, write — all the capabilities it needs. But it read a 1000-line file (~4000 tokens), then read 30 more files, ran 20 commands. Every command's output, every file's contents, all pile up in the `messages` list.\n\nThe context window is finite. Once full, the API outright rejects the call: `prompt_too_long`.\n\nWithout compression, an agent simply cannot work on large projects.\n\n---\n\n## The Solution\n\n\n\nThe hook structure, skill loading, and sub-Agent from s07 are preserved, with some tools omitted to focus on compaction. The core change: insert three pre-processors (0 API calls) before each LLM call, trigger an LLM summary (1 API call) when tokens still exceed the threshold, and emergency-trim if the API throws an error.\n\nCore design: cheap first, expensive last.\n\n---\n\n## How It Works\n\n\n\n### L1: snip_compact — Trim Irrelevant Old Conversation\n\nThe agent ran 80 turns of conversation, accumulating 160 `messages`. The very first \"help me create hello.py\" is barely relevant to current work, yet it still occupies space.\n\nMessage count exceeds 50 → keep the first 3 (initial context) and the last 47 (current work), trim the middle; the only extra boundary rule is that `assistant(tool_use)` must not be separated from the following `user(tool_result)`:\n\n```python\ndef snip_compact(messages, max_messages=50):\n if len(messages) <= max_messages:\n return messages\n head_end, tail_start = 3, len(messages) - (max_messages - 3)\n if _message_has_tool_use(messages[head_end - 1]):\n while head_end < len(messages) and _is_tool_result_message(messages[head_end]):\n head_end += 1\n if _is_tool_result_message(messages[tail_start]) and _message_has_tool_use(messages[tail_start - 1]):\n tail_start -= 1\n snipped = tail_start - head_end\n placeholder = {\"role\": \"user\", \"content\": f\"[snipped {snipped} messages from conversation middle]\"}\n return messages[:head_end] + [placeholder] + messages[tail_start:]\n```\n\nMessages are still trimmed directly; this just adds one boundary guard. `tool_result` content within remaining messages still keeps accumulating — message #34 may still hold 30KB of old file contents. → L2.\n\n### L2: micro_compact — Placeholder for Old Tool Results\n\n\n\nThe agent read 10 files consecutively. The full contents of reads 1–7 are still sitting in context, no longer needed, but hogging large amounts of space.\n\nKeep only the 3 most recent `tool_result` entries intact; replace older ones with a one-line placeholder:\n\n```python\nKEEP_RECENT_TOOL_RESULTS = 3\n\ndef micro_compact(messages):\n tool_results = collect_tool_result_blocks(messages)\n if len(tool_results) <= KEEP_RECENT_TOOL_RESULTS:\n return messages\n for _, _, block in tool_results[:-KEEP_RECENT_TOOL_RESULTS]:\n if len(block.get(\"content\", \"\")) > 120:\n block[\"content\"] = \"[Earlier tool result compacted. Re-run if needed.]\"\n return messages\n```\n\nOld results are cleared, but a single new result can be 500KB — one `cat` of a large file can max out the context. → L3.\n\n### L3: tool_result_budget — Persist Large Results to Disk\n\n\n\nThe model read 5 large files in one go; all `tool_result` blocks in the last user message total 500KB.\n\nSum the size of all `tool_result` blocks in the last user message. If over 200KB → sort by size, starting from the largest, persist to `.task_outputs/tool-results/`, keeping only a `` marker + a 2000-character preview in context. The model sees the marker and knows the full content is on disk, re-reading it when needed.\n\n```python\ndef tool_result_budget(messages, max_bytes=200_000):\n last = messages[-1]\n blocks = [(i, b) for i, b in enumerate(last[\"content\"])\n if b.get(\"type\") == \"tool_result\"]\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n if total <= max_bytes:\n return messages\n ranked = sorted(blocks, key=lambda p: len(str(p[1].get(\"content\", \"\"))), reverse=True)\n for idx, block in ranked:\n if total <= max_bytes:\n break\n block[\"content\"] = persist_large_output(block[\"tool_use_id\"], str(block[\"content\"]))\n total = recalculate_total(blocks)\n return messages\n```\n\nThe first three layers are all plain-text / structural operations — 0 API calls — but they cannot \"understand\" conversation content. Context may still be too large. → L4.\n\n### L4: compact_history — Full LLM Summary\n\n\n\nAll three previous layers have run, but after 30 minutes of continuous work on a huge project, tokens still exceed the threshold.\n\nThree-step process:\n\n1. **Save transcript**: Write the full conversation to `.transcripts/` in JSONL format. The transcript preserves a recoverable record, but the model's active context only contains the summary. For the model's current reasoning, the details are no longer in context. The teaching code does not provide a transcript retrieval tool.\n2. **LLM generates summary**: Send conversation history to the LLM, asking it to preserve key information: current goals, important findings, modified files, remaining work, user constraints, etc.\n3. **Replace message list**: All old messages are replaced with a single summary. The teaching version only keeps the summary; the real Claude Code re-attaches some recent files, plans, agent/skill/tool context after compaction.\n\n```python\ndef compact_history(messages):\n transcript_path = write_transcript(messages) # Save full conversation first\n summary = summarize_history(messages) # LLM generates summary\n return [{\"role\": \"user\",\n \"content\": f\"[Compacted]\\n\\n{summary}\"}]\n```\n\n**Circuit breaker**: After 3 consecutive failures, stop retrying to prevent an infinite loop wasting API calls.\n\n### Reactive: reactive_compact\n\nSometimes the API still returns `prompt_too_long` (413) — when context grows faster than compression triggers.\n\nThis triggers **reactive_compact**: more aggressive than compact_history, it retreats from the tail, but still avoids leaving an orphaned `tool_result`.\n\n```python\ndef reactive_compact(messages):\n transcript = write_transcript(messages)\n summary = summarize_history(messages)\n tail_start = max(0, len(messages) - 5)\n if _is_tool_result_message(messages[tail_start]) and _message_has_tool_use(messages[tail_start - 1]):\n tail_start -= 1\n return [{\"role\": \"user\",\n \"content\": f\"[Reactive compact]\\n\\n{summary}\"}, *messages[tail_start:]]\n```\n\nReactive compact has a retry limit (default 1). If it still fails, an exception is raised instead of looping forever. Full error recovery is deferred to s11.\n\n### Putting It All Together\n\n```python\ndef agent_loop(messages):\n reactive_retries = 0\n while True:\n # Three pre-processors (0 API calls)\n # Order: budget first, so large content is persisted before placeholders\n messages[:] = tool_result_budget(messages) # L3: persist large results\n messages[:] = snip_compact(messages) # L1: trim middle\n messages[:] = micro_compact(messages) # L2: old result placeholders\n\n # Still too much? LLM summary (1 API call)\n if estimate_token_count(messages) > THRESHOLD:\n messages[:] = compact_history(messages)\n\n try:\n response = client.messages.create(...)\n except PromptTooLongError:\n if reactive_retries < MAX_REACTIVE_RETRIES:\n messages[:] = reactive_compact(messages) # Emergency\n reactive_retries += 1\n continue\n raise # retry limit exceeded, raise exception\n # ... tool execution ...\n\n # compact tool: when the model actively calls it, triggers compact_history\n if block.name == \"compact\":\n messages[:] = compact_history(messages)\n results.append({..., \"content\": \"[Compacted. History summarized.]\"})\n messages.append({\"role\": \"user\", \"content\": results})\n break # end current turn, start fresh with compacted context\n```\n\n**The order must not be swapped.** L3 (budget) runs before L2 (micro) because micro replaces old large tool_results with one-line placeholders — budget must persist the full content before that happens. This is why CC source puts `applyToolResultBudget` first.\n\n---\n\n## Changes From s07\n\n| Component | Before (s07) | After (s08) |\n|-----------|-------------|-------------|\n| Context management | None (context grows unbounded) | Four-layer compression pipeline + emergency |\n| New functions | — | snip_compact, micro_compact, tool_result_budget, compact_history, reactive_compact |\n| Tools | bash, read_file, write_file, edit_file, glob, todo_write, task, load_skill (8) | 8 + compact (9) |\n| Loop | LLM call → tool execution | Three pre-processors before each turn + threshold-triggered compact_history |\n| Design principle | — | Cheap first, expensive last |\n\n---\n\n## Try It\n\n```sh\ncd learn-claude-code\npython s08_context_compact/code.py\n```\n\nTry these prompts:\n\n1. `Read the file README.md, then read code.py, then read s01_agent_loop/README.md` (read multiple files consecutively, observe L2 compressing old results)\n2. `Read every file in s08_context_compact/` (read a large amount of content at once, observe L3 persisting to disk)\n3. Chat for 20+ turns, observe whether `[auto compact]` or `[reactive compact]` appears\n\nWhat to watch for: After each tool execution, are old `tool_result` entries compressed? When tokens exceed the threshold after extended conversation, is summarization triggered automatically?\n\n---\n\n## What's Next\n\nContext compression lets an agent run for a long time without crashing. But after each compression, the preferences and constraints the user told it are also lost. Can we let the agent selectively remember important things?\n\ns09 Memory → three subsystems: choosing what to remember, extracting key information, consolidating and organizing. Across compressions, across sessions.\n\n\nDeep Dive Into CC Source Code
\n\n> The following is based on analysis of CC source code `compact.ts`, `autoCompact.ts`, `microCompact.ts`, and `query.ts`.\n\n### Execution Order Comparison\n\nThe teaching version labels layers L1/L2/L3/L4 for pedagogical clarity, but actual execution order does not match the numbering:\n\n| Dimension | Teaching Version | Claude Code |\n|-----------|-----------------|-------------|\n| Execution order | budget → snip → micro → auto | budget → snip → micro → collapse → auto (`query.ts:379-468`) |\n| snip_compact | Keep head 3 + tail 47 | CC only enables on main thread; implementation not in open-source repo (`HISTORY_SNIP` feature gate), but interface is visible: `snipCompactIfNeeded(messages)` → `{ messages, tokensFreed, boundaryMessage? }`, also exposes `SnipTool` for model-initiated snipping. Teaching version's 3/47 are simplified parameters |\n| micro_compact | Text placeholder replacement | Two paths: time-based clears content directly, cached uses API `cache_edits` (legacy path removed) |\n| micro_compact whitelist | By position (most recent 3) | time-based triggers by time threshold; cached triggers by count (`microCompact.ts`) |\n| tool_result_budget | 200KB characters | 200,000 characters (`toolLimits.ts:49`) |\n| compact_history threshold | Character count estimate | Precise tokens: `contextWindow - maxOutputTokens - 13_000` |\n| Summary requirements | 5 categories of info | 9 sections + ``/`` dual tags |\n| Compression prompt | Simple prompt | Double-ended hard guardrails forbidding tool calls |\n| PTL retry | Yes (simplified) | `truncateHeadForPTLRetry()` retreats by message groups (`compact.ts:243-290`) |\n| Post-compaction recovery | None (teaching version only keeps summary) | Auto re-read recent files, plans, agent/skill/tool context |\n| Circuit breaker | 3 times | 3 times (`autoCompact.ts:70`) |\n| Reactive retry | 1 time | CC has more granular tiered retries |\n\n### Execution Order Details\n\nThe real order in CC source `query.ts`:\n\n1. `applyToolResultBudget` (L379): persist large results first, ensuring full content is saved\n2. `snipCompact` (L403): trim middle messages\n3. `microcompact` (L414): old result placeholders\n4. `contextCollapse` (L441): independent context management system (not in teaching version)\n5. `autoCompact` (L454): LLM full summary\n\nThe teaching version's budget → snip → micro order matches this. The teaching version does not have the contextCollapse mechanism.\n\n### Full Constant Reference\n\n| Constant | Value | Source File |\n|----------|-------|-------------|\n| `AUTOCOMPACT_BUFFER_TOKENS` | 13,000 | `autoCompact.ts:62` |\n| `MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES` | 3 | `autoCompact.ts:70` |\n| `MAX_OUTPUT_TOKENS_FOR_SUMMARY` | 20,000 | `autoCompact.ts:30` |\n| `POST_COMPACT_TOKEN_BUDGET` | 50,000 | `compact.ts:123` |\n| `POST_COMPACT_MAX_FILES_TO_RESTORE` | 5 | `compact.ts:122` |\n| `POST_COMPACT_MAX_TOKENS_PER_FILE` | 5,000 | `compact.ts:124` |\n| Time micro_compact interval | 60 minutes | `timeBasedMCConfig.ts` |\n| `MAX_COMPACT_STREAMING_RETRIES` | 2 | `compact.ts:131` |\n\n### contextCollapse and sessionMemoryCompact\n\nCC source code has two additional mechanisms not covered in this teaching version:\n\n- **contextCollapse**: An independent context management system that, when enabled, suppresses proactive autocompact (`autoCompact.ts:215-222`), with collapse's commit/blocking flow taking over context management. Manual `/compact` and reactive fallback remain independent paths, unaffected by contextCollapse.\n- **sessionMemoryCompact**: Before compact_history, CC first attempts a lightweight summary using existing session memory (covered in s09) without calling the LLM. This mechanism becomes clearer after learning s09.\n\n### What Does the Compression Prompt Look Like?\n\nCC's compression prompt has two hard requirements:\n\n1. **Absolutely no tool calls**: It begins with `CRITICAL: Respond with TEXT ONLY. Do NOT call any tools.`, and appends another REMINDER at the end\n2. **Analyze first, then summarize**: The model must first reason in an `` tag, then output the formal summary in a `` tag. The analysis is stripped during formatting\n\n### Teaching Version Simplifications Are Intentional\n\n- micro_compact uses text placeholders → we don't have API-level `cache_edits` access\n- Tokens estimated via character count → precise tokenizers are out of scope\n- Post-compaction recovery omitted → teaching version only keeps summary, does not auto re-attach files\n- Two auxiliary mechanisms not covered → they fall in the 10% detail category\n\nThe core design principle, cheap first, expensive last, is fully preserved.\n\n
\n\n\n"
+ "content": "# s08: Context Compact — Context Will Fill Up, Have a Way to Make Room\n\ns01 → s02 → s03 → s04 → s05 → s06 → s07 → `s08` → [s09](/en/s09) → s10 → ... → s20\n> *\"Context will fill up — have a way to make room\"* — Four-layer compression pipeline: cheap first, expensive last.\n>\n> **Harness Layer**: Compression — clean memory, unlimited sessions.\n\n---\n\n## The Problem\n\nThe agent is running along, then freezes.\n\nIt has bash, read, write — all the capabilities it needs. But it read a 1000-line file (~4000 tokens), then read 30 more files, ran 20 commands. Every command's output, every file's contents, all pile up in the `messages` list.\n\nThe context window is finite. Once full, the API outright rejects the call: `prompt_too_long`.\n\nWithout compression, an agent simply cannot work on large projects.\n\n---\n\n## The Solution\n\n\n\nThe hook structure, skill loading, and sub-Agent from s07 are preserved, with some tools omitted to focus on compaction. The core change: insert three pre-processors (0 API calls) before each LLM call, trigger an LLM summary (1 API call) when tokens still exceed the threshold, and emergency-trim if the API throws an error.\n\nCore design: cheap first, expensive last.\n\n---\n\n## How It Works\n\n\n\n### L1: snip_compact — Trim Irrelevant Old Conversation\n\nThe agent ran 80 turns of conversation, accumulating 160 `messages`. The very first \"help me create hello.py\" is barely relevant to current work, yet it still occupies space.\n\nMessage count exceeds 50 → keep the first 3 (initial context) and the last 47 (current work), trim the middle; the only extra boundary rule is that `assistant(tool_use)` must not be separated from the following `user(tool_result)`:\n\n```python\ndef snip_compact(messages, max_messages=50):\n if len(messages) <= max_messages:\n return messages\n head_end, tail_start = 3, len(messages) - (max_messages - 3)\n if head_end > 0 and _message_has_tool_use(messages[head_end - 1]):\n while head_end < len(messages) and _is_tool_result_message(messages[head_end]):\n head_end += 1\n if (tail_start > 0 and tail_start < len(messages)\n and _is_tool_result_message(messages[tail_start])\n and _message_has_tool_use(messages[tail_start - 1])):\n tail_start -= 1\n snipped = tail_start - head_end\n placeholder = {\"role\": \"user\", \"content\": f\"[snipped {snipped} messages from conversation middle]\"}\n return messages[:head_end] + [placeholder] + messages[tail_start:]\n```\n\nMessages are still trimmed directly; this just adds one boundary guard. `tool_result` content within remaining messages still keeps accumulating — message #34 may still hold 30KB of old file contents. → L2.\n\n### L2: micro_compact — Placeholder for Old Tool Results\n\n\n\nThe agent read 10 files consecutively. The full contents of reads 1–7 are still sitting in context, no longer needed, but hogging large amounts of space.\n\nKeep only the 3 most recent `tool_result` entries intact; replace older ones with a one-line placeholder:\n\n```python\nKEEP_RECENT_TOOL_RESULTS = 3\n\ndef micro_compact(messages):\n tool_results = collect_tool_result_blocks(messages)\n if len(tool_results) <= KEEP_RECENT_TOOL_RESULTS:\n return messages\n for _, _, block in tool_results[:-KEEP_RECENT_TOOL_RESULTS]:\n if len(block.get(\"content\", \"\")) > 120:\n block[\"content\"] = \"[Earlier tool result compacted. Re-run if needed.]\"\n return messages\n```\n\nOld results are cleared, but a single new result can be 500KB — one `cat` of a large file can max out the context. → L3.\n\n### L3: tool_result_budget — Persist Large Results to Disk\n\n\n\nThe model read 5 large files in one go; all `tool_result` blocks in the last user message total 500KB.\n\nSum the size of all `tool_result` blocks in the last user message. If over 200KB → sort by size, starting from the largest, persist to `.task_outputs/tool-results/`, keeping only a `` marker + a 2000-character preview in context. The model sees the marker and knows the full content is on disk, re-reading it when needed.\n\n```python\ndef tool_result_budget(messages, max_bytes=200_000):\n last = messages[-1]\n blocks = [(i, b) for i, b in enumerate(last[\"content\"])\n if b.get(\"type\") == \"tool_result\"]\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n if total <= max_bytes:\n return messages\n ranked = sorted(blocks, key=lambda p: len(str(p[1].get(\"content\", \"\"))), reverse=True)\n for idx, block in ranked:\n if total <= max_bytes:\n break\n block[\"content\"] = persist_large_output(block[\"tool_use_id\"], str(block[\"content\"]))\n total = recalculate_total(blocks)\n return messages\n```\n\nThe first three layers are all plain-text / structural operations — 0 API calls — but they cannot \"understand\" conversation content. Context may still be too large. → L4.\n\n### L4: compact_history — Full LLM Summary\n\n\n\nAll three previous layers have run, but after 30 minutes of continuous work on a huge project, tokens still exceed the threshold.\n\nThree-step process:\n\n1. **Save transcript**: Write the full conversation to `.transcripts/` in JSONL format. The transcript preserves a recoverable record, but the model's active context only contains the summary. For the model's current reasoning, the details are no longer in context. The teaching code does not provide a transcript retrieval tool.\n2. **LLM generates summary**: Send conversation history to the LLM, asking it to preserve key information: current goals, important findings, modified files, remaining work, user constraints, etc.\n3. **Replace message list**: All old messages are replaced with a single summary. The teaching version only keeps the summary; the real Claude Code re-attaches some recent files, plans, agent/skill/tool context after compaction.\n\n```python\ndef compact_history(messages):\n transcript_path = write_transcript(messages) # Save full conversation first\n summary = summarize_history(messages) # LLM generates summary\n return [{\"role\": \"user\",\n \"content\": f\"[Compacted]\\n\\n{summary}\"}]\n```\n\n**Circuit breaker**: After 3 consecutive failures, stop retrying to prevent an infinite loop wasting API calls.\n\n### Reactive: reactive_compact\n\nSometimes the API still returns `prompt_too_long` (413) — when context grows faster than compression triggers.\n\nThis triggers **reactive_compact**: more aggressive than compact_history, it retreats from the tail, but still avoids leaving an orphaned `tool_result`.\n\n```python\ndef reactive_compact(messages):\n transcript = write_transcript(messages)\n tail_start = max(0, len(messages) - 5)\n if (tail_start > 0 and tail_start < len(messages)\n and _is_tool_result_message(messages[tail_start])\n and _message_has_tool_use(messages[tail_start - 1])):\n tail_start -= 1\n summary = summarize_history(messages[:tail_start])\n return [{\"role\": \"user\",\n \"content\": f\"[Reactive compact]\\n\\n{summary}\"}, *messages[tail_start:]]\n```\n\nReactive compact has a retry limit (default 1). If it still fails, an exception is raised instead of looping forever. Full error recovery is deferred to s11.\n\n### Putting It All Together\n\n```python\ndef agent_loop(messages):\n reactive_retries = 0\n while True:\n # Three pre-processors (0 API calls)\n # Order: budget first, so large content is persisted before placeholders\n messages[:] = tool_result_budget(messages) # L3: persist large results\n messages[:] = snip_compact(messages) # L1: trim middle\n messages[:] = micro_compact(messages) # L2: old result placeholders\n\n # Still too much? LLM summary (1 API call)\n if estimate_token_count(messages) > THRESHOLD:\n messages[:] = compact_history(messages)\n\n try:\n response = client.messages.create(...)\n except PromptTooLongError:\n if reactive_retries < MAX_REACTIVE_RETRIES:\n messages[:] = reactive_compact(messages) # Emergency\n reactive_retries += 1\n continue\n raise # retry limit exceeded, raise exception\n # ... tool execution ...\n\n # compact tool: when the model actively calls it, triggers compact_history\n if block.name == \"compact\":\n messages[:] = compact_history(messages)\n results.append({..., \"content\": \"[Compacted. History summarized.]\"})\n messages.append({\"role\": \"user\", \"content\": results})\n break # end current turn, start fresh with compacted context\n```\n\n**The order must not be swapped.** L3 (budget) runs before L2 (micro) because micro replaces old large tool_results with one-line placeholders — budget must persist the full content before that happens. This is why CC source puts `applyToolResultBudget` first.\n\n---\n\n## Changes From s07\n\n| Component | Before (s07) | After (s08) |\n|-----------|-------------|-------------|\n| Context management | None (context grows unbounded) | Four-layer compression pipeline + emergency |\n| New functions | — | snip_compact, micro_compact, tool_result_budget, compact_history, reactive_compact |\n| Tools | bash, read_file, write_file, edit_file, glob, todo_write, task, load_skill (8) | 8 + compact (9) |\n| Loop | LLM call → tool execution | Three pre-processors before each turn + threshold-triggered compact_history |\n| Design principle | — | Cheap first, expensive last |\n\n---\n\n## Try It\n\n```sh\ncd learn-claude-code\npython s08_context_compact/code.py\n```\n\nTry these prompts:\n\n1. `Read the file README.md, then read code.py, then read s01_agent_loop/README.md` (read multiple files consecutively, observe L2 compressing old results)\n2. `Read every file in s08_context_compact/` (read a large amount of content at once, observe L3 persisting to disk)\n3. Chat for 20+ turns, observe whether `[auto compact]` or `[reactive compact]` appears\n\nWhat to watch for: After each tool execution, are old `tool_result` entries compressed? When tokens exceed the threshold after extended conversation, is summarization triggered automatically?\n\n---\n\n## What's Next\n\nContext compression lets an agent run for a long time without crashing. But after each compression, the preferences and constraints the user told it are also lost. Can we let the agent selectively remember important things?\n\ns09 Memory → three subsystems: choosing what to remember, extracting key information, consolidating and organizing. Across compressions, across sessions.\n\n\nDeep Dive Into CC Source Code
\n\n> The following is based on analysis of CC source code `compact.ts`, `autoCompact.ts`, `microCompact.ts`, and `query.ts`.\n\n### Execution Order Comparison\n\nThe teaching version labels layers L1/L2/L3/L4 for pedagogical clarity, but actual execution order does not match the numbering:\n\n| Dimension | Teaching Version | Claude Code |\n|-----------|-----------------|-------------|\n| Execution order | budget → snip → micro → auto | budget → snip → micro → collapse → auto (`query.ts:379-468`) |\n| snip_compact | Keep head 3 + tail 47 | CC only enables on main thread; implementation not in open-source repo (`HISTORY_SNIP` feature gate), but interface is visible: `snipCompactIfNeeded(messages)` → `{ messages, tokensFreed, boundaryMessage? }`, also exposes `SnipTool` for model-initiated snipping. Teaching version's 3/47 are simplified parameters |\n| micro_compact | Text placeholder replacement | Two paths: time-based clears content directly, cached uses API `cache_edits` (legacy path removed) |\n| micro_compact whitelist | By position (most recent 3) | time-based triggers by time threshold; cached triggers by count (`microCompact.ts`) |\n| tool_result_budget | 200KB characters | 200,000 characters (`toolLimits.ts:49`) |\n| compact_history threshold | Character count estimate | Precise tokens: `contextWindow - maxOutputTokens - 13_000` |\n| Summary requirements | 5 categories of info | 9 sections + ``/`` dual tags |\n| Compression prompt | Simple prompt | Double-ended hard guardrails forbidding tool calls |\n| PTL retry | Yes (simplified) | `truncateHeadForPTLRetry()` retreats by message groups (`compact.ts:243-290`) |\n| Post-compaction recovery | None (teaching version only keeps summary) | Auto re-read recent files, plans, agent/skill/tool context |\n| Circuit breaker | 3 times | 3 times (`autoCompact.ts:70`) |\n| Reactive retry | 1 time | CC has more granular tiered retries |\n\n### Execution Order Details\n\nThe real order in CC source `query.ts`:\n\n1. `applyToolResultBudget` (L379): persist large results first, ensuring full content is saved\n2. `snipCompact` (L403): trim middle messages\n3. `microcompact` (L414): old result placeholders\n4. `contextCollapse` (L441): independent context management system (not in teaching version)\n5. `autoCompact` (L454): LLM full summary\n\nThe teaching version's budget → snip → micro order matches this. The teaching version does not have the contextCollapse mechanism.\n\n### read_file Trade-off\n\nThe teaching version's `micro_compact` replaces old `tool_result` blocks with placeholders uniformly, including `read_file`. This usually does not affect functional correctness: if the model needs the file contents later, it can read the file again. The cost is an extra tool call and potentially lower prompt cache hit rates.\n\nClaude Code does not solve this with the teaching version's simple rule. It also puts `Read` in the microcompactable tool set, but maintains a separate `readFileState`: repeated reads of unchanged files return `FILE_UNCHANGED_STUB`, and after compaction it restores recently read file contents within a budget (for example, up to 5 files, 5K tokens per file, 50K tokens total). That is a production-level cache and recovery mechanism. The teaching version does not expand into that machinery; it keeps the simpler trade-off of compacting old results and re-reading when needed.\n\n### Full Constant Reference\n\n| Constant | Value | Source File |\n|----------|-------|-------------|\n| `AUTOCOMPACT_BUFFER_TOKENS` | 13,000 | `autoCompact.ts:62` |\n| `MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES` | 3 | `autoCompact.ts:70` |\n| `MAX_OUTPUT_TOKENS_FOR_SUMMARY` | 20,000 | `autoCompact.ts:30` |\n| `POST_COMPACT_TOKEN_BUDGET` | 50,000 | `compact.ts:123` |\n| `POST_COMPACT_MAX_FILES_TO_RESTORE` | 5 | `compact.ts:122` |\n| `POST_COMPACT_MAX_TOKENS_PER_FILE` | 5,000 | `compact.ts:124` |\n| Time micro_compact interval | 60 minutes | `timeBasedMCConfig.ts` |\n| `MAX_COMPACT_STREAMING_RETRIES` | 2 | `compact.ts:131` |\n\n### contextCollapse and sessionMemoryCompact\n\nCC source code has two additional mechanisms not covered in this teaching version:\n\n- **contextCollapse**: An independent context management system that, when enabled, suppresses proactive autocompact (`autoCompact.ts:215-222`), with collapse's commit/blocking flow taking over context management. Manual `/compact` and reactive fallback remain independent paths, unaffected by contextCollapse.\n- **sessionMemoryCompact**: Before compact_history, CC first attempts a lightweight summary using existing session memory (covered in s09) without calling the LLM. This mechanism becomes clearer after learning s09.\n\n### What Does the Compression Prompt Look Like?\n\nCC's compression prompt has two hard requirements:\n\n1. **Absolutely no tool calls**: It begins with `CRITICAL: Respond with TEXT ONLY. Do NOT call any tools.`, and appends another REMINDER at the end\n2. **Analyze first, then summarize**: The model must first reason in an `` tag, then output the formal summary in a `` tag. The analysis is stripped during formatting\n\n### Teaching Version Simplifications Are Intentional\n\n- micro_compact uses text placeholders → we don't have API-level `cache_edits` access\n- read_file is not special-cased → the teaching version accepts re-reading when needed instead of introducing readFileState and post-compaction recovery\n- Tokens estimated via character count → precise tokenizers are out of scope\n- Post-compaction recovery omitted → teaching version only keeps summary, does not auto re-attach files\n- Two auxiliary mechanisms not covered → they fall in the 10% detail category\n\nThe core design principle, cheap first, expensive last, is fully preserved.\n\n
\n\n\n"
},
{
"version": "s08",
"locale": "zh",
"title": "s08: Context Compact — 上下文总会满,要有办法腾地方",
- "content": "# s08: Context Compact — 上下文总会满,要有办法腾地方\n\ns01 → s02 → s03 → s04 → s05 → s06 → s07 → `s08` → [s09](/zh/s09) → s10 → ... → s20\n> *\"上下文总会满, 要有办法腾地方\"* — 四层压缩策略, 便宜的先跑贵的后跑。\n>\n> **Harness 层**: 压缩 — 干净的记忆, 无限的会话。\n\n---\n\n## 问题\n\nAgent 跑着跑着,不动了。\n\n手里有 bash、有 read、有 write,能力是够的。但它读了一个 1000 行的文件(~4000 token),又读了 30 个文件,跑了 20 条命令。每条命令的输出、每个文件的内容,全都堆在 `messages` 列表里。\n\n上下文窗口是有限的。满了之后,API 直接拒绝:`prompt_too_long`。\n\n不压缩,Agent 根本没法在大项目里干活。\n\n---\n\n## 解决方案\n\n\n\n保留 s07 的 hook 结构、技能加载、子 Agent 等骨架,省略部分工具细节以聚焦压缩。核心变动:每轮 LLM 调用前插入三层预处理器(0 API),token 仍超阈值时触发 LLM 摘要(1 API),API 报错时应急裁剪。\n\n核心设计:便宜的先跑,贵的后跑。\n\n---\n\n## 工作原理\n\n\n\n### L1: snip_compact — 裁掉无关的旧对话\n\nAgent 跑了 80 轮对话,`messages` 攒了 160 条。最前面的\"帮我创建 hello.py\"和当前工作几乎无关了,但全占着位置。\n\n消息数超过 50 条 → 保留头部 3 条(初始上下文)和尾部 47 条(当前工作),中间裁掉;唯一额外边界条件是,不能把 `assistant(tool_use)` 和后面的 `user(tool_result)` 拆开:\n\n```python\ndef snip_compact(messages, max_messages=50):\n if len(messages) <= max_messages:\n return messages\n head_end, tail_start = 3, len(messages) - (max_messages - 3)\n if _message_has_tool_use(messages[head_end - 1]):\n while head_end < len(messages) and _is_tool_result_message(messages[head_end]):\n head_end += 1\n if _is_tool_result_message(messages[tail_start]) and _message_has_tool_use(messages[tail_start - 1]):\n tail_start -= 1\n snipped = tail_start - head_end\n placeholder = {\"role\": \"user\", \"content\": f\"[snipped {snipped} messages from conversation middle]\"}\n return messages[:head_end] + [placeholder] + messages[tail_start:]\n```\n\n裁掉的是消息本身,只是在切口处多做一步保护;剩下的消息里 `tool_result` 内容仍在累积——第 34 条消息里可能躺着 30KB 的旧文件内容。→ L2。\n\n### L2: micro_compact — 旧工具结果占位\n\n\n\nAgent 连续读了 10 个文件。第 1-7 次的完整内容还躺在上下文里,早就不需要了,但占着大量空间。\n\n只保留最近 3 条 `tool_result` 的完整内容,更旧的替换为一行占位符:\n\n```python\nKEEP_RECENT_TOOL_RESULTS = 3\n\ndef micro_compact(messages):\n tool_results = collect_tool_result_blocks(messages)\n if len(tool_results) <= KEEP_RECENT_TOOL_RESULTS:\n return messages\n for _, _, block in tool_results[:-KEEP_RECENT_TOOL_RESULTS]:\n if len(block.get(\"content\", \"\")) > 120:\n block[\"content\"] = \"[Earlier tool result compacted. Re-run if needed.]\"\n return messages\n```\n\n旧结果清掉了,但单条新结果可能就有 500KB——一个 `cat` 大文件的输出就能打满上下文。→ L3。\n\n### L3: tool_result_budget — 大结果落盘\n\n\n\n模型一次读了 5 个大文件,单条 user 消息里所有 `tool_result` 加起来 500KB。\n\n统计最后一条 user 消息里所有 `tool_result` 的总大小。超过 200KB → 按大小排序,从最大的开始落盘到 `.task_outputs/tool-results/`,上下文里只留 `` 标记 + 前 2000 字符预览。模型看到标记后知道完整内容在磁盘上,需要时可以重新读。\n\n```python\ndef tool_result_budget(messages, max_bytes=200_000):\n last = messages[-1]\n blocks = [(i, b) for i, b in enumerate(last[\"content\"])\n if b.get(\"type\") == \"tool_result\"]\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n if total <= max_bytes:\n return messages\n ranked = sorted(blocks, key=lambda p: len(str(p[1].get(\"content\", \"\"))), reverse=True)\n for idx, block in ranked:\n if total <= max_bytes:\n break\n block[\"content\"] = persist_large_output(block[\"tool_use_id\"], str(block[\"content\"]))\n total = recalculate_total(blocks)\n return messages\n```\n\n前三层都是纯文本/结构操作,0 API 调用,但也无法\"理解\"对话内容。上下文可能仍然太大。→ L4。\n\n### L4: compact_history — LLM 全量摘要\n\n\n\n前三层全跑完了,但在超大项目中连续工作 30 分钟后,token 仍然超过阈值。\n\n三步流程:\n\n1. **保存 transcript**:完整对话写入 `.transcripts/`,JSONL 格式。transcript 保留了可恢复记录,但模型的活跃上下文里只剩摘要。对模型当下推理来说,细节已经不在上下文中了。教学代码没有提供 transcript 检索工具。\n2. **LLM 生成摘要**:把对话历史发给 LLM,要求保留当前目标、重要发现、已改文件、剩余工作、用户约束等关键信息。\n3. **替换消息列表**:所有旧消息被替换为一条摘要。教学版只保留摘要;真实 Claude Code 会在 compact 后重新附加部分最近文件、计划、agent/skill/tool 等上下文。\n\n```python\ndef compact_history(messages):\n transcript_path = write_transcript(messages) # 先保存完整对话\n summary = summarize_history(messages) # LLM 生成摘要\n return [{\"role\": \"user\",\n \"content\": f\"[Compacted]\\n\\n{summary}\"}]\n```\n\n**熔断器**:连续失败 3 次后停止重试,防止死循环浪费 API 调用。\n\n### 应急: reactive_compact\n\n有时候 API 还是返回 `prompt_too_long`(413),上下文增长速度快于压缩触发速度时。\n\n这时触发 **reactive_compact**:比 compact_history 更激进,从尾部回退,但仍要避免留下孤立 `tool_result`。\n\n```python\ndef reactive_compact(messages):\n transcript = write_transcript(messages)\n summary = summarize_history(messages)\n tail_start = max(0, len(messages) - 5)\n if _is_tool_result_message(messages[tail_start]) and _message_has_tool_use(messages[tail_start - 1]):\n tail_start -= 1\n return [{\"role\": \"user\",\n \"content\": f\"[Reactive compact]\\n\\n{summary}\"}, *messages[tail_start:]]\n```\n\nreactive compact 有重试上限(默认 1 次)。再失败就抛出异常,不无限循环。完整的错误恢复逻辑留给 s11。\n\n### 合起来跑\n\n```python\ndef agent_loop(messages):\n reactive_retries = 0\n while True:\n # 三个预处理器(0 API 调用)\n # 顺序:budget 先跑,确保大内容落盘后再做占位和裁剪\n messages[:] = tool_result_budget(messages) # L3: 大结果落盘\n messages[:] = snip_compact(messages) # L1: 裁中间\n messages[:] = micro_compact(messages) # L2: 旧结果占位\n\n # 还不够?LLM 摘要(1 API 调用)\n if estimate_token_count(messages) > THRESHOLD:\n messages[:] = compact_history(messages)\n\n try:\n response = client.messages.create(...)\n except PromptTooLongError:\n if reactive_retries < MAX_REACTIVE_RETRIES:\n messages[:] = reactive_compact(messages) # 应急\n reactive_retries += 1\n continue\n raise # 超过重试上限,抛出异常\n # ... 工具执行 ...\n\n # compact 工具:模型主动调用时触发 compact_history\n if block.name == \"compact\":\n messages[:] = compact_history(messages)\n results.append({..., \"content\": \"[Compacted. History summarized.]\"})\n messages.append({\"role\": \"user\", \"content\": results})\n break # 结束当前 turn,用压缩后的上下文开始新一轮\n```\n\n**顺序不能换。** L3(budget)在 L2(micro)前面,因为 micro 会把旧的大 tool_result 替换成一行占位符,budget 必须在那之前把完整内容落盘。这也是为什么 CC 源码把 `applyToolResultBudget` 放在最前面。\n\n---\n\n## 相对 s07 的变更\n\n| 组件 | 之前 (s07) | 之后 (s08) |\n|------|-----------|-----------|\n| 上下文管理 | 无(上下文无限膨胀) | 四层压缩管线 + 应急 |\n| 新函数 | — | snip_compact, micro_compact, tool_result_budget, compact_history, reactive_compact |\n| 工具 | bash, read, write, edit, glob, todo_write, task, load_skill (8) | 8 + compact (9) |\n| 循环 | LLM 调用 → 工具执行 | 每轮前跑三层预处理器 + 阈值触发 compact_history |\n| 设计原则 | — | 便宜的先跑,贵的后跑 |\n\n---\n\n## 试一下\n\n```sh\ncd learn-claude-code\npython s08_context_compact/code.py\n```\n\n试试这些 prompt:\n\n1. `Read the file README.md, then read code.py, then read s01_agent_loop/README.md`(连续读多个文件,观察 L2 压缩旧结果)\n2. `Read every file in s08_context_compact/`(一次性读大量内容,观察 L3 落盘)\n3. 反复对话 20+ 轮,观察是否出现 `[auto compact]` 或 `[reactive compact]`\n\n观察重点:每次工具执行后,旧 tool_result 是否被压缩?连续对话后 token 超阈值时,是否自动触发了摘要?\n\n---\n\n## 接下来\n\n上下文压缩让 Agent 能跑很久不会崩。但每次压缩后,用户之前告诉它的偏好、约束也跟着丢了。能不能让 Agent 有选择地记住重要的事?\n\ns09 Memory → 三个子系统:选择记什么、提取关键信息、整理巩固。跨压缩、跨会话。\n\n\n深入 CC 源码
\n\n> 以下基于 CC 源码 `compact.ts`、`autoCompact.ts`、`microCompact.ts`、`query.ts` 的分析。\n\n### 执行顺序对照\n\n教学版为了讲解方便按 L1/L2/L3/L4 编号,但实际执行顺序和编号不完全对应:\n\n| 维度 | 教学版 | Claude Code |\n|------|--------|-------------|\n| 执行顺序 | budget → snip → micro → auto | budget → snip → micro → collapse → auto(`query.ts:379-468`) |\n| snip_compact | 保留头 3 + 尾 47 | CC 仅主线程启用;实现不在开源仓库中(`HISTORY_SNIP` feature gate),但接口可见:`snipCompactIfNeeded(messages)` → `{ messages, tokensFreed, boundaryMessage? }`,还暴露了 `SnipTool` 工具让模型主动调用。教学版的 3/47 是简化参数 |\n| micro_compact | 文本占位符替换 | 两条路径:time-based 直接清内容,cached 走 API `cache_edits`(legacy path 已移除) |\n| micro_compact 白名单 | 按位置(最近 3 条) | time-based 按时间阈值触发;cached 按计数触发(`microCompact.ts`) |\n| tool_result_budget | 200KB 字符 | 200,000 字符(`toolLimits.ts:49`) |\n| compact_history 阈值 | 字符数估算 | 精确 token:`contextWindow - maxOutputTokens - 13_000` |\n| 摘要要求 | 5 类信息 | 9 个部分 + ``/`` 双标签 |\n| 压缩 prompt | 简单 prompt | 首尾双重防呆禁止调工具 |\n| PTL retry | 有(简化) | `truncateHeadForPTLRetry()` 按消息组回退(`compact.ts:243-290`) |\n| 后压缩恢复 | 无(教学版只保留摘要) | 自动重新读取最近文件、计划、agent/skill/tool 等 |\n| 熔断器 | 3 次 | 3 次(`autoCompact.ts:70`) |\n| reactive 重试 | 1 次 | CC 有更精细的分级重试 |\n\n### 执行顺序详解\n\nCC 源码 `query.ts` 中的真实顺序:\n\n1. `applyToolResultBudget`(L379):先处理大结果,确保完整内容落盘\n2. `snipCompact`(L403):裁中间消息\n3. `microcompact`(L414):旧结果占位\n4. `contextCollapse`(L441):独立的上下文管理系统(教学版无)\n5. `autoCompact`(L454):LLM 全量摘要\n\n教学版的 budget → snip → micro 顺序与此一致。教学版没有 contextCollapse 机制。\n\n### 完整常量参考\n\n| 常量 | 值 | 源文件 |\n|------|-----|--------|\n| `AUTOCOMPACT_BUFFER_TOKENS` | 13,000 | `autoCompact.ts:62` |\n| `MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES` | 3 | `autoCompact.ts:70` |\n| `MAX_OUTPUT_TOKENS_FOR_SUMMARY` | 20,000 | `autoCompact.ts:30` |\n| `POST_COMPACT_TOKEN_BUDGET` | 50,000 | `compact.ts:123` |\n| `POST_COMPACT_MAX_FILES_TO_RESTORE` | 5 | `compact.ts:122` |\n| `POST_COMPACT_MAX_TOKENS_PER_FILE` | 5,000 | `compact.ts:124` |\n| 时间 micro_compact 间隔 | 60 分钟 | `timeBasedMCConfig.ts` |\n| `MAX_COMPACT_STREAMING_RETRIES` | 2 | `compact.ts:131` |\n\n### contextCollapse 和 sessionMemoryCompact\n\nCC 源码中还有两个机制本教学版没有展开:\n\n- **contextCollapse**:独立的上下文管理系统,启用时抑制 proactive autocompact(`autoCompact.ts:215-222`),由 collapse 的 commit/blocking 流程接管上下文管理。但 manual `/compact` 和 reactive fallback 仍是独立路径,不受 contextCollapse 影响。\n- **sessionMemoryCompact**:compact_history 之前,CC 会先尝试用已有的 session memory(s09 会讲到)做轻量摘要,不调 LLM。这个机制等学完 s09 之后回头看会更清楚。\n\n### 压缩 prompt 长什么样?\n\nCC 的压缩 prompt 有两个硬性要求:\n\n1. **绝对禁止调用工具**:开头就是 `CRITICAL: Respond with TEXT ONLY. Do NOT call any tools.`,末尾还会再 REMINDER 一次\n2. **先分析再总结**:模型需要先在 `` 标签里理清思路,然后在 `` 标签里输出正式摘要。analysis 在格式化时被剥离\n\n### 教学版的简化是刻意的\n\n- micro_compact 用文本占位 → 我们没有 API 层的 `cache_edits` 权限\n- token 用字符数估算 → 精确 tokenizer 不在教学范围内\n- 后压缩恢复省略 → 教学版只保留摘要,不自动重新附加文件\n- 两个辅助机制不展开 → 属于 10% 的细节\n\n核心设计思想,便宜的先跑贵的后跑,完整保留。\n\n
\n\n\n"
+ "content": "# s08: Context Compact — 上下文总会满,要有办法腾地方\n\ns01 → s02 → s03 → s04 → s05 → s06 → s07 → `s08` → [s09](/zh/s09) → s10 → ... → s20\n> *\"上下文总会满, 要有办法腾地方\"* — 四层压缩策略, 便宜的先跑贵的后跑。\n>\n> **Harness 层**: 压缩 — 干净的记忆, 无限的会话。\n\n---\n\n## 问题\n\nAgent 跑着跑着,不动了。\n\n手里有 bash、有 read、有 write,能力是够的。但它读了一个 1000 行的文件(~4000 token),又读了 30 个文件,跑了 20 条命令。每条命令的输出、每个文件的内容,全都堆在 `messages` 列表里。\n\n上下文窗口是有限的。满了之后,API 直接拒绝:`prompt_too_long`。\n\n不压缩,Agent 根本没法在大项目里干活。\n\n---\n\n## 解决方案\n\n\n\n保留 s07 的 hook 结构、技能加载、子 Agent 等骨架,省略部分工具细节以聚焦压缩。核心变动:每轮 LLM 调用前插入三层预处理器(0 API),token 仍超阈值时触发 LLM 摘要(1 API),API 报错时应急裁剪。\n\n核心设计:便宜的先跑,贵的后跑。\n\n---\n\n## 工作原理\n\n\n\n### L1: snip_compact — 裁掉无关的旧对话\n\nAgent 跑了 80 轮对话,`messages` 攒了 160 条。最前面的\"帮我创建 hello.py\"和当前工作几乎无关了,但全占着位置。\n\n消息数超过 50 条 → 保留头部 3 条(初始上下文)和尾部 47 条(当前工作),中间裁掉;唯一额外边界条件是,不能把 `assistant(tool_use)` 和后面的 `user(tool_result)` 拆开:\n\n```python\ndef snip_compact(messages, max_messages=50):\n if len(messages) <= max_messages:\n return messages\n head_end, tail_start = 3, len(messages) - (max_messages - 3)\n if head_end > 0 and _message_has_tool_use(messages[head_end - 1]):\n while head_end < len(messages) and _is_tool_result_message(messages[head_end]):\n head_end += 1\n if (tail_start > 0 and tail_start < len(messages)\n and _is_tool_result_message(messages[tail_start])\n and _message_has_tool_use(messages[tail_start - 1])):\n tail_start -= 1\n snipped = tail_start - head_end\n placeholder = {\"role\": \"user\", \"content\": f\"[snipped {snipped} messages from conversation middle]\"}\n return messages[:head_end] + [placeholder] + messages[tail_start:]\n```\n\n裁掉的是消息本身,只是在切口处多做一步保护;剩下的消息里 `tool_result` 内容仍在累积——第 34 条消息里可能躺着 30KB 的旧文件内容。→ L2。\n\n### L2: micro_compact — 旧工具结果占位\n\n\n\nAgent 连续读了 10 个文件。第 1-7 次的完整内容还躺在上下文里,早就不需要了,但占着大量空间。\n\n只保留最近 3 条 `tool_result` 的完整内容,更旧的替换为一行占位符:\n\n```python\nKEEP_RECENT_TOOL_RESULTS = 3\n\ndef micro_compact(messages):\n tool_results = collect_tool_result_blocks(messages)\n if len(tool_results) <= KEEP_RECENT_TOOL_RESULTS:\n return messages\n for _, _, block in tool_results[:-KEEP_RECENT_TOOL_RESULTS]:\n if len(block.get(\"content\", \"\")) > 120:\n block[\"content\"] = \"[Earlier tool result compacted. Re-run if needed.]\"\n return messages\n```\n\n旧结果清掉了,但单条新结果可能就有 500KB——一个 `cat` 大文件的输出就能打满上下文。→ L3。\n\n### L3: tool_result_budget — 大结果落盘\n\n\n\n模型一次读了 5 个大文件,单条 user 消息里所有 `tool_result` 加起来 500KB。\n\n统计最后一条 user 消息里所有 `tool_result` 的总大小。超过 200KB → 按大小排序,从最大的开始落盘到 `.task_outputs/tool-results/`,上下文里只留 `` 标记 + 前 2000 字符预览。模型看到标记后知道完整内容在磁盘上,需要时可以重新读。\n\n```python\ndef tool_result_budget(messages, max_bytes=200_000):\n last = messages[-1]\n blocks = [(i, b) for i, b in enumerate(last[\"content\"])\n if b.get(\"type\") == \"tool_result\"]\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n if total <= max_bytes:\n return messages\n ranked = sorted(blocks, key=lambda p: len(str(p[1].get(\"content\", \"\"))), reverse=True)\n for idx, block in ranked:\n if total <= max_bytes:\n break\n block[\"content\"] = persist_large_output(block[\"tool_use_id\"], str(block[\"content\"]))\n total = recalculate_total(blocks)\n return messages\n```\n\n前三层都是纯文本/结构操作,0 API 调用,但也无法\"理解\"对话内容。上下文可能仍然太大。→ L4。\n\n### L4: compact_history — LLM 全量摘要\n\n\n\n前三层全跑完了,但在超大项目中连续工作 30 分钟后,token 仍然超过阈值。\n\n三步流程:\n\n1. **保存 transcript**:完整对话写入 `.transcripts/`,JSONL 格式。transcript 保留了可恢复记录,但模型的活跃上下文里只剩摘要。对模型当下推理来说,细节已经不在上下文中了。教学代码没有提供 transcript 检索工具。\n2. **LLM 生成摘要**:把对话历史发给 LLM,要求保留当前目标、重要发现、已改文件、剩余工作、用户约束等关键信息。\n3. **替换消息列表**:所有旧消息被替换为一条摘要。教学版只保留摘要;真实 Claude Code 会在 compact 后重新附加部分最近文件、计划、agent/skill/tool 等上下文。\n\n```python\ndef compact_history(messages):\n transcript_path = write_transcript(messages) # 先保存完整对话\n summary = summarize_history(messages) # LLM 生成摘要\n return [{\"role\": \"user\",\n \"content\": f\"[Compacted]\\n\\n{summary}\"}]\n```\n\n**熔断器**:连续失败 3 次后停止重试,防止死循环浪费 API 调用。\n\n### 应急: reactive_compact\n\n有时候 API 还是返回 `prompt_too_long`(413),上下文增长速度快于压缩触发速度时。\n\n这时触发 **reactive_compact**:比 compact_history 更激进,从尾部回退,但仍要避免留下孤立 `tool_result`。\n\n```python\ndef reactive_compact(messages):\n transcript = write_transcript(messages)\n tail_start = max(0, len(messages) - 5)\n if (tail_start > 0 and tail_start < len(messages)\n and _is_tool_result_message(messages[tail_start])\n and _message_has_tool_use(messages[tail_start - 1])):\n tail_start -= 1\n summary = summarize_history(messages[:tail_start])\n return [{\"role\": \"user\",\n \"content\": f\"[Reactive compact]\\n\\n{summary}\"}, *messages[tail_start:]]\n```\n\nreactive compact 有重试上限(默认 1 次)。再失败就抛出异常,不无限循环。完整的错误恢复逻辑留给 s11。\n\n### 合起来跑\n\n```python\ndef agent_loop(messages):\n reactive_retries = 0\n while True:\n # 三个预处理器(0 API 调用)\n # 顺序:budget 先跑,确保大内容落盘后再做占位和裁剪\n messages[:] = tool_result_budget(messages) # L3: 大结果落盘\n messages[:] = snip_compact(messages) # L1: 裁中间\n messages[:] = micro_compact(messages) # L2: 旧结果占位\n\n # 还不够?LLM 摘要(1 API 调用)\n if estimate_token_count(messages) > THRESHOLD:\n messages[:] = compact_history(messages)\n\n try:\n response = client.messages.create(...)\n except PromptTooLongError:\n if reactive_retries < MAX_REACTIVE_RETRIES:\n messages[:] = reactive_compact(messages) # 应急\n reactive_retries += 1\n continue\n raise # 超过重试上限,抛出异常\n # ... 工具执行 ...\n\n # compact 工具:模型主动调用时触发 compact_history\n if block.name == \"compact\":\n messages[:] = compact_history(messages)\n results.append({..., \"content\": \"[Compacted. History summarized.]\"})\n messages.append({\"role\": \"user\", \"content\": results})\n break # 结束当前 turn,用压缩后的上下文开始新一轮\n```\n\n**顺序不能换。** L3(budget)在 L2(micro)前面,因为 micro 会把旧的大 tool_result 替换成一行占位符,budget 必须在那之前把完整内容落盘。这也是为什么 CC 源码把 `applyToolResultBudget` 放在最前面。\n\n---\n\n## 相对 s07 的变更\n\n| 组件 | 之前 (s07) | 之后 (s08) |\n|------|-----------|-----------|\n| 上下文管理 | 无(上下文无限膨胀) | 四层压缩管线 + 应急 |\n| 新函数 | — | snip_compact, micro_compact, tool_result_budget, compact_history, reactive_compact |\n| 工具 | bash, read, write, edit, glob, todo_write, task, load_skill (8) | 8 + compact (9) |\n| 循环 | LLM 调用 → 工具执行 | 每轮前跑三层预处理器 + 阈值触发 compact_history |\n| 设计原则 | — | 便宜的先跑,贵的后跑 |\n\n---\n\n## 试一下\n\n```sh\ncd learn-claude-code\npython s08_context_compact/code.py\n```\n\n试试这些 prompt:\n\n1. `Read the file README.md, then read code.py, then read s01_agent_loop/README.md`(连续读多个文件,观察 L2 压缩旧结果)\n2. `Read every file in s08_context_compact/`(一次性读大量内容,观察 L3 落盘)\n3. 反复对话 20+ 轮,观察是否出现 `[auto compact]` 或 `[reactive compact]`\n\n观察重点:每次工具执行后,旧 tool_result 是否被压缩?连续对话后 token 超阈值时,是否自动触发了摘要?\n\n---\n\n## 接下来\n\n上下文压缩让 Agent 能跑很久不会崩。但每次压缩后,用户之前告诉它的偏好、约束也跟着丢了。能不能让 Agent 有选择地记住重要的事?\n\ns09 Memory → 三个子系统:选择记什么、提取关键信息、整理巩固。跨压缩、跨会话。\n\n\n深入 CC 源码
\n\n> 以下基于 CC 源码 `compact.ts`、`autoCompact.ts`、`microCompact.ts`、`query.ts` 的分析。\n\n### 执行顺序对照\n\n教学版为了讲解方便按 L1/L2/L3/L4 编号,但实际执行顺序和编号不完全对应:\n\n| 维度 | 教学版 | Claude Code |\n|------|--------|-------------|\n| 执行顺序 | budget → snip → micro → auto | budget → snip → micro → collapse → auto(`query.ts:379-468`) |\n| snip_compact | 保留头 3 + 尾 47 | CC 仅主线程启用;实现不在开源仓库中(`HISTORY_SNIP` feature gate),但接口可见:`snipCompactIfNeeded(messages)` → `{ messages, tokensFreed, boundaryMessage? }`,还暴露了 `SnipTool` 工具让模型主动调用。教学版的 3/47 是简化参数 |\n| micro_compact | 文本占位符替换 | 两条路径:time-based 直接清内容,cached 走 API `cache_edits`(legacy path 已移除) |\n| micro_compact 白名单 | 按位置(最近 3 条) | time-based 按时间阈值触发;cached 按计数触发(`microCompact.ts`) |\n| tool_result_budget | 200KB 字符 | 200,000 字符(`toolLimits.ts:49`) |\n| compact_history 阈值 | 字符数估算 | 精确 token:`contextWindow - maxOutputTokens - 13_000` |\n| 摘要要求 | 5 类信息 | 9 个部分 + ``/`` 双标签 |\n| 压缩 prompt | 简单 prompt | 首尾双重防呆禁止调工具 |\n| PTL retry | 有(简化) | `truncateHeadForPTLRetry()` 按消息组回退(`compact.ts:243-290`) |\n| 后压缩恢复 | 无(教学版只保留摘要) | 自动重新读取最近文件、计划、agent/skill/tool 等 |\n| 熔断器 | 3 次 | 3 次(`autoCompact.ts:70`) |\n| reactive 重试 | 1 次 | CC 有更精细的分级重试 |\n\n### 执行顺序详解\n\nCC 源码 `query.ts` 中的真实顺序:\n\n1. `applyToolResultBudget`(L379):先处理大结果,确保完整内容落盘\n2. `snipCompact`(L403):裁中间消息\n3. `microcompact`(L414):旧结果占位\n4. `contextCollapse`(L441):独立的上下文管理系统(教学版无)\n5. `autoCompact`(L454):LLM 全量摘要\n\n教学版的 budget → snip → micro 顺序与此一致。教学版没有 contextCollapse 机制。\n\n### read_file 的取舍\n\n教学版的 `micro_compact` 会把旧 `tool_result` 统一替换成占位符,包括 `read_file`。这通常不影响功能正确性:如果后续还需要文件内容,模型可以重新读一次。代价是可能多一次工具调用,也可能降低 prompt cache 命中率。\n\nClaude Code 没有用教学版这种简单规则解决这个问题。它把 `Read` 也放进可 microcompact 的工具集合,但同时维护 `readFileState`:重复读取未变化文件时返回 `FILE_UNCHANGED_STUB`,compact 后再按预算恢复最近读过的文件内容(例如最多 5 个文件、每个 5K token、总预算 50K token)。这是生产级实现里的缓存和恢复机制,教学版不展开,保留“压缩旧结果,必要时重新读取”的简单 trade-off。\n\n### 完整常量参考\n\n| 常量 | 值 | 源文件 |\n|------|-----|--------|\n| `AUTOCOMPACT_BUFFER_TOKENS` | 13,000 | `autoCompact.ts:62` |\n| `MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES` | 3 | `autoCompact.ts:70` |\n| `MAX_OUTPUT_TOKENS_FOR_SUMMARY` | 20,000 | `autoCompact.ts:30` |\n| `POST_COMPACT_TOKEN_BUDGET` | 50,000 | `compact.ts:123` |\n| `POST_COMPACT_MAX_FILES_TO_RESTORE` | 5 | `compact.ts:122` |\n| `POST_COMPACT_MAX_TOKENS_PER_FILE` | 5,000 | `compact.ts:124` |\n| 时间 micro_compact 间隔 | 60 分钟 | `timeBasedMCConfig.ts` |\n| `MAX_COMPACT_STREAMING_RETRIES` | 2 | `compact.ts:131` |\n\n### contextCollapse 和 sessionMemoryCompact\n\nCC 源码中还有两个机制本教学版没有展开:\n\n- **contextCollapse**:独立的上下文管理系统,启用时抑制 proactive autocompact(`autoCompact.ts:215-222`),由 collapse 的 commit/blocking 流程接管上下文管理。但 manual `/compact` 和 reactive fallback 仍是独立路径,不受 contextCollapse 影响。\n- **sessionMemoryCompact**:compact_history 之前,CC 会先尝试用已有的 session memory(s09 会讲到)做轻量摘要,不调 LLM。这个机制等学完 s09 之后回头看会更清楚。\n\n### 压缩 prompt 长什么样?\n\nCC 的压缩 prompt 有两个硬性要求:\n\n1. **绝对禁止调用工具**:开头就是 `CRITICAL: Respond with TEXT ONLY. Do NOT call any tools.`,末尾还会再 REMINDER 一次\n2. **先分析再总结**:模型需要先在 `` 标签里理清思路,然后在 `` 标签里输出正式摘要。analysis 在格式化时被剥离\n\n### 教学版的简化是刻意的\n\n- micro_compact 用文本占位 → 我们没有 API 层的 `cache_edits` 权限\n- read_file 不特殊处理 → 教学版接受必要时重新读取,避免引入 readFileState 和后压缩恢复机制\n- token 用字符数估算 → 精确 tokenizer 不在教学范围内\n- 后压缩恢复省略 → 教学版只保留摘要,不自动重新附加文件\n- 两个辅助机制不展开 → 属于 10% 的细节\n\n核心设计思想,便宜的先跑贵的后跑,完整保留。\n\n
\n\n\n"
},
{
"version": "s08",
"locale": "ja",
"title": "s08: Context Compact — コンテキストはいつか満杯になる、場所を空ける方法が必要",
- "content": "# s08: Context Compact — コンテキストはいつか満杯になる、場所を空ける方法が必要\n\ns01 → s02 → s03 → s04 → s05 → s06 → s07 → `s08` → [s09](/ja/s09) → s10 → ... → s20\n> *\"Context will fill up — have a way to make room\"* — 4層圧縮戦略、安価なものを先に、高価なものを後に実行。\n>\n> **Harness レイヤー**: 圧縮 — クリーンな記憶、無限のセッション。\n\n---\n\n## 課題\n\nAgent が動いている途中で、止まってしまう。\n\nbash、read、write は揃っており、能力は十分。しかし 1000 行のファイル(~4000 token)を読み、さらに 30 のファイルを読み、20 のコマンドを実行したとします。各コマンドの出力、各ファイルの内容がすべて `messages` リストに蓄積されます。\n\nコンテキストウィンドウには上限があります。満杯になると、API は即座に拒否します:`prompt_too_long`。\n\n圧縮しなければ、Agent は大規模プロジェクトではまともに動けません。\n\n---\n\n## ソリューション\n\n\n\ns07 のフック構造、スキルロード、サブ Agent の骨格を維持し、圧縮に焦点を当てるため一部のツールは省略。コアの変更点:各 LLM 呼び出し前に 3 層のプリプロセッサ(0 API)を挿入し、token が閾値を超えた場合は LLM 要約(1 API)をトリガー、API エラー時には緊急トリムを実行。\n\nコア設計:安価なものを先に、高価なものを後に。\n\n---\n\n## 仕組み\n\n\n\n### L1: snip_compact — 無関係な古い会話を切り捨て\n\nAgent が 80 ラウンドの会話を実行し、`messages` が 160 件まで溜まった。先頭の「hello.py を作って」は現在の作業とほぼ無関係だが、スペースを占有し続けている。\n\nメッセージ数が 50 を超えた場合 → 先頭 3 件(初期コンテキスト)と末尾 47 件(現在の作業)を保持して中間を切り詰める。ただし切れ目だけは調整し、`assistant(tool_use)` と後続の `user(tool_result)` を分断しない:\n\n```python\ndef snip_compact(messages, max_messages=50):\n if len(messages) <= max_messages:\n return messages\n head_end, tail_start = 3, len(messages) - (max_messages - 3)\n if _message_has_tool_use(messages[head_end - 1]):\n while head_end < len(messages) and _is_tool_result_message(messages[head_end]):\n head_end += 1\n if _is_tool_result_message(messages[tail_start]) and _message_has_tool_use(messages[tail_start - 1]):\n tail_start -= 1\n snipped = tail_start - head_end\n placeholder = {\"role\": \"user\", \"content\": f\"[snipped {snipped} messages from conversation middle]\"}\n return messages[:head_end] + [placeholder] + messages[tail_start:]\n```\n\n切り捨て自体は単純なままで、境界だけを保護する。残ったメッセージ内の `tool_result` 内容はまだ蓄積され続けている。34 番目のメッセージに 30KB の古いファイル内容が残っているかもしれない。→ L2。\n\n### L2: micro_compact — 古いツール結果をプレースホルダに置換\n\n\n\nAgent が連続して 10 個のファイルを読んだ。1〜7 回目の完全な内容はまだコンテキストに残っており、もう不要だが、大量のスペースを占有している。\n\n直近 3 件の `tool_result` の完全な内容のみを保持し、それより古いものは 1 行のプレースホルダに置換:\n\n```python\nKEEP_RECENT_TOOL_RESULTS = 3\n\ndef micro_compact(messages):\n tool_results = collect_tool_result_blocks(messages)\n if len(tool_results) <= KEEP_RECENT_TOOL_RESULTS:\n return messages\n for _, _, block in tool_results[:-KEEP_RECENT_TOOL_RESULTS]:\n if len(block.get(\"content\", \"\")) > 120:\n block[\"content\"] = \"[Earlier tool result compacted. Re-run if needed.]\"\n return messages\n```\n\n古い結果はクリーンアップされたが、1 件の新しい結果だけで 500KB の可能性がある。大きなファイルを `cat` するだけでコンテキストがいっぱいになる。→ L3。\n\n### L3: tool_result_budget — 大きな結果をディスクに退避\n\n\n\nモデルが一度に 5 つの大きなファイルを読み、1 つの user メッセージ内の全 `tool_result` の合計が 500KB に達した。\n\n最後の user メッセージ内のすべての `tool_result` の合計サイズを集計。200KB を超えた場合 → サイズ順にソートし、最大のものから順に `.task_outputs/tool-results/` に退避。コンテキストには `` マーカー + 先頭 2000 文字のプレビューのみを残す。モデルはマーカーを見て完全な内容がディスク上にあることを認識し、必要に応じて再読み込みできる。\n\n```python\ndef tool_result_budget(messages, max_bytes=200_000):\n last = messages[-1]\n blocks = [(i, b) for i, b in enumerate(last[\"content\"])\n if b.get(\"type\") == \"tool_result\"]\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n if total <= max_bytes:\n return messages\n ranked = sorted(blocks, key=lambda p: len(str(p[1].get(\"content\", \"\"))), reverse=True)\n for idx, block in ranked:\n if total <= max_bytes:\n break\n block[\"content\"] = persist_large_output(block[\"tool_use_id\"], str(block[\"content\"]))\n total = recalculate_total(blocks)\n return messages\n```\n\n最初の 3 層はすべて純粋なテキスト/構造操作(0 API 呼び出し)だが、会話内容を「理解」することはできない。コンテキストがまだ大きすぎる可能性がある。→ L4。\n\n### L4: compact_history — LLM 全量要約\n\n\n\n最初の 3 層がすべて実行されたが、超大規模プロジェクトで 30 分間連続作業すると、token がまだ閾値を超えている。\n\n3 ステップのフロー:\n\n1. **transcript を保存**:完全な会話を `.transcripts/` に JSONL 形式で書き出す。transcript は回復可能な記録として保存されるが、モデルのアクティブなコンテキストには要約しか残らない。モデルの現在の推論にとって、詳細はすでにコンテキストにない。教学コードは transcript 検索ツールを提供しない。\n2. **LLM で要約を生成**:会話履歴を LLM に送り、現在の目標、重要な発見、変更済みファイル、残りの作業、ユーザーの制約などの重要な情報を保持するよう指示。\n3. **メッセージリストを置換**:すべての古いメッセージが 1 件の要約に置き換えられる。教学版は要約のみを保持する。実際の Claude Code は compact 後に直近のファイル、計画、agent/skill/tool などのコンテキストを再付加する。\n\n```python\ndef compact_history(messages):\n transcript_path = write_transcript(messages) # 先に完全な会話を保存\n summary = summarize_history(messages) # LLM で要約を生成\n return [{\"role\": \"user\",\n \"content\": f\"[Compacted]\\n\\n{summary}\"}]\n```\n\n**サーキットブレーカー**:連続 3 回失敗したらリトライを停止し、無限ループによる API 呼び出しの浪費を防止。\n\n### 緊急: reactive_compact\n\nAPI がまだ `prompt_too_long`(413)を返すことがある。コンテキストの増加速度が圧縮のトリガー速度を上回る場合。\n\nこの時 **reactive_compact** がトリガーされる:compact_history よりもさらに積極的だが、末尾を残す際も孤立した `tool_result` を残さないようにする。\n\n```python\ndef reactive_compact(messages):\n transcript = write_transcript(messages)\n summary = summarize_history(messages)\n tail_start = max(0, len(messages) - 5)\n if _is_tool_result_message(messages[tail_start]) and _message_has_tool_use(messages[tail_start - 1]):\n tail_start -= 1\n return [{\"role\": \"user\",\n \"content\": f\"[Reactive compact]\\n\\n{summary}\"}, *messages[tail_start:]]\n```\n\nreactive compact にはリトライ上限がある(デフォルト 1 回)。さらに失敗した場合は例外をスローし、無限ループしない。完全なエラー回復ロジックは s11 に委ねる。\n\n### 合わせて実行\n\n```python\ndef agent_loop(messages):\n reactive_retries = 0\n while True:\n # 3 つのプリプロセッサ(0 API 呼び出し)\n # 順序:budget を先に実行し、大きな内容をプレースホルダ化する前に退避\n messages[:] = tool_result_budget(messages) # L3: 大きな結果を退避\n messages[:] = snip_compact(messages) # L1: 中間を切り捨て\n messages[:] = micro_compact(messages) # L2: 古い結果をプレースホルダに\n\n # まだ足りない?LLM 要約(1 API 呼び出し)\n if estimate_token_count(messages) > THRESHOLD:\n messages[:] = compact_history(messages)\n\n try:\n response = client.messages.create(...)\n except PromptTooLongError:\n if reactive_retries < MAX_REACTIVE_RETRIES:\n messages[:] = reactive_compact(messages) # 緊急対応\n reactive_retries += 1\n continue\n raise # リトライ上限超過、例外をスロー\n # ... ツール実行 ...\n\n # compact ツール:モデルが能動的に呼び出した場合、compact_history をトリガー\n if block.name == \"compact\":\n messages[:] = compact_history(messages)\n results.append({..., \"content\": \"[Compacted. History summarized.]\"})\n messages.append({\"role\": \"user\", \"content\": results})\n break # 現在のターンを終了し、圧縮後のコンテキストで新しく開始\n```\n\n**順序は変えられない。** L3(budget)が L2(micro)の前に実行される理由:micro は古い大きな tool_result を 1 行のプレースホルダに置換するため、budget はその前に完全な内容を退避させる必要がある。CC ソースが `applyToolResultBudget` を最初に配置する理由も同じ。\n\n---\n\n## s07 からの変更点\n\n| コンポーネント | 変更前 (s07) | 変更後 (s08) |\n|------|-----------|-----------|\n| コンテキスト管理 | なし(コンテキストが無限に膨張) | 4 層圧縮パイプライン + 緊急対応 |\n| 新規関数 | — | snip_compact, micro_compact, tool_result_budget, compact_history, reactive_compact |\n| ツール | bash, read_file, write_file, edit_file, glob, todo_write, task, load_skill (8) | 8 + compact (9) |\n| ループ | LLM 呼び出し → ツール実行 | 各ラウンド前に 3 層プリプロセッサを実行 + 閾値で compact_history をトリガー |\n| 設計原則 | — | 安価なものを先に、高価なものを後に |\n\n---\n\n## 試してみよう\n\n```sh\ncd learn-claude-code\npython s08_context_compact/code.py\n```\n\n以下のプロンプトを試してみてください:\n\n1. `Read the file README.md, then read code.py, then read s01_agent_loop/README.md`(連続して複数のファイルを読み、L2 の古い結果圧縮を観察)\n2. `Read every file in s08_context_compact/`(一度に大量の内容を読み込み、L3 のディスク退避を観察)\n3. 20+ ラウンドの対話を繰り返し、`[auto compact]` または `[reactive compact]` が表示されるか観察\n\n観察のポイント:ツール実行のたびに、古い tool_result は圧縮されているか?連続対話で token が閾値を超えたとき、要約が自動的にトリガーされたか?\n\n---\n\n## 次へ\n\nコンテキスト圧縮により、Agent は長時間クラッシュせずに動けるようになった。しかし、圧縮のたびにユーザーが以前に伝えた偏好や制約も一緒に失われてしまう。Agent が重要なことを選択的に記憶できるようにできないか?\n\ns09 Memory → 3 つのサブシステム:何を記憶するかの選択、重要情報の抽出、整理と統合。圧縮を越え、セッションを越えて。\n\n\nCC ソースコードの詳細
\n\n> 以下は CC ソースコード `compact.ts`、`autoCompact.ts`、`microCompact.ts`、`query.ts` の分析に基づく。\n\n### 実行順序の対応\n\n教学版は説明の便宜上 L1/L2/L3/L4 と番号を振っているが、実際の実行順序は番号と完全には一致しない:\n\n| 項目 | 教学版 | Claude Code |\n|------|--------|-------------|\n| 実行順序 | budget → snip → micro → auto | budget → snip → micro → collapse → auto(`query.ts:379-468`) |\n| snip_compact | 先頭 3 + 末尾 47 を保持 | CC はメインスレッドのみ有効;実装はオープンソースリポジトリにない(`HISTORY_SNIP` feature gate)、インターフェースは確認可能:`snipCompactIfNeeded(messages)` → `{ messages, tokensFreed, boundaryMessage? }`、`SnipTool` もモデルが能動的に呼び出し可能。教学版の 3/47 は簡略パラメータ |\n| micro_compact | テキストプレースホルダで置換 | 2 つのパス:time-based は直接内容をクリア、cached は API の `cache_edits` を使用(legacy パスは削除済み) |\n| micro_compact ホワイトリスト | 位置による(直近 3 件) | time-based は時間閾値でトリガー、cached はカウントでトリガー(`microCompact.ts`) |\n| tool_result_budget | 200KB 文字 | 200,000 文字(`toolLimits.ts:49`) |\n| compact_history 閾値 | 文字数で推定 | 精密な token 数:`contextWindow - maxOutputTokens - 13_000` |\n| 要約の要求 | 5 種類の情報 | 9 つのセクション + ``/`` デュアルタグ |\n| 圧縮プロンプト | シンプルなプロンプト | 先頭と末尾に二重の安全ガードでツール呼び出しを禁止 |\n| PTL retry | あり(簡略版) | `truncateHeadForPTLRetry()` がメッセージグループ単位でロールバック(`compact.ts:243-290`) |\n| 圧縮後のリカバリ | なし(教学版は要約のみ保持) | 直近のファイル、計画、agent/skill/tool などの自動再付加 |\n| サーキットブレーカー | 3 回 | 3 回(`autoCompact.ts:70`) |\n| reactive リトライ | 1 回 | CC にはより精緻な段階別リトライがある |\n\n### 実行順序の詳細\n\nCC ソース `query.ts` での実際の順序:\n\n1. `applyToolResultBudget`(L379):まず大きな結果を処理し、完全な内容を退避\n2. `snipCompact`(L403):中間メッセージを切り捨て\n3. `microcompact`(L414):古い結果のプレースホルダ化\n4. `contextCollapse`(L441):独立したコンテキスト管理システム(教学版にはなし)\n5. `autoCompact`(L454):LLM 全量要約\n\n教学版の budget → snip → micro の順序はこれと一致する。教学版には contextCollapse メカニズムがない。\n\n### 完全な定数リファレンス\n\n| 定数 | 値 | ソースファイル |\n|------|-----|--------|\n| `AUTOCOMPACT_BUFFER_TOKENS` | 13,000 | `autoCompact.ts:62` |\n| `MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES` | 3 | `autoCompact.ts:70` |\n| `MAX_OUTPUT_TOKENS_FOR_SUMMARY` | 20,000 | `autoCompact.ts:30` |\n| `POST_COMPACT_TOKEN_BUDGET` | 50,000 | `compact.ts:123` |\n| `POST_COMPACT_MAX_FILES_TO_RESTORE` | 5 | `compact.ts:122` |\n| `POST_COMPACT_MAX_TOKENS_PER_FILE` | 5,000 | `compact.ts:124` |\n| 時間ベース micro_compact 間隔 | 60 分 | `timeBasedMCConfig.ts` |\n| `MAX_COMPACT_STREAMING_RETRIES` | 2 | `compact.ts:131` |\n\n### contextCollapse と sessionMemoryCompact\n\nCC ソースコードには、この教学版では展開していない 2 つのメカニズムが存在する:\n\n- **contextCollapse**:独立したコンテキスト管理システム。有効時には proactive autocompact を抑制し(`autoCompact.ts:215-222`)、collapse の commit/blocking フローがコンテキスト管理を引き継ぐ。ただし manual `/compact` と reactive fallback は独立パスのままで、contextCollapse の影響を受けない。\n- **sessionMemoryCompact**:compact_history の前に、CC は既存の session memory(s09 で解説)を使った軽量要約を先に試みる。LLM を呼び出さない。このメカニズムは s09 を学んだ後に振り返るとより理解しやすい。\n\n### 圧縮プロンプトの中身\n\nCC の圧縮プロンプトには 2 つの厳格な要件がある:\n\n1. **ツール呼び出しの絶対禁止**:冒頭が `CRITICAL: Respond with TEXT ONLY. Do NOT call any tools.` で、末尾にも再度 REMINDER がある\n2. **先に分析してから要約**:モデルはまず `` タグで思考を整理し、その後 `` タグで正式な要約を出力する。analysis はフォーマット時に除去される\n\n### 教学版の簡略化は意図的\n\n- micro_compact でテキストプレースホルダを使用 → API 層の `cache_edits` 権限がないため\n- token を文字数で推定 → 精密な tokenizer は教学の対象外\n- 圧縮後のリカバリを省略 → 教学版は要約のみを保持し、ファイルの自動再付加を行わない\n- 2 つの補助メカニズムを展開しない → 10% の細部に属する\n\nコア設計思想、安価なものを先に高価なものを後に、は完全に保持されている。\n\n
\n\n\n"
+ "content": "# s08: Context Compact — コンテキストはいつか満杯になる、場所を空ける方法が必要\n\ns01 → s02 → s03 → s04 → s05 → s06 → s07 → `s08` → [s09](/ja/s09) → s10 → ... → s20\n> *\"Context will fill up — have a way to make room\"* — 4層圧縮戦略、安価なものを先に、高価なものを後に実行。\n>\n> **Harness レイヤー**: 圧縮 — クリーンな記憶、無限のセッション。\n\n---\n\n## 課題\n\nAgent が動いている途中で、止まってしまう。\n\nbash、read、write は揃っており、能力は十分。しかし 1000 行のファイル(~4000 token)を読み、さらに 30 のファイルを読み、20 のコマンドを実行したとします。各コマンドの出力、各ファイルの内容がすべて `messages` リストに蓄積されます。\n\nコンテキストウィンドウには上限があります。満杯になると、API は即座に拒否します:`prompt_too_long`。\n\n圧縮しなければ、Agent は大規模プロジェクトではまともに動けません。\n\n---\n\n## ソリューション\n\n\n\ns07 のフック構造、スキルロード、サブ Agent の骨格を維持し、圧縮に焦点を当てるため一部のツールは省略。コアの変更点:各 LLM 呼び出し前に 3 層のプリプロセッサ(0 API)を挿入し、token が閾値を超えた場合は LLM 要約(1 API)をトリガー、API エラー時には緊急トリムを実行。\n\nコア設計:安価なものを先に、高価なものを後に。\n\n---\n\n## 仕組み\n\n\n\n### L1: snip_compact — 無関係な古い会話を切り捨て\n\nAgent が 80 ラウンドの会話を実行し、`messages` が 160 件まで溜まった。先頭の「hello.py を作って」は現在の作業とほぼ無関係だが、スペースを占有し続けている。\n\nメッセージ数が 50 を超えた場合 → 先頭 3 件(初期コンテキスト)と末尾 47 件(現在の作業)を保持して中間を切り詰める。ただし切れ目だけは調整し、`assistant(tool_use)` と後続の `user(tool_result)` を分断しない:\n\n```python\ndef snip_compact(messages, max_messages=50):\n if len(messages) <= max_messages:\n return messages\n head_end, tail_start = 3, len(messages) - (max_messages - 3)\n if head_end > 0 and _message_has_tool_use(messages[head_end - 1]):\n while head_end < len(messages) and _is_tool_result_message(messages[head_end]):\n head_end += 1\n if (tail_start > 0 and tail_start < len(messages)\n and _is_tool_result_message(messages[tail_start])\n and _message_has_tool_use(messages[tail_start - 1])):\n tail_start -= 1\n snipped = tail_start - head_end\n placeholder = {\"role\": \"user\", \"content\": f\"[snipped {snipped} messages from conversation middle]\"}\n return messages[:head_end] + [placeholder] + messages[tail_start:]\n```\n\n切り捨て自体は単純なままで、境界だけを保護する。残ったメッセージ内の `tool_result` 内容はまだ蓄積され続けている。34 番目のメッセージに 30KB の古いファイル内容が残っているかもしれない。→ L2。\n\n### L2: micro_compact — 古いツール結果をプレースホルダに置換\n\n\n\nAgent が連続して 10 個のファイルを読んだ。1〜7 回目の完全な内容はまだコンテキストに残っており、もう不要だが、大量のスペースを占有している。\n\n直近 3 件の `tool_result` の完全な内容のみを保持し、それより古いものは 1 行のプレースホルダに置換:\n\n```python\nKEEP_RECENT_TOOL_RESULTS = 3\n\ndef micro_compact(messages):\n tool_results = collect_tool_result_blocks(messages)\n if len(tool_results) <= KEEP_RECENT_TOOL_RESULTS:\n return messages\n for _, _, block in tool_results[:-KEEP_RECENT_TOOL_RESULTS]:\n if len(block.get(\"content\", \"\")) > 120:\n block[\"content\"] = \"[Earlier tool result compacted. Re-run if needed.]\"\n return messages\n```\n\n古い結果はクリーンアップされたが、1 件の新しい結果だけで 500KB の可能性がある。大きなファイルを `cat` するだけでコンテキストがいっぱいになる。→ L3。\n\n### L3: tool_result_budget — 大きな結果をディスクに退避\n\n\n\nモデルが一度に 5 つの大きなファイルを読み、1 つの user メッセージ内の全 `tool_result` の合計が 500KB に達した。\n\n最後の user メッセージ内のすべての `tool_result` の合計サイズを集計。200KB を超えた場合 → サイズ順にソートし、最大のものから順に `.task_outputs/tool-results/` に退避。コンテキストには `` マーカー + 先頭 2000 文字のプレビューのみを残す。モデルはマーカーを見て完全な内容がディスク上にあることを認識し、必要に応じて再読み込みできる。\n\n```python\ndef tool_result_budget(messages, max_bytes=200_000):\n last = messages[-1]\n blocks = [(i, b) for i, b in enumerate(last[\"content\"])\n if b.get(\"type\") == \"tool_result\"]\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n if total <= max_bytes:\n return messages\n ranked = sorted(blocks, key=lambda p: len(str(p[1].get(\"content\", \"\"))), reverse=True)\n for idx, block in ranked:\n if total <= max_bytes:\n break\n block[\"content\"] = persist_large_output(block[\"tool_use_id\"], str(block[\"content\"]))\n total = recalculate_total(blocks)\n return messages\n```\n\n最初の 3 層はすべて純粋なテキスト/構造操作(0 API 呼び出し)だが、会話内容を「理解」することはできない。コンテキストがまだ大きすぎる可能性がある。→ L4。\n\n### L4: compact_history — LLM 全量要約\n\n\n\n最初の 3 層がすべて実行されたが、超大規模プロジェクトで 30 分間連続作業すると、token がまだ閾値を超えている。\n\n3 ステップのフロー:\n\n1. **transcript を保存**:完全な会話を `.transcripts/` に JSONL 形式で書き出す。transcript は回復可能な記録として保存されるが、モデルのアクティブなコンテキストには要約しか残らない。モデルの現在の推論にとって、詳細はすでにコンテキストにない。教学コードは transcript 検索ツールを提供しない。\n2. **LLM で要約を生成**:会話履歴を LLM に送り、現在の目標、重要な発見、変更済みファイル、残りの作業、ユーザーの制約などの重要な情報を保持するよう指示。\n3. **メッセージリストを置換**:すべての古いメッセージが 1 件の要約に置き換えられる。教学版は要約のみを保持する。実際の Claude Code は compact 後に直近のファイル、計画、agent/skill/tool などのコンテキストを再付加する。\n\n```python\ndef compact_history(messages):\n transcript_path = write_transcript(messages) # 先に完全な会話を保存\n summary = summarize_history(messages) # LLM で要約を生成\n return [{\"role\": \"user\",\n \"content\": f\"[Compacted]\\n\\n{summary}\"}]\n```\n\n**サーキットブレーカー**:連続 3 回失敗したらリトライを停止し、無限ループによる API 呼び出しの浪費を防止。\n\n### 緊急: reactive_compact\n\nAPI がまだ `prompt_too_long`(413)を返すことがある。コンテキストの増加速度が圧縮のトリガー速度を上回る場合。\n\nこの時 **reactive_compact** がトリガーされる:compact_history よりもさらに積極的だが、末尾を残す際も孤立した `tool_result` を残さないようにする。\n\n```python\ndef reactive_compact(messages):\n transcript = write_transcript(messages)\n tail_start = max(0, len(messages) - 5)\n if (tail_start > 0 and tail_start < len(messages)\n and _is_tool_result_message(messages[tail_start])\n and _message_has_tool_use(messages[tail_start - 1])):\n tail_start -= 1\n summary = summarize_history(messages[:tail_start])\n return [{\"role\": \"user\",\n \"content\": f\"[Reactive compact]\\n\\n{summary}\"}, *messages[tail_start:]]\n```\n\nreactive compact にはリトライ上限がある(デフォルト 1 回)。さらに失敗した場合は例外をスローし、無限ループしない。完全なエラー回復ロジックは s11 に委ねる。\n\n### 合わせて実行\n\n```python\ndef agent_loop(messages):\n reactive_retries = 0\n while True:\n # 3 つのプリプロセッサ(0 API 呼び出し)\n # 順序:budget を先に実行し、大きな内容をプレースホルダ化する前に退避\n messages[:] = tool_result_budget(messages) # L3: 大きな結果を退避\n messages[:] = snip_compact(messages) # L1: 中間を切り捨て\n messages[:] = micro_compact(messages) # L2: 古い結果をプレースホルダに\n\n # まだ足りない?LLM 要約(1 API 呼び出し)\n if estimate_token_count(messages) > THRESHOLD:\n messages[:] = compact_history(messages)\n\n try:\n response = client.messages.create(...)\n except PromptTooLongError:\n if reactive_retries < MAX_REACTIVE_RETRIES:\n messages[:] = reactive_compact(messages) # 緊急対応\n reactive_retries += 1\n continue\n raise # リトライ上限超過、例外をスロー\n # ... ツール実行 ...\n\n # compact ツール:モデルが能動的に呼び出した場合、compact_history をトリガー\n if block.name == \"compact\":\n messages[:] = compact_history(messages)\n results.append({..., \"content\": \"[Compacted. History summarized.]\"})\n messages.append({\"role\": \"user\", \"content\": results})\n break # 現在のターンを終了し、圧縮後のコンテキストで新しく開始\n```\n\n**順序は変えられない。** L3(budget)が L2(micro)の前に実行される理由:micro は古い大きな tool_result を 1 行のプレースホルダに置換するため、budget はその前に完全な内容を退避させる必要がある。CC ソースが `applyToolResultBudget` を最初に配置する理由も同じ。\n\n---\n\n## s07 からの変更点\n\n| コンポーネント | 変更前 (s07) | 変更後 (s08) |\n|------|-----------|-----------|\n| コンテキスト管理 | なし(コンテキストが無限に膨張) | 4 層圧縮パイプライン + 緊急対応 |\n| 新規関数 | — | snip_compact, micro_compact, tool_result_budget, compact_history, reactive_compact |\n| ツール | bash, read_file, write_file, edit_file, glob, todo_write, task, load_skill (8) | 8 + compact (9) |\n| ループ | LLM 呼び出し → ツール実行 | 各ラウンド前に 3 層プリプロセッサを実行 + 閾値で compact_history をトリガー |\n| 設計原則 | — | 安価なものを先に、高価なものを後に |\n\n---\n\n## 試してみよう\n\n```sh\ncd learn-claude-code\npython s08_context_compact/code.py\n```\n\n以下のプロンプトを試してみてください:\n\n1. `Read the file README.md, then read code.py, then read s01_agent_loop/README.md`(連続して複数のファイルを読み、L2 の古い結果圧縮を観察)\n2. `Read every file in s08_context_compact/`(一度に大量の内容を読み込み、L3 のディスク退避を観察)\n3. 20+ ラウンドの対話を繰り返し、`[auto compact]` または `[reactive compact]` が表示されるか観察\n\n観察のポイント:ツール実行のたびに、古い tool_result は圧縮されているか?連続対話で token が閾値を超えたとき、要約が自動的にトリガーされたか?\n\n---\n\n## 次へ\n\nコンテキスト圧縮により、Agent は長時間クラッシュせずに動けるようになった。しかし、圧縮のたびにユーザーが以前に伝えた偏好や制約も一緒に失われてしまう。Agent が重要なことを選択的に記憶できるようにできないか?\n\ns09 Memory → 3 つのサブシステム:何を記憶するかの選択、重要情報の抽出、整理と統合。圧縮を越え、セッションを越えて。\n\n\nCC ソースコードの詳細
\n\n> 以下は CC ソースコード `compact.ts`、`autoCompact.ts`、`microCompact.ts`、`query.ts` の分析に基づく。\n\n### 実行順序の対応\n\n教学版は説明の便宜上 L1/L2/L3/L4 と番号を振っているが、実際の実行順序は番号と完全には一致しない:\n\n| 項目 | 教学版 | Claude Code |\n|------|--------|-------------|\n| 実行順序 | budget → snip → micro → auto | budget → snip → micro → collapse → auto(`query.ts:379-468`) |\n| snip_compact | 先頭 3 + 末尾 47 を保持 | CC はメインスレッドのみ有効;実装はオープンソースリポジトリにない(`HISTORY_SNIP` feature gate)、インターフェースは確認可能:`snipCompactIfNeeded(messages)` → `{ messages, tokensFreed, boundaryMessage? }`、`SnipTool` もモデルが能動的に呼び出し可能。教学版の 3/47 は簡略パラメータ |\n| micro_compact | テキストプレースホルダで置換 | 2 つのパス:time-based は直接内容をクリア、cached は API の `cache_edits` を使用(legacy パスは削除済み) |\n| micro_compact ホワイトリスト | 位置による(直近 3 件) | time-based は時間閾値でトリガー、cached はカウントでトリガー(`microCompact.ts`) |\n| tool_result_budget | 200KB 文字 | 200,000 文字(`toolLimits.ts:49`) |\n| compact_history 閾値 | 文字数で推定 | 精密な token 数:`contextWindow - maxOutputTokens - 13_000` |\n| 要約の要求 | 5 種類の情報 | 9 つのセクション + ``/`` デュアルタグ |\n| 圧縮プロンプト | シンプルなプロンプト | 先頭と末尾に二重の安全ガードでツール呼び出しを禁止 |\n| PTL retry | あり(簡略版) | `truncateHeadForPTLRetry()` がメッセージグループ単位でロールバック(`compact.ts:243-290`) |\n| 圧縮後のリカバリ | なし(教学版は要約のみ保持) | 直近のファイル、計画、agent/skill/tool などの自動再付加 |\n| サーキットブレーカー | 3 回 | 3 回(`autoCompact.ts:70`) |\n| reactive リトライ | 1 回 | CC にはより精緻な段階別リトライがある |\n\n### 実行順序の詳細\n\nCC ソース `query.ts` での実際の順序:\n\n1. `applyToolResultBudget`(L379):まず大きな結果を処理し、完全な内容を退避\n2. `snipCompact`(L403):中間メッセージを切り捨て\n3. `microcompact`(L414):古い結果のプレースホルダ化\n4. `contextCollapse`(L441):独立したコンテキスト管理システム(教学版にはなし)\n5. `autoCompact`(L454):LLM 全量要約\n\n教学版の budget → snip → micro の順序はこれと一致する。教学版には contextCollapse メカニズムがない。\n\n### read_file のトレードオフ\n\n教学版の `micro_compact` は、古い `tool_result` を一律にプレースホルダへ置き換える。`read_file` も例外ではない。これは通常、機能的な正しさには影響しない。後でファイル内容が必要になれば、モデルはもう一度そのファイルを読めばよい。代償は、追加のツール呼び出しが発生し得ることと、prompt cache のヒット率が下がり得ること。\n\nClaude Code は、この問題を教学版のような単純なルールでは処理していない。`Read` も microcompact 可能なツール集合に入れる一方で、別途 `readFileState` を維持している。変更されていないファイルの再読込では `FILE_UNCHANGED_STUB` を返し、compact 後には予算内で直近に読んだファイル内容を復元する(例:最大 5 ファイル、1 ファイル 5K token、合計 50K token)。これは本番実装向けのキャッシュと復元メカニズムである。教学版ではそこまで展開せず、「古い結果を圧縮し、必要なら再読込する」という単純な trade-off を残している。\n\n### 完全な定数リファレンス\n\n| 定数 | 値 | ソースファイル |\n|------|-----|--------|\n| `AUTOCOMPACT_BUFFER_TOKENS` | 13,000 | `autoCompact.ts:62` |\n| `MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES` | 3 | `autoCompact.ts:70` |\n| `MAX_OUTPUT_TOKENS_FOR_SUMMARY` | 20,000 | `autoCompact.ts:30` |\n| `POST_COMPACT_TOKEN_BUDGET` | 50,000 | `compact.ts:123` |\n| `POST_COMPACT_MAX_FILES_TO_RESTORE` | 5 | `compact.ts:122` |\n| `POST_COMPACT_MAX_TOKENS_PER_FILE` | 5,000 | `compact.ts:124` |\n| 時間ベース micro_compact 間隔 | 60 分 | `timeBasedMCConfig.ts` |\n| `MAX_COMPACT_STREAMING_RETRIES` | 2 | `compact.ts:131` |\n\n### contextCollapse と sessionMemoryCompact\n\nCC ソースコードには、この教学版では展開していない 2 つのメカニズムが存在する:\n\n- **contextCollapse**:独立したコンテキスト管理システム。有効時には proactive autocompact を抑制し(`autoCompact.ts:215-222`)、collapse の commit/blocking フローがコンテキスト管理を引き継ぐ。ただし manual `/compact` と reactive fallback は独立パスのままで、contextCollapse の影響を受けない。\n- **sessionMemoryCompact**:compact_history の前に、CC は既存の session memory(s09 で解説)を使った軽量要約を先に試みる。LLM を呼び出さない。このメカニズムは s09 を学んだ後に振り返るとより理解しやすい。\n\n### 圧縮プロンプトの中身\n\nCC の圧縮プロンプトには 2 つの厳格な要件がある:\n\n1. **ツール呼び出しの絶対禁止**:冒頭が `CRITICAL: Respond with TEXT ONLY. Do NOT call any tools.` で、末尾にも再度 REMINDER がある\n2. **先に分析してから要約**:モデルはまず `` タグで思考を整理し、その後 `` タグで正式な要約を出力する。analysis はフォーマット時に除去される\n\n### 教学版の簡略化は意図的\n\n- micro_compact でテキストプレースホルダを使用 → API 層の `cache_edits` 権限がないため\n- read_file は特別扱いしない → 教学版では必要時の再読込を受け入れ、readFileState と圧縮後復元の仕組みを導入しない\n- token を文字数で推定 → 精密な tokenizer は教学の対象外\n- 圧縮後のリカバリを省略 → 教学版は要約のみを保持し、ファイルの自動再付加を行わない\n- 2 つの補助メカニズムを展開しない → 10% の細部に属する\n\nコア設計思想、安価なものを先に高価なものを後に、は完全に保持されている。\n\n
\n\n\n"
},
{
"version": "s09",
"locale": "en",
"title": "s09: Memory — Compression Loses Details, Keep a Layer That Doesn't",
- "content": "# s09: Memory — Compression Loses Details, Keep a Layer That Doesn't\n\ns01 → ... → s07 → s08 → `s09` → [s10](/en/s10) → s11 → ... → s20\n> *\"Compression loses details, keep a layer that doesn't\"* — File store + index + on-demand loading, across compactions, across sessions.\n>\n> **Harness Layer**: Memory — knowledge that survives compaction and sessions.\n\n---\n\n## The Problem\n\ns08's autoCompact preserves current goals, remaining work, and user constraints in the summary, but details get lost: \"use tabs not spaces\" might get simplified to \"user has code style preferences\". And when you start a new session, even the summary is gone.\n\nLLMs have no persistent state; all information lives in the context window. When context fills up, it gets compressed, and compression is lossy. What's needed is a storage layer that doesn't participate in compression and persists across sessions.\n\n---\n\n## The Solution\n\n\n\nThe s08 compression pipeline is preserved, focusing on memory. Storage uses the filesystem: a `.memory/` directory where each memory is a `.md` file with YAML frontmatter (`name` / `description` / `type`). When files accumulate, an index is needed: `MEMORY.md` holds one link per line and gets injected into the SYSTEM.\n\nKey design: the index stays in SYSTEM prompt (cacheable by prompt cache), file content is injected on demand (matched by filename/description to the current conversation, without breaking the cache). Writing has two paths: the user explicitly says \"remember\", or extraction runs in the background after each turn. When files accumulate, periodic consolidation deduplicates.\n\nFour memory types, each answering a different question:\n\n| Type | Answers | Example |\n|------|---------|---------|\n| user | Who you are | \"Use tabs not spaces\" |\n| feedback | How to work | \"Don't mock the database\" |\n| project | What's happening | \"Auth rewrite is compliance-driven\" |\n| reference | Where to find things | \"Pipeline bugs are in Linear INGEST\" |\n\n---\n\n## How It Works\n\n\n\n### Storage: Markdown Files + Index\n\nEach memory is a `.md` file with YAML frontmatter for metadata:\n\n```markdown\n---\nname: user-preference-tabs\ndescription: User prefers tabs for indentation\ntype: user\n---\n\nUser prefers using tabs, not spaces, for indentation.\n**Why:** Consistency with existing codebase conventions.\n**How to apply:** Always use tabs when writing or editing files.\n```\n\n`MEMORY.md` is the index, one link per line:\n\n```markdown\n- [user-preference-tabs](user-preference-tabs.md) — User prefers tabs for indentation\n```\n\nWriting a new memory automatically rebuilds the index:\n\n```python\ndef write_memory_file(name, mem_type, description, body):\n slug = name.lower().replace(\" \", \"-\")\n filepath = MEMORY_DIR / f\"{slug}.md\"\n filepath.write_text(\n f\"---\\nname: {name}\\ndescription: {description}\\ntype: {mem_type}\\n---\\n\\n{body}\\n\"\n )\n _rebuild_index()\n```\n\n### Loading: Two Paths\n\n**Path 1: Index in SYSTEM.** `build_system()` reads `MEMORY.md` every turn and injects the memory catalog into the SYSTEM prompt. The index in SYSTEM can be cached by prompt cache, avoiding resending it every turn.\n\n**Path 2: Relevant memories on demand.** Before each LLM call, `load_memories()` sends the recent conversation and the memory catalog (name + description) to the LLM as a lightweight side-query, selects relevant filenames, then reads and injects their contents. Capped at 5 to control cost.\n\n```python\ndef select_relevant_memories(messages, max_items=5):\n files = list_memory_files()\n if not files:\n return []\n\n # Build catalog: \"0: user-preference-tabs — User prefers tabs...\"\n catalog = \"\\n\".join(f\"{i}: {f['name']} — {f['description']}\" for i, f in enumerate(files))\n\n response = client.messages.create(model=MODEL, messages=[{\"role\": \"user\",\n \"content\": f\"Select relevant memory indices. Return JSON array.\\n\\n\"\n f\"Recent conversation:\\n{recent}\\n\\nMemory catalog:\\n{catalog}\"}],\n max_tokens=200)\n indices = json.loads(re.search(r'\\[.*?\\]', response.content[0].text).group())\n return [files[i][\"filename\"] for i in indices if 0 <= i < len(files)]\n```\n\nIf the side-query fails (API error, JSON parse failure), it falls back to keyword matching on name + description.\n\n### Writing: Extraction After Each Turn\n\nUsers don't always say \"remember this\". Preferences are usually scattered across normal dialogue: \"tabs are better than spaces\", \"let's use single quotes from now on\".\n\n`extract_memories()` runs when each turn ends, triggered when the model stops without a tool_use (indicating the conversation has reached a natural break):\n\n```python\n# In agent_loop:\nif response.stop_reason != \"tool_use\":\n extract_memories(messages) # Extract new memories from recent dialogue\n consolidate_memories() # Check if consolidation is needed\n return\n```\n\nBefore extraction, existing memories are checked to avoid duplicates. The extraction prompt asks the LLM to return a JSON array of `{name, type, description, body}`, writing files only when genuinely new information is found.\n\n```python\ndef extract_memories(messages):\n dialogue = format_recent_messages(messages[-10:])\n existing = \"\\n\".join(f\"- {m['name']}: {m['description']}\" for m in list_memory_files())\n\n prompt = (\n \"Extract user preferences, constraints, or project facts.\\n\"\n \"Return JSON array: [{name, type, description, body}].\\n\"\n \"If nothing new or already covered, return [].\\n\\n\"\n f\"Existing memories:\\n{existing}\\n\\nDialogue:\\n{dialogue[:4000]}\"\n )\n # ... parse response, write files ...\n```\n\n### Consolidation: Low-Frequency Deduplication\n\nMemory files accumulate. `consolidate_memories()` triggers when the file count reaches a threshold (default 10), asking the LLM to deduplicate, merge contradictions, and prune stale memories:\n\n```python\nCONSOLIDATE_THRESHOLD = 10\n\ndef consolidate_memories():\n files = list_memory_files()\n if len(files) < CONSOLIDATE_THRESHOLD:\n return # Too few, not worth consolidating\n # Send all memories to LLM, get back deduplicated list\n # Replace all files with consolidated results\n```\n\nCC calls this process **Dream**, with four gates in practice: time interval, scan throttle, session count, file lock. The teaching version simplifies to a file-count threshold.\n\n### What Memory Stores\n\nMemory stores information that remains useful across sessions: user preferences, recurring feedback, project background, common entry points, and investigation clues. It focuses on \"what will be useful later\" and brings that information back through an index plus on-demand loading.\n\nSession memory focuses on continuity inside one session: what context should survive after compaction. The two work together: Memory handles long-term knowledge; session memory handles the current session across compaction.\n\n---\n\n## Changes From s08\n\n| Component | Before (s08) | After (s09) |\n|-----------|-------------|-------------|\n| Memory capability | None (preferences degrade with compaction) | Storage + loading + extraction + consolidation |\n| New functions | — | write_memory_file, select_relevant_memories, load_memories, extract_memories, consolidate_memories |\n| Storage | — | .memory/MEMORY.md index + .memory/*.md files |\n| Tools | bash, read, write, edit, glob, todo_write, task, load_skill, compact (9) | bash, read_file, write_file, edit_file, glob, task (6) |\n| Loop | Only compression each turn | Memory injection + compression + post-turn extraction + periodic consolidation |\n\n---\n\n## Try It\n\n```sh\ncd learn-claude-code\npython s09_memory/code.py\n```\n\nTry these prompts (enter across multiple turns, observe memory accumulation and loading):\n\n1. `I prefer using tabs for indentation, not spaces. Remember that.`\n2. `Create a Python file called test.py` (observe whether the Agent uses tabs)\n3. `What did I tell you about my preferences?` (observe whether the Agent remembers)\n4. `I also prefer single quotes over double quotes for strings.`\n\nWhat to watch for: Does `[Memory: extracted N new memories]` appear after each turn? Are `.md` files generated in `.memory/`? Is `MEMORY.md` index updated? Does the Agent automatically load previous memories in new conversations?\n\n---\n\n## What's Next\n\nMemory, compression, and tools are all in place. But the system prompt is still a hardcoded string. Adding a new tool means manually adding a description; switching projects means rewriting the whole prompt. Prompts should be assembled at runtime.\n\ns10 System Prompt → segments + runtime assembly. Different projects, different tools, different prompts.\n\n\nDeep Dive Into CC Source Code
\n\n> The following is based on analysis of CC source code under `src/` in `memdir/`, `services/`, `utils/`, `query/`. Line numbers verified against source.\n\n### Source Code Paths\n\n| File | Lines | Responsibility |\n|------|-------|---------------|\n| `memdir/memdir.ts` | 507 | Core: MEMORY.md definition (`34-38`), memory behavior instructions distinguishing memory/plan/tasks (`199-266`), `loadMemoryPrompt()` three paths (`419-490`) |\n| `memdir/findRelevantMemories.ts` | 141 | Sonnet side-query memory selection (`18-24` system prompt, `97-122` call logic) |\n| `memdir/memoryTypes.ts` | 271 | Type definitions, frontmatter fields |\n| `memdir/memoryScan.ts` | — | Scan .md files, exclude MEMORY.md, read frontmatter, max 200 files, sorted by mtime desc (`35-94`) |\n| `services/extractMemories/extractMemories.ts` | 615 | Forked agent extraction, restricted permissions, `skipTranscript: true`, `maxTurns: 5` (`371-427`) |\n| `services/autoDream/autoDream.ts` | 324 | Dream consolidation, four-layer gating (`63-66` defaults, `130-190` gating, `224-233` forked agent) |\n| `services/SessionMemory/sessionMemory.ts` | 495 | Session-level memory management |\n| `services/compact/sessionMemoryCompact.ts` | — | Session memory lightweight summary, thresholds 10K/5/40K (`56-61`) |\n| `utils/attachments.ts` | — | Injection budget: 200 lines / 4096 bytes per file, 60KB per session (`269-288`); find relevant memory by query (`2196-2241`) |\n| `query.ts` | — | Memory prefetch at start of each user turn (`301-304`), non-blocking collection (`1592-1614`) |\n| `query/stopHooks.ts` | — | Stop hook fire-and-forget triggers extraction and Dream (`141-155`) |\n\n### Memory Selection: LLM, Not Embedding\n\nCC uses **Sonnet itself to select** (`findRelevantMemories.ts`), not embedding vector similarity:\n\n1. `memoryScan.ts` scans all `.md` files in `.memory/` (excluding MEMORY.md), max 200 files, sorted by mtime descending\n2. Lists all memory files' `name` + `description` as a catalog\n3. Sends to Sonnet side-query: \"Select truly useful memories by name and description (max 5). Skip if unsure.\"\n4. Sonnet returns `{ selected_memories: [\"file1.md\", ...] }`\n5. Selected files' full contents are read (≤ 200 lines / 4096 bytes per file) and injected. Total session budget: 60KB\n\nAt the start of each user turn, `query.ts:301-304` starts memory prefetch (async); after tool execution, `1592-1614` collects completed results non-blocking.\n\n### Extraction Timing: Stop Hook, Not After autoCompact\n\nTrigger location (`stopHooks.ts:141-155`): inside `handleStopHooks()`, fire-and-forget triggers extraction and Dream. The teaching version places extraction in the `stop_reason != \"tool_use\"` branch, matching the direction.\n\nCC's extraction runs via forked agent (`extractMemories.ts:371-427`): restricted permissions, `skipTranscript: true`, `maxTurns: 5`. Also has overlap protection: if the main Agent already wrote memory files, extraction is skipped.\n\n### Memory File Format\n\nCC uses Markdown + YAML frontmatter, consistent with the teaching version. Four types: `user`, `feedback`, `project`, `reference`.\n\n`memdir.ts:34-38` defines index constraints: `MEMORY.md` max 200 lines / 25KB. `memdir.ts:199-266` builds memory behavior instructions, explicitly distinguishing memory from plan and tasks. Storage location: `~/.claude/projects//memory/`.\n\n### Dream: Four-Layer Gating\n\nNot \"triggered when idle\" or \"consolidate when count is enough\", but four gates (`autoDream.ts`, defaults `63-66`, gating logic `130-190`):\n\n1. **Time gate**: ≥ 24 hours since last consolidation\n2. **Scan throttle**: Avoid frequent filesystem scans\n3. **Session gate**: ≥ 5 session transcripts modified since last consolidation\n4. **Lock gate**: No other process currently consolidating (`.consolidate-lock` file)\n\nThe merge itself runs via forked agent (`224-233`): locate → collect recent signals → merge and write files → prune and update index. Lock file mtime serves as lastConsolidatedAt. Crash recovery: lock auto-expires after 1 hour.\n\n### User Memory vs Session Memory\n\n| | User Memory | Session Memory |\n|---|---|---|\n| Persistence | Cross-session | Single session |\n| Storage | Multiple .md files in `memory/` | `session-memory//memory.md` |\n| Loaded into | system prompt | compact summary |\n| Purpose | Cross-session knowledge accumulation | Cross-compact context continuity |\n\nsessionMemoryCompact (mentioned in s08) uses Session Memory: before autoCompact, it reads the session memory file and, if sufficient (≥ 10K tokens, ≥ 5 text messages, ≤ 40K tokens, `sessionMemoryCompact.ts:56-61`), uses it as a summary without calling the LLM.\n\n### Where the Real Implementation Is More Complex\n\n- **Feature flags**: Memory features have multiple feature gate layers\n- **Team memory**: Shared team memories, `loadMemoryPrompt()` has a dedicated path (not covered in teaching version)\n- **KAIROS**: Timing-aware memory extraction strategy, daily-log mode in `loadMemoryPrompt()`\n- **Prompt cache**: Memory injection must account for prompt cache TTL, avoiding full system prompt rewrites each turn\n- **File locks**: Concurrency control for multi-process scenarios\n- **Memory prefetch**: Async prefetch, non-blocking main flow\n\n### Teaching Version Simplifications Are Intentional\n\n- LLM side-query → LLM side-query + keyword fallback: teaching version keeps LLM selection, adds fallback path\n- Memory JSON → Markdown + frontmatter: teaching version matches CC\n- Stop hook trigger → `stop_reason != \"tool_use\"` branch: same direction\n- Four-layer gating → file-count threshold: teaching version lacks transcript system and multi-session concepts\n- Forked agent + restricted permissions → direct call: teaching version has no subprocess isolation\n\n \n\n\n"
+ "content": "# s09: Memory — Compression Loses Details, Keep a Layer That Doesn't\n\ns01 → ... → s07 → s08 → `s09` → [s10](/en/s10) → s11 → ... → s20\n> *\"Compression loses details, keep a layer that doesn't\"* — File store + index + on-demand loading, across compactions, across sessions.\n>\n> **Harness Layer**: Memory — knowledge that survives compaction and sessions.\n\n---\n\n## The Problem\n\ns08's autoCompact preserves current goals, remaining work, and user constraints in the summary, but details get lost: \"use tabs not spaces\" might get simplified to \"user has code style preferences\". And when you start a new session, even the summary is gone.\n\nLLMs have no persistent state; all information lives in the context window. When context fills up, it gets compressed, and compression is lossy. What's needed is a storage layer that doesn't participate in compression and persists across sessions.\n\n---\n\n## The Solution\n\n\n\nThe s08 compression pipeline is preserved, focusing on memory. Storage uses the filesystem: a `.memory/` directory where each memory is a `.md` file with YAML frontmatter (`name` / `description` / `type`). When files accumulate, an index is needed: `MEMORY.md` holds one link per line and gets injected into the SYSTEM.\n\nKey design: the index stays in SYSTEM prompt (cacheable by prompt cache), file content is injected on demand (matched by filename/description to the current conversation, without breaking the cache). Writing has two paths: the user explicitly says \"remember\", or extraction runs in the background after each turn. When files accumulate, periodic consolidation deduplicates.\n\nFour memory types, each answering a different question:\n\n| Type | Answers | Example |\n|------|---------|---------|\n| user | Who you are | \"Use tabs not spaces\" |\n| feedback | How to work | \"Don't mock the database\" |\n| project | What's happening | \"Auth rewrite is compliance-driven\" |\n| reference | Where to find things | \"Pipeline bugs are in Linear INGEST\" |\n\n---\n\n## How It Works\n\n\n\n### Storage: Markdown Files + Index\n\nEach memory is a `.md` file with YAML frontmatter for metadata:\n\n```markdown\n---\nname: user-preference-tabs\ndescription: User prefers tabs for indentation\ntype: user\n---\n\nUser prefers using tabs, not spaces, for indentation.\n**Why:** Consistency with existing codebase conventions.\n**How to apply:** Always use tabs when writing or editing files.\n```\n\n`MEMORY.md` is the index, one link per line:\n\n```markdown\n- [user-preference-tabs](user-preference-tabs.md) — User prefers tabs for indentation\n```\n\nWriting a new memory automatically rebuilds the index:\n\n```python\ndef write_memory_file(name, mem_type, description, body):\n slug = name.lower().replace(\" \", \"-\")\n filepath = MEMORY_DIR / f\"{slug}.md\"\n filepath.write_text(\n f\"---\\nname: {name}\\ndescription: {description}\\ntype: {mem_type}\\n---\\n\\n{body}\\n\"\n )\n _rebuild_index()\n```\n\n### Loading: Two Paths\n\n**Path 1: Index in SYSTEM.** `build_system()` reads `MEMORY.md` once at the start of each user request and injects the memory catalog into the SYSTEM prompt. Memory extraction and consolidation run only when the turn ends, so SYSTEM does not need to be rebuilt repeatedly within the same user request.\n\n**Path 2: Relevant memories on demand.** At the start of each user request, `load_memories()` sends the recent conversation and the memory catalog (name + description) to the LLM as a lightweight side-query, selects relevant filenames, then reads and injects their contents. Capped at 5 to control cost.\n\n```python\ndef select_relevant_memories(messages, max_items=5):\n files = list_memory_files()\n if not files:\n return []\n\n # Build catalog: \"0: user-preference-tabs — User prefers tabs...\"\n catalog = \"\\n\".join(f\"{i}: {f['name']} — {f['description']}\" for i, f in enumerate(files))\n\n response = client.messages.create(model=MODEL, messages=[{\"role\": \"user\",\n \"content\": f\"Select relevant memory indices. Return JSON array.\\n\\n\"\n f\"Recent conversation:\\n{recent}\\n\\nMemory catalog:\\n{catalog}\"}],\n max_tokens=200)\n indices = json.loads(re.search(r'\\[.*?\\]', response.content[0].text).group())\n return [files[i][\"filename\"] for i in indices if 0 <= i < len(files)]\n```\n\nIf the side-query fails (API error, JSON parse failure), it falls back to keyword matching on name + description.\n\n### Writing: Extraction After Each Turn\n\nUsers don't always say \"remember this\". Preferences are usually scattered across normal dialogue: \"tabs are better than spaces\", \"let's use single quotes from now on\".\n\n`extract_memories()` runs when each turn ends, triggered when the model stops without a tool_use (indicating the conversation has reached a natural break):\n\n```python\n# In agent_loop:\nif response.stop_reason != \"tool_use\":\n extract_memories(messages) # Extract new memories from recent dialogue\n consolidate_memories() # Check if consolidation is needed\n return\n```\n\nBefore extraction, existing memories are checked to avoid duplicates. The extraction prompt asks the LLM to return a JSON array of `{name, type, description, body}`, writing files only when genuinely new information is found.\n\n```python\ndef extract_memories(messages):\n dialogue = format_recent_messages(messages[-10:])\n existing = \"\\n\".join(f\"- {m['name']}: {m['description']}\" for m in list_memory_files())\n\n prompt = (\n \"Extract user preferences, constraints, or project facts.\\n\"\n \"Return JSON array: [{name, type, description, body}].\\n\"\n \"If nothing new or already covered, return [].\\n\\n\"\n f\"Existing memories:\\n{existing}\\n\\nDialogue:\\n{dialogue[:4000]}\"\n )\n # ... parse response, write files ...\n```\n\n### Consolidation: Low-Frequency Deduplication\n\nMemory files accumulate. `consolidate_memories()` triggers when the file count reaches a threshold (default 10), asking the LLM to deduplicate, merge contradictions, and prune stale memories:\n\n```python\nCONSOLIDATE_THRESHOLD = 10\n\ndef consolidate_memories():\n files = list_memory_files()\n if len(files) < CONSOLIDATE_THRESHOLD:\n return # Too few, not worth consolidating\n # Send all memories to LLM, get back deduplicated list\n # Replace all files with consolidated results\n```\n\nCC calls this process **Dream**, with four gates in practice: time interval, scan throttle, session count, file lock. The teaching version simplifies to a file-count threshold.\n\n### What Memory Stores\n\nMemory stores information that remains useful across sessions: user preferences, recurring feedback, project background, common entry points, and investigation clues. It focuses on \"what will be useful later\" and brings that information back through an index plus on-demand loading.\n\nSession memory focuses on continuity inside one session: what context should survive after compaction. The two work together: Memory handles long-term knowledge; session memory handles the current session across compaction.\n\n---\n\n## Changes From s08\n\n| Component | Before (s08) | After (s09) |\n|-----------|-------------|-------------|\n| Memory capability | None (preferences degrade with compaction) | Storage + loading + extraction + consolidation |\n| New functions | — | write_memory_file, select_relevant_memories, load_memories, extract_memories, consolidate_memories |\n| Storage | — | .memory/MEMORY.md index + .memory/*.md files |\n| Tools | bash, read, write, edit, glob, todo_write, task, load_skill, compact (9) | bash, read_file, write_file, edit_file, glob, task (6) |\n| Loop | Only compression each turn | Memory injection + compression + post-turn extraction + periodic consolidation |\n\n---\n\n## Try It\n\n```sh\ncd learn-claude-code\npython s09_memory/code.py\n```\n\nTry these prompts (enter across multiple turns, observe memory accumulation and loading):\n\n1. `I prefer using tabs for indentation, not spaces. Remember that.`\n2. `Create a Python file called test.py` (observe whether the Agent uses tabs)\n3. `What did I tell you about my preferences?` (observe whether the Agent remembers)\n4. `I also prefer single quotes over double quotes for strings.`\n\nWhat to watch for: Does `[Memory: extracted N new memories]` appear after each turn? Are `.md` files generated in `.memory/`? Is `MEMORY.md` index updated? Does the Agent automatically load previous memories in new conversations?\n\n---\n\n## What's Next\n\nMemory, compression, and tools are all in place. But the system prompt is still a hardcoded string. Adding a new tool means manually adding a description; switching projects means rewriting the whole prompt. Prompts should be assembled at runtime.\n\ns10 System Prompt → segments + runtime assembly. Different projects, different tools, different prompts.\n\n\nDeep Dive Into CC Source Code
\n\n> The following is based on analysis of CC source code under `src/` in `memdir/`, `services/`, `utils/`, `query/`. Line numbers verified against source.\n\n### Source Code Paths\n\n| File | Lines | Responsibility |\n|------|-------|---------------|\n| `memdir/memdir.ts` | 507 | Core: MEMORY.md definition (`34-38`), memory behavior instructions distinguishing memory/plan/tasks (`199-266`), `loadMemoryPrompt()` three paths (`419-490`) |\n| `memdir/findRelevantMemories.ts` | 141 | Sonnet side-query memory selection (`18-24` system prompt, `97-122` call logic) |\n| `memdir/memoryTypes.ts` | 271 | Type definitions, frontmatter fields |\n| `memdir/memoryScan.ts` | — | Scan .md files, exclude MEMORY.md, read frontmatter, max 200 files, sorted by mtime desc (`35-94`) |\n| `services/extractMemories/extractMemories.ts` | 615 | Forked agent extraction, restricted permissions, `skipTranscript: true`, `maxTurns: 5` (`371-427`) |\n| `services/autoDream/autoDream.ts` | 324 | Dream consolidation, four-layer gating (`63-66` defaults, `130-190` gating, `224-233` forked agent) |\n| `services/SessionMemory/sessionMemory.ts` | 495 | Session-level memory management |\n| `services/compact/sessionMemoryCompact.ts` | — | Session memory lightweight summary, thresholds 10K/5/40K (`56-61`) |\n| `utils/attachments.ts` | — | Injection budget: 200 lines / 4096 bytes per file, 60KB per session (`269-288`); find relevant memory by query (`2196-2241`) |\n| `query.ts` | — | Memory prefetch at start of each user turn (`301-304`), non-blocking collection (`1592-1614`) |\n| `query/stopHooks.ts` | — | Stop hook fire-and-forget triggers extraction and Dream (`141-155`) |\n\n### Memory Selection: LLM, Not Embedding\n\nCC uses **Sonnet itself to select** (`findRelevantMemories.ts`), not embedding vector similarity:\n\n1. `memoryScan.ts` scans all `.md` files in `.memory/` (excluding MEMORY.md), max 200 files, sorted by mtime descending\n2. Lists all memory files' `name` + `description` as a catalog\n3. Sends to Sonnet side-query: \"Select truly useful memories by name and description (max 5). Skip if unsure.\"\n4. Sonnet returns `{ selected_memories: [\"file1.md\", ...] }`\n5. Selected files' full contents are read (≤ 200 lines / 4096 bytes per file) and injected. Total session budget: 60KB\n\nAt the start of each user turn, `query.ts:301-304` starts memory prefetch (async); after tool execution, `1592-1614` collects completed results non-blocking.\n\n### Extraction Timing: Stop Hook, Not After autoCompact\n\nTrigger location (`stopHooks.ts:141-155`): inside `handleStopHooks()`, fire-and-forget triggers extraction and Dream. The teaching version places extraction in the `stop_reason != \"tool_use\"` branch, matching the direction.\n\nCC's extraction runs via forked agent (`extractMemories.ts:371-427`): restricted permissions, `skipTranscript: true`, `maxTurns: 5`. Also has overlap protection: if the main Agent already wrote memory files, extraction is skipped.\n\n### Memory File Format\n\nCC uses Markdown + YAML frontmatter, consistent with the teaching version. Four types: `user`, `feedback`, `project`, `reference`.\n\n`memdir.ts:34-38` defines index constraints: `MEMORY.md` max 200 lines / 25KB. `memdir.ts:199-266` builds memory behavior instructions, explicitly distinguishing memory from plan and tasks. Storage location: `~/.claude/projects//memory/`.\n\n### Dream: Four-Layer Gating\n\nNot \"triggered when idle\" or \"consolidate when count is enough\", but four gates (`autoDream.ts`, defaults `63-66`, gating logic `130-190`):\n\n1. **Time gate**: ≥ 24 hours since last consolidation\n2. **Scan throttle**: Avoid frequent filesystem scans\n3. **Session gate**: ≥ 5 session transcripts modified since last consolidation\n4. **Lock gate**: No other process currently consolidating (`.consolidate-lock` file)\n\nThe merge itself runs via forked agent (`224-233`): locate → collect recent signals → merge and write files → prune and update index. Lock file mtime serves as lastConsolidatedAt. Crash recovery: lock auto-expires after 1 hour.\n\n### User Memory vs Session Memory\n\n| | User Memory | Session Memory |\n|---|---|---|\n| Persistence | Cross-session | Single session |\n| Storage | Multiple .md files in `memory/` | `session-memory//memory.md` |\n| Loaded into | system prompt | compact summary |\n| Purpose | Cross-session knowledge accumulation | Cross-compact context continuity |\n\nsessionMemoryCompact (mentioned in s08) uses Session Memory: before autoCompact, it reads the session memory file and, if sufficient (≥ 10K tokens, ≥ 5 text messages, ≤ 40K tokens, `sessionMemoryCompact.ts:56-61`), uses it as a summary without calling the LLM.\n\n### Where the Real Implementation Is More Complex\n\n- **Feature flags**: Memory features have multiple feature gate layers\n- **Team memory**: Shared team memories, `loadMemoryPrompt()` has a dedicated path (not covered in teaching version)\n- **KAIROS**: Timing-aware memory extraction strategy, daily-log mode in `loadMemoryPrompt()`\n- **Prompt cache**: Memory injection must account for prompt cache TTL, avoiding full system prompt rewrites each turn\n- **File locks**: Concurrency control for multi-process scenarios\n- **Memory prefetch**: Async prefetch, non-blocking main flow\n\n### Teaching Version Simplifications Are Intentional\n\n- LLM side-query → LLM side-query + keyword fallback: teaching version keeps LLM selection, adds fallback path\n- Memory JSON → Markdown + frontmatter: teaching version matches CC\n- Stop hook trigger → `stop_reason != \"tool_use\"` branch: same direction\n- Four-layer gating → file-count threshold: teaching version lacks transcript system and multi-session concepts\n- Forked agent + restricted permissions → direct call: teaching version has no subprocess isolation\n\n \n\n\n"
},
{
"version": "s09",
"locale": "zh",
"title": "s09: Memory — 压缩会丢细节,要有一层不丢的",
- "content": "# s09: Memory — 压缩会丢细节,要有一层不丢的\n\ns01 → ... → s07 → s08 → `s09` → [s10](/zh/s10) → s11 → ... → s20\n> *\"压缩会丢细节, 要有一层不丢的\"* — 文件仓库 + 索引 + 按需加载,跨压缩、跨会话。\n>\n> **Harness 层**: 记忆 — 跨压缩、跨会话的知识积累。\n\n---\n\n## 问题\n\ns08 的 autoCompact 会把当前目标、剩余工作、用户约束写进摘要,但细节会丢失:\"用 tab 缩进不要用空格\"可能被简化成\"用户有代码风格偏好\"。而且新开一个会话,连摘要也没了。\n\nLLM 没有持久状态,所有信息都在上下文窗口里。上下文满了要压缩,压缩就有损。需要一层不参与压缩、跨会话保留的存储。\n\n---\n\n## 解决方案\n\n\n\ns08 的压缩管线保留,聚焦记忆。存储选文件系统:`.memory/` 目录下,每个记忆一个 `.md` 文件,带 YAML frontmatter(`name` / `description` / `type`)。文件多了需要索引:`MEMORY.md` 一行一个链接,注入 SYSTEM。\n\n关键设计:索引常驻 SYSTEM prompt(可被 prompt cache 缓存),文件内容按需注入到当前 user turn(按 filename/description 匹配当前对话,不破坏 cache)。写入由每轮结束后的提取器完成:用户显式说\"记住\"或表达稳定偏好时,提取器会保存为记忆。文件积累多了,定期整理去重。\n\n四类记忆,各有用途:\n\n| 类型 | 回答什么 | 示例 |\n|------|---------|------|\n| user | 你是谁 | \"用 tab 不用空格\" |\n| feedback | 怎么做事 | \"别 mock 数据库\" |\n| project | 正在发生什么 | \"auth 重写是合规驱动\" |\n| reference | 东西在哪找 | \"pipeline bug 在 Linear INGEST\" |\n\n---\n\n## 工作原理\n\n\n\n### 存储:Markdown 文件 + 索引\n\n每个记忆是一个 `.md` 文件,YAML frontmatter 记录元数据:\n\n```markdown\n---\nname: user-preference-tabs\ndescription: User prefers tabs for indentation\ntype: user\n---\n\nUser prefers using tabs, not spaces, for indentation.\n**Why:** Consistency with existing codebase conventions.\n**How to apply:** Always use tabs when writing or editing files.\n```\n\n`MEMORY.md` 是索引,一行一个链接:\n\n```markdown\n- [user-preference-tabs](user-preference-tabs.md) — User prefers tabs for indentation\n```\n\n写入新记忆时自动重建索引:\n\n```python\ndef write_memory_file(name, mem_type, description, body):\n slug = name.lower().replace(\" \", \"-\")\n filepath = MEMORY_DIR / f\"{slug}.md\"\n filepath.write_text(\n f\"---\\nname: {name}\\ndescription: {description}\\ntype: {mem_type}\\n---\\n\\n{body}\\n\"\n )\n _rebuild_index()\n```\n\n### 加载:两条路径\n\n**路径一:索引常驻 SYSTEM。** `build_system()` 每轮重建 SYSTEM 时读取 `MEMORY.md`,把记忆清单注入。SYSTEM prompt 中的索引可以被 prompt cache 缓存,不需要每轮重新发送。\n\n**路径二:相关记忆按需注入。** 每轮调用前,`load_memories()` 把最近对话和记忆目录(name + description)一起发给 LLM 做一次轻量 side-query,选出相关的文件名,再读文件内容临时注入到当前 user turn。最多 5 条,控制开销。\n\n```python\ndef select_relevant_memories(messages, max_items=5):\n files = list_memory_files()\n if not files:\n return []\n\n # Build catalog: \"0: user-preference-tabs — User prefers tabs...\"\n catalog = \"\\n\".join(f\"{i}: {f['name']} — {f['description']}\" for i, f in enumerate(files))\n\n response = client.messages.create(model=MODEL, messages=[{\"role\": \"user\",\n \"content\": f\"Select relevant memory indices. Return JSON array.\\n\\n\"\n f\"Recent conversation:\\n{recent}\\n\\nMemory catalog:\\n{catalog}\"}],\n max_tokens=200)\n text = extract_text(response.content).strip()\n indices = json.loads(re.search(r'\\[.*?\\]', text).group())\n return [files[i][\"filename\"] for i in indices if 0 <= i < len(files)]\n```\n\n如果 side-query 失败(API 错误、JSON 解析失败),降级到关键词匹配 name + description。\n\n### 写入:每轮结束后提取\n\n用户不会每次都说\"记住这个\"。偏好通常散落在正常对话中:\"用 tab 比空格好\"、\"以后都用单引号\"。\n\n`extract_memories()` 在每轮结束时运行,条件是模型停止且没有 tool_use(说明对话告一段落):\n\n```python\n# In agent_loop:\nif response.stop_reason != \"tool_use\":\n extract_memories(pre_compress) # 从压缩前快照提取新记忆\n consolidate_memories() # 检查是否需要整理\n return\n```\n\n提取前先检查已有记忆,避免重复。提取 prompt 要求 LLM 返回 `{name, type, description, body}` 的 JSON 数组,只有确实有新信息时才写文件。\n\n```python\ndef extract_memories(messages):\n dialogue = format_recent_messages(messages[-10:])\n existing = \"\\n\".join(f\"- {m['name']}: {m['description']}\" for m in list_memory_files())\n\n prompt = (\n \"Extract user preferences, constraints, or project facts.\\n\"\n \"Return JSON array: [{name, type, description, body}].\\n\"\n \"If nothing new or already covered, return [].\\n\\n\"\n f\"Existing memories:\\n{existing}\\n\\nDialogue:\\n{dialogue[:4000]}\"\n )\n # ... parse response, write files ...\n```\n\n### 整理:低频合并去重\n\n记忆文件会积累。`consolidate_memories()` 在文件数达到阈值(默认 10)时触发,让 LLM 去重、合并矛盾、淘汰过时记忆:\n\n```python\nCONSOLIDATE_THRESHOLD = 10\n\ndef consolidate_memories():\n files = list_memory_files()\n if len(files) < CONSOLIDATE_THRESHOLD:\n return # 太少,不值得整理\n # Send all memories to LLM, get back deduplicated list\n # Replace all files with consolidated results\n```\n\nCC 把这个过程叫 Dream,实际有四层门控:时间间隔、扫描节流、会话数、文件锁。教学版简化为文件数阈值。\n\n### Memory 适合保存什么\n\nMemory 保存跨会话仍然有用的信息:用户偏好、反复出现的反馈、项目背景、常用入口和排查线索。它关注“以后还会用到什么”,并通过索引 + 按需加载把这些信息带回当前对话。\n\nsession memory 关注同一会话内的连续性:compact 之后,当前会话还需要保留哪些上下文。两者配合使用:Memory 管长期知识,session memory 管当前会话的压缩续接。\n\n---\n\n## 相对 s08 的变更\n\n| 组件 | 之前 (s08) | 之后 (s09) |\n|------|-----------|-----------|\n| 记忆能力 | 无(压缩后偏好随摘要退化) | 存储 + 加载 + 提取 + 整理 |\n| 新函数 | — | write_memory_file, select_relevant_memories, load_memories, extract_memories, consolidate_memories |\n| 存储 | — | .memory/MEMORY.md 索引 + .memory/*.md 文件 |\n| 工具 | bash, read, write, edit, glob, todo_write, task, load_skill, compact (9) | bash, read_file, write_file, edit_file, glob, task (6) |\n| 循环 | 每轮只做压缩 | 每轮注入记忆 + 压缩 + 每轮结束后提取 + 定期整理 |\n\n---\n\n## 试一下\n\n```sh\ncd learn-claude-code\npython s09_memory/code.py\n```\n\n试试这些 prompt(分多轮输入,观察记忆的累积和加载):\n\n1. `I prefer using tabs for indentation, not spaces. Remember that.`\n2. `Create a Python file called test.py`(观察 Agent 是否用了 tab)\n3. `What did I tell you about my preferences?`(观察 Agent 是否记得)\n4. `I also prefer single quotes over double quotes for strings.`\n\n观察重点:每轮结束后是否出现 `[Memory: extracted N new memories]`?`.memory/` 目录下是否生成了 `.md` 文件?`MEMORY.md` 索引是否更新?新一轮对话时 Agent 是否自动加载了之前的记忆?\n\n---\n\n## 接下来\n\n记忆、压缩、工具都已就绪。但 system prompt 还是硬编码的一大段字符串。加了新工具要手动加描述,换了项目要重写整个 prompt。prompt 应该运行时组装。\n\ns10 System Prompt → 分段 + 运行时组装。不同项目、不同工具,拼出不同的 prompt。\n\n\n深入 CC 源码
\n\n> 以下基于 CC 源码 `src/` 下 `memdir/`、`services/`、`utils/`、`query/` 的分析,行号已对照核实。\n\n### 源码路径\n\n| 文件 | 行数 | 职责 |\n|------|------|------|\n| `memdir/memdir.ts` | 507 | 核心:MEMORY.md 定义(`34-38`)、记忆行为指令区分 memory/plan/tasks(`199-266`)、`loadMemoryPrompt()` 三条路径(`419-490`) |\n| `memdir/findRelevantMemories.ts` | 141 | Sonnet side-query 选记忆(`18-24` 系统提示、`97-122` 调用逻辑) |\n| `memdir/memoryTypes.ts` | 271 | 类型定义,frontmatter 字段 |\n| `memdir/memoryScan.ts` | — | 扫描 .md 文件,排除 MEMORY.md,读 frontmatter,最多 200 个,按 mtime 降序(`35-94`) |\n| `services/extractMemories/extractMemories.ts` | 615 | forked agent 提取记忆,受限权限,`skipTranscript: true`,`maxTurns: 5`(`371-427`) |\n| `services/autoDream/autoDream.ts` | 324 | Dream 整理,四层门控(`63-66` 默认值、`130-190` 门控、`224-233` forked agent) |\n| `services/SessionMemory/sessionMemory.ts` | 495 | 会话级记忆管理 |\n| `services/compact/sessionMemoryCompact.ts` | — | session memory 轻量摘要,阈值 10K/5/40K(`56-61`) |\n| `utils/attachments.ts` | — | 注入预算:200 行 / 4096 字节每文件,60KB 每 session(`269-288`);按 query 找相关 memory(`2196-2241`) |\n| `query.ts` | — | memory prefetch 每轮启动(`301-304`),非阻塞收集(`1592-1614`) |\n| `query/stopHooks.ts` | — | stop hook fire-and-forget 触发提取和 Dream(`141-155`) |\n\n### 记忆选择:LLM 选,不是 embedding\n\nCC 用 **Sonnet 本身来选**(`findRelevantMemories.ts`),不是 embedding 向量相似度:\n\n1. `memoryScan.ts` 扫描 `.memory/` 下所有 `.md` 文件(排除 MEMORY.md),最多 200 个,按 mtime 降序\n2. 把 `name` + `description` 列成清单\n3. 发给 Sonnet side-query:\"根据名称和描述选出真正有用的记忆(最多 5 个)。不确定就不要选。\"\n4. Sonnet 返回 `{ selected_memories: [\"file1.md\", ...] }`\n5. 选中文件读取完整内容(每文件 ≤ 200 行 / 4096 字节),注入上下文。单 session 总预算 60KB\n\n每轮用户 turn 开始时,`query.ts:301-304` 启动 memory prefetch(异步);工具执行后 `1592-1614` 非阻塞收集结果,不卡主流程。\n\n### 提取时机:stop hook,不是 autoCompact 后\n\n触发位置(`stopHooks.ts:141-155`):在 `handleStopHooks()` 中,fire-and-forget 触发提取和 Dream。教学版把提取放在 `stop_reason != \"tool_use\"` 分支里,方向一致。\n\nCC 的提取通过 forked agent 执行(`extractMemories.ts:371-427`):受限权限、`skipTranscript: true`、`maxTurns: 5`。还有重叠保护:如果主 Agent 已经写入了记忆文件,跳过提取。\n\n### 记忆文件格式\n\nCC 用 Markdown + YAML frontmatter,和教学版一致。四种类型:`user`、`feedback`、`project`、`reference`。\n\n`memdir.ts:34-38` 定义索引约束:`MEMORY.md` 最多 200 行 / 25KB。`memdir.ts:199-266` 构建记忆行为指令,明确区分 memory、plan、tasks。存储位置:`~/.claude/projects//memory/`。\n\n### Dream:四层门控\n\n不是\"空闲时触发\"或\"数量够了就合并\",而是四层门控(`autoDream.ts`,默认值 `63-66`,门控逻辑 `130-190`):\n\n1. **时间门控**:距上次合并 ≥ 24 小时\n2. **扫描节流**:避免频繁扫描文件系统\n3. **会话门控**:自上次合并以来修改了 ≥ 5 个会话 transcript\n4. **锁门控**:没有其他进程正在合并(`.consolidate-lock` 文件)\n\n合并本身通过 forked agent 执行(`224-233`):定位 → 收集近期信号 → 合并写文件 → 剪枝更新索引。锁文件 mtime 就是 lastConsolidatedAt。崩溃恢复:1 小时后锁自动过期。\n\n### User Memory vs Session Memory\n\n| | User Memory | Session Memory |\n|---|---|---|\n| 持久性 | 跨会话 | 单会话 |\n| 存储 | `memory/` 下多个 .md 文件 | `session-memory//memory.md` |\n| 加载到 | system prompt | compact 摘要 |\n| 用途 | 跨会话的知识积累 | 跨 compact 的上下文连续性 |\n\nsessionMemoryCompact(s08 中提到的机制)正是使用了 Session Memory:autoCompact 前先读 session memory 文件,如果内容足够(≥ 10K token、≥ 5 条文本消息、≤ 40K token,`sessionMemoryCompact.ts:56-61`),就用它做摘要,不调 LLM。\n\n### 真实实现比教学版复杂的地方\n\n- **Feature flags**:记忆相关功能有多层 feature gate 控制\n- **Team memory**:团队共享记忆,`loadMemoryPrompt()` 有专门路径(教学版未涉及)\n- **KAIROS**:时机感知的记忆提取策略,`loadMemoryPrompt()` 中 daily-log 模式\n- **Prompt cache**:记忆注入需要考虑 prompt cache 的 TTL,避免每次都重写 system prompt 的大段内容\n- **文件锁**:多进程并发时的锁机制\n- **Memory prefetch**:异步预取,不阻塞主流程\n\n### 教学版的简化是刻意的\n\n- LLM side-query → LLM side-query + 关键词降级:教学版保留了 LLM 选择,加了降级路径\n- 记忆 JSON → Markdown + frontmatter:教学版与 CC 一致\n- stop hook 触发 → `stop_reason != \"tool_use\"` 分支:方向一致\n- 四层门控 → 文件数阈值:教学版没有 transcript 系统和多会话概念\n- forked agent + 受限权限 → 直接调用:教学版没有子进程隔离\n\n \n\n\n"
+ "content": "# s09: Memory — 压缩会丢细节,要有一层不丢的\n\ns01 → ... → s07 → s08 → `s09` → [s10](/zh/s10) → s11 → ... → s20\n> *\"压缩会丢细节, 要有一层不丢的\"* — 文件仓库 + 索引 + 按需加载,跨压缩、跨会话。\n>\n> **Harness 层**: 记忆 — 跨压缩、跨会话的知识积累。\n\n---\n\n## 问题\n\ns08 的 autoCompact 会把当前目标、剩余工作、用户约束写进摘要,但细节会丢失:\"用 tab 缩进不要用空格\"可能被简化成\"用户有代码风格偏好\"。而且新开一个会话,连摘要也没了。\n\nLLM 没有持久状态,所有信息都在上下文窗口里。上下文满了要压缩,压缩就有损。需要一层不参与压缩、跨会话保留的存储。\n\n---\n\n## 解决方案\n\n\n\ns08 的压缩管线保留,聚焦记忆。存储选文件系统:`.memory/` 目录下,每个记忆一个 `.md` 文件,带 YAML frontmatter(`name` / `description` / `type`)。文件多了需要索引:`MEMORY.md` 一行一个链接,注入 SYSTEM。\n\n关键设计:索引常驻 SYSTEM prompt(可被 prompt cache 缓存),文件内容按需注入到当前 user turn(按 filename/description 匹配当前对话,不破坏 cache)。写入由每轮结束后的提取器完成:用户显式说\"记住\"或表达稳定偏好时,提取器会保存为记忆。文件积累多了,定期整理去重。\n\n四类记忆,各有用途:\n\n| 类型 | 回答什么 | 示例 |\n|------|---------|------|\n| user | 你是谁 | \"用 tab 不用空格\" |\n| feedback | 怎么做事 | \"别 mock 数据库\" |\n| project | 正在发生什么 | \"auth 重写是合规驱动\" |\n| reference | 东西在哪找 | \"pipeline bug 在 Linear INGEST\" |\n\n---\n\n## 工作原理\n\n\n\n### 存储:Markdown 文件 + 索引\n\n每个记忆是一个 `.md` 文件,YAML frontmatter 记录元数据:\n\n```markdown\n---\nname: user-preference-tabs\ndescription: User prefers tabs for indentation\ntype: user\n---\n\nUser prefers using tabs, not spaces, for indentation.\n**Why:** Consistency with existing codebase conventions.\n**How to apply:** Always use tabs when writing or editing files.\n```\n\n`MEMORY.md` 是索引,一行一个链接:\n\n```markdown\n- [user-preference-tabs](user-preference-tabs.md) — User prefers tabs for indentation\n```\n\n写入新记忆时自动重建索引:\n\n```python\ndef write_memory_file(name, mem_type, description, body):\n slug = name.lower().replace(\" \", \"-\")\n filepath = MEMORY_DIR / f\"{slug}.md\"\n filepath.write_text(\n f\"---\\nname: {name}\\ndescription: {description}\\ntype: {mem_type}\\n---\\n\\n{body}\\n\"\n )\n _rebuild_index()\n```\n\n### 加载:两条路径\n\n**路径一:索引常驻 SYSTEM。** `build_system()` 在每次用户请求开始时读取 `MEMORY.md`,把记忆清单注入。记忆提取和整理只在本轮结束时触发,因此同一轮用户请求中不需要重复重建 SYSTEM。\n\n**路径二:相关记忆按需注入。** 每次用户请求开始时,`load_memories()` 把最近对话和记忆目录(name + description)一起发给 LLM 做一次轻量 side-query,选出相关的文件名,再读文件内容临时注入到当前 user turn。最多 5 条,控制开销。\n\n```python\ndef select_relevant_memories(messages, max_items=5):\n files = list_memory_files()\n if not files:\n return []\n\n # Build catalog: \"0: user-preference-tabs — User prefers tabs...\"\n catalog = \"\\n\".join(f\"{i}: {f['name']} — {f['description']}\" for i, f in enumerate(files))\n\n response = client.messages.create(model=MODEL, messages=[{\"role\": \"user\",\n \"content\": f\"Select relevant memory indices. Return JSON array.\\n\\n\"\n f\"Recent conversation:\\n{recent}\\n\\nMemory catalog:\\n{catalog}\"}],\n max_tokens=200)\n text = extract_text(response.content).strip()\n indices = json.loads(re.search(r'\\[.*?\\]', text).group())\n return [files[i][\"filename\"] for i in indices if 0 <= i < len(files)]\n```\n\n如果 side-query 失败(API 错误、JSON 解析失败),降级到关键词匹配 name + description。\n\n### 写入:每轮结束后提取\n\n用户不会每次都说\"记住这个\"。偏好通常散落在正常对话中:\"用 tab 比空格好\"、\"以后都用单引号\"。\n\n`extract_memories()` 在每轮结束时运行,条件是模型停止且没有 tool_use(说明对话告一段落):\n\n```python\n# In agent_loop:\nif response.stop_reason != \"tool_use\":\n extract_memories(pre_compress) # 从压缩前快照提取新记忆\n consolidate_memories() # 检查是否需要整理\n return\n```\n\n提取前先检查已有记忆,避免重复。提取 prompt 要求 LLM 返回 `{name, type, description, body}` 的 JSON 数组,只有确实有新信息时才写文件。\n\n```python\ndef extract_memories(messages):\n dialogue = format_recent_messages(messages[-10:])\n existing = \"\\n\".join(f\"- {m['name']}: {m['description']}\" for m in list_memory_files())\n\n prompt = (\n \"Extract user preferences, constraints, or project facts.\\n\"\n \"Return JSON array: [{name, type, description, body}].\\n\"\n \"If nothing new or already covered, return [].\\n\\n\"\n f\"Existing memories:\\n{existing}\\n\\nDialogue:\\n{dialogue[:4000]}\"\n )\n # ... parse response, write files ...\n```\n\n### 整理:低频合并去重\n\n记忆文件会积累。`consolidate_memories()` 在文件数达到阈值(默认 10)时触发,让 LLM 去重、合并矛盾、淘汰过时记忆:\n\n```python\nCONSOLIDATE_THRESHOLD = 10\n\ndef consolidate_memories():\n files = list_memory_files()\n if len(files) < CONSOLIDATE_THRESHOLD:\n return # 太少,不值得整理\n # Send all memories to LLM, get back deduplicated list\n # Replace all files with consolidated results\n```\n\nCC 把这个过程叫 Dream,实际有四层门控:时间间隔、扫描节流、会话数、文件锁。教学版简化为文件数阈值。\n\n### Memory 适合保存什么\n\nMemory 保存跨会话仍然有用的信息:用户偏好、反复出现的反馈、项目背景、常用入口和排查线索。它关注“以后还会用到什么”,并通过索引 + 按需加载把这些信息带回当前对话。\n\nsession memory 关注同一会话内的连续性:compact 之后,当前会话还需要保留哪些上下文。两者配合使用:Memory 管长期知识,session memory 管当前会话的压缩续接。\n\n---\n\n## 相对 s08 的变更\n\n| 组件 | 之前 (s08) | 之后 (s09) |\n|------|-----------|-----------|\n| 记忆能力 | 无(压缩后偏好随摘要退化) | 存储 + 加载 + 提取 + 整理 |\n| 新函数 | — | write_memory_file, select_relevant_memories, load_memories, extract_memories, consolidate_memories |\n| 存储 | — | .memory/MEMORY.md 索引 + .memory/*.md 文件 |\n| 工具 | bash, read, write, edit, glob, todo_write, task, load_skill, compact (9) | bash, read_file, write_file, edit_file, glob, task (6) |\n| 循环 | 每轮只做压缩 | 每轮注入记忆 + 压缩 + 每轮结束后提取 + 定期整理 |\n\n---\n\n## 试一下\n\n```sh\ncd learn-claude-code\npython s09_memory/code.py\n```\n\n试试这些 prompt(分多轮输入,观察记忆的累积和加载):\n\n1. `I prefer using tabs for indentation, not spaces. Remember that.`\n2. `Create a Python file called test.py`(观察 Agent 是否用了 tab)\n3. `What did I tell you about my preferences?`(观察 Agent 是否记得)\n4. `I also prefer single quotes over double quotes for strings.`\n\n观察重点:每轮结束后是否出现 `[Memory: extracted N new memories]`?`.memory/` 目录下是否生成了 `.md` 文件?`MEMORY.md` 索引是否更新?新一轮对话时 Agent 是否自动加载了之前的记忆?\n\n---\n\n## 接下来\n\n记忆、压缩、工具都已就绪。但 system prompt 还是硬编码的一大段字符串。加了新工具要手动加描述,换了项目要重写整个 prompt。prompt 应该运行时组装。\n\ns10 System Prompt → 分段 + 运行时组装。不同项目、不同工具,拼出不同的 prompt。\n\n\n深入 CC 源码
\n\n> 以下基于 CC 源码 `src/` 下 `memdir/`、`services/`、`utils/`、`query/` 的分析,行号已对照核实。\n\n### 源码路径\n\n| 文件 | 行数 | 职责 |\n|------|------|------|\n| `memdir/memdir.ts` | 507 | 核心:MEMORY.md 定义(`34-38`)、记忆行为指令区分 memory/plan/tasks(`199-266`)、`loadMemoryPrompt()` 三条路径(`419-490`) |\n| `memdir/findRelevantMemories.ts` | 141 | Sonnet side-query 选记忆(`18-24` 系统提示、`97-122` 调用逻辑) |\n| `memdir/memoryTypes.ts` | 271 | 类型定义,frontmatter 字段 |\n| `memdir/memoryScan.ts` | — | 扫描 .md 文件,排除 MEMORY.md,读 frontmatter,最多 200 个,按 mtime 降序(`35-94`) |\n| `services/extractMemories/extractMemories.ts` | 615 | forked agent 提取记忆,受限权限,`skipTranscript: true`,`maxTurns: 5`(`371-427`) |\n| `services/autoDream/autoDream.ts` | 324 | Dream 整理,四层门控(`63-66` 默认值、`130-190` 门控、`224-233` forked agent) |\n| `services/SessionMemory/sessionMemory.ts` | 495 | 会话级记忆管理 |\n| `services/compact/sessionMemoryCompact.ts` | — | session memory 轻量摘要,阈值 10K/5/40K(`56-61`) |\n| `utils/attachments.ts` | — | 注入预算:200 行 / 4096 字节每文件,60KB 每 session(`269-288`);按 query 找相关 memory(`2196-2241`) |\n| `query.ts` | — | memory prefetch 每轮启动(`301-304`),非阻塞收集(`1592-1614`) |\n| `query/stopHooks.ts` | — | stop hook fire-and-forget 触发提取和 Dream(`141-155`) |\n\n### 记忆选择:LLM 选,不是 embedding\n\nCC 用 **Sonnet 本身来选**(`findRelevantMemories.ts`),不是 embedding 向量相似度:\n\n1. `memoryScan.ts` 扫描 `.memory/` 下所有 `.md` 文件(排除 MEMORY.md),最多 200 个,按 mtime 降序\n2. 把 `name` + `description` 列成清单\n3. 发给 Sonnet side-query:\"根据名称和描述选出真正有用的记忆(最多 5 个)。不确定就不要选。\"\n4. Sonnet 返回 `{ selected_memories: [\"file1.md\", ...] }`\n5. 选中文件读取完整内容(每文件 ≤ 200 行 / 4096 字节),注入上下文。单 session 总预算 60KB\n\n每轮用户 turn 开始时,`query.ts:301-304` 启动 memory prefetch(异步);工具执行后 `1592-1614` 非阻塞收集结果,不卡主流程。\n\n### 提取时机:stop hook,不是 autoCompact 后\n\n触发位置(`stopHooks.ts:141-155`):在 `handleStopHooks()` 中,fire-and-forget 触发提取和 Dream。教学版把提取放在 `stop_reason != \"tool_use\"` 分支里,方向一致。\n\nCC 的提取通过 forked agent 执行(`extractMemories.ts:371-427`):受限权限、`skipTranscript: true`、`maxTurns: 5`。还有重叠保护:如果主 Agent 已经写入了记忆文件,跳过提取。\n\n### 记忆文件格式\n\nCC 用 Markdown + YAML frontmatter,和教学版一致。四种类型:`user`、`feedback`、`project`、`reference`。\n\n`memdir.ts:34-38` 定义索引约束:`MEMORY.md` 最多 200 行 / 25KB。`memdir.ts:199-266` 构建记忆行为指令,明确区分 memory、plan、tasks。存储位置:`~/.claude/projects//memory/`。\n\n### Dream:四层门控\n\n不是\"空闲时触发\"或\"数量够了就合并\",而是四层门控(`autoDream.ts`,默认值 `63-66`,门控逻辑 `130-190`):\n\n1. **时间门控**:距上次合并 ≥ 24 小时\n2. **扫描节流**:避免频繁扫描文件系统\n3. **会话门控**:自上次合并以来修改了 ≥ 5 个会话 transcript\n4. **锁门控**:没有其他进程正在合并(`.consolidate-lock` 文件)\n\n合并本身通过 forked agent 执行(`224-233`):定位 → 收集近期信号 → 合并写文件 → 剪枝更新索引。锁文件 mtime 就是 lastConsolidatedAt。崩溃恢复:1 小时后锁自动过期。\n\n### User Memory vs Session Memory\n\n| | User Memory | Session Memory |\n|---|---|---|\n| 持久性 | 跨会话 | 单会话 |\n| 存储 | `memory/` 下多个 .md 文件 | `session-memory//memory.md` |\n| 加载到 | system prompt | compact 摘要 |\n| 用途 | 跨会话的知识积累 | 跨 compact 的上下文连续性 |\n\nsessionMemoryCompact(s08 中提到的机制)正是使用了 Session Memory:autoCompact 前先读 session memory 文件,如果内容足够(≥ 10K token、≥ 5 条文本消息、≤ 40K token,`sessionMemoryCompact.ts:56-61`),就用它做摘要,不调 LLM。\n\n### 真实实现比教学版复杂的地方\n\n- **Feature flags**:记忆相关功能有多层 feature gate 控制\n- **Team memory**:团队共享记忆,`loadMemoryPrompt()` 有专门路径(教学版未涉及)\n- **KAIROS**:时机感知的记忆提取策略,`loadMemoryPrompt()` 中 daily-log 模式\n- **Prompt cache**:记忆注入需要考虑 prompt cache 的 TTL,避免每次都重写 system prompt 的大段内容\n- **文件锁**:多进程并发时的锁机制\n- **Memory prefetch**:异步预取,不阻塞主流程\n\n### 教学版的简化是刻意的\n\n- LLM side-query → LLM side-query + 关键词降级:教学版保留了 LLM 选择,加了降级路径\n- 记忆 JSON → Markdown + frontmatter:教学版与 CC 一致\n- stop hook 触发 → `stop_reason != \"tool_use\"` 分支:方向一致\n- 四层门控 → 文件数阈值:教学版没有 transcript 系统和多会话概念\n- forked agent + 受限权限 → 直接调用:教学版没有子进程隔离\n\n \n\n\n"
},
{
"version": "s09",
"locale": "ja",
"title": "s09: Memory — 圧縮は詳細を失う、失わない層が必要",
- "content": "# s09: Memory — 圧縮は詳細を失う、失わない層が必要\n\ns01 → ... → s07 → s08 → `s09` → [s10](/ja/s10) → s11 → ... → s20\n> *\"圧縮は詳細を失う、失わない層が必要\"* — ファイルストア + インデックス + オンデマンド読み込み。圧縮を越え、セッションを越えて。\n>\n> **Harness レイヤー**: 記憶 — 圧縮とセッションを越える知識の蓄積。\n\n---\n\n## 課題\n\ns08 の autoCompact は現在の目標、残りの作業、ユーザーの制約をサマリに保持するが、詳細は失われる:「タブでインデント、スペース不可」が「ユーザーにコードスタイルの好みあり」と簡略化される。そして新しいセッションを開始すると、サマリすらない。\n\nLLM には永続状態がなく、すべての情報はコンテキストウィンドウ内にある。コンテキストが満杯になれば圧縮され、圧縮は非可逆。圧縮に参加せず、セッションを越えて保持されるストレージ層が必要。\n\n---\n\n## ソリューション\n\n\n\ns08 の圧縮パイプラインを維持し、記憶に焦点を当てる。ストレージにはファイルシステムを採用:`.memory/` ディレクトリに各記憶を `.md` ファイルとして保存、YAML frontmatter(`name` / `description` / `type`)付き。ファイルが増えたらインデックスが必要:`MEMORY.md` に 1 行 1 リンクを記録し、SYSTEM に注入。\n\n重要な設計:インデックスは SYSTEM prompt に常駐(prompt cache でキャッシュ可能)、ファイル内容はオンデマンド注入(filename/description で現在の会話にマッチ、cache を破壊しない)。書き込みは 2 つのパス:ユーザーが明示的に「覚えて」と言うか、毎ターン終了後にバックグラウンドで抽出。ファイルが蓄積されたら、定期的に整理して重複排除。\n\n4 種類の記憶、それぞれ異なる質問に答える:\n\n| タイプ | 何に答えるか | 例 |\n|--------|-------------|-----|\n| user | あなたは誰か | \"タブでスペース不可\" |\n| feedback | どう作業するか | \"DB をモックしない\" |\n| project | 何が起きているか | \"auth 書き直しはコンプライアンス主導\" |\n| reference | どこで探すか | \"パイプラインのバグは Linear INGEST\" |\n\n---\n\n## 仕組み\n\n\n\n### ストレージ:Markdown ファイル + インデックス\n\n各記憶は `.md` ファイル、YAML frontmatter でメタデータを記録:\n\n```markdown\n---\nname: user-preference-tabs\ndescription: User prefers tabs for indentation\ntype: user\n---\n\nUser prefers using tabs, not spaces, for indentation.\n**Why:** Consistency with existing codebase conventions.\n**How to apply:** Always use tabs when writing or editing files.\n```\n\n`MEMORY.md` はインデックス、1 行に 1 リンク:\n\n```markdown\n- [user-preference-tabs](user-preference-tabs.md) — User prefers tabs for indentation\n```\n\n新しい記憶を書き込むとインデックスを自動再構築:\n\n```python\ndef write_memory_file(name, mem_type, description, body):\n slug = name.lower().replace(\" \", \"-\")\n filepath = MEMORY_DIR / f\"{slug}.md\"\n filepath.write_text(\n f\"---\\nname: {name}\\ndescription: {description}\\ntype: {mem_type}\\n---\\n\\n{body}\\n\"\n )\n _rebuild_index()\n```\n\n### 読み込み:2 つのパス\n\n**パス 1:インデックスを SYSTEM に常駐。** `build_system()` は毎ターン SYSTEM を再構築する際に `MEMORY.md` を読み込み、記憶カタログを注入。SYSTEM prompt 内のインデックスは prompt cache でキャッシュ可能で、毎ターン再送不要。\n\n**パス 2:関連記憶をオンデマンド注入。** 各 LLM 呼び出し前、`load_memories()` は最近の会話と記憶カタログ(name + description)を LLM に軽量 side-query として送信し、関連するファイル名を選択、ファイル内容を読み込んで注入。上限 5 件でコストを制御。\n\n```python\ndef select_relevant_memories(messages, max_items=5):\n files = list_memory_files()\n if not files:\n return []\n\n # Build catalog: \"0: user-preference-tabs — User prefers tabs...\"\n catalog = \"\\n\".join(f\"{i}: {f['name']} — {f['description']}\" for i, f in enumerate(files))\n\n response = client.messages.create(model=MODEL, messages=[{\"role\": \"user\",\n \"content\": f\"Select relevant memory indices. Return JSON array.\\n\\n\"\n f\"Recent conversation:\\n{recent}\\n\\nMemory catalog:\\n{catalog}\"}],\n max_tokens=200)\n indices = json.loads(re.search(r'\\[.*?\\]', response.content[0].text).group())\n return [files[i][\"filename\"] for i in indices if 0 <= i < len(files)]\n```\n\nside-query が失敗した場合(API エラー、JSON パース失敗)、name + description のキーワードマッチにフォールバック。\n\n### 書き込み:毎ターン終了後の抽出\n\nユーザーが毎回「これを覚えて」と言うわけではない。好みは通常、通常の会話の中に散らばっている:「タブの方がスペースより良い」「これからはシングルクォートにしよう」。\n\n`extract_memories()` は各ターン終了時に実行、モデルが tool_use なしで停止した場合にトリガー(会話が自然な区切りに達したことを示す):\n\n```python\n# In agent_loop:\nif response.stop_reason != \"tool_use\":\n extract_memories(messages) # 最近の会話から新しい記憶を抽出\n consolidate_memories() # 整理が必要かチェック\n return\n```\n\n抽出前に既存の記憶を確認し、重複を回避。抽出プロンプトは LLM に `{name, type, description, body}` の JSON 配列を要求、本当に新しい情報がある場合のみファイルに書き込む。\n\n```python\ndef extract_memories(messages):\n dialogue = format_recent_messages(messages[-10:])\n existing = \"\\n\".join(f\"- {m['name']}: {m['description']}\" for m in list_memory_files())\n\n prompt = (\n \"Extract user preferences, constraints, or project facts.\\n\"\n \"Return JSON array: [{name, type, description, body}].\\n\"\n \"If nothing new or already covered, return [].\\n\\n\"\n f\"Existing memories:\\n{existing}\\n\\nDialogue:\\n{dialogue[:4000]}\"\n )\n # ... parse response, write files ...\n```\n\n### 整理:低頻度の重複排除\n\n記憶ファイルは蓄積される。`consolidate_memories()` はファイル数が閾値(デフォルト 10)に達した時にトリガー、LLM に重複排除、矛盾の統合、古い記憶の剪定を依頼:\n\n```python\nCONSOLIDATE_THRESHOLD = 10\n\ndef consolidate_memories():\n files = list_memory_files()\n if len(files) < CONSOLIDATE_THRESHOLD:\n return # 少なすぎる、整理する価値なし\n # Send all memories to LLM, get back deduplicated list\n # Replace all files with consolidated results\n```\n\nCC はこのプロセスを **Dream** と呼び、実際には 4 層のゲートがある:時間間隔、スキャンスロットル、セッション数、ファイルロック。教学版はファイル数閾値に簡略化。\n\n### Memory に保存するもの\n\nMemory はセッションを越えて有用な情報を保存する:ユーザーの好み、繰り返し出るフィードバック、プロジェクト背景、よく使う入口、調査の手がかりなど。「あとでまた使うもの」を対象にし、インデックス + オンデマンド読み込みで現在の会話に戻す。\n\nsession memory は 1 つのセッション内の連続性を扱う:compact 後も現在の会話に残すべき文脈を保持する。両者は役割が分かれている。Memory は長期知識を扱い、session memory は現在のセッションを compact 越しにつなぐ。\n\n---\n\n## s08 からの変更点\n\n| コンポーネント | 変更前 (s08) | 変更後 (s09) |\n|-----------|-------------|-------------|\n| 記憶能力 | なし(圧縮後、好みはサマリと共に劣化) | ストレージ + 読み込み + 抽出 + 整理 |\n| 新規関数 | — | write_memory_file, select_relevant_memories, load_memories, extract_memories, consolidate_memories |\n| ストレージ | — | .memory/MEMORY.md インデックス + .memory/*.md ファイル |\n| ツール | bash, read, write, edit, glob, todo_write, task, load_skill, compact (9) | bash, read_file, write_file, edit_file, glob, task (6) |\n| ループ | 毎ターン圧縮のみ | 記憶注入 + 圧縮 + ターン終了後の抽出 + 定期整理 |\n\n---\n\n## 試してみよう\n\n```sh\ncd learn-claude-code\npython s09_memory/code.py\n```\n\n以下のプロンプトを試してみてください(複数ターンに分けて入力し、記憶の蓄積と読み込みを観察):\n\n1. `I prefer using tabs for indentation, not spaces. Remember that.`\n2. `Create a Python file called test.py`(Agent がタブを使用したか観察)\n3. `What did I tell you about my preferences?`(Agent が覚えているか観察)\n4. `I also prefer single quotes over double quotes for strings.`\n\n観察のポイント:各ターン終了後に `[Memory: extracted N new memories]` が表示されるか?`.memory/` ディレクトリに `.md` ファイルが生成されたか?`MEMORY.md` インデックスが更新されたか?新しい会話で Agent が以前の記憶を自動的に読み込んだか?\n\n---\n\n## 次へ\n\n記憶、圧縮、ツールはすべて揃った。しかし system prompt はまだハードコードされた文字列。新しいツールを追加するには手動で説明を書き、プロジェクトを変えるにはプロンプト全体を書き直す。プロンプトは実行時に組み立てられるべき。\n\ns10 System Prompt → セグメント + 実行時組み立て。異なるプロジェクト、異なるツール、異なるプロンプト。\n\n\nCC ソースコードの詳細
\n\n> 以下は CC ソースコード `src/` 下の `memdir/`、`services/`、`utils/`、`query/` の分析に基づく。行番号はソースコードと照合済み。\n\n### ソースコードパス\n\n| ファイル | 行数 | 職責 |\n|------|------|------|\n| `memdir/memdir.ts` | 507 | 核心:MEMORY.md 定義(`34-38`)、記憶動作指示で memory/plan/tasks を区別(`199-266`)、`loadMemoryPrompt()` 3 パス(`419-490`) |\n| `memdir/findRelevantMemories.ts` | 141 | Sonnet side-query で記憶選択(`18-24` システムプロンプト、`97-122` 呼び出しロジック) |\n| `memdir/memoryTypes.ts` | 271 | 型定義、frontmatter フィールド |\n| `memdir/memoryScan.ts` | — | .md ファイルをスキャン、MEMORY.md を除外、frontmatter を読み取り、最大 200 ファイル、mtime 降順(`35-94`) |\n| `services/extractMemories/extractMemories.ts` | 615 | forked agent で記憶を抽出、制限付き権限、`skipTranscript: true`、`maxTurns: 5`(`371-427`) |\n| `services/autoDream/autoDream.ts` | 324 | Dream 整理、4 層ゲート(`63-66` デフォルト値、`130-190` ゲート、`224-233` forked agent) |\n| `services/SessionMemory/sessionMemory.ts` | 495 | セッションレベルの記憶管理 |\n| `services/compact/sessionMemoryCompact.ts` | — | session memory 軽量サマリ、閾値 10K/5/40K(`56-61`) |\n| `utils/attachments.ts` | — | 注入予算:200 行 / 4096 バイト/ファイル、60KB/セッション(`269-288`);query で関連記憶を検索(`2196-2241`) |\n| `query.ts` | — | memory prefetch を毎ターン開始時に起動(`301-304`)、非ブロッキング収集(`1592-1614`) |\n| `query/stopHooks.ts` | — | stop hook fire-and-forget で抽出と Dream をトリガー(`141-155`) |\n\n### 記憶選択:embedding ではなく LLM\n\nCC は **Sonnet 自身で選択**(`findRelevantMemories.ts`)、embedding ベクトル類似度ではない:\n\n1. `memoryScan.ts` が `.memory/` 下のすべての `.md` ファイルをスキャン(MEMORY.md を除外)、最大 200 ファイル、mtime 降順\n2. `name` + `description` をカタログとしてリスト化\n3. Sonnet side-query に送信:「名前と説明から本当に有用な記憶を選択(最大 5 件)。不明ならスキップ。」\n4. Sonnet が `{ selected_memories: [\"file1.md\", ...] }` を返却\n5. 選択されたファイルの完全な内容を読み込み(≤ 200 行 / 4096 バイト/ファイル)、注入。セッション総予算:60KB\n\n毎ターンのユーザー turn 開始時、`query.ts:301-304` が memory prefetch を起動(非同期);ツール実行後、`1592-1614` が非ブロッキングで結果を収集。\n\n### 抽出タイミング:stop hook、autoCompact 後ではない\n\nトリガー位置(`stopHooks.ts:141-155`):`handleStopHooks()` 内で、fire-and-forget で抽出と Dream をトリガー。教学版は `stop_reason != \"tool_use\"` 分岐に抽出を配置、方向は一致。\n\nCC の抽出は forked agent で実行(`extractMemories.ts:371-427`):制限付き権限、`skipTranscript: true`、`maxTurns: 5`。重複保護もある:メイン Agent が既に記憶ファイルを書き込んだ場合、抽出をスキップ。\n\n### 記憶ファイル形式\n\nCC は Markdown + YAML frontmatter を使用、教学版と一致。4 種類:`user`、`feedback`、`project`、`reference`。\n\n`memdir.ts:34-38` がインデックス制約を定義:`MEMORY.md` 最大 200 行 / 25KB。`memdir.ts:199-266` が記憶動作指示を構築、memory と plan と tasks を明確に区別。保存場所:`~/.claude/projects//memory/`。\n\n### Dream:4 層ゲート\n\n「アイドル時にトリガー」や「数が足りたら統合」ではなく、4 層のゲート(`autoDream.ts`、デフォルト値 `63-66`、ゲートロジック `130-190`):\n\n1. **時間ゲート**:前回の統合から ≥ 24 時間\n2. **スキャンスロットル**:頻繁なファイルシステムスキャンを回避\n3. **セッションゲート**:前回の統合以降 ≥ 5 セッションの transcript が変更された\n4. **ロックゲート**:他のプロセスが統合中でない(`.consolidate-lock` ファイル)\n\n統合自体は forked agent で実行(`224-233`):定位 → 直近のシグナル収集 → 統合してファイル書き込み → 剪定してインデックス更新。ロックファイルの mtime が lastConsolidatedAt。クラッシュリカバリ:1 時間後にロックが自動期限切れ。\n\n### User Memory vs Session Memory\n\n| | User Memory | Session Memory |\n|---|---|---|\n| 永続性 | セッション間 | 単一セッション |\n| ストレージ | `memory/` 下の複数 .md ファイル | `session-memory//memory.md` |\n| 注入先 | system prompt | compact サマリ |\n| 目的 | セッション間の知識蓄積 | compact を越えたコンテキストの連続性 |\n\nsessionMemoryCompact(s08 で触れた仕組み)は Session Memory を活用:autoCompact の前に session memory ファイルを読み込み、内容が十分であれば(≥ 10K token、≥ 5 テキストメッセージ、≤ 40K token、`sessionMemoryCompact.ts:56-61`)、LLM を呼び出さずにサマリとして使用。\n\n### 実際の実装が教学版より複雑な点\n\n- **Feature flags**:記憶関連機能には複数の feature gate 層がある\n- **Team memory**:チーム共有記憶、`loadMemoryPrompt()` に専用パスあり(教学版では未カバー)\n- **KAIROS**:タイミング認識型の記憶抽出戦略、`loadMemoryPrompt()` の daily-log モード\n- **Prompt cache**:記憶注入は prompt cache の TTL を考慮する必要があり、毎ターン system prompt の大部分を書き直すことを避ける\n- **ファイルロック**:マルチプロセス時の並行制御\n- **Memory prefetch**:非同期プレフェッチ、メインフローをブロックしない\n\n### 教学版の簡略化は意図的\n\n- LLM side-query → LLM side-query + キーワードフォールバック:教学版は LLM 選択を維持し、フォールバックパスを追加\n- 記憶 JSON → Markdown + frontmatter:教学版は CC と一致\n- stop hook トリガー → `stop_reason != \"tool_use\"` 分岐:方向は一致\n- 4 層ゲート → ファイル数閾値:教学版には transcript システムやマルチセッションの概念がない\n- forked agent + 制限付き権限 → 直接呼び出し:教学版にはサブプロセス分離がない\n\n \n\n\n"
+ "content": "# s09: Memory — 圧縮は詳細を失う、失わない層が必要\n\ns01 → ... → s07 → s08 → `s09` → [s10](/ja/s10) → s11 → ... → s20\n> *\"圧縮は詳細を失う、失わない層が必要\"* — ファイルストア + インデックス + オンデマンド読み込み。圧縮を越え、セッションを越えて。\n>\n> **Harness レイヤー**: 記憶 — 圧縮とセッションを越える知識の蓄積。\n\n---\n\n## 課題\n\ns08 の autoCompact は現在の目標、残りの作業、ユーザーの制約をサマリに保持するが、詳細は失われる:「タブでインデント、スペース不可」が「ユーザーにコードスタイルの好みあり」と簡略化される。そして新しいセッションを開始すると、サマリすらない。\n\nLLM には永続状態がなく、すべての情報はコンテキストウィンドウ内にある。コンテキストが満杯になれば圧縮され、圧縮は非可逆。圧縮に参加せず、セッションを越えて保持されるストレージ層が必要。\n\n---\n\n## ソリューション\n\n\n\ns08 の圧縮パイプラインを維持し、記憶に焦点を当てる。ストレージにはファイルシステムを採用:`.memory/` ディレクトリに各記憶を `.md` ファイルとして保存、YAML frontmatter(`name` / `description` / `type`)付き。ファイルが増えたらインデックスが必要:`MEMORY.md` に 1 行 1 リンクを記録し、SYSTEM に注入。\n\n重要な設計:インデックスは SYSTEM prompt に常駐(prompt cache でキャッシュ可能)、ファイル内容はオンデマンド注入(filename/description で現在の会話にマッチ、cache を破壊しない)。書き込みは 2 つのパス:ユーザーが明示的に「覚えて」と言うか、毎ターン終了後にバックグラウンドで抽出。ファイルが蓄積されたら、定期的に整理して重複排除。\n\n4 種類の記憶、それぞれ異なる質問に答える:\n\n| タイプ | 何に答えるか | 例 |\n|--------|-------------|-----|\n| user | あなたは誰か | \"タブでスペース不可\" |\n| feedback | どう作業するか | \"DB をモックしない\" |\n| project | 何が起きているか | \"auth 書き直しはコンプライアンス主導\" |\n| reference | どこで探すか | \"パイプラインのバグは Linear INGEST\" |\n\n---\n\n## 仕組み\n\n\n\n### ストレージ:Markdown ファイル + インデックス\n\n各記憶は `.md` ファイル、YAML frontmatter でメタデータを記録:\n\n```markdown\n---\nname: user-preference-tabs\ndescription: User prefers tabs for indentation\ntype: user\n---\n\nUser prefers using tabs, not spaces, for indentation.\n**Why:** Consistency with existing codebase conventions.\n**How to apply:** Always use tabs when writing or editing files.\n```\n\n`MEMORY.md` はインデックス、1 行に 1 リンク:\n\n```markdown\n- [user-preference-tabs](user-preference-tabs.md) — User prefers tabs for indentation\n```\n\n新しい記憶を書き込むとインデックスを自動再構築:\n\n```python\ndef write_memory_file(name, mem_type, description, body):\n slug = name.lower().replace(\" \", \"-\")\n filepath = MEMORY_DIR / f\"{slug}.md\"\n filepath.write_text(\n f\"---\\nname: {name}\\ndescription: {description}\\ntype: {mem_type}\\n---\\n\\n{body}\\n\"\n )\n _rebuild_index()\n```\n\n### 読み込み:2 つのパス\n\n**パス 1:インデックスを SYSTEM に常駐。** `build_system()` は各ユーザーリクエストの開始時に 1 回だけ `MEMORY.md` を読み込み、記憶カタログを SYSTEM prompt に注入。記憶の抽出と整理はターン終了時にだけ実行されるため、同じユーザーリクエスト内で SYSTEM を繰り返し再構築する必要はない。\n\n**パス 2:関連記憶をオンデマンド注入。** 各ユーザーリクエストの開始時に、`load_memories()` は最近の会話と記憶カタログ(name + description)を LLM に軽量 side-query として送信し、関連するファイル名を選択、ファイル内容を読み込んで注入。上限 5 件でコストを制御。\n\n```python\ndef select_relevant_memories(messages, max_items=5):\n files = list_memory_files()\n if not files:\n return []\n\n # Build catalog: \"0: user-preference-tabs — User prefers tabs...\"\n catalog = \"\\n\".join(f\"{i}: {f['name']} — {f['description']}\" for i, f in enumerate(files))\n\n response = client.messages.create(model=MODEL, messages=[{\"role\": \"user\",\n \"content\": f\"Select relevant memory indices. Return JSON array.\\n\\n\"\n f\"Recent conversation:\\n{recent}\\n\\nMemory catalog:\\n{catalog}\"}],\n max_tokens=200)\n indices = json.loads(re.search(r'\\[.*?\\]', response.content[0].text).group())\n return [files[i][\"filename\"] for i in indices if 0 <= i < len(files)]\n```\n\nside-query が失敗した場合(API エラー、JSON パース失敗)、name + description のキーワードマッチにフォールバック。\n\n### 書き込み:毎ターン終了後の抽出\n\nユーザーが毎回「これを覚えて」と言うわけではない。好みは通常、通常の会話の中に散らばっている:「タブの方がスペースより良い」「これからはシングルクォートにしよう」。\n\n`extract_memories()` は各ターン終了時に実行、モデルが tool_use なしで停止した場合にトリガー(会話が自然な区切りに達したことを示す):\n\n```python\n# In agent_loop:\nif response.stop_reason != \"tool_use\":\n extract_memories(messages) # 最近の会話から新しい記憶を抽出\n consolidate_memories() # 整理が必要かチェック\n return\n```\n\n抽出前に既存の記憶を確認し、重複を回避。抽出プロンプトは LLM に `{name, type, description, body}` の JSON 配列を要求、本当に新しい情報がある場合のみファイルに書き込む。\n\n```python\ndef extract_memories(messages):\n dialogue = format_recent_messages(messages[-10:])\n existing = \"\\n\".join(f\"- {m['name']}: {m['description']}\" for m in list_memory_files())\n\n prompt = (\n \"Extract user preferences, constraints, or project facts.\\n\"\n \"Return JSON array: [{name, type, description, body}].\\n\"\n \"If nothing new or already covered, return [].\\n\\n\"\n f\"Existing memories:\\n{existing}\\n\\nDialogue:\\n{dialogue[:4000]}\"\n )\n # ... parse response, write files ...\n```\n\n### 整理:低頻度の重複排除\n\n記憶ファイルは蓄積される。`consolidate_memories()` はファイル数が閾値(デフォルト 10)に達した時にトリガー、LLM に重複排除、矛盾の統合、古い記憶の剪定を依頼:\n\n```python\nCONSOLIDATE_THRESHOLD = 10\n\ndef consolidate_memories():\n files = list_memory_files()\n if len(files) < CONSOLIDATE_THRESHOLD:\n return # 少なすぎる、整理する価値なし\n # Send all memories to LLM, get back deduplicated list\n # Replace all files with consolidated results\n```\n\nCC はこのプロセスを **Dream** と呼び、実際には 4 層のゲートがある:時間間隔、スキャンスロットル、セッション数、ファイルロック。教学版はファイル数閾値に簡略化。\n\n### Memory に保存するもの\n\nMemory はセッションを越えて有用な情報を保存する:ユーザーの好み、繰り返し出るフィードバック、プロジェクト背景、よく使う入口、調査の手がかりなど。「あとでまた使うもの」を対象にし、インデックス + オンデマンド読み込みで現在の会話に戻す。\n\nsession memory は 1 つのセッション内の連続性を扱う:compact 後も現在の会話に残すべき文脈を保持する。両者は役割が分かれている。Memory は長期知識を扱い、session memory は現在のセッションを compact 越しにつなぐ。\n\n---\n\n## s08 からの変更点\n\n| コンポーネント | 変更前 (s08) | 変更後 (s09) |\n|-----------|-------------|-------------|\n| 記憶能力 | なし(圧縮後、好みはサマリと共に劣化) | ストレージ + 読み込み + 抽出 + 整理 |\n| 新規関数 | — | write_memory_file, select_relevant_memories, load_memories, extract_memories, consolidate_memories |\n| ストレージ | — | .memory/MEMORY.md インデックス + .memory/*.md ファイル |\n| ツール | bash, read, write, edit, glob, todo_write, task, load_skill, compact (9) | bash, read_file, write_file, edit_file, glob, task (6) |\n| ループ | 毎ターン圧縮のみ | 記憶注入 + 圧縮 + ターン終了後の抽出 + 定期整理 |\n\n---\n\n## 試してみよう\n\n```sh\ncd learn-claude-code\npython s09_memory/code.py\n```\n\n以下のプロンプトを試してみてください(複数ターンに分けて入力し、記憶の蓄積と読み込みを観察):\n\n1. `I prefer using tabs for indentation, not spaces. Remember that.`\n2. `Create a Python file called test.py`(Agent がタブを使用したか観察)\n3. `What did I tell you about my preferences?`(Agent が覚えているか観察)\n4. `I also prefer single quotes over double quotes for strings.`\n\n観察のポイント:各ターン終了後に `[Memory: extracted N new memories]` が表示されるか?`.memory/` ディレクトリに `.md` ファイルが生成されたか?`MEMORY.md` インデックスが更新されたか?新しい会話で Agent が以前の記憶を自動的に読み込んだか?\n\n---\n\n## 次へ\n\n記憶、圧縮、ツールはすべて揃った。しかし system prompt はまだハードコードされた文字列。新しいツールを追加するには手動で説明を書き、プロジェクトを変えるにはプロンプト全体を書き直す。プロンプトは実行時に組み立てられるべき。\n\ns10 System Prompt → セグメント + 実行時組み立て。異なるプロジェクト、異なるツール、異なるプロンプト。\n\n\nCC ソースコードの詳細
\n\n> 以下は CC ソースコード `src/` 下の `memdir/`、`services/`、`utils/`、`query/` の分析に基づく。行番号はソースコードと照合済み。\n\n### ソースコードパス\n\n| ファイル | 行数 | 職責 |\n|------|------|------|\n| `memdir/memdir.ts` | 507 | 核心:MEMORY.md 定義(`34-38`)、記憶動作指示で memory/plan/tasks を区別(`199-266`)、`loadMemoryPrompt()` 3 パス(`419-490`) |\n| `memdir/findRelevantMemories.ts` | 141 | Sonnet side-query で記憶選択(`18-24` システムプロンプト、`97-122` 呼び出しロジック) |\n| `memdir/memoryTypes.ts` | 271 | 型定義、frontmatter フィールド |\n| `memdir/memoryScan.ts` | — | .md ファイルをスキャン、MEMORY.md を除外、frontmatter を読み取り、最大 200 ファイル、mtime 降順(`35-94`) |\n| `services/extractMemories/extractMemories.ts` | 615 | forked agent で記憶を抽出、制限付き権限、`skipTranscript: true`、`maxTurns: 5`(`371-427`) |\n| `services/autoDream/autoDream.ts` | 324 | Dream 整理、4 層ゲート(`63-66` デフォルト値、`130-190` ゲート、`224-233` forked agent) |\n| `services/SessionMemory/sessionMemory.ts` | 495 | セッションレベルの記憶管理 |\n| `services/compact/sessionMemoryCompact.ts` | — | session memory 軽量サマリ、閾値 10K/5/40K(`56-61`) |\n| `utils/attachments.ts` | — | 注入予算:200 行 / 4096 バイト/ファイル、60KB/セッション(`269-288`);query で関連記憶を検索(`2196-2241`) |\n| `query.ts` | — | memory prefetch を毎ターン開始時に起動(`301-304`)、非ブロッキング収集(`1592-1614`) |\n| `query/stopHooks.ts` | — | stop hook fire-and-forget で抽出と Dream をトリガー(`141-155`) |\n\n### 記憶選択:embedding ではなく LLM\n\nCC は **Sonnet 自身で選択**(`findRelevantMemories.ts`)、embedding ベクトル類似度ではない:\n\n1. `memoryScan.ts` が `.memory/` 下のすべての `.md` ファイルをスキャン(MEMORY.md を除外)、最大 200 ファイル、mtime 降順\n2. `name` + `description` をカタログとしてリスト化\n3. Sonnet side-query に送信:「名前と説明から本当に有用な記憶を選択(最大 5 件)。不明ならスキップ。」\n4. Sonnet が `{ selected_memories: [\"file1.md\", ...] }` を返却\n5. 選択されたファイルの完全な内容を読み込み(≤ 200 行 / 4096 バイト/ファイル)、注入。セッション総予算:60KB\n\n毎ターンのユーザー turn 開始時、`query.ts:301-304` が memory prefetch を起動(非同期);ツール実行後、`1592-1614` が非ブロッキングで結果を収集。\n\n### 抽出タイミング:stop hook、autoCompact 後ではない\n\nトリガー位置(`stopHooks.ts:141-155`):`handleStopHooks()` 内で、fire-and-forget で抽出と Dream をトリガー。教学版は `stop_reason != \"tool_use\"` 分岐に抽出を配置、方向は一致。\n\nCC の抽出は forked agent で実行(`extractMemories.ts:371-427`):制限付き権限、`skipTranscript: true`、`maxTurns: 5`。重複保護もある:メイン Agent が既に記憶ファイルを書き込んだ場合、抽出をスキップ。\n\n### 記憶ファイル形式\n\nCC は Markdown + YAML frontmatter を使用、教学版と一致。4 種類:`user`、`feedback`、`project`、`reference`。\n\n`memdir.ts:34-38` がインデックス制約を定義:`MEMORY.md` 最大 200 行 / 25KB。`memdir.ts:199-266` が記憶動作指示を構築、memory と plan と tasks を明確に区別。保存場所:`~/.claude/projects//memory/`。\n\n### Dream:4 層ゲート\n\n「アイドル時にトリガー」や「数が足りたら統合」ではなく、4 層のゲート(`autoDream.ts`、デフォルト値 `63-66`、ゲートロジック `130-190`):\n\n1. **時間ゲート**:前回の統合から ≥ 24 時間\n2. **スキャンスロットル**:頻繁なファイルシステムスキャンを回避\n3. **セッションゲート**:前回の統合以降 ≥ 5 セッションの transcript が変更された\n4. **ロックゲート**:他のプロセスが統合中でない(`.consolidate-lock` ファイル)\n\n統合自体は forked agent で実行(`224-233`):定位 → 直近のシグナル収集 → 統合してファイル書き込み → 剪定してインデックス更新。ロックファイルの mtime が lastConsolidatedAt。クラッシュリカバリ:1 時間後にロックが自動期限切れ。\n\n### User Memory vs Session Memory\n\n| | User Memory | Session Memory |\n|---|---|---|\n| 永続性 | セッション間 | 単一セッション |\n| ストレージ | `memory/` 下の複数 .md ファイル | `session-memory//memory.md` |\n| 注入先 | system prompt | compact サマリ |\n| 目的 | セッション間の知識蓄積 | compact を越えたコンテキストの連続性 |\n\nsessionMemoryCompact(s08 で触れた仕組み)は Session Memory を活用:autoCompact の前に session memory ファイルを読み込み、内容が十分であれば(≥ 10K token、≥ 5 テキストメッセージ、≤ 40K token、`sessionMemoryCompact.ts:56-61`)、LLM を呼び出さずにサマリとして使用。\n\n### 実際の実装が教学版より複雑な点\n\n- **Feature flags**:記憶関連機能には複数の feature gate 層がある\n- **Team memory**:チーム共有記憶、`loadMemoryPrompt()` に専用パスあり(教学版では未カバー)\n- **KAIROS**:タイミング認識型の記憶抽出戦略、`loadMemoryPrompt()` の daily-log モード\n- **Prompt cache**:記憶注入は prompt cache の TTL を考慮する必要があり、毎ターン system prompt の大部分を書き直すことを避ける\n- **ファイルロック**:マルチプロセス時の並行制御\n- **Memory prefetch**:非同期プレフェッチ、メインフローをブロックしない\n\n### 教学版の簡略化は意図的\n\n- LLM side-query → LLM side-query + キーワードフォールバック:教学版は LLM 選択を維持し、フォールバックパスを追加\n- 記憶 JSON → Markdown + frontmatter:教学版は CC と一致\n- stop hook トリガー → `stop_reason != \"tool_use\"` 分岐:方向は一致\n- 4 層ゲート → ファイル数閾値:教学版には transcript システムやマルチセッションの概念がない\n- forked agent + 制限付き権限 → 直接呼び出し:教学版にはサブプロセス分離がない\n\n \n\n\n"
},
{
"version": "s10",
@@ -291,19 +291,19 @@
"version": "s17",
"locale": "en",
"title": "s17: Autonomous Agents — Check the Board, Claim the Task",
- "content": "# s17: Autonomous Agents — Check the Board, Claim the Task\n\ns01 → ... → s15 → s16 → `s17` → [s18](/en/s18) → s19 → s20\n\n> *\"Check the board, claim the task\"* — poll when idle, work when found.\n>\n> **Harness Layer**: Autonomy — Self-organizing teammates, no leader assignment needed.\n\n---\n\n## The Problem\n\ns16's teammates can communicate and handshake shutdown. But each teammate waits for Lead to assign tasks — with 10 unclaimed tasks on the board, Lead has to manually assign 10 times. This doesn't scale. Teammates should check the task board themselves, claim unowned tasks, and look for the next one when done.\n\n---\n\n## The Solution\n\n\n\nCarries forward S16's teaching-version MessageBus and protocol tools. This chapter adds: **idle_poll** (poll every 5 seconds when idle), **scan_unclaimed_tasks** (scan the board for claimable tasks), **auto-claim** (claim on sight, no Lead needed).\n\nTeammate lifecycle expands from two phases to three:\n\n| Phase | Behavior | Exit condition |\n|-------|----------|----------------|\n| WORK | inbox → LLM → tool loop | `stop_reason != tool_use` |\n| IDLE | 5s poll inbox + task board | 60s timeout |\n| SHUTDOWN | Send summary, exit | — |\n\n---\n\n## How It Works\n\n### idle_poll: Idle Polling\n\nAfter completing a task, the teammate doesn't exit. It enters the IDLE phase — checking every 5 seconds for new work:\n\n```python\nIDLE_POLL_INTERVAL = 5 # seconds\nIDLE_TIMEOUT = 60 # seconds\n\ndef idle_poll(agent_name, messages, name, role) -> str:\n \"\"\"Return 'work', 'shutdown', or 'timeout'.\"\"\"\n for _ in range(IDLE_TIMEOUT // IDLE_POLL_INTERVAL):\n time.sleep(IDLE_POLL_INTERVAL)\n\n # ① Check inbox (priority)\n inbox = BUS.read_inbox(agent_name)\n if inbox:\n # shutdown_request handled immediately\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n # ... reply shutdown_response\n return \"shutdown\"\n # Regular messages: inject into context, return to WORK\n messages.append(...)\n return \"work\"\n\n # ② Scan task board\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n task = unclaimed[0]\n result = claim_task(task[\"id\"], agent_name)\n if \"Claimed\" in result:\n messages.append(...)\n return \"work\"\n return \"timeout\"\n```\n\nInbox takes priority (may contain protocol messages like shutdown_request), task board second. A shutdown_request received during IDLE is dispatched immediately — no need to wait for the next WORK phase.\n\n### scan_unclaimed_tasks: Scan the Task Board\n\nFind tasks that are pending, unowned, with all dependencies completed (`can_start`):\n\n```python\ndef scan_unclaimed_tasks() -> list[dict]:\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and can_start(task[\"id\"])):\n unclaimed.append(task)\n return unclaimed\n```\n\nThree conditions: must be pending, no owner, all blockedBy dependencies completed. `can_start` checks dependency task status — having dependencies doesn't mean the task can't start, only unresolved dependencies block it. Teaching version picks the first by filename; CC uses file locks to prevent multiple teammates from claiming the same task.\n\n### claim_task: Owner Check\n\nAuto-claim checks the claim result, not treating failure as success:\n\n```python\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if task.owner:\n return f\"Task {task_id} already owned by {task.owner}\"\n if not can_start(task_id):\n return f\"Blocked by: {deps}\"\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n return f\"Claimed {task.id} ({task.subject})\"\n```\n\nTeaching version has no file locks, so concurrent claims may still race. But the `task.owner` check avoids the most obvious \"last writer wins\" problem. CC uses `proper-lockfile` to protect task files, with `claimTask` doing read-modify-write inside a file lock (`utils/tasks.ts:541-612`).\n\n### Teammate Lifecycle: WORK → IDLE → SHUTDOWN\n\ns16's teammates exit after finishing. s17 adds the IDLE phase — teammates cycle through WORK → IDLE in an outer loop:\n\n```python\n# Outer loop: WORK → IDLE cycle\nwhile True:\n # WORK phase: inner loop (max 10 LLM rounds)\n for _ in range(10):\n # Check inbox, dispatch protocol, call LLM, execute tools\n ...\n if response.stop_reason != \"tool_use\":\n break # WORK phase ends\n\n # IDLE phase\n idle_result = idle_poll(name, messages, name, role)\n if idle_result == \"shutdown\":\n break\n if idle_result == \"timeout\":\n break # 60s timeout → SHUTDOWN\n\n# SHUTDOWN: send summary to Lead\nBUS.send(name, \"lead\", summary, \"result\")\n```\n\nKey design:\n- **Outer while True**: WORK and IDLE alternate until timeout or shutdown request\n- **Inner for 10**: WORK phase caps at 10 LLM rounds (prevents infinite loops)\n- **IDLE timeout 60s**: 12 polls × 5s = 60s. Timeout sends summary and exits\n- **shutdown_request works in both phases**: WORK phase dispatches via `handle_inbox_message`; IDLE phase's `idle_poll` checks and replies directly\n\n### Identity Re-injection\n\nAfter autoCompact (s08), a teammate's messages list may be compressed into a summary. On each new WORK phase entry, check:\n\n```python\nif len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}. \"\n f\"Continue your work.\"})\n```\n\nShort messages suggest compression happened — re-inject identity. In real CC, context compaction preserves the system prompt; the teaching version's simplified implementation needs manual handling.\n\n### consume_lead_inbox: Unified Inbox Consumer\n\nBoth the `check_inbox` tool and the main loop call the same `consume_lead_inbox()` function: route protocol responses to update state first, then inject all messages into Lead's conversation history. Teammates' summaries and results don't just print to terminal — Lead's LLM can see them and coordinate next steps.\n\n### Putting It Together\n\n```\n1. Lead: \"Build the backend — too many tasks, let teammates self-claim\"\n2. Lead → create_task(\"Create database schema\")\n3. Lead → create_task(\"Write API routes\")\n4. Lead → create_task(\"Write unit tests\")\n5. Lead → spawn_teammate(\"alice\", \"backend\", \"You are a backend developer\")\n6. Lead → spawn_teammate(\"bob\", \"backend\", \"You are a backend developer\")\n\n7. alice thread starts → WORK: no initial inbox → spins → IDLE\n8. bob thread starts → WORK: no initial inbox → spins → IDLE\n\n9. alice IDLE poll 1 → scan_unclaimed → finds \"Create database schema\"\n10. alice → claim_task → \"Create database schema\" → back to WORK\n11. bob IDLE poll 1 → scan_unclaimed → finds \"Write API routes\"\n12. bob → claim_task → \"Write API routes\" → back to WORK\n\n13. alice WORK: write_file(\"schema.sql\", ...) → complete_task → WORK ends\n14. alice IDLE → scan → \"Write unit tests\" → claim → WORK\n15. alice WORK: write_file(\"test_api.py\", ...) → complete_task → WORK ends\n16. alice IDLE → 60s no new tasks → SHUTDOWN\n\n17. bob similar flow → done → SHUTDOWN\n18. Lead consume_lead_inbox → sees alice and bob's summaries\n```\n\nTwo teammates claim and work in parallel. Lead only creates tasks and spawns teammates — no manual assignment needed.\n\n---\n\n## Changes from s16\n\n| Component | Before (s16) | After (s17) |\n|-----------|-------------|-------------|\n| Task assignment | Lead manually assigns | Teammates auto-claim (can_start checks deps) |\n| Teammate state | WORK or exit | WORK → IDLE (60s poll) → SHUTDOWN |\n| claim_task | No owner check | Rejects tasks that already have an owner |\n| IDLE phase shutdown | Doesn't handle shutdown_request | Dispatches shutdown immediately and exits |\n| Lead inbox | Prints only, not in context | consume_lead_inbox injects into history |\n| New functions | — | idle_poll, scan_unclaimed_tasks, consume_lead_inbox |\n| Identity persistence | System prompt only | Auto re-inject after compression |\n| Lead tools | 14 (s16) | 14 (unchanged) |\n| Teammate tools | 5 | 8 (+ list_tasks, claim_task, complete_task) |\n| Teammate exit | Exit after task done | Exit only after 60s idle timeout |\n\n---\n\n## Try It\n\n```sh\ncd learn-claude-code\npython s17_autonomous_agents/code.py\n```\n\nTry this prompt:\n\n`Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim and work.`\n\nWhat to observe: Do teammates auto-claim unassigned tasks? Are tasks with blockedBy dependencies claimed only after their dependencies complete? Does idle timeout trigger shutdown? Does a shutdown_request in IDLE phase get an immediate response? How do task states change in `.tasks/`?\n\n---\n\n## What's Next\n\nTeammates self-organize now. But Alice and Bob both work in the same directory — Alice edits `config.py`, Bob also edits `config.py`, overwriting each other.\n\ns18 Worktree Isolation → Each task gets its own working directory, no conflicts.\n\n\nDeep Dive into CC Source
\n\n> Teaching note: This chapter's idle_poll + auto-claim mechanism is a teaching design, using a unified polling function to demonstrate \"find work when idle.\" CC's actual implementation combines multiple mechanisms, but shares the same goal — reducing Lead's manual assignment burden.\n\n### 1. CC's Idle Mechanism: Combined Approach, Not Single Polling\n\nTeaching version uses a single `idle_poll()` to handle both inbox checking and task claiming during idle. CC's actual implementation combines four mechanisms:\n\n**idle_notification**: After completing a round of work, `sendIdleNotification()` (`inProcessRunner.ts:569-589`) sends an idle notification to Lead. Lead knows the teammate is available and can assign new tasks or request shutdown.\n\n**mailbox polling**: `waitForNextPromptOrShutdown()` (`inProcessRunner.ts:689-868`) is a **500ms polling loop** that continuously checks three sources: pending user messages, mailbox file messages, and task list. Shutdown requests are prioritized (`inProcessRunner.ts:768-804`), preventing starvation by regular messages.\n\n**task watcher**: `useTaskListWatcher` (`hooks/useTaskListWatcher.ts:34-189`) uses `fs.watch()` to monitor the `.claude/tasks/` directory with 1-second debounce, triggering checks when new tasks are created or dependencies unblock. The dependency check (`L197-207`) verifies \"no incomplete tasks in blockedBy\", not \"blockedBy is empty\".\n\n**active claiming**: The polling loop also calls `tryClaimNextTask()` (`inProcessRunner.ts:853-860`) — actively claiming tasks from the task list while waiting. So \"teammates don't actively poll for tasks\" is inaccurate; CC has both passive notification and active claiming.\n\n### 2. Task Claiming: File Locks + Atomic Operations\n\n`claimTask()` (`utils/tasks.ts:541-612`) uses `proper-lockfile` task-level locks, performing read-check-modify-write within the lock. Checks: owner already exists (`L575-576`), already completed (`L580-581`), unresolved blockers in blockedBy (`L585-594`). `claimTaskWithBusyCheck()` (`utils/tasks.ts:614-692`) uses task-list level locks, making busy check and claim atomic to avoid TOCTOU.\n\n`findAvailableTask()` (`inProcessRunner.ts:595-604`) checks \"all blockedBy completed\" using `task.blockedBy.every(id => !unresolvedTaskIds.has(id))`. `tryClaimNextTask()` (`inProcessRunner.ts:624-657`) updates status to `in_progress` after claiming, so the UI immediately reflects the change.\n\n### 3. Teaching Version vs CC Comparison\n\n| Dimension | Teaching (s17) | CC |\n|-----------|----------------|-----|\n| Idle mechanism | idle_poll unified polling (5s) | idle_notification + 500ms mailbox polling + task watcher |\n| Task discovery | scan_unclaimed_tasks (polling) | useTaskListWatcher (file watching) + tryClaimNextTask (active polling) |\n| Dependency check | can_start (all blockedBy completed) | findAvailableTask (same semantics) |\n| Concurrency safety | Owner check (no file lock) | proper-lockfile task lock + task-list lock |\n| Shutdown handling | IDLE dispatches directly, WORK via handle_inbox_message | 500ms polling loop prioritizes shutdown_request |\n| Timeout exit | 60s with no new tasks | No fixed timeout, Lead manual shutdown |\n| Identity persistence | Messages length detection | Context compaction preserves system prompt |\n| Claim failure handling | Check return value, skip on failure | File locks guarantee atomicity |\n\nTeaching version's `idle_poll()` merges CC's four mechanisms into one polling function — a reasonable simplification since the core semantics (find work when idle, claim after deps resolve, prioritize shutdown) are consistent.\n\n \n\n\n"
+ "content": "# s17: Autonomous Agents — Check the Board, Claim the Task\n\ns01 → ... → s15 → s16 → `s17` → [s18](/en/s18) → s19 → s20\n\n> *\"Check the board, claim the task\"* — poll when idle, work when found.\n>\n> **Harness Layer**: Autonomy — Self-organizing teammates, no leader assignment needed.\n\n---\n\n## The Problem\n\ns16's teammates can communicate and handshake shutdown. But each teammate waits for Lead to assign tasks — with 10 unclaimed tasks on the board, Lead has to manually assign 10 times. This doesn't scale. Teammates should check the task board themselves, claim unowned tasks, and look for the next one when done.\n\n---\n\n## The Solution\n\n\n\nCarries forward S16's teaching-version MessageBus and protocol tools. This chapter adds: **idle_poll** (poll every 5 seconds when idle), **scan_unclaimed_tasks** (scan the board for claimable tasks), **auto-claim** (claim on sight, no Lead needed).\n\nTeammate lifecycle expands from two phases to three:\n\n| Phase | Behavior | Exit condition |\n|-------|----------|----------------|\n| WORK | inbox → LLM → tool loop | `stop_reason != tool_use` |\n| IDLE | 5s poll inbox + task board | 60s timeout |\n| SHUTDOWN | Send summary, exit | — |\n\n---\n\n## How It Works\n\n### idle_poll: Idle Polling\n\nAfter completing a task, the teammate doesn't exit. It enters the IDLE phase — checking every 5 seconds for new work:\n\n```python\nIDLE_POLL_INTERVAL = 5 # seconds\nIDLE_TIMEOUT = 60 # seconds\n\ndef idle_poll(name, messages, role) -> str:\n \"\"\"Return 'work', 'shutdown', or 'timeout'.\"\"\"\n for _ in range(IDLE_TIMEOUT // IDLE_POLL_INTERVAL):\n time.sleep(IDLE_POLL_INTERVAL)\n\n # ① Check inbox (priority)\n inbox = BUS.read_inbox(name)\n if inbox:\n # shutdown_request handled immediately\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n # ... reply shutdown_response\n return \"shutdown\"\n # Regular messages: inject into context, return to WORK\n messages.append(...)\n return \"work\"\n\n # ② Scan task board\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n task = unclaimed[0]\n result = claim_task(task[\"id\"], name)\n if \"Claimed\" in result:\n messages.append(...)\n return \"work\"\n return \"timeout\"\n```\n\nInbox takes priority (may contain protocol messages like shutdown_request), task board second. A shutdown_request received during IDLE is dispatched immediately — no need to wait for the next WORK phase.\n\n### scan_unclaimed_tasks: Scan the Task Board\n\nFind tasks that are pending, unowned, with all dependencies completed (`can_start`):\n\n```python\ndef scan_unclaimed_tasks() -> list[dict]:\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and can_start(task[\"id\"])):\n unclaimed.append(task)\n return unclaimed\n```\n\nThree conditions: must be pending, no owner, all blockedBy dependencies completed. `can_start` checks dependency task status — having dependencies doesn't mean the task can't start, only unresolved dependencies block it. Teaching version picks the first by filename; CC uses file locks to prevent multiple teammates from claiming the same task.\n\n### claim_task: Owner Check\n\nAuto-claim checks the claim result, not treating failure as success:\n\n```python\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if task.owner:\n return f\"Task {task_id} already owned by {task.owner}\"\n if not can_start(task_id):\n return f\"Blocked by: {deps}\"\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n return f\"Claimed {task.id} ({task.subject})\"\n```\n\nTeaching version has no file locks, so concurrent claims may still race. But the `task.owner` check avoids the most obvious \"last writer wins\" problem. CC uses `proper-lockfile` to protect task files, with `claimTask` doing read-modify-write inside a file lock (`utils/tasks.ts:541-612`).\n\n### Teammate Lifecycle: WORK → IDLE → SHUTDOWN\n\ns16's teammates exit after finishing. s17 adds the IDLE phase — teammates cycle through WORK → IDLE in an outer loop:\n\n```python\n# Outer loop: WORK → IDLE cycle\nwhile True:\n # WORK phase: inner loop (max 10 LLM rounds)\n for _ in range(10):\n # Check inbox, dispatch protocol, call LLM, execute tools\n ...\n if response.stop_reason != \"tool_use\":\n break # WORK phase ends\n\n # IDLE phase\n idle_result = idle_poll(name, messages, role)\n if idle_result == \"shutdown\":\n break\n if idle_result == \"timeout\":\n break # 60s timeout → SHUTDOWN\n\n# SHUTDOWN: send summary to Lead\nBUS.send(name, \"lead\", summary, \"result\")\n```\n\nKey design:\n- **Outer while True**: WORK and IDLE alternate until timeout or shutdown request\n- **Inner for 10**: WORK phase caps at 10 LLM rounds (prevents infinite loops)\n- **IDLE timeout 60s**: 12 polls × 5s = 60s. Timeout sends summary and exits\n- **shutdown_request works in both phases**: WORK phase dispatches via `handle_inbox_message`; IDLE phase's `idle_poll` checks and replies directly\n\n### Identity Re-injection\n\nAfter autoCompact (s08), a teammate's messages list may be compressed into a summary. On each new WORK phase entry, check:\n\n```python\nif len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}. \"\n f\"Continue your work.\"})\n```\n\nShort messages suggest compression happened — re-inject identity. In real CC, context compaction preserves the system prompt; the teaching version's simplified implementation needs manual handling.\n\n### consume_lead_inbox: Unified Inbox Consumer\n\nBoth the `check_inbox` tool and the main loop call the same `consume_lead_inbox()` function: route protocol responses to update state first, then inject all messages into Lead's conversation history. Teammates' summaries and results don't just print to terminal — Lead's LLM can see them and coordinate next steps.\n\n### Putting It Together\n\n```\n1. Lead: \"Build the backend — too many tasks, let teammates self-claim\"\n2. Lead → create_task(\"Create database schema\")\n3. Lead → create_task(\"Write API routes\")\n4. Lead → create_task(\"Write unit tests\")\n5. Lead → spawn_teammate(\"alice\", \"backend\", \"You are a backend developer\")\n6. Lead → spawn_teammate(\"bob\", \"backend\", \"You are a backend developer\")\n\n7. alice thread starts → WORK: no initial inbox → spins → IDLE\n8. bob thread starts → WORK: no initial inbox → spins → IDLE\n\n9. alice IDLE poll 1 → scan_unclaimed → finds \"Create database schema\"\n10. alice → claim_task → \"Create database schema\" → back to WORK\n11. bob IDLE poll 1 → scan_unclaimed → finds \"Write API routes\"\n12. bob → claim_task → \"Write API routes\" → back to WORK\n\n13. alice WORK: write_file(\"schema.sql\", ...) → complete_task → WORK ends\n14. alice IDLE → scan → \"Write unit tests\" → claim → WORK\n15. alice WORK: write_file(\"test_api.py\", ...) → complete_task → WORK ends\n16. alice IDLE → 60s no new tasks → SHUTDOWN\n\n17. bob similar flow → done → SHUTDOWN\n18. Lead consume_lead_inbox → sees alice and bob's summaries\n```\n\nTwo teammates claim and work in parallel. Lead only creates tasks and spawns teammates — no manual assignment needed.\n\n---\n\n## Changes from s16\n\n| Component | Before (s16) | After (s17) |\n|-----------|-------------|-------------|\n| Task assignment | Lead manually assigns | Teammates auto-claim (can_start checks deps) |\n| Teammate state | WORK or exit | WORK → IDLE (60s poll) → SHUTDOWN |\n| claim_task | No owner check | Rejects tasks that already have an owner |\n| IDLE phase shutdown | Doesn't handle shutdown_request | Dispatches shutdown immediately and exits |\n| Lead inbox | Prints only, not in context | consume_lead_inbox injects into history |\n| New functions | — | idle_poll, scan_unclaimed_tasks, consume_lead_inbox |\n| Identity persistence | System prompt only | Auto re-inject after compression |\n| Lead tools | 14 (s16) | 14 (unchanged) |\n| Teammate tools | 5 | 8 (+ list_tasks, claim_task, complete_task) |\n| Teammate exit | Exit after task done | Exit only after 60s idle timeout |\n\n---\n\n## Try It\n\n```sh\ncd learn-claude-code\npython s17_autonomous_agents/code.py\n```\n\nTry this prompt:\n\n`Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim and work.`\n\nWhat to observe: Do teammates auto-claim unassigned tasks? Are tasks with blockedBy dependencies claimed only after their dependencies complete? Does idle timeout trigger shutdown? Does a shutdown_request in IDLE phase get an immediate response? How do task states change in `.tasks/`?\n\n---\n\n## What's Next\n\nTeammates self-organize now. But Alice and Bob both work in the same directory — Alice edits `config.py`, Bob also edits `config.py`, overwriting each other.\n\ns18 Worktree Isolation → Each task gets its own working directory, no conflicts.\n\n\nDeep Dive into CC Source
\n\n> Teaching note: This chapter's idle_poll + auto-claim mechanism is a teaching design, using a unified polling function to demonstrate \"find work when idle.\" CC's actual implementation combines multiple mechanisms, but shares the same goal — reducing Lead's manual assignment burden.\n\n### 1. CC's Idle Mechanism: Combined Approach, Not Single Polling\n\nTeaching version uses a single `idle_poll()` to handle both inbox checking and task claiming during idle. CC's actual implementation combines four mechanisms:\n\n**idle_notification**: After completing a round of work, `sendIdleNotification()` (`inProcessRunner.ts:569-589`) sends an idle notification to Lead. Lead knows the teammate is available and can assign new tasks or request shutdown.\n\n**mailbox polling**: `waitForNextPromptOrShutdown()` (`inProcessRunner.ts:689-868`) is a **500ms polling loop** that continuously checks three sources: pending user messages, mailbox file messages, and task list. Shutdown requests are prioritized (`inProcessRunner.ts:768-804`), preventing starvation by regular messages.\n\n**task watcher**: `useTaskListWatcher` (`hooks/useTaskListWatcher.ts:34-189`) uses `fs.watch()` to monitor the `.claude/tasks/` directory with 1-second debounce, triggering checks when new tasks are created or dependencies unblock. The dependency check (`L197-207`) verifies \"no incomplete tasks in blockedBy\", not \"blockedBy is empty\".\n\n**active claiming**: The polling loop also calls `tryClaimNextTask()` (`inProcessRunner.ts:853-860`) — actively claiming tasks from the task list while waiting. So \"teammates don't actively poll for tasks\" is inaccurate; CC has both passive notification and active claiming.\n\n### 2. Task Claiming: File Locks + Atomic Operations\n\n`claimTask()` (`utils/tasks.ts:541-612`) uses `proper-lockfile` task-level locks, performing read-check-modify-write within the lock. Checks: owner already exists (`L575-576`), already completed (`L580-581`), unresolved blockers in blockedBy (`L585-594`). `claimTaskWithBusyCheck()` (`utils/tasks.ts:614-692`) uses task-list level locks, making busy check and claim atomic to avoid TOCTOU.\n\n`findAvailableTask()` (`inProcessRunner.ts:595-604`) checks \"all blockedBy completed\" using `task.blockedBy.every(id => !unresolvedTaskIds.has(id))`. `tryClaimNextTask()` (`inProcessRunner.ts:624-657`) updates status to `in_progress` after claiming, so the UI immediately reflects the change.\n\n### 3. Teaching Version vs CC Comparison\n\n| Dimension | Teaching (s17) | CC |\n|-----------|----------------|-----|\n| Idle mechanism | idle_poll unified polling (5s) | idle_notification + 500ms mailbox polling + task watcher |\n| Task discovery | scan_unclaimed_tasks (polling) | useTaskListWatcher (file watching) + tryClaimNextTask (active polling) |\n| Dependency check | can_start (all blockedBy completed) | findAvailableTask (same semantics) |\n| Concurrency safety | Owner check (no file lock) | proper-lockfile task lock + task-list lock |\n| Shutdown handling | IDLE dispatches directly, WORK via handle_inbox_message | 500ms polling loop prioritizes shutdown_request |\n| Timeout exit | 60s with no new tasks | No fixed timeout, Lead manual shutdown |\n| Identity persistence | Messages length detection | Context compaction preserves system prompt |\n| Claim failure handling | Check return value, skip on failure | File locks guarantee atomicity |\n\nTeaching version's `idle_poll()` merges CC's four mechanisms into one polling function — a reasonable simplification since the core semantics (find work when idle, claim after deps resolve, prioritize shutdown) are consistent.\n\n \n\n\n"
},
{
"version": "s17",
"locale": "zh",
"title": "s17: Autonomous Agents — 自己看板,自己认领",
- "content": "# s17: Autonomous Agents — 自己看板,自己认领\n\ns01 → ... → s15 → s16 → `s17` → [s18](/zh/s18) → s19 → s20\n\n> *\"自己看板,自己认领\"* — 空闲时轮询,有活就干。\n>\n> **Harness 层**: 自治 — 队友自组织,不依赖 Lead 分配。\n\n---\n\n## 问题\n\ns16 的队友能通信、能握手关机。但每个队友等 Lead 分配任务——如果任务看板上有 10 个未认领任务,Lead 得手动 assign 10 次。这不能扩展。队友应该自己看任务看板,发现没人做的任务就认领,做完再找下一个。\n\n---\n\n## 解决方案\n\n\n\n沿用 S16 的教学版 MessageBus 和协议工具。本章新增:**idle_poll**(空闲时每 5 秒轮询一次)、**scan_unclaimed_tasks**(扫描看板上可认领的任务)、**自动认领**(找到任务就 claim,不用 Lead 操心)。\n\n队友生命周期从两阶段变成三阶段:\n\n| 阶段 | 行为 | 退出条件 |\n|------|------|---------|\n| WORK | inbox → LLM → 工具循环 | `stop_reason != tool_use` |\n| IDLE | 每 5s 轮询 inbox + 任务板 | 60s 超时 |\n| SHUTDOWN | 发 summary,退出 | — |\n\n---\n\n## 工作原理\n\n### idle_poll: 空闲轮询\n\n队友完成当前任务后不退出,进入 IDLE 阶段——每 5 秒检查一次有没有新工作:\n\n```python\nIDLE_POLL_INTERVAL = 5 # seconds\nIDLE_TIMEOUT = 60 # seconds\n\ndef idle_poll(agent_name, messages, name, role) -> str:\n \"\"\"Return 'work', 'shutdown', or 'timeout'.\"\"\"\n for _ in range(IDLE_TIMEOUT // IDLE_POLL_INTERVAL):\n time.sleep(IDLE_POLL_INTERVAL)\n\n # ① 检查收件箱(优先)\n inbox = BUS.read_inbox(agent_name)\n if inbox:\n # shutdown_request 立即处理\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n # ... 回复 shutdown_response\n return \"shutdown\"\n # 普通消息注入上下文,回到 WORK\n messages.append(...)\n return \"work\"\n\n # ② 扫描任务看板\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n task = unclaimed[0]\n result = claim_task(task[\"id\"], agent_name)\n if \"Claimed\" in result:\n messages.append(...)\n return \"work\"\n return \"timeout\"\n```\n\ninbox 优先(可能包含 shutdown_request 等协议消息),任务板其次。IDLE 阶段收到 shutdown_request 会直接回复并退出,不等到下一轮 WORK。\n\n### scan_unclaimed_tasks: 扫描任务看板\n\n找 pending 状态、无 owner、所有依赖已完成(`can_start`)的任务:\n\n```python\ndef scan_unclaimed_tasks() -> list[dict]:\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and can_start(task[\"id\"])):\n unclaimed.append(task)\n return unclaimed\n```\n\n三个条件:必须是 pending、没有 owner、所有 blockedBy 依赖已完成。`can_start` 检查依赖任务的状态——有依赖不代表不能做,只有被未完成的任务阻塞才不能做。教学版按文件名排序取第一个;CC 用文件锁防止多个队友同时认领同一个任务。\n\n### claim_task: owner 检查\n\n自动认领时检查 claim 结果,不把失败当成功:\n\n```python\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if task.owner:\n return f\"Task {task_id} already owned by {task.owner}\"\n if not can_start(task_id):\n return f\"Blocked by: {deps}\"\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n return f\"Claimed {task.id} ({task.subject})\"\n```\n\n教学版没有文件锁,并发认领可能出现竞争。但至少 `task.owner` 检查避免了最明显的\"后写覆盖\"问题。CC 用 `proper-lockfile` 保护任务文件,`claimTask` 在文件锁内完成读-改-写(`utils/tasks.ts:541-612`)。\n\n### 队友生命周期: WORK → IDLE → SHUTDOWN\n\ns16 的队友做完任务就退出。s17 加了 IDLE 阶段,队友在外层循环中反复 WORK → IDLE:\n\n```python\n# Outer loop: WORK → IDLE cycle\nwhile True:\n # WORK phase: 内层循环(最多 10 轮 LLM 调用)\n for _ in range(10):\n # 检查 inbox、处理协议消息、调 LLM、执行工具\n ...\n if response.stop_reason != \"tool_use\":\n break # WORK 阶段结束\n\n # IDLE phase\n idle_result = idle_poll(name, messages, name, role)\n if idle_result == \"shutdown\":\n break\n if idle_result == \"timeout\":\n break # 60s 超时 → SHUTDOWN\n\n# SHUTDOWN: 发 summary 给 Lead\nBUS.send(name, \"lead\", summary, \"result\")\n```\n\n关键设计:\n- **外层 while True**:WORK 和 IDLE 交替进行,直到超时或收到关机请求\n- **内层 for 10**:WORK 阶段最多 10 轮 LLM 调用(防止无限循环)\n- **IDLE 超时 60 秒**:12 次轮询 × 5 秒 = 60 秒。超时后发送 summary 并退出\n- **shutdown_request 两阶段都能响应**:WORK 阶段通过 `handle_inbox_message` 分发;IDLE 阶段 `idle_poll` 直接检查并回复\n\n### 身份重注入\n\nautoCompact(s08)之后,队友的 messages 列表可能被压缩成一段摘要。每次进入新的 WORK 阶段时检查:\n\n```python\nif len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}. \"\n f\"Continue your work.\"})\n```\n\n消息过短说明发生了压缩,此时重新注入身份信息。真实 CC 中 context compaction 会保留 system prompt,教学版的简化实现需要手动处理。\n\n### consume_lead_inbox: 统一 inbox 消费\n\n`check_inbox` 工具和主循环末尾都调用同一个 `consume_lead_inbox()` 函数:先路由协议 response 更新状态,再把所有消息注入 Lead 的对话历史。队友发来的 summary/result 不会只打印在终端,Lead 的 LLM 能看到并协调下一步。\n\n### 合起来跑\n\n```\n1. Lead: \"搭建后端——任务太多,让队友自己认领\"\n2. Lead → create_task(\"创建数据库 schema\")\n3. Lead → create_task(\"写 API 路由\")\n4. Lead → create_task(\"写单元测试\")\n5. Lead → spawn_teammate(\"alice\", \"backend\", \"你是后端开发者\")\n6. Lead → spawn_teammate(\"bob\", \"backend\", \"你是后端开发者\")\n\n7. alice 线程启动 → WORK: 没有初始 inbox → 空转 → IDLE\n8. bob 线程启动 → WORK: 没有初始 inbox → 空转 → IDLE\n\n9. alice IDLE 第 1 次轮询 → scan_unclaimed → 发现\"创建数据库 schema\"\n10. alice → claim_task → \"创建数据库 schema\" → 回到 WORK\n11. bob IDLE 第 1 次轮询 → scan_unclaimed → 发现\"写 API 路由\"\n12. bob → claim_task → \"写 API 路由\" → 回到 WORK\n\n13. alice WORK: write_file(\"schema.sql\", ...) → complete_task → WORK 结束\n14. alice IDLE → scan → \"写单元测试\" → claim → WORK\n15. alice WORK: write_file(\"test_api.py\", ...) → complete_task → WORK 结束\n16. alice IDLE → 60s 无新任务 → SHUTDOWN\n\n17. bob 类似流程 → 做完 → SHUTDOWN\n18. Lead consume_lead_inbox → 看到 alice 和 bob 的 summary\n```\n\n两个队友并行认领、并行工作。Lead 只需要创建任务和启动队友,不需要手动分配。\n\n---\n\n## 相对 s16 的变更\n\n| 组件 | 之前 (s16) | 之后 (s17) |\n|------|-----------|-----------|\n| 任务分配 | Lead 手动 assign | 队友自动认领(can_start 检查依赖) |\n| 队友状态 | WORK 或退出 | WORK → IDLE(轮询 60s) → SHUTDOWN |\n| claim_task | 无 owner 检查 | 拒绝已有 owner 的任务 |\n| IDLE 阶段关机 | 不处理 shutdown_request | 直接 dispatch shutdown 并退出 |\n| Lead inbox | 只打印,不进上下文 | consume_lead_inbox 统一注入 history |\n| 新函数 | — | idle_poll, scan_unclaimed_tasks, consume_lead_inbox |\n| 身份保持 | 仅 system prompt | 压缩后自动重注入 |\n| Lead 工具 | 14 (s16) | 14(不变) |\n| 队友工具 | 5 | 8(+ list_tasks, claim_task, complete_task) |\n| 队友退出条件 | 完成任务即退出 | 60s 无新任务才退出 |\n\n---\n\n## 试一下\n\n```sh\ncd learn-claude-code\npython s17_autonomous_agents/code.py\n```\n\n试试这个 prompt:\n\n`Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim and work.`\n\n观察重点:队友是否自动认领了未分配的任务?有 blockedBy 依赖的任务是否在前置完成后被正确认领?空闲超时后是否自动关机?IDLE 阶段收到 shutdown_request 是否立即响应?`.tasks/` 目录下的任务状态如何变化?\n\n---\n\n## 接下来\n\n队友自组织了。但 Alice 和 Bob 都在同一个目录下工作——Alice 改 `config.py`,Bob 也改 `config.py`,互相覆盖。\n\ns18 Worktree Isolation → 每个任务有自己的工作目录,互不干扰。\n\n\n深入 CC 源码
\n\n> 教学说明:本章的 idle_poll + auto-claim 机制是教学设计,用统一的轮询函数演示\"空闲后找活干\"。CC 的实际实现是多个机制的组合,但目标一致——减少 Lead 的手动分配负担。\n\n### 一、CC 的空闲机制:组合路径,不是单一轮询\n\n教学版用一个 `idle_poll()` 统一处理空闲时的 inbox 检查和任务认领。CC 的实际实现是四个机制的组合:\n\n**idle_notification**:队友完成一轮工作后,`sendIdleNotification()`(`inProcessRunner.ts:569-589`)向 Lead 发送空闲通知。Lead 知道队友可用了,可以分配新任务或请求关机。\n\n**mailbox 轮询**:`waitForNextPromptOrShutdown()`(`inProcessRunner.ts:689-868`)是一个 **500ms 轮询循环**,持续检查三类来源:pending user messages、mailbox 文件消息、task list。shutdown_request 被优先处理(`inProcessRunner.ts:768-804`),不会被普通消息饿死。\n\n**task watcher**:`useTaskListWatcher`(`hooks/useTaskListWatcher.ts:34-189`)用 `fs.watch()` 监听 `.claude/tasks/` 目录变化,1 秒 debounce,当新任务创建或依赖解锁时触发检查。依赖判断(`L197-207`)是\"blockedBy 中没有未完成的任务\",不是\"blockedBy 为空\"。\n\n**主动 claim**:轮询循环内部也会调用 `tryClaimNextTask()`(`inProcessRunner.ts:853-860`)——在等待期间主动从 task list 领取任务。所以\"队友不主动轮询任务\"不准确,CC 同时有被动通知和主动认领。\n\n### 二、任务认领:文件锁 + 原子操作\n\n`claimTask()`(`utils/tasks.ts:541-612`)用 `proper-lockfile` 的任务文件锁,在锁内完成读-检查-改-写。检查项:owner 是否已存在(`L575-576`)、是否已完成(`L580-581`)、blockedBy 中是否有未完成任务(`L585-594`)。`claimTaskWithBusyCheck()`(`utils/tasks.ts:614-692`)用 task-list 级别锁,把 busy check 和 claim 做成原子操作,避免 TOCTOU。\n\n`findAvailableTask()`(`inProcessRunner.ts:595-604`)的依赖判断也是\"所有 blockedBy 已完成\",用 `task.blockedBy.every(id => !unresolvedTaskIds.has(id))` 实现。`tryClaimNextTask()`(`inProcessRunner.ts:624-657`)在认领后把状态更新为 `in_progress`,让 UI 立即反映变化。\n\n### 三、教学版 vs CC 对比\n\n| 维度 | 教学版 (s17) | CC |\n|------|-------------|-----|\n| 空闲机制 | idle_poll 统一轮询(5s) | idle_notification + 500ms mailbox 轮询 + task watcher |\n| 任务发现 | scan_unclaimed_tasks(轮询) | useTaskListWatcher(文件监听)+ tryClaimNextTask(主动轮询) |\n| 依赖判断 | can_start(所有 blockedBy 已完成) | findAvailableTask(同样语义) |\n| 并发安全 | owner 检查(无文件锁) | proper-lockfile 任务锁 + task-list 锁 |\n| shutdown 处理 | IDLE 直接分发,WORK 通过 handle_inbox_message | 500ms 轮询中优先处理 shutdown_request |\n| 超时退出 | 60s 无新任务 | 无固定超时,Lead 手动 shutdown |\n| 身份保持 | messages 长度检测 | context compaction 保留 system prompt |\n| claim 失败处理 | 检查返回值,失败不注入 | 文件锁保证原子性 |\n\n教学版的 `idle_poll()` 把 CC 的四个机制合并成一个轮询函数——简化合理,因为核心语义(空闲时找活干、依赖解锁后可认领、shutdown 优先)是一致的。\n\n \n\n\n"
+ "content": "# s17: Autonomous Agents — 自己看板,自己认领\n\ns01 → ... → s15 → s16 → `s17` → [s18](/zh/s18) → s19 → s20\n\n> *\"自己看板,自己认领\"* — 空闲时轮询,有活就干。\n>\n> **Harness 层**: 自治 — 队友自组织,不依赖 Lead 分配。\n\n---\n\n## 问题\n\ns16 的队友能通信、能握手关机。但每个队友等 Lead 分配任务——如果任务看板上有 10 个未认领任务,Lead 得手动 assign 10 次。这不能扩展。队友应该自己看任务看板,发现没人做的任务就认领,做完再找下一个。\n\n---\n\n## 解决方案\n\n\n\n沿用 S16 的教学版 MessageBus 和协议工具。本章新增:**idle_poll**(空闲时每 5 秒轮询一次)、**scan_unclaimed_tasks**(扫描看板上可认领的任务)、**自动认领**(找到任务就 claim,不用 Lead 操心)。\n\n队友生命周期从两阶段变成三阶段:\n\n| 阶段 | 行为 | 退出条件 |\n|------|------|---------|\n| WORK | inbox → LLM → 工具循环 | `stop_reason != tool_use` |\n| IDLE | 每 5s 轮询 inbox + 任务板 | 60s 超时 |\n| SHUTDOWN | 发 summary,退出 | — |\n\n---\n\n## 工作原理\n\n### idle_poll: 空闲轮询\n\n队友完成当前任务后不退出,进入 IDLE 阶段——每 5 秒检查一次有没有新工作:\n\n```python\nIDLE_POLL_INTERVAL = 5 # seconds\nIDLE_TIMEOUT = 60 # seconds\n\ndef idle_poll(name, messages, role) -> str:\n \"\"\"Return 'work', 'shutdown', or 'timeout'.\"\"\"\n for _ in range(IDLE_TIMEOUT // IDLE_POLL_INTERVAL):\n time.sleep(IDLE_POLL_INTERVAL)\n\n # ① 检查收件箱(优先)\n inbox = BUS.read_inbox(name)\n if inbox:\n # shutdown_request 立即处理\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n # ... 回复 shutdown_response\n return \"shutdown\"\n # 普通消息注入上下文,回到 WORK\n messages.append(...)\n return \"work\"\n\n # ② 扫描任务看板\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n task = unclaimed[0]\n result = claim_task(task[\"id\"], name)\n if \"Claimed\" in result:\n messages.append(...)\n return \"work\"\n return \"timeout\"\n```\n\ninbox 优先(可能包含 shutdown_request 等协议消息),任务板其次。IDLE 阶段收到 shutdown_request 会直接回复并退出,不等到下一轮 WORK。\n\n### scan_unclaimed_tasks: 扫描任务看板\n\n找 pending 状态、无 owner、所有依赖已完成(`can_start`)的任务:\n\n```python\ndef scan_unclaimed_tasks() -> list[dict]:\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and can_start(task[\"id\"])):\n unclaimed.append(task)\n return unclaimed\n```\n\n三个条件:必须是 pending、没有 owner、所有 blockedBy 依赖已完成。`can_start` 检查依赖任务的状态——有依赖不代表不能做,只有被未完成的任务阻塞才不能做。教学版按文件名排序取第一个;CC 用文件锁防止多个队友同时认领同一个任务。\n\n### claim_task: owner 检查\n\n自动认领时检查 claim 结果,不把失败当成功:\n\n```python\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if task.owner:\n return f\"Task {task_id} already owned by {task.owner}\"\n if not can_start(task_id):\n return f\"Blocked by: {deps}\"\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n return f\"Claimed {task.id} ({task.subject})\"\n```\n\n教学版没有文件锁,并发认领可能出现竞争。但至少 `task.owner` 检查避免了最明显的\"后写覆盖\"问题。CC 用 `proper-lockfile` 保护任务文件,`claimTask` 在文件锁内完成读-改-写(`utils/tasks.ts:541-612`)。\n\n### 队友生命周期: WORK → IDLE → SHUTDOWN\n\ns16 的队友做完任务就退出。s17 加了 IDLE 阶段,队友在外层循环中反复 WORK → IDLE:\n\n```python\n# Outer loop: WORK → IDLE cycle\nwhile True:\n # WORK phase: 内层循环(最多 10 轮 LLM 调用)\n for _ in range(10):\n # 检查 inbox、处理协议消息、调 LLM、执行工具\n ...\n if response.stop_reason != \"tool_use\":\n break # WORK 阶段结束\n\n # IDLE phase\n idle_result = idle_poll(name, messages, role)\n if idle_result == \"shutdown\":\n break\n if idle_result == \"timeout\":\n break # 60s 超时 → SHUTDOWN\n\n# SHUTDOWN: 发 summary 给 Lead\nBUS.send(name, \"lead\", summary, \"result\")\n```\n\n关键设计:\n- **外层 while True**:WORK 和 IDLE 交替进行,直到超时或收到关机请求\n- **内层 for 10**:WORK 阶段最多 10 轮 LLM 调用(防止无限循环)\n- **IDLE 超时 60 秒**:12 次轮询 × 5 秒 = 60 秒。超时后发送 summary 并退出\n- **shutdown_request 两阶段都能响应**:WORK 阶段通过 `handle_inbox_message` 分发;IDLE 阶段 `idle_poll` 直接检查并回复\n\n### 身份重注入\n\nautoCompact(s08)之后,队友的 messages 列表可能被压缩成一段摘要。每次进入新的 WORK 阶段时检查:\n\n```python\nif len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}. \"\n f\"Continue your work.\"})\n```\n\n消息过短说明发生了压缩,此时重新注入身份信息。真实 CC 中 context compaction 会保留 system prompt,教学版的简化实现需要手动处理。\n\n### consume_lead_inbox: 统一 inbox 消费\n\n`check_inbox` 工具和主循环末尾都调用同一个 `consume_lead_inbox()` 函数:先路由协议 response 更新状态,再把所有消息注入 Lead 的对话历史。队友发来的 summary/result 不会只打印在终端,Lead 的 LLM 能看到并协调下一步。\n\n### 合起来跑\n\n```\n1. Lead: \"搭建后端——任务太多,让队友自己认领\"\n2. Lead → create_task(\"创建数据库 schema\")\n3. Lead → create_task(\"写 API 路由\")\n4. Lead → create_task(\"写单元测试\")\n5. Lead → spawn_teammate(\"alice\", \"backend\", \"你是后端开发者\")\n6. Lead → spawn_teammate(\"bob\", \"backend\", \"你是后端开发者\")\n\n7. alice 线程启动 → WORK: 没有初始 inbox → 空转 → IDLE\n8. bob 线程启动 → WORK: 没有初始 inbox → 空转 → IDLE\n\n9. alice IDLE 第 1 次轮询 → scan_unclaimed → 发现\"创建数据库 schema\"\n10. alice → claim_task → \"创建数据库 schema\" → 回到 WORK\n11. bob IDLE 第 1 次轮询 → scan_unclaimed → 发现\"写 API 路由\"\n12. bob → claim_task → \"写 API 路由\" → 回到 WORK\n\n13. alice WORK: write_file(\"schema.sql\", ...) → complete_task → WORK 结束\n14. alice IDLE → scan → \"写单元测试\" → claim → WORK\n15. alice WORK: write_file(\"test_api.py\", ...) → complete_task → WORK 结束\n16. alice IDLE → 60s 无新任务 → SHUTDOWN\n\n17. bob 类似流程 → 做完 → SHUTDOWN\n18. Lead consume_lead_inbox → 看到 alice 和 bob 的 summary\n```\n\n两个队友并行认领、并行工作。Lead 只需要创建任务和启动队友,不需要手动分配。\n\n---\n\n## 相对 s16 的变更\n\n| 组件 | 之前 (s16) | 之后 (s17) |\n|------|-----------|-----------|\n| 任务分配 | Lead 手动 assign | 队友自动认领(can_start 检查依赖) |\n| 队友状态 | WORK 或退出 | WORK → IDLE(轮询 60s) → SHUTDOWN |\n| claim_task | 无 owner 检查 | 拒绝已有 owner 的任务 |\n| IDLE 阶段关机 | 不处理 shutdown_request | 直接 dispatch shutdown 并退出 |\n| Lead inbox | 只打印,不进上下文 | consume_lead_inbox 统一注入 history |\n| 新函数 | — | idle_poll, scan_unclaimed_tasks, consume_lead_inbox |\n| 身份保持 | 仅 system prompt | 压缩后自动重注入 |\n| Lead 工具 | 14 (s16) | 14(不变) |\n| 队友工具 | 5 | 8(+ list_tasks, claim_task, complete_task) |\n| 队友退出条件 | 完成任务即退出 | 60s 无新任务才退出 |\n\n---\n\n## 试一下\n\n```sh\ncd learn-claude-code\npython s17_autonomous_agents/code.py\n```\n\n试试这个 prompt:\n\n`Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim and work.`\n\n观察重点:队友是否自动认领了未分配的任务?有 blockedBy 依赖的任务是否在前置完成后被正确认领?空闲超时后是否自动关机?IDLE 阶段收到 shutdown_request 是否立即响应?`.tasks/` 目录下的任务状态如何变化?\n\n---\n\n## 接下来\n\n队友自组织了。但 Alice 和 Bob 都在同一个目录下工作——Alice 改 `config.py`,Bob 也改 `config.py`,互相覆盖。\n\ns18 Worktree Isolation → 每个任务有自己的工作目录,互不干扰。\n\n\n深入 CC 源码
\n\n> 教学说明:本章的 idle_poll + auto-claim 机制是教学设计,用统一的轮询函数演示\"空闲后找活干\"。CC 的实际实现是多个机制的组合,但目标一致——减少 Lead 的手动分配负担。\n\n### 一、CC 的空闲机制:组合路径,不是单一轮询\n\n教学版用一个 `idle_poll()` 统一处理空闲时的 inbox 检查和任务认领。CC 的实际实现是四个机制的组合:\n\n**idle_notification**:队友完成一轮工作后,`sendIdleNotification()`(`inProcessRunner.ts:569-589`)向 Lead 发送空闲通知。Lead 知道队友可用了,可以分配新任务或请求关机。\n\n**mailbox 轮询**:`waitForNextPromptOrShutdown()`(`inProcessRunner.ts:689-868`)是一个 **500ms 轮询循环**,持续检查三类来源:pending user messages、mailbox 文件消息、task list。shutdown_request 被优先处理(`inProcessRunner.ts:768-804`),不会被普通消息饿死。\n\n**task watcher**:`useTaskListWatcher`(`hooks/useTaskListWatcher.ts:34-189`)用 `fs.watch()` 监听 `.claude/tasks/` 目录变化,1 秒 debounce,当新任务创建或依赖解锁时触发检查。依赖判断(`L197-207`)是\"blockedBy 中没有未完成的任务\",不是\"blockedBy 为空\"。\n\n**主动 claim**:轮询循环内部也会调用 `tryClaimNextTask()`(`inProcessRunner.ts:853-860`)——在等待期间主动从 task list 领取任务。所以\"队友不主动轮询任务\"不准确,CC 同时有被动通知和主动认领。\n\n### 二、任务认领:文件锁 + 原子操作\n\n`claimTask()`(`utils/tasks.ts:541-612`)用 `proper-lockfile` 的任务文件锁,在锁内完成读-检查-改-写。检查项:owner 是否已存在(`L575-576`)、是否已完成(`L580-581`)、blockedBy 中是否有未完成任务(`L585-594`)。`claimTaskWithBusyCheck()`(`utils/tasks.ts:614-692`)用 task-list 级别锁,把 busy check 和 claim 做成原子操作,避免 TOCTOU。\n\n`findAvailableTask()`(`inProcessRunner.ts:595-604`)的依赖判断也是\"所有 blockedBy 已完成\",用 `task.blockedBy.every(id => !unresolvedTaskIds.has(id))` 实现。`tryClaimNextTask()`(`inProcessRunner.ts:624-657`)在认领后把状态更新为 `in_progress`,让 UI 立即反映变化。\n\n### 三、教学版 vs CC 对比\n\n| 维度 | 教学版 (s17) | CC |\n|------|-------------|-----|\n| 空闲机制 | idle_poll 统一轮询(5s) | idle_notification + 500ms mailbox 轮询 + task watcher |\n| 任务发现 | scan_unclaimed_tasks(轮询) | useTaskListWatcher(文件监听)+ tryClaimNextTask(主动轮询) |\n| 依赖判断 | can_start(所有 blockedBy 已完成) | findAvailableTask(同样语义) |\n| 并发安全 | owner 检查(无文件锁) | proper-lockfile 任务锁 + task-list 锁 |\n| shutdown 处理 | IDLE 直接分发,WORK 通过 handle_inbox_message | 500ms 轮询中优先处理 shutdown_request |\n| 超时退出 | 60s 无新任务 | 无固定超时,Lead 手动 shutdown |\n| 身份保持 | messages 长度检测 | context compaction 保留 system prompt |\n| claim 失败处理 | 检查返回值,失败不注入 | 文件锁保证原子性 |\n\n教学版的 `idle_poll()` 把 CC 的四个机制合并成一个轮询函数——简化合理,因为核心语义(空闲时找活干、依赖解锁后可认领、shutdown 优先)是一致的。\n\n \n\n\n"
},
{
"version": "s17",
"locale": "ja",
"title": "s17: Autonomous Agents — ボードを見て、自分で認領",
- "content": "# s17: Autonomous Agents — ボードを見て、自分で認領\n\ns01 → ... → s15 → s16 → `s17` → [s18](/ja/s18) → s19 → s20\n\n> *\"ボードを見て、自分で認領\"* — 空き時にポーリング、仕事があれば開始。\n>\n> **Harness 層**: 自治 — チームメイトが自己組織化、リーダーの割り当て不要。\n\n---\n\n## 課題\n\ns16 のチームメイトは通信でき、シャットダウンハンドシェイクもできる。しかし各チームメイトは Lead がタスクを割り当てるのを待つ——ボードに 10 個の未認領タスクがあれば、Lead は 10 回手動で assign しなければならない。これはスケールしない。チームメイトは自分でタスクボードを見て、未認領のタスクを見つけて認領し、終わったら次を探すべき。\n\n---\n\n## ソリューション\n\n\n\nS16 の教学版 MessageBus とプロトコルツールを踏襲。本章の追加:**idle_poll**(空き時に 5 秒ごとにポーリング)、**scan_unclaimed_tasks**(ボード上の認領可能なタスクをスキャン)、**自動認領**(見つけたら即座に claim、Lead 不要)。\n\nチームメイトのライフサイクルは 2 フェーズから 3 フェーズに:\n\n| フェーズ | 動作 | 終了条件 |\n|----------|------|---------|\n| WORK | inbox → LLM → ツールループ | `stop_reason != tool_use` |\n| IDLE | 5s ポーリング inbox + タスクボード | 60s タイムアウト |\n| SHUTDOWN | summary を送信、終了 | — |\n\n---\n\n## 仕組み\n\n### idle_poll: 空き時ポーリング\n\nチームメイトはタスク完了後も終了せず、IDLE フェーズに入る——5 秒ごとに新しい仕事がないか確認:\n\n```python\nIDLE_POLL_INTERVAL = 5 # seconds\nIDLE_TIMEOUT = 60 # seconds\n\ndef idle_poll(agent_name, messages, name, role) -> str:\n \"\"\"Return 'work', 'shutdown', or 'timeout'.\"\"\"\n for _ in range(IDLE_TIMEOUT // IDLE_POLL_INTERVAL):\n time.sleep(IDLE_POLL_INTERVAL)\n\n # ① 受信箱確認(優先)\n inbox = BUS.read_inbox(agent_name)\n if inbox:\n # shutdown_request は即座に処理\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n # ... shutdown_response 返信\n return \"shutdown\"\n # 通常メッセージ:コンテキストに注入、WORK に戻る\n messages.append(...)\n return \"work\"\n\n # ② タスクボードスキャン\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n task = unclaimed[0]\n result = claim_task(task[\"id\"], agent_name)\n if \"Claimed\" in result:\n messages.append(...)\n return \"work\"\n return \"timeout\"\n```\n\ninbox を優先(shutdown_request 等のプロトコルメッセージの可能性)、タスクボードが次。IDLE フェーズで shutdown_request を受信すると即座に返信して終了し、次の WORK を待つ必要がない。\n\n### scan_unclaimed_tasks: タスクボードスキャン\n\npending 状態、owner なし、全依存関係完了(`can_start`)のタスクを検索:\n\n```python\ndef scan_unclaimed_tasks() -> list[dict]:\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and can_start(task[\"id\"])):\n unclaimed.append(task)\n return unclaimed\n```\n\n3 つの条件:pending であること、owner がないこと、全 blockedBy 依存が完了していること。`can_start` は依存タスクの状態を確認——依存があるからといってタスクを開始できないわけではなく、未解決の依存のみがブロックする。教学版はファイル名順で最初のものを選択、CC はファイルロックで複数チームメイトの同時認領を防止。\n\n### claim_task: owner チェック\n\n自動認領時に claim 結果を確認し、失敗を成功として扱わない:\n\n```python\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if task.owner:\n return f\"Task {task_id} already owned by {task.owner}\"\n if not can_start(task_id):\n return f\"Blocked by: {deps}\"\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n return f\"Claimed {task.id} ({task.subject})\"\n```\n\n教学版にはファイルロックがないため、並行認領で競合する可能性がある。しかし `task.owner` チェックで最も明白な「後書き上書き」問題を回避。CC は `proper-lockfile` でタスクファイルを保護、`claimTask` はファイルロック内で read-modify-write を実行(`utils/tasks.ts:541-612`)。\n\n### チームメイトライフサイクル: WORK → IDLE → SHUTDOWN\n\ns16 のチームメイトはタスク完了後に終了。s17 は IDLE フェーズを追加——外側ループで WORK → IDLE を繰り返す:\n\n```python\n# 外側ループ: WORK → IDLE サイクル\nwhile True:\n # WORK フェーズ: 内側ループ(最大 10 ラウンド LLM 呼び出し)\n for _ in range(10):\n # inbox 確認、プロトコルメッセージ処理、LLM 呼び出し、ツール実行\n ...\n if response.stop_reason != \"tool_use\":\n break # WORK フェーズ終了\n\n # IDLE フェーズ\n idle_result = idle_poll(name, messages, name, role)\n if idle_result == \"shutdown\":\n break\n if idle_result == \"timeout\":\n break # 60s タイムアウト → SHUTDOWN\n\n# SHUTDOWN: summary を Lead に送信\nBUS.send(name, \"lead\", summary, \"result\")\n```\n\n主要設計:\n- **外側 while True**:WORK と IDLE がタイムアウトまたはシャットダウン要求まで交互に続く\n- **内側 for 10**:WORK フェーズは最大 10 ラウンドの LLM 呼び出し(無限ループ防止)\n- **IDLE タイムアウト 60 秒**:12 回ポーリング × 5 秒 = 60 秒。タイムアウト後 summary を送信して終了\n- **shutdown_request は両フェーズで応答**:WORK フェーズは `handle_inbox_message` でディスパッチ、IDLE フェーズは `idle_poll` が直接確認して返信\n\n### 身份再注入\n\nautoCompact(s08)後、チームメイトの messages リストが要約に圧縮される可能性がある。新しい WORK フェーズに入るたびに確認:\n\n```python\nif len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}. \"\n f\"Continue your work.\"})\n```\n\nメッセージが短い場合、圧縮が発生したことを示す——身份情報を再注入。真实 CC では context compaction が system prompt を保持、教学版の簡略実装は手動処理が必要。\n\n### consume_lead_inbox: 統一 inbox コンシューマ\n\n`check_inbox` ツールとメインループ末尾の両方が同じ `consume_lead_inbox()` 関数を呼び出す:プロトコル response を先にルーティングして状態を更新し、全メッセージを Lead の会話履歴に注入。チームメイトからの summary/result は端末に表示されるだけでなく、Lead の LLM も確認して次のステップを調整可能。\n\n### 組み合わせて実行\n\n```\n1. Lead: \"バックエンド構築——タスクが多すぎる、チームメイトに自己認領させる\"\n2. Lead → create_task(\"データベーススキーマを作成\")\n3. Lead → create_task(\"API ルートを書く\")\n4. Lead → create_task(\"ユニットテストを書く\")\n5. Lead → spawn_teammate(\"alice\", \"backend\", \"あなたはバックエンド開発者\")\n6. Lead → spawn_teammate(\"bob\", \"backend\", \"あなたはバックエンド開発者\")\n\n7. alice スレッド起動 → WORK: 初期 inbox なし → 空転 → IDLE\n8. bob スレッド起動 → WORK: 初期 inbox なし → 空転 → IDLE\n\n9. alice IDLE ポーリング 1 回目 → scan_unclaimed → \"データベーススキーマを作成\" を発見\n10. alice → claim_task → \"データベーススキーマを作成\" → WORK に戻る\n11. bob IDLE ポーリング 1 回目 → scan_unclaimed → \"API ルートを書く\" を発見\n12. bob → claim_task → \"API ルートを書く\" → WORK に戻る\n\n13. alice WORK: write_file(\"schema.sql\", ...) → complete_task → WORK 終了\n14. alice IDLE → scan → \"ユニットテストを書く\" → claim → WORK\n15. alice WORK: write_file(\"test_api.py\", ...) → complete_task → WORK 終了\n16. alice IDLE → 60s 新しいタスクなし → SHUTDOWN\n\n17. bob も同様のフロー → 完了 → SHUTDOWN\n18. Lead consume_lead_inbox → alice と bob の summary を確認\n```\n\n2 人のチームメイトが並行して認領・作業。Lead はタスクを作成してチームメイトを起動するだけで、手動割り当て不要。\n\n---\n\n## s16 からの変更\n\n| コンポーネント | 変更前 (s16) | 変更後 (s17) |\n|--------------|------------|------------|\n| タスク割り当て | Lead が手動 assign | チームメイトが自動認領(can_start で依存確認) |\n| チームメイト状態 | WORK または終了 | WORK → IDLE(60s ポーリング) → SHUTDOWN |\n| claim_task | owner チェックなし | 既に owner があるタスクを拒否 |\n| IDLE フェーズシャットダウン | shutdown_request を処理しない | 即座にシャットダウンをディスパッチして終了 |\n| Lead inbox | 印刷のみ、コンテキストに入らない | consume_lead_inbox で history に注入 |\n| 新規関数 | — | idle_poll, scan_unclaimed_tasks, consume_lead_inbox |\n| 身份保持 | system prompt のみ | 圧縮後に自動再注入 |\n| Lead ツール | 14 (s16) | 14(変更なし) |\n| チームメイトツール | 5 | 8(+ list_tasks, claim_task, complete_task) |\n| チームメイト終了条件 | タスク完了後即終了 | 60s アイドルタイムアウト後のみ終了 |\n\n---\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython s17_autonomous_agents/code.py\n```\n\n以下のプロンプトを試してください:\n\n`Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim and work.`\n\n観察ポイント:チームメイトは未割り当てのタスクを自動認領したか?blockedBy 依存のあるタスクは依存完了後に正しく認領されたか?アイドルタイムアウトでシャットダウンしたか?IDLE フェーズで shutdown_request に即座に応答したか?`.tasks/` ディレクトリのタスク状態はどう変化したか?\n\n---\n\n## 次の章\n\nチームメイトが自己組織化した。しかし Alice も Bob も同じディレクトリで作業——Alice が `config.py` を編集し、Bob も `config.py` を編集して互いに上書きしてしまう。\n\ns18 Worktree Isolation → 各タスクに専用の作業ディレクトリ、競合なし。\n\n\nCC ソースコード深掘り
\n\n> 教学注記:本章の idle_poll + auto-claim 機構は教学設計であり、統一ポーリング関数で「空き時に仕事を探す」をデモ。CC の実際の実装は複数機構の組み合わせだが、目標は同じ——Lead の手動割り当て負担を軽減。\n\n### 一、CC の空き機構:組み合わせ路径、単一ポーリングではない\n\n教学版は 1 つの `idle_poll()` で空き時の inbox 確認とタスク認領を統一処理。CC の実際の実装は 4 つの機構の組み合わせ:\n\n**idle_notification**:チームメイトが 1 ラウンドの作業を完了後、`sendIdleNotification()`(`inProcessRunner.ts:569-589`)が Lead に空き通知を送信。Lead はチームメイトが利用可能であることを知り、新しいタスクを割り当てたりシャットダウンを要求可能。\n\n**mailbox ポーリング**:`waitForNextPromptOrShutdown()`(`inProcessRunner.ts:689-868`)は **500ms ポーリングループ**で、3 つのソースを継続チェック:pending user messages、mailbox ファイルメッセージ、task list。shutdown_request は優先処理(`inProcessRunner.ts:768-804`)、通常メッセージによる飢餓を防止。\n\n**task watcher**:`useTaskListWatcher`(`hooks/useTaskListWatcher.ts:34-189`)が `fs.watch()` で `.claude/tasks/` ディレクトリの変化を監視、1 秒 debounce で新タスク作成や依存アンロック時にチェックをトリガー。依存判断(`L197-207`)は「blockedBy に未完了タスクがない」で、「blockedBy が空」ではない。\n\n**能動 claim**:ポーリングループ内でも `tryClaimNextTask()`(`inProcessRunner.ts:853-860`)を呼び出し——待機中に task list から能動的にタスクを認領。したがって「チームメイトは能動的にタスクをポーリングしない」は不正確、CC は受動通知と能動認領の両方を持つ。\n\n### 二、タスク認領:ファイルロック + 原子操作\n\n`claimTask()`(`utils/tasks.ts:541-612`)は `proper-lockfile` のタスクファイルロックを使用、ロック内で read-check-modify-write を実行。チェック項目:owner が既に存在(`L575-576`)、完了済み(`L580-581`)、blockedBy に未完了タスクがあるか(`L585-594`)。`claimTaskWithBusyCheck()`(`utils/tasks.ts:614-692`)はタスクリストレベルロックを使用、busy check と claim を原子操作にして TOCTOU を回避。\n\n`findAvailableTask()`(`inProcessRunner.ts:595-604`)の依存判断も「全 blockedBy 完了」で、`task.blockedBy.every(id => !unresolvedTaskIds.has(id))` で実装。`tryClaimNextTask()`(`inProcessRunner.ts:624-657`)は認領後 status を `in_progress` に更新、UI に即座に反映。\n\n### 三、教学版 vs CC 対比\n\n| 次元 | 教学版 (s17) | CC |\n|------|-------------|-----|\n| 空き機構 | idle_poll 統一ポーリング(5s) | idle_notification + 500ms mailbox ポーリング + task watcher |\n| タスク発見 | scan_unclaimed_tasks(ポーリング) | useTaskListWatcher(ファイル監視)+ tryClaimNextTask(能動ポーリング) |\n| 依存チェック | can_start(全 blockedBy 完了) | findAvailableTask(同じセマンティクス) |\n| 並行安全性 | owner チェック(ファイルロックなし) | proper-lockfile タスクロック + タスクリストロック |\n| shutdown 処理 | IDLE 直接ディスパッチ、WORK は handle_inbox_message | 500ms ポーリングループで shutdown_request を優先 |\n| タイムアウト終了 | 60s 新しいタスクなし | 固定タイムアウトなし、Lead 手動 shutdown |\n| 身份保持 | messages 長さ検出 | context compaction が system prompt を保持 |\n| claim 失敗処理 | 戻り値を確認、失敗時はスキップ | ファイルロックで原子性を保証 |\n\n教学版の `idle_poll()` は CC の 4 つの機構を 1 つのポーリング関数に統合——核心セマンティクス(空き時に仕事を探す、依存アンロック後に認領、shutdown 優先)が一致するため、合理的な簡略化。\n\n \n\n\n"
+ "content": "# s17: Autonomous Agents — ボードを見て、自分で認領\n\ns01 → ... → s15 → s16 → `s17` → [s18](/ja/s18) → s19 → s20\n\n> *\"ボードを見て、自分で認領\"* — 空き時にポーリング、仕事があれば開始。\n>\n> **Harness 層**: 自治 — チームメイトが自己組織化、リーダーの割り当て不要。\n\n---\n\n## 課題\n\ns16 のチームメイトは通信でき、シャットダウンハンドシェイクもできる。しかし各チームメイトは Lead がタスクを割り当てるのを待つ——ボードに 10 個の未認領タスクがあれば、Lead は 10 回手動で assign しなければならない。これはスケールしない。チームメイトは自分でタスクボードを見て、未認領のタスクを見つけて認領し、終わったら次を探すべき。\n\n---\n\n## ソリューション\n\n\n\nS16 の教学版 MessageBus とプロトコルツールを踏襲。本章の追加:**idle_poll**(空き時に 5 秒ごとにポーリング)、**scan_unclaimed_tasks**(ボード上の認領可能なタスクをスキャン)、**自動認領**(見つけたら即座に claim、Lead 不要)。\n\nチームメイトのライフサイクルは 2 フェーズから 3 フェーズに:\n\n| フェーズ | 動作 | 終了条件 |\n|----------|------|---------|\n| WORK | inbox → LLM → ツールループ | `stop_reason != tool_use` |\n| IDLE | 5s ポーリング inbox + タスクボード | 60s タイムアウト |\n| SHUTDOWN | summary を送信、終了 | — |\n\n---\n\n## 仕組み\n\n### idle_poll: 空き時ポーリング\n\nチームメイトはタスク完了後も終了せず、IDLE フェーズに入る——5 秒ごとに新しい仕事がないか確認:\n\n```python\nIDLE_POLL_INTERVAL = 5 # seconds\nIDLE_TIMEOUT = 60 # seconds\n\ndef idle_poll(name, messages, role) -> str:\n \"\"\"Return 'work', 'shutdown', or 'timeout'.\"\"\"\n for _ in range(IDLE_TIMEOUT // IDLE_POLL_INTERVAL):\n time.sleep(IDLE_POLL_INTERVAL)\n\n # ① 受信箱確認(優先)\n inbox = BUS.read_inbox(name)\n if inbox:\n # shutdown_request は即座に処理\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n # ... shutdown_response 返信\n return \"shutdown\"\n # 通常メッセージ:コンテキストに注入、WORK に戻る\n messages.append(...)\n return \"work\"\n\n # ② タスクボードスキャン\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n task = unclaimed[0]\n result = claim_task(task[\"id\"], name)\n if \"Claimed\" in result:\n messages.append(...)\n return \"work\"\n return \"timeout\"\n```\n\ninbox を優先(shutdown_request 等のプロトコルメッセージの可能性)、タスクボードが次。IDLE フェーズで shutdown_request を受信すると即座に返信して終了し、次の WORK を待つ必要がない。\n\n### scan_unclaimed_tasks: タスクボードスキャン\n\npending 状態、owner なし、全依存関係完了(`can_start`)のタスクを検索:\n\n```python\ndef scan_unclaimed_tasks() -> list[dict]:\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and can_start(task[\"id\"])):\n unclaimed.append(task)\n return unclaimed\n```\n\n3 つの条件:pending であること、owner がないこと、全 blockedBy 依存が完了していること。`can_start` は依存タスクの状態を確認——依存があるからといってタスクを開始できないわけではなく、未解決の依存のみがブロックする。教学版はファイル名順で最初のものを選択、CC はファイルロックで複数チームメイトの同時認領を防止。\n\n### claim_task: owner チェック\n\n自動認領時に claim 結果を確認し、失敗を成功として扱わない:\n\n```python\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if task.owner:\n return f\"Task {task_id} already owned by {task.owner}\"\n if not can_start(task_id):\n return f\"Blocked by: {deps}\"\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n return f\"Claimed {task.id} ({task.subject})\"\n```\n\n教学版にはファイルロックがないため、並行認領で競合する可能性がある。しかし `task.owner` チェックで最も明白な「後書き上書き」問題を回避。CC は `proper-lockfile` でタスクファイルを保護、`claimTask` はファイルロック内で read-modify-write を実行(`utils/tasks.ts:541-612`)。\n\n### チームメイトライフサイクル: WORK → IDLE → SHUTDOWN\n\ns16 のチームメイトはタスク完了後に終了。s17 は IDLE フェーズを追加——外側ループで WORK → IDLE を繰り返す:\n\n```python\n# 外側ループ: WORK → IDLE サイクル\nwhile True:\n # WORK フェーズ: 内側ループ(最大 10 ラウンド LLM 呼び出し)\n for _ in range(10):\n # inbox 確認、プロトコルメッセージ処理、LLM 呼び出し、ツール実行\n ...\n if response.stop_reason != \"tool_use\":\n break # WORK フェーズ終了\n\n # IDLE フェーズ\n idle_result = idle_poll(name, messages, role)\n if idle_result == \"shutdown\":\n break\n if idle_result == \"timeout\":\n break # 60s タイムアウト → SHUTDOWN\n\n# SHUTDOWN: summary を Lead に送信\nBUS.send(name, \"lead\", summary, \"result\")\n```\n\n主要設計:\n- **外側 while True**:WORK と IDLE がタイムアウトまたはシャットダウン要求まで交互に続く\n- **内側 for 10**:WORK フェーズは最大 10 ラウンドの LLM 呼び出し(無限ループ防止)\n- **IDLE タイムアウト 60 秒**:12 回ポーリング × 5 秒 = 60 秒。タイムアウト後 summary を送信して終了\n- **shutdown_request は両フェーズで応答**:WORK フェーズは `handle_inbox_message` でディスパッチ、IDLE フェーズは `idle_poll` が直接確認して返信\n\n### 身份再注入\n\nautoCompact(s08)後、チームメイトの messages リストが要約に圧縮される可能性がある。新しい WORK フェーズに入るたびに確認:\n\n```python\nif len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}. \"\n f\"Continue your work.\"})\n```\n\nメッセージが短い場合、圧縮が発生したことを示す——身份情報を再注入。真实 CC では context compaction が system prompt を保持、教学版の簡略実装は手動処理が必要。\n\n### consume_lead_inbox: 統一 inbox コンシューマ\n\n`check_inbox` ツールとメインループ末尾の両方が同じ `consume_lead_inbox()` 関数を呼び出す:プロトコル response を先にルーティングして状態を更新し、全メッセージを Lead の会話履歴に注入。チームメイトからの summary/result は端末に表示されるだけでなく、Lead の LLM も確認して次のステップを調整可能。\n\n### 組み合わせて実行\n\n```\n1. Lead: \"バックエンド構築——タスクが多すぎる、チームメイトに自己認領させる\"\n2. Lead → create_task(\"データベーススキーマを作成\")\n3. Lead → create_task(\"API ルートを書く\")\n4. Lead → create_task(\"ユニットテストを書く\")\n5. Lead → spawn_teammate(\"alice\", \"backend\", \"あなたはバックエンド開発者\")\n6. Lead → spawn_teammate(\"bob\", \"backend\", \"あなたはバックエンド開発者\")\n\n7. alice スレッド起動 → WORK: 初期 inbox なし → 空転 → IDLE\n8. bob スレッド起動 → WORK: 初期 inbox なし → 空転 → IDLE\n\n9. alice IDLE ポーリング 1 回目 → scan_unclaimed → \"データベーススキーマを作成\" を発見\n10. alice → claim_task → \"データベーススキーマを作成\" → WORK に戻る\n11. bob IDLE ポーリング 1 回目 → scan_unclaimed → \"API ルートを書く\" を発見\n12. bob → claim_task → \"API ルートを書く\" → WORK に戻る\n\n13. alice WORK: write_file(\"schema.sql\", ...) → complete_task → WORK 終了\n14. alice IDLE → scan → \"ユニットテストを書く\" → claim → WORK\n15. alice WORK: write_file(\"test_api.py\", ...) → complete_task → WORK 終了\n16. alice IDLE → 60s 新しいタスクなし → SHUTDOWN\n\n17. bob も同様のフロー → 完了 → SHUTDOWN\n18. Lead consume_lead_inbox → alice と bob の summary を確認\n```\n\n2 人のチームメイトが並行して認領・作業。Lead はタスクを作成してチームメイトを起動するだけで、手動割り当て不要。\n\n---\n\n## s16 からの変更\n\n| コンポーネント | 変更前 (s16) | 変更後 (s17) |\n|--------------|------------|------------|\n| タスク割り当て | Lead が手動 assign | チームメイトが自動認領(can_start で依存確認) |\n| チームメイト状態 | WORK または終了 | WORK → IDLE(60s ポーリング) → SHUTDOWN |\n| claim_task | owner チェックなし | 既に owner があるタスクを拒否 |\n| IDLE フェーズシャットダウン | shutdown_request を処理しない | 即座にシャットダウンをディスパッチして終了 |\n| Lead inbox | 印刷のみ、コンテキストに入らない | consume_lead_inbox で history に注入 |\n| 新規関数 | — | idle_poll, scan_unclaimed_tasks, consume_lead_inbox |\n| 身份保持 | system prompt のみ | 圧縮後に自動再注入 |\n| Lead ツール | 14 (s16) | 14(変更なし) |\n| チームメイトツール | 5 | 8(+ list_tasks, claim_task, complete_task) |\n| チームメイト終了条件 | タスク完了後即終了 | 60s アイドルタイムアウト後のみ終了 |\n\n---\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython s17_autonomous_agents/code.py\n```\n\n以下のプロンプトを試してください:\n\n`Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim and work.`\n\n観察ポイント:チームメイトは未割り当てのタスクを自動認領したか?blockedBy 依存のあるタスクは依存完了後に正しく認領されたか?アイドルタイムアウトでシャットダウンしたか?IDLE フェーズで shutdown_request に即座に応答したか?`.tasks/` ディレクトリのタスク状態はどう変化したか?\n\n---\n\n## 次の章\n\nチームメイトが自己組織化した。しかし Alice も Bob も同じディレクトリで作業——Alice が `config.py` を編集し、Bob も `config.py` を編集して互いに上書きしてしまう。\n\ns18 Worktree Isolation → 各タスクに専用の作業ディレクトリ、競合なし。\n\n\nCC ソースコード深掘り
\n\n> 教学注記:本章の idle_poll + auto-claim 機構は教学設計であり、統一ポーリング関数で「空き時に仕事を探す」をデモ。CC の実際の実装は複数機構の組み合わせだが、目標は同じ——Lead の手動割り当て負担を軽減。\n\n### 一、CC の空き機構:組み合わせ路径、単一ポーリングではない\n\n教学版は 1 つの `idle_poll()` で空き時の inbox 確認とタスク認領を統一処理。CC の実際の実装は 4 つの機構の組み合わせ:\n\n**idle_notification**:チームメイトが 1 ラウンドの作業を完了後、`sendIdleNotification()`(`inProcessRunner.ts:569-589`)が Lead に空き通知を送信。Lead はチームメイトが利用可能であることを知り、新しいタスクを割り当てたりシャットダウンを要求可能。\n\n**mailbox ポーリング**:`waitForNextPromptOrShutdown()`(`inProcessRunner.ts:689-868`)は **500ms ポーリングループ**で、3 つのソースを継続チェック:pending user messages、mailbox ファイルメッセージ、task list。shutdown_request は優先処理(`inProcessRunner.ts:768-804`)、通常メッセージによる飢餓を防止。\n\n**task watcher**:`useTaskListWatcher`(`hooks/useTaskListWatcher.ts:34-189`)が `fs.watch()` で `.claude/tasks/` ディレクトリの変化を監視、1 秒 debounce で新タスク作成や依存アンロック時にチェックをトリガー。依存判断(`L197-207`)は「blockedBy に未完了タスクがない」で、「blockedBy が空」ではない。\n\n**能動 claim**:ポーリングループ内でも `tryClaimNextTask()`(`inProcessRunner.ts:853-860`)を呼び出し——待機中に task list から能動的にタスクを認領。したがって「チームメイトは能動的にタスクをポーリングしない」は不正確、CC は受動通知と能動認領の両方を持つ。\n\n### 二、タスク認領:ファイルロック + 原子操作\n\n`claimTask()`(`utils/tasks.ts:541-612`)は `proper-lockfile` のタスクファイルロックを使用、ロック内で read-check-modify-write を実行。チェック項目:owner が既に存在(`L575-576`)、完了済み(`L580-581`)、blockedBy に未完了タスクがあるか(`L585-594`)。`claimTaskWithBusyCheck()`(`utils/tasks.ts:614-692`)はタスクリストレベルロックを使用、busy check と claim を原子操作にして TOCTOU を回避。\n\n`findAvailableTask()`(`inProcessRunner.ts:595-604`)の依存判断も「全 blockedBy 完了」で、`task.blockedBy.every(id => !unresolvedTaskIds.has(id))` で実装。`tryClaimNextTask()`(`inProcessRunner.ts:624-657`)は認領後 status を `in_progress` に更新、UI に即座に反映。\n\n### 三、教学版 vs CC 対比\n\n| 次元 | 教学版 (s17) | CC |\n|------|-------------|-----|\n| 空き機構 | idle_poll 統一ポーリング(5s) | idle_notification + 500ms mailbox ポーリング + task watcher |\n| タスク発見 | scan_unclaimed_tasks(ポーリング) | useTaskListWatcher(ファイル監視)+ tryClaimNextTask(能動ポーリング) |\n| 依存チェック | can_start(全 blockedBy 完了) | findAvailableTask(同じセマンティクス) |\n| 並行安全性 | owner チェック(ファイルロックなし) | proper-lockfile タスクロック + タスクリストロック |\n| shutdown 処理 | IDLE 直接ディスパッチ、WORK は handle_inbox_message | 500ms ポーリングループで shutdown_request を優先 |\n| タイムアウト終了 | 60s 新しいタスクなし | 固定タイムアウトなし、Lead 手動 shutdown |\n| 身份保持 | messages 長さ検出 | context compaction が system prompt を保持 |\n| claim 失敗処理 | 戻り値を確認、失敗時はスキップ | ファイルロックで原子性を保証 |\n\n教学版の `idle_poll()` は CC の 4 つの機構を 1 つのポーリング関数に統合——核心セマンティクス(空き時に仕事を探す、依存アンロック後に認領、shutdown 優先)が一致するため、合理的な簡略化。\n\n \n\n\n"
},
{
"version": "s18",
diff --git a/web/src/data/generated/versions.json b/web/src/data/generated/versions.json
index 676a20aa5..7551f10c9 100644
--- a/web/src/data/generated/versions.json
+++ b/web/src/data/generated/versions.json
@@ -664,7 +664,7 @@
"filename": "s08_context_compact/code.py",
"title": "Context Compact",
"subtitle": "Context Will Fill Up",
- "loc": 382,
+ "loc": 414,
"tools": [
"bash",
"read_file",
@@ -755,7 +755,7 @@
},
{
"name": "spawn_subagent",
- "signature": "def spawn_subagent(task: str)",
+ "signature": "def spawn_subagent(description: str)",
"startLine": 225
},
{
@@ -763,74 +763,89 @@
"signature": "def estimate_size(msgs)",
"startLine": 269
},
+ {
+ "name": "_block_type",
+ "signature": "def _block_type(block)",
+ "startLine": 271
+ },
+ {
+ "name": "_message_has_tool_use",
+ "signature": "def _message_has_tool_use(msg)",
+ "startLine": 275
+ },
+ {
+ "name": "_is_tool_result_message",
+ "signature": "def _is_tool_result_message(msg)",
+ "startLine": 284
+ },
{
"name": "snip_compact",
"signature": "def snip_compact(messages, max_messages=50)",
- "startLine": 273
+ "startLine": 295
},
{
"name": "collect_tool_results",
"signature": "def collect_tool_results(messages)",
- "startLine": 281
+ "startLine": 313
},
{
"name": "micro_compact",
"signature": "def micro_compact(messages)",
- "startLine": 290
+ "startLine": 322
},
{
"name": "persist_large_output",
"signature": "def persist_large_output(tool_use_id, output)",
- "startLine": 300
+ "startLine": 332
},
{
"name": "tool_result_budget",
"signature": "def tool_result_budget(messages, max_bytes=200_000)",
- "startLine": 307
+ "startLine": 339
},
{
"name": "write_transcript",
"signature": "def write_transcript(messages)",
- "startLine": 325
+ "startLine": 357
},
{
"name": "summarize_history",
"signature": "def summarize_history(messages)",
- "startLine": 332
+ "startLine": 364
},
{
"name": "compact_history",
"signature": "def compact_history(messages)",
- "startLine": 343
+ "startLine": 375
},
{
"name": "reactive_compact",
"signature": "def reactive_compact(messages)",
- "startLine": 351
+ "startLine": 383
},
{
"name": "trigger_hooks",
"signature": "def trigger_hooks(event, *args)",
- "startLine": 391
+ "startLine": 428
},
{
"name": "permission_hook",
"signature": "def permission_hook(block)",
- "startLine": 398
+ "startLine": 435
},
{
"name": "log_hook",
"signature": "def log_hook(block)",
- "startLine": 403
+ "startLine": 440
},
{
"name": "agent_loop",
"signature": "def agent_loop(messages: list)",
- "startLine": 417
+ "startLine": 454
}
],
"layer": "memory",
- "source": "#!/usr/bin/env python3\n\"\"\"\ns08_context_compact.py - Context Compact\n\nFour-layer compaction pipeline inserted before LLM calls:\n\n L1: snip_compact — trim middle messages when count > 50\n L2: micro_compact — replace old tool_results with placeholders\n L3: tool_result_budget — persist large results to disk\n L4: compact_history — LLM full summary (1 API call)\n\n Emergency: reactive_compact — when API still returns prompt_too_long\n\n ┌─────────────────────────────────────────────────────────────┐\n │ messages[] │\n │ ↓ │\n │ L3 budget ─→ L1 snip ─→ L2 micro ─→ [token > threshold?] │\n │ ├─ No → LLM │\n │ └─ Yes → L4 summary │\n │ ↓ │\n │ LLM call │\n │ [prompt_too_long?] │\n │ └─ Yes → reactive │\n └─────────────────────────────────────────────────────────────┘\n\nCore principle: cheap first, expensive last.\nExecution order matches CC source: budget → snip → micro → auto.\n\nBuilds on s07 (skill loading). Usage:\n\n python s08_context_compact/code.py\n Needs: pip install anthropic python-dotenv + ANTHROPIC_API_KEY in .env\n\"\"\"\n\nimport ast, json, os, subprocess, time\nfrom pathlib import Path\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"): os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nSKILLS_DIR = WORKDIR / \"skills\"\nTRANSCRIPT_DIR = WORKDIR / \".transcripts\"\nTOOL_RESULTS_DIR = WORKDIR / \".task_outputs\" / \"tool-results\"\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nCURRENT_TODOS: list[dict] = []\n\n# s07: Skill catalog scan (inherited from s07)\ndef _parse_frontmatter(text: str) -> tuple[dict, str]:\n if not text.startswith(\"---\"):\n return {}, text\n parts = text.split(\"---\", 2)\n if len(parts) < 3:\n return {}, text\n meta = {}\n for line in parts[1].strip().splitlines():\n if \":\" in line:\n k, v = line.split(\":\", 1)\n meta[k.strip()] = v.strip().strip('\"').strip(\"'\")\n return meta, parts[2].strip()\n\nSKILL_REGISTRY: dict[str, dict] = {}\n\ndef _scan_skills():\n if not SKILLS_DIR.exists():\n return\n for d in sorted(SKILLS_DIR.iterdir()):\n if not d.is_dir():\n continue\n manifest = d / \"SKILL.md\"\n if manifest.exists():\n raw = manifest.read_text()\n meta, body = _parse_frontmatter(raw)\n name = meta.get(\"name\", d.name)\n desc = meta.get(\"description\", raw.split(\"\\n\")[0].lstrip(\"#\").strip())\n SKILL_REGISTRY[name] = {\"name\": name, \"description\": desc, \"content\": raw}\n\n_scan_skills()\n\ndef list_skills() -> str:\n if not SKILL_REGISTRY:\n return \"(no skills found)\"\n return \"\\n\".join(f\"- **{s['name']}**: {s['description']}\" for s in SKILL_REGISTRY.values())\n\ndef load_skill(name: str) -> str:\n skill = SKILL_REGISTRY.get(name)\n if not skill:\n return f\"Skill not found: {name}\"\n return skill[\"content\"]\n\n# s08: SYSTEM includes skill catalog (inherited from s07 build_system)\ndef build_system() -> str:\n catalog = list_skills()\n return (\n f\"You are a coding agent at {WORKDIR}. \"\n f\"Skills available:\\n{catalog}\\n\"\n \"Use load_skill to get full details when needed.\"\n )\n\nSYSTEM = build_system()\n\n# s08: subagent gets its own system prompt — no compact, no skill loading\nSUB_SYSTEM = (\n f\"You are a coding agent at {WORKDIR}. \"\n \"Complete the task you were given, then return a concise summary. \"\n \"Do not delegate further.\"\n)\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s02-s07 (unchanged): Basic Tools\n# ═══════════════════════════════════════════════════════════\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR): raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR, capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired: return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines): lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e: return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n file_path = safe_path(path); file_path.parent.mkdir(parents=True, exist_ok=True)\n file_path.write_text(content); return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e: return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n file_path = safe_path(path)\n text = file_path.read_text()\n if old_text not in text: return f\"Error: text not found in {path}\"\n file_path.write_text(text.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e: return f\"Error: {e}\"\n\ndef run_glob(pattern: str) -> str:\n import glob as g\n try:\n results = []\n for match in g.glob(pattern, root_dir=WORKDIR):\n if (WORKDIR / match).resolve().is_relative_to(WORKDIR):\n results.append(match)\n return \"\\n\".join(results) if results else \"(no matches)\"\n except Exception as e: return f\"Error: {e}\"\n\ndef _normalize_todos(todos):\n if isinstance(todos, str):\n try:\n todos = json.loads(todos)\n except json.JSONDecodeError:\n try:\n todos = ast.literal_eval(todos)\n except (SyntaxError, ValueError):\n return None, \"Error: todos must be a list or JSON array string\"\n if not isinstance(todos, list):\n return None, \"Error: todos must be a list\"\n for i, t in enumerate(todos):\n if not isinstance(t, dict):\n return None, f\"Error: todos[{i}] must be an object\"\n if \"content\" not in t or \"status\" not in t:\n return None, f\"Error: todos[{i}] missing 'content' or 'status'\"\n if t[\"status\"] not in (\"pending\", \"in_progress\", \"completed\"):\n return None, f\"Error: todos[{i}] has invalid status '{t['status']}'\"\n return todos, None\n\ndef run_todo_write(todos: list) -> str:\n global CURRENT_TODOS\n todos, error = _normalize_todos(todos)\n if error:\n return error\n CURRENT_TODOS = todos\n lines = [\"\\n\\033[33m## Current Tasks\\033[0m\"]\n for t in CURRENT_TODOS:\n icon = {\"pending\": \" \", \"in_progress\": \"\\033[36m▸\\033[0m\", \"completed\": \"\\033[32m✓\\033[0m\"}[t[\"status\"]]\n lines.append(f\" [{icon}] {t['content']}\")\n print(\"\\n\".join(lines))\n return f\"Updated {len(CURRENT_TODOS)} tasks\"\n\ndef extract_text(content) -> str:\n if not isinstance(content, list): return str(content)\n return \"\\n\".join(getattr(b, \"text\", \"\") for b in content if getattr(b, \"type\", None) == \"text\")\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s06-s07 (unchanged): Subagent\n# ═══════════════════════════════════════════════════════════\n\nSUB_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"pattern\": {\"type\": \"string\"}}, \"required\": [\"pattern\"]}},\n]\nSUB_HANDLERS = {\"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"edit_file\": run_edit, \"glob\": run_glob}\n\ndef spawn_subagent(task: str) -> str:\n print(f\"\\n\\033[35m[Subagent spawned]\\033[0m\")\n messages = [{\"role\": \"user\", \"content\": task}]\n for _ in range(30):\n response = client.messages.create(model=MODEL, system=SUB_SYSTEM,\n messages=messages, tools=SUB_TOOLS, max_tokens=8000)\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": str(blocked)})\n continue\n handler = SUB_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n trigger_hooks(\"PostToolUse\", block, output)\n print(f\" \\033[90m[sub] {block.name}: {str(output)[:100]}\\033[0m\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n result = extract_text(messages[-1][\"content\"])\n if not result:\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\":\n result = extract_text(msg[\"content\"])\n if result:\n break\n if not result:\n result = \"Subagent stopped after 30 turns without final answer.\"\n print(f\"\\033[35m[Subagent done]\\033[0m\")\n return result\n\n\n# ═══════════════════════════════════════════════════════════\n# NEW in s08: Four-Layer Compaction Pipeline\n# ═══════════════════════════════════════════════════════════\n\nCONTEXT_LIMIT = 50000\nKEEP_RECENT = 3\nPERSIST_THRESHOLD = 30000\n\ndef estimate_size(msgs): return len(str(msgs))\n\n\n# L1: snipCompact — trim middle messages\ndef snip_compact(messages, max_messages=50):\n if len(messages) <= max_messages: return messages\n keep_head, keep_tail = 3, max_messages - 3\n snipped = len(messages) - keep_head - keep_tail\n return messages[:keep_head] + [{\"role\": \"user\", \"content\": f\"[snipped {snipped} messages]\"}] + messages[-keep_tail:]\n\n\n# L2: microCompact — old result placeholders\ndef collect_tool_results(messages):\n blocks = []\n for mi, msg in enumerate(messages):\n if msg.get(\"role\") != \"user\" or not isinstance(msg.get(\"content\"), list): continue\n for bi, block in enumerate(msg[\"content\"]):\n if isinstance(block, dict) and block.get(\"type\") == \"tool_result\":\n blocks.append((mi, bi, block))\n return blocks\n\ndef micro_compact(messages):\n tool_results = collect_tool_results(messages)\n if len(tool_results) <= KEEP_RECENT: return messages\n for _, _, block in tool_results[:-KEEP_RECENT]:\n if len(block.get(\"content\", \"\")) > 120:\n block[\"content\"] = \"[Earlier tool result compacted. Re-run if needed.]\"\n return messages\n\n\n# L3: toolResultBudget — persist large results to disk\ndef persist_large_output(tool_use_id, output):\n if len(output) <= PERSIST_THRESHOLD: return output\n TOOL_RESULTS_DIR.mkdir(parents=True, exist_ok=True)\n path = TOOL_RESULTS_DIR / f\"{tool_use_id}.txt\"\n if not path.exists(): path.write_text(output)\n return f\"\\nFull output: {path}\\nPreview:\\n{output[:2000]}\\n\"\n\ndef tool_result_budget(messages, max_bytes=200_000):\n last = messages[-1] if messages else None\n if not last or last.get(\"role\") != \"user\" or not isinstance(last.get(\"content\"), list): return messages\n blocks = [(i, b) for i, b in enumerate(last[\"content\"]) if isinstance(b, dict) and b.get(\"type\") == \"tool_result\"]\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n if total <= max_bytes: return messages\n ranked = sorted(blocks, key=lambda p: len(str(p[1].get(\"content\", \"\"))), reverse=True)\n for _, block in ranked:\n if total <= max_bytes: break\n content = str(block.get(\"content\", \"\"))\n if len(content) <= PERSIST_THRESHOLD: continue\n tid = block.get(\"tool_use_id\", \"unknown\")\n block[\"content\"] = persist_large_output(tid, content)\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n return messages\n\n\n# L4: autoCompact — LLM full summary\ndef write_transcript(messages):\n TRANSCRIPT_DIR.mkdir(parents=True, exist_ok=True)\n path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with path.open(\"w\") as f:\n for msg in messages: f.write(json.dumps(msg, default=str) + \"\\n\")\n return path\n\ndef summarize_history(messages):\n conversation = json.dumps(messages, default=str)[:80000]\n prompt = (\"Summarize this coding-agent conversation so work can continue.\\n\"\n \"Preserve: 1. current goal, 2. key findings/decisions, 3. files read/changed, \"\n \"4. remaining work, 5. user constraints.\\nBe compact but concrete.\\n\\n\" + conversation)\n response = client.messages.create(model=MODEL, messages=[{\"role\": \"user\", \"content\": prompt}], max_tokens=2000)\n return \"\\n\".join(\n getattr(block, \"text\", \"\")\n for block in response.content\n if getattr(block, \"type\", None) == \"text\").strip() or \"(empty summary)\"\n\ndef compact_history(messages):\n transcript_path = write_transcript(messages)\n print(f\"[transcript saved: {transcript_path}]\")\n summary = summarize_history(messages)\n return [{\"role\": \"user\", \"content\": f\"[Compacted]\\n\\n{summary}\"}]\n\n\n# Emergency: reactiveCompact — on API error\ndef reactive_compact(messages):\n transcript = write_transcript(messages)\n summary = summarize_history(messages)\n return [{\"role\": \"user\", \"content\": f\"[Reactive compact]\\n\\n{summary}\"}, *messages[-5:]]\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s07: Tool Definitions\n# ═══════════════════════════════════════════════════════════\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"pattern\": {\"type\": \"string\"}}, \"required\": [\"pattern\"]}},\n {\"name\": \"todo_write\", \"description\": \"Create and manage a task list for your current coding session.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"todos\": {\"type\": \"array\", \"items\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}, \"status\": {\"type\": \"string\", \"enum\": [\"pending\", \"in_progress\", \"completed\"]}}, \"required\": [\"content\", \"status\"]}}}, \"required\": [\"todos\"]}},\n {\"name\": \"task\", \"description\": \"Launch a subagent to handle a complex subtask. Returns only the final conclusion.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"description\": {\"type\": \"string\"}}, \"required\": [\"description\"]}},\n {\"name\": \"load_skill\", \"description\": \"Load the full content of a skill by name.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}}, \"required\": [\"name\"]}},\n # s08 change: new compact tool — triggers compact_history, not a no-op\n {\"name\": \"compact\", \"description\": \"Summarize earlier conversation to free context space.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"focus\": {\"type\": \"string\"}}}},\n]\n\nTOOL_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"edit_file\": run_edit, \"glob\": run_glob, \"todo_write\": run_todo_write,\n \"task\": spawn_subagent, \"load_skill\": load_skill,\n}\n\n# FROM s04 (unchanged): Hooks\nHOOKS = {\"PreToolUse\": [], \"PostToolUse\": []}\ndef trigger_hooks(event, *args):\n for cb in HOOKS[event]:\n r = cb(*args)\n if r is not None: return r\n return None\n\nDENY_LIST = [\"rm -rf /\", \"sudo\", \"shutdown\"]\ndef permission_hook(block):\n if block.name == \"bash\":\n for p in DENY_LIST:\n if p in block.input.get(\"command\", \"\"): return \"Permission denied\"\n return None\ndef log_hook(block):\n print(f\"\\033[90m[HOOK] {block.name}\\033[0m\")\n return None\n\nHOOKS[\"PreToolUse\"].append(permission_hook)\nHOOKS[\"PreToolUse\"].append(log_hook)\n\n\n# ═══════════════════════════════════════════════════════════\n# agent_loop — s08 core: run compaction pipeline before LLM\n# ═══════════════════════════════════════════════════════════\n\nMAX_REACTIVE_RETRIES = 1 # retry limit for reactive compact\n\ndef agent_loop(messages: list):\n reactive_retries = 0\n while True:\n # s08 change: three preprocessors (0 API calls, cheap first)\n # Order matches CC source: budget → snip → micro\n messages[:] = tool_result_budget(messages) # L3: persist large results first\n messages[:] = snip_compact(messages) # L1: trim middle\n messages[:] = micro_compact(messages) # L2: old result placeholders\n\n # s08 change: tokens still over threshold → LLM summary (1 API call)\n if estimate_size(messages) > CONTEXT_LIMIT:\n print(\"[auto compact]\")\n messages[:] = compact_history(messages)\n\n try:\n response = client.messages.create(model=MODEL, system=SYSTEM, messages=messages, tools=TOOLS, max_tokens=8000)\n reactive_retries = 0 # reset on successful API call\n except Exception as e:\n if (\"prompt_too_long\" in str(e).lower() or \"too many tokens\" in str(e).lower()) and reactive_retries < MAX_REACTIVE_RETRIES:\n print(\"[reactive compact]\")\n messages[:] = reactive_compact(messages)\n reactive_retries += 1\n continue\n raise\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\": return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\": continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n\n # s08: compact tool triggers compact_history, not a no-op string\n if block.name == \"compact\":\n messages[:] = compact_history(messages)\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": \"[Compacted. Conversation history has been summarized.]\"})\n messages.append({\"role\": \"user\", \"content\": results})\n break # end current turn, start fresh with compacted context\n\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(blocked)})\n continue\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n trigger_hooks(\"PostToolUse\", block, output)\n print(str(output)[:200])\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n else:\n # normal path: no compact was called\n messages.append({\"role\": \"user\", \"content\": results})\n continue\n # compact was called: results already appended above\n continue\n\n\nif __name__ == \"__main__\":\n print(\"s08: Context Compact — four-layer compaction pipeline\")\n print(\"输入问题,回车发送。输入 q 退出。\\n\")\n history = []\n while True:\n try: query = input(\"\\033[36ms08 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt): break\n if query.strip().lower() in (\"q\", \"exit\", \"\"): break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\": print(block.text)\n print()\n",
+ "source": "#!/usr/bin/env python3\n\"\"\"\ns08_context_compact.py - Context Compact\n\nFour-layer compaction pipeline inserted before LLM calls:\n\n L1: snip_compact — trim middle messages when count > 50\n L2: micro_compact — replace old tool_results with placeholders\n L3: tool_result_budget — persist large results to disk\n L4: compact_history — LLM full summary (1 API call)\n\n Emergency: reactive_compact — when API still returns prompt_too_long\n\n ┌─────────────────────────────────────────────────────────────┐\n │ messages[] │\n │ ↓ │\n │ L3 budget ─→ L1 snip ─→ L2 micro ─→ [token > threshold?] │\n │ ├─ No → LLM │\n │ └─ Yes → L4 summary │\n │ ↓ │\n │ LLM call │\n │ [prompt_too_long?] │\n │ └─ Yes → reactive │\n └─────────────────────────────────────────────────────────────┘\n\nCore principle: cheap first, expensive last.\nExecution order matches CC source: budget → snip → micro → auto.\n\nBuilds on s07 (skill loading). Usage:\n\n python s08_context_compact/code.py\n Needs: pip install anthropic python-dotenv + ANTHROPIC_API_KEY in .env\n\"\"\"\n\nimport ast, json, os, subprocess, time\nfrom pathlib import Path\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"): os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nSKILLS_DIR = WORKDIR / \"skills\"\nTRANSCRIPT_DIR = WORKDIR / \".transcripts\"\nTOOL_RESULTS_DIR = WORKDIR / \".task_outputs\" / \"tool-results\"\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nCURRENT_TODOS: list[dict] = []\n\n# s07: Skill catalog scan (inherited from s07)\ndef _parse_frontmatter(text: str) -> tuple[dict, str]:\n if not text.startswith(\"---\"):\n return {}, text\n parts = text.split(\"---\", 2)\n if len(parts) < 3:\n return {}, text\n meta = {}\n for line in parts[1].strip().splitlines():\n if \":\" in line:\n k, v = line.split(\":\", 1)\n meta[k.strip()] = v.strip().strip('\"').strip(\"'\")\n return meta, parts[2].strip()\n\nSKILL_REGISTRY: dict[str, dict] = {}\n\ndef _scan_skills():\n if not SKILLS_DIR.exists():\n return\n for d in sorted(SKILLS_DIR.iterdir()):\n if not d.is_dir():\n continue\n manifest = d / \"SKILL.md\"\n if manifest.exists():\n raw = manifest.read_text()\n meta, body = _parse_frontmatter(raw)\n name = meta.get(\"name\", d.name)\n desc = meta.get(\"description\", raw.split(\"\\n\")[0].lstrip(\"#\").strip())\n SKILL_REGISTRY[name] = {\"name\": name, \"description\": desc, \"content\": raw}\n\n_scan_skills()\n\ndef list_skills() -> str:\n if not SKILL_REGISTRY:\n return \"(no skills found)\"\n return \"\\n\".join(f\"- **{s['name']}**: {s['description']}\" for s in SKILL_REGISTRY.values())\n\ndef load_skill(name: str) -> str:\n skill = SKILL_REGISTRY.get(name)\n if not skill:\n return f\"Skill not found: {name}\"\n return skill[\"content\"]\n\n# s08: SYSTEM includes skill catalog (inherited from s07 build_system)\ndef build_system() -> str:\n catalog = list_skills()\n return (\n f\"You are a coding agent at {WORKDIR}. \"\n f\"Skills available:\\n{catalog}\\n\"\n \"Use load_skill to get full details when needed.\"\n )\n\nSYSTEM = build_system()\n\n# s08: subagent gets its own system prompt — no compact, no skill loading\nSUB_SYSTEM = (\n f\"You are a coding agent at {WORKDIR}. \"\n \"Complete the task you were given, then return a concise summary. \"\n \"Do not delegate further.\"\n)\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s02-s07 (unchanged): Basic Tools\n# ═══════════════════════════════════════════════════════════\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR): raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR, capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired: return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines): lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e: return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n file_path = safe_path(path); file_path.parent.mkdir(parents=True, exist_ok=True)\n file_path.write_text(content); return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e: return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n file_path = safe_path(path)\n text = file_path.read_text()\n if old_text not in text: return f\"Error: text not found in {path}\"\n file_path.write_text(text.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e: return f\"Error: {e}\"\n\ndef run_glob(pattern: str) -> str:\n import glob as g\n try:\n results = []\n for match in g.glob(pattern, root_dir=WORKDIR):\n if (WORKDIR / match).resolve().is_relative_to(WORKDIR):\n results.append(match)\n return \"\\n\".join(results) if results else \"(no matches)\"\n except Exception as e: return f\"Error: {e}\"\n\ndef _normalize_todos(todos):\n if isinstance(todos, str):\n try:\n todos = json.loads(todos)\n except json.JSONDecodeError:\n try:\n todos = ast.literal_eval(todos)\n except (SyntaxError, ValueError):\n return None, \"Error: todos must be a list or JSON array string\"\n if not isinstance(todos, list):\n return None, \"Error: todos must be a list\"\n for i, t in enumerate(todos):\n if not isinstance(t, dict):\n return None, f\"Error: todos[{i}] must be an object\"\n if \"content\" not in t or \"status\" not in t:\n return None, f\"Error: todos[{i}] missing 'content' or 'status'\"\n if t[\"status\"] not in (\"pending\", \"in_progress\", \"completed\"):\n return None, f\"Error: todos[{i}] has invalid status '{t['status']}'\"\n return todos, None\n\ndef run_todo_write(todos: list) -> str:\n global CURRENT_TODOS\n todos, error = _normalize_todos(todos)\n if error:\n return error\n CURRENT_TODOS = todos\n lines = [\"\\n\\033[33m## Current Tasks\\033[0m\"]\n for t in CURRENT_TODOS:\n icon = {\"pending\": \" \", \"in_progress\": \"\\033[36m▸\\033[0m\", \"completed\": \"\\033[32m✓\\033[0m\"}[t[\"status\"]]\n lines.append(f\" [{icon}] {t['content']}\")\n print(\"\\n\".join(lines))\n return f\"Updated {len(CURRENT_TODOS)} tasks\"\n\ndef extract_text(content) -> str:\n if not isinstance(content, list): return str(content)\n return \"\\n\".join(getattr(b, \"text\", \"\") for b in content if getattr(b, \"type\", None) == \"text\")\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s06-s07 (unchanged): Subagent\n# ═══════════════════════════════════════════════════════════\n\nSUB_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"pattern\": {\"type\": \"string\"}}, \"required\": [\"pattern\"]}},\n]\nSUB_HANDLERS = {\"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"edit_file\": run_edit, \"glob\": run_glob}\n\ndef spawn_subagent(description: str) -> str:\n print(f\"\\n\\033[35m[Subagent spawned]\\033[0m\")\n messages = [{\"role\": \"user\", \"content\": description}]\n for _ in range(30):\n response = client.messages.create(model=MODEL, system=SUB_SYSTEM,\n messages=messages, tools=SUB_TOOLS, max_tokens=8000)\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": str(blocked)})\n continue\n handler = SUB_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n trigger_hooks(\"PostToolUse\", block, output)\n print(f\" \\033[90m[sub] {block.name}: {str(output)[:100]}\\033[0m\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n result = extract_text(messages[-1][\"content\"])\n if not result:\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\":\n result = extract_text(msg[\"content\"])\n if result:\n break\n if not result:\n result = \"Subagent stopped after 30 turns without final answer.\"\n print(f\"\\033[35m[Subagent done]\\033[0m\")\n return result\n\n\n# ═══════════════════════════════════════════════════════════\n# NEW in s08: Four-Layer Compaction Pipeline\n# ═══════════════════════════════════════════════════════════\n\nCONTEXT_LIMIT = 50000\nKEEP_RECENT = 3\nPERSIST_THRESHOLD = 30000\n\ndef estimate_size(msgs): return len(str(msgs))\n\ndef _block_type(block):\n return block.get(\"type\") if isinstance(block, dict) else getattr(block, \"type\", None)\n\n\ndef _message_has_tool_use(msg):\n if msg.get(\"role\") != \"assistant\":\n return False\n content = msg.get(\"content\")\n if not isinstance(content, list):\n return False\n return any(_block_type(block) == \"tool_use\" for block in content)\n\n\ndef _is_tool_result_message(msg):\n if msg.get(\"role\") != \"user\":\n return False\n content = msg.get(\"content\")\n if not isinstance(content, list):\n return False\n return any(isinstance(block, dict) and block.get(\"type\") == \"tool_result\"\n for block in content)\n\n\n# L1: snipCompact — trim middle messages\ndef snip_compact(messages, max_messages=50):\n if len(messages) <= max_messages: return messages\n keep_head, keep_tail = 3, max_messages - 3\n head_end, tail_start = keep_head, len(messages) - keep_tail\n if head_end > 0 and _message_has_tool_use(messages[head_end - 1]):\n while head_end < len(messages) and _is_tool_result_message(messages[head_end]):\n head_end += 1\n if (tail_start > 0 and tail_start < len(messages)\n and _is_tool_result_message(messages[tail_start])\n and _message_has_tool_use(messages[tail_start - 1])):\n tail_start -= 1\n if head_end >= tail_start:\n return messages\n snipped = tail_start - head_end\n return messages[:head_end] + [{\"role\": \"user\", \"content\": f\"[snipped {snipped} messages]\"}] + messages[tail_start:]\n\n\n# L2: microCompact — old result placeholders\ndef collect_tool_results(messages):\n blocks = []\n for mi, msg in enumerate(messages):\n if msg.get(\"role\") != \"user\" or not isinstance(msg.get(\"content\"), list): continue\n for bi, block in enumerate(msg[\"content\"]):\n if isinstance(block, dict) and block.get(\"type\") == \"tool_result\":\n blocks.append((mi, bi, block))\n return blocks\n\ndef micro_compact(messages):\n tool_results = collect_tool_results(messages)\n if len(tool_results) <= KEEP_RECENT: return messages\n for _, _, block in tool_results[:-KEEP_RECENT]:\n if len(block.get(\"content\", \"\")) > 120:\n block[\"content\"] = \"[Earlier tool result compacted. Re-run if needed.]\"\n return messages\n\n\n# L3: toolResultBudget — persist large results to disk\ndef persist_large_output(tool_use_id, output):\n if len(output) <= PERSIST_THRESHOLD: return output\n TOOL_RESULTS_DIR.mkdir(parents=True, exist_ok=True)\n path = TOOL_RESULTS_DIR / f\"{tool_use_id}.txt\"\n if not path.exists(): path.write_text(output)\n return f\"\\nFull output: {path}\\nPreview:\\n{output[:2000]}\\n\"\n\ndef tool_result_budget(messages, max_bytes=200_000):\n last = messages[-1] if messages else None\n if not last or last.get(\"role\") != \"user\" or not isinstance(last.get(\"content\"), list): return messages\n blocks = [(i, b) for i, b in enumerate(last[\"content\"]) if isinstance(b, dict) and b.get(\"type\") == \"tool_result\"]\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n if total <= max_bytes: return messages\n ranked = sorted(blocks, key=lambda p: len(str(p[1].get(\"content\", \"\"))), reverse=True)\n for _, block in ranked:\n if total <= max_bytes: break\n content = str(block.get(\"content\", \"\"))\n if len(content) <= PERSIST_THRESHOLD: continue\n tid = block.get(\"tool_use_id\", \"unknown\")\n block[\"content\"] = persist_large_output(tid, content)\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n return messages\n\n\n# L4: autoCompact — LLM full summary\ndef write_transcript(messages):\n TRANSCRIPT_DIR.mkdir(parents=True, exist_ok=True)\n path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with path.open(\"w\") as f:\n for msg in messages: f.write(json.dumps(msg, default=str) + \"\\n\")\n return path\n\ndef summarize_history(messages):\n conversation = json.dumps(messages, default=str)[:80000]\n prompt = (\"Summarize this coding-agent conversation so work can continue.\\n\"\n \"Preserve: 1. current goal, 2. key findings/decisions, 3. files read/changed, \"\n \"4. remaining work, 5. user constraints.\\nBe compact but concrete.\\n\\n\" + conversation)\n response = client.messages.create(model=MODEL, messages=[{\"role\": \"user\", \"content\": prompt}], max_tokens=2000)\n return \"\\n\".join(\n getattr(block, \"text\", \"\")\n for block in response.content\n if getattr(block, \"type\", None) == \"text\").strip() or \"(empty summary)\"\n\ndef compact_history(messages):\n transcript_path = write_transcript(messages)\n print(f\"[transcript saved: {transcript_path}]\")\n summary = summarize_history(messages)\n return [{\"role\": \"user\", \"content\": f\"[Compacted]\\n\\n{summary}\"}]\n\n\n# Emergency: reactiveCompact — on API error\ndef reactive_compact(messages):\n transcript = write_transcript(messages)\n tail_start = max(0, len(messages) - 5)\n if (tail_start > 0 and tail_start < len(messages)\n and _is_tool_result_message(messages[tail_start])\n and _message_has_tool_use(messages[tail_start - 1])):\n tail_start -= 1\n summary = summarize_history(messages[:tail_start])\n return [{\"role\": \"user\", \"content\": f\"[Reactive compact]\\n\\n{summary}\"}, *messages[tail_start:]]\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s07: Tool Definitions\n# ═══════════════════════════════════════════════════════════\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"pattern\": {\"type\": \"string\"}}, \"required\": [\"pattern\"]}},\n {\"name\": \"todo_write\", \"description\": \"Create and manage a task list for your current coding session.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"todos\": {\"type\": \"array\", \"items\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}, \"status\": {\"type\": \"string\", \"enum\": [\"pending\", \"in_progress\", \"completed\"]}}, \"required\": [\"content\", \"status\"]}}}, \"required\": [\"todos\"]}},\n {\"name\": \"task\", \"description\": \"Launch a subagent to handle a complex subtask. Returns only the final conclusion.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"description\": {\"type\": \"string\"}}, \"required\": [\"description\"]}},\n {\"name\": \"load_skill\", \"description\": \"Load the full content of a skill by name.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}}, \"required\": [\"name\"]}},\n # s08 change: new compact tool — triggers compact_history, not a no-op\n {\"name\": \"compact\", \"description\": \"Summarize earlier conversation to free context space.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"focus\": {\"type\": \"string\"}}}},\n]\n\nTOOL_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"edit_file\": run_edit, \"glob\": run_glob, \"todo_write\": run_todo_write,\n \"task\": spawn_subagent, \"load_skill\": load_skill,\n}\n\n# FROM s04 (unchanged): Hooks\nHOOKS = {\"PreToolUse\": [], \"PostToolUse\": []}\ndef trigger_hooks(event, *args):\n for cb in HOOKS[event]:\n r = cb(*args)\n if r is not None: return r\n return None\n\nDENY_LIST = [\"rm -rf /\", \"sudo\", \"shutdown\"]\ndef permission_hook(block):\n if block.name == \"bash\":\n for p in DENY_LIST:\n if p in block.input.get(\"command\", \"\"): return \"Permission denied\"\n return None\ndef log_hook(block):\n print(f\"\\033[90m[HOOK] {block.name}\\033[0m\")\n return None\n\nHOOKS[\"PreToolUse\"].append(permission_hook)\nHOOKS[\"PreToolUse\"].append(log_hook)\n\n\n# ═══════════════════════════════════════════════════════════\n# agent_loop — s08 core: run compaction pipeline before LLM\n# ═══════════════════════════════════════════════════════════\n\nMAX_REACTIVE_RETRIES = 1 # retry limit for reactive compact\n\ndef agent_loop(messages: list):\n reactive_retries = 0\n while True:\n # s08 change: three preprocessors (0 API calls, cheap first)\n # Order matches CC source: budget → snip → micro\n messages[:] = tool_result_budget(messages) # L3: persist large results first\n messages[:] = snip_compact(messages) # L1: trim middle\n messages[:] = micro_compact(messages) # L2: old result placeholders\n\n # s08 change: tokens still over threshold → LLM summary (1 API call)\n if estimate_size(messages) > CONTEXT_LIMIT:\n print(\"[auto compact]\")\n messages[:] = compact_history(messages)\n\n try:\n response = client.messages.create(model=MODEL, system=SYSTEM, messages=messages, tools=TOOLS, max_tokens=8000)\n reactive_retries = 0 # reset on successful API call\n except Exception as e:\n if (\"prompt_too_long\" in str(e).lower() or \"too many tokens\" in str(e).lower()) and reactive_retries < MAX_REACTIVE_RETRIES:\n print(\"[reactive compact]\")\n messages[:] = reactive_compact(messages)\n reactive_retries += 1\n continue\n raise\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\": return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\": continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n\n # s08: compact tool triggers compact_history, not a no-op string\n if block.name == \"compact\":\n messages[:] = compact_history(messages)\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": \"[Compacted. Conversation history has been summarized.]\"})\n messages.append({\"role\": \"user\", \"content\": results})\n break # end current turn, start fresh with compacted context\n\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(blocked)})\n continue\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n trigger_hooks(\"PostToolUse\", block, output)\n print(str(output)[:200])\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n else:\n # normal path: no compact was called\n messages.append({\"role\": \"user\", \"content\": results})\n continue\n # compact was called: results already appended above\n continue\n\n\nif __name__ == \"__main__\":\n print(\"s08: Context Compact — four-layer compaction pipeline\")\n print(\"输入问题,回车发送。输入 q 退出。\\n\")\n history = []\n while True:\n try: query = input(\"\\033[36ms08 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt): break\n if query.strip().lower() in (\"q\", \"exit\", \"\"): break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\": print(block.text)\n print()\n",
"images": [
{
"src": "/course-assets/s08_context_compact/auto-compact.svg",
@@ -859,7 +874,7 @@
"filename": "s09_memory/code.py",
"title": "Memory",
"subtitle": "Keep a Layer That Doesn't Lose Details",
- "loc": 498,
+ "loc": 528,
"tools": [
"bash",
"read_file",
@@ -931,101 +946,116 @@
{
"name": "safe_path",
"signature": "def safe_path(p: str)",
- "startLine": 360
+ "startLine": 358
},
{
"name": "run_bash",
"signature": "def run_bash(command: str)",
- "startLine": 365
+ "startLine": 363
},
{
"name": "run_read",
"signature": "def run_read(path: str, limit: int | None = None)",
- "startLine": 372
+ "startLine": 370
},
{
"name": "run_write",
"signature": "def run_write(path: str, content: str)",
- "startLine": 379
+ "startLine": 377
},
{
"name": "run_edit",
"signature": "def run_edit(path: str, old_text: str, new_text: str)",
- "startLine": 385
+ "startLine": 383
},
{
"name": "run_glob",
"signature": "def run_glob(pattern: str)",
- "startLine": 394
+ "startLine": 392
},
{
"name": "extract_text",
"signature": "def extract_text(content)",
- "startLine": 404
+ "startLine": 402
},
{
"name": "spawn_subagent",
- "signature": "def spawn_subagent(task: str)",
- "startLine": 419
+ "signature": "def spawn_subagent(description: str)",
+ "startLine": 417
},
{
"name": "estimate_size",
"signature": "def estimate_size(msgs)",
+ "startLine": 450
+ },
+ {
+ "name": "_block_type",
+ "signature": "def _block_type(block)",
"startLine": 452
},
+ {
+ "name": "_message_has_tool_use",
+ "signature": "def _message_has_tool_use(msg)",
+ "startLine": 455
+ },
+ {
+ "name": "_is_tool_result_message",
+ "signature": "def _is_tool_result_message(msg)",
+ "startLine": 463
+ },
{
"name": "snip_compact",
"signature": "def snip_compact(msgs, mx=50)",
- "startLine": 454
+ "startLine": 471
},
{
"name": "collect_tool_results",
"signature": "def collect_tool_results(msgs)",
- "startLine": 458
+ "startLine": 485
},
{
"name": "micro_compact",
"signature": "def micro_compact(msgs)",
- "startLine": 466
+ "startLine": 493
},
{
"name": "persist_large",
"signature": "def persist_large(tid, out)",
- "startLine": 473
+ "startLine": 500
},
{
"name": "tool_result_budget",
"signature": "def tool_result_budget(msgs, mx=200_000)",
- "startLine": 480
+ "startLine": 507
},
{
"name": "write_transcript",
"signature": "def write_transcript(msgs)",
- "startLine": 494
+ "startLine": 521
},
{
"name": "summarize_history",
"signature": "def summarize_history(msgs)",
- "startLine": 501
+ "startLine": 528
},
{
"name": "compact_history",
"signature": "def compact_history(msgs)",
- "startLine": 509
+ "startLine": 536
},
{
"name": "reactive_compact",
"signature": "def reactive_compact(msgs)",
- "startLine": 514
+ "startLine": 541
},
{
"name": "agent_loop",
"signature": "def agent_loop(messages: list)",
- "startLine": 551
+ "startLine": 583
}
],
"layer": "memory",
- "source": "#!/usr/bin/env python3\n\"\"\"\ns09_memory.py - Memory System\n\nPersistent, cross-session knowledge for the coding agent.\n\nStorage:\n .memory/\n MEMORY.md ← index (one line per memory, ≤200 lines)\n feedback_tabs.md ← individual memory files (Markdown + YAML frontmatter)\n user_profile.md\n project_facts.md\n\nFlow in agent_loop:\n 1. Load MEMORY.md index into SYSTEM prompt (cheap, always present)\n 2. Select relevant memories by filename/description → inject content\n 3. Run compression pipeline from s08\n 4. After each turn ends → extract new memories from original messages\n 5. Periodically consolidate (Dream)\n\nBuilds on s08 (context compact). Usage:\n\n python s09_memory/code.py\n Needs: pip install anthropic python-dotenv + ANTHROPIC_API_KEY in .env\n\"\"\"\n\nimport os, subprocess, json, time, re\nfrom pathlib import Path\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"): os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nMEMORY_DIR = WORKDIR / \".memory\"; MEMORY_DIR.mkdir(exist_ok=True)\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\nSKILLS_DIR = WORKDIR / \"skills\"\nTRANSCRIPT_DIR = WORKDIR / \".transcripts\"\nTOOL_RESULTS_DIR = WORKDIR / \".task_outputs\" / \"tool-results\"\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n\n# ═══════════════════════════════════════════════════════════\n# NEW in s09: Memory System\n# ═══════════════════════════════════════════════════════════\n\nMEMORY_TYPES = [\"user\", \"feedback\", \"project\", \"reference\"]\n\ndef _parse_frontmatter(text: str) -> tuple[dict, str]:\n if not text.startswith(\"---\"):\n return {}, text\n parts = text.split(\"---\", 2)\n if len(parts) < 3:\n return {}, text\n meta = {}\n for line in parts[1].strip().splitlines():\n if \":\" in line:\n k, v = line.split(\":\", 1)\n meta[k.strip()] = v.strip().strip('\"').strip(\"'\")\n return meta, parts[2].strip()\n\n\ndef write_memory_file(name: str, mem_type: str, description: str, body: str):\n \"\"\"Write a single memory file with YAML frontmatter.\"\"\"\n slug = name.lower().replace(\" \", \"-\").replace(\"/\", \"-\")\n filename = f\"{slug}.md\"\n filepath = MEMORY_DIR / filename\n filepath.write_text(\n f\"---\\nname: {name}\\ndescription: {description}\\ntype: {mem_type}\\n---\\n\\n{body}\\n\"\n )\n _rebuild_index()\n return filepath\n\n\ndef _rebuild_index():\n \"\"\"Rebuild MEMORY.md index from all memory files.\"\"\"\n lines = []\n for f in sorted(MEMORY_DIR.glob(\"*.md\")):\n if f.name == \"MEMORY.md\":\n continue\n raw = f.read_text()\n meta, body = _parse_frontmatter(raw)\n name = meta.get(\"name\", f.stem)\n desc = meta.get(\"description\", body.split(\"\\n\")[0][:80])\n lines.append(f\"- [{name}]({f.name}) — {desc}\")\n MEMORY_INDEX.write_text(\"\\n\".join(lines) + \"\\n\" if lines else \"\")\n\n\ndef read_memory_index() -> str:\n \"\"\"Read MEMORY.md index (injected into SYSTEM every turn).\"\"\"\n if not MEMORY_INDEX.exists():\n return \"\"\n text = MEMORY_INDEX.read_text().strip()\n return text if text else \"\"\n\n\ndef read_memory_file(filename: str) -> str | None:\n \"\"\"Read a single memory file's full content.\"\"\"\n path = MEMORY_DIR / filename\n if not path.exists():\n return None\n return path.read_text()\n\n\ndef list_memory_files() -> list[dict]:\n \"\"\"List all memory files with metadata.\"\"\"\n result = []\n for f in sorted(MEMORY_DIR.glob(\"*.md\")):\n if f.name == \"MEMORY.md\":\n continue\n raw = f.read_text()\n meta, body = _parse_frontmatter(raw)\n result.append({\n \"filename\": f.name,\n \"name\": meta.get(\"name\", f.stem),\n \"description\": meta.get(\"description\", \"\"),\n \"type\": meta.get(\"type\", \"user\"),\n \"body\": body,\n })\n return result\n\n\ndef select_relevant_memories(messages: list, max_items: int = 5) -> list[str]:\n \"\"\"Select relevant memory filenames by matching recent conversation against\n memory names/descriptions. Uses a simple LLM call (or falls back to keyword\n matching on name+description).\"\"\"\n files = list_memory_files()\n if not files:\n return []\n\n # Collect recent user text for context\n recent_texts = []\n for msg in reversed(messages):\n if msg.get(\"role\") == \"user\":\n content = msg.get(\"content\", \"\")\n if isinstance(content, list):\n content = \" \".join(\n str(getattr(b, \"text\", \"\")) for b in content\n if getattr(b, \"type\", None) == \"text\"\n )\n if isinstance(content, str):\n recent_texts.append(content)\n if len(recent_texts) >= 3:\n break\n recent = \" \".join(reversed(recent_texts))[:2000]\n\n if not recent.strip():\n return []\n\n # Build catalog of name + description for LLM to choose from\n catalog_lines = []\n for i, f in enumerate(files):\n catalog_lines.append(f\"{i}: {f['name']} — {f['description']}\")\n catalog = \"\\n\".join(catalog_lines)\n\n prompt = (\n \"Given the recent conversation and the memory catalog below, \"\n \"select the indices of memories that are clearly relevant. \"\n \"Return ONLY a JSON array of integers, e.g. [0, 3]. \"\n \"If none are relevant, return [].\\n\\n\"\n f\"Recent conversation:\\n{recent}\\n\\n\"\n f\"Memory catalog:\\n{catalog}\"\n )\n\n try:\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\": prompt}],\n max_tokens=200,\n )\n text = extract_text(response.content).strip()\n # Extract JSON array from response\n match = re.search(r'\\[.*?\\]', text, re.DOTALL)\n if match:\n indices = json.loads(match.group())\n selected = []\n for idx in indices:\n if isinstance(idx, int) and 0 <= idx < len(files):\n selected.append(files[idx][\"filename\"])\n if len(selected) >= max_items:\n break\n return selected\n except Exception:\n pass\n\n # Fallback: keyword matching on name + description\n keywords = [w.lower() for w in recent.split() if len(w) > 3]\n selected = []\n for f in files:\n text = (f[\"name\"] + \" \" + f[\"description\"]).lower()\n if any(kw in text for kw in keywords):\n selected.append(f[\"filename\"])\n if len(selected) >= max_items:\n break\n return selected\n\n\ndef load_memories(messages: list) -> str:\n \"\"\"Load relevant memory content for injection into context.\"\"\"\n selected_files = select_relevant_memories(messages)\n if not selected_files:\n return \"\"\n\n parts = [\"\"]\n for filename in selected_files:\n content = read_memory_file(filename)\n if content:\n parts.append(content)\n parts.append(\"\")\n return \"\\n\\n\".join(parts)\n\n\ndef extract_memories(messages: list):\n \"\"\"Extract new memories from recent dialogue. Runs after each turn.\"\"\"\n # Collect recent conversation text\n dialogue_parts = []\n for msg in messages[-10:]:\n role = msg.get(\"role\", \"?\")\n content = msg.get(\"content\", \"\")\n if isinstance(content, list):\n content = \" \".join(\n str(getattr(b, \"text\", \"\")) for b in content\n if getattr(b, \"type\", None) == \"text\"\n )\n if isinstance(content, str) and content.strip():\n dialogue_parts.append(f\"{role}: {content}\")\n dialogue = \"\\n\".join(dialogue_parts)\n\n if not dialogue.strip():\n return\n\n # Check existing memories to avoid duplicates\n existing = list_memory_files()\n existing_desc = \"\\n\".join(f\"- {m['name']}: {m['description']}\" for m in existing) if existing else \"(none)\"\n\n prompt = (\n \"Extract user preferences, constraints, or project facts from this dialogue.\\n\"\n \"Return a JSON array. Each item: {name, type, description, body}.\\n\"\n \"- name: short kebab-case identifier (e.g. 'user-preference-tabs')\\n\"\n \"- type: one of 'user' (user preference), 'feedback' (guidance), \"\n \"'project' (project fact), 'reference' (external pointer)\\n\"\n \"- description: one-line summary for index lookup\\n\"\n \"- body: full detail in markdown\\n\"\n \"If nothing new or already covered by existing memories, return [].\\n\\n\"\n f\"Existing memories:\\n{existing_desc}\\n\\n\"\n f\"Dialogue:\\n{dialogue[:4000]}\"\n )\n\n try:\n response = client.messages.create(\n model=MODEL, messages=[{\"role\": \"user\", \"content\": prompt}], max_tokens=800\n )\n text = extract_text(response.content).strip()\n # Extract JSON array from response\n match = re.search(r'\\[.*\\]', text, re.DOTALL)\n if not match:\n return\n items = json.loads(match.group())\n if not items:\n return\n count = 0\n for mem in items:\n name = mem.get(\"name\", f\"memory_{int(time.time())}\")\n mem_type = mem.get(\"type\", \"user\")\n desc = mem.get(\"description\", \"\")\n body = mem.get(\"body\", \"\")\n if desc and body:\n write_memory_file(name, mem_type, desc, body)\n count += 1\n if count:\n print(f\"\\n\\033[33m[Memory: extracted {count} new memories]\\033[0m\")\n except Exception:\n pass\n\n\nCONSOLIDATE_THRESHOLD = 10\n\ndef consolidate_memories():\n \"\"\"Merge duplicate/stale memories. Triggered when file count ≥ threshold.\"\"\"\n files = list_memory_files()\n if len(files) < CONSOLIDATE_THRESHOLD:\n return\n\n catalog = \"\\n\\n\".join(\n f\"## {f['filename']}\\nname: {f['name']}\\ndescription: {f['description']}\\n{f['body']}\"\n for f in files\n )\n\n prompt = (\n \"Consolidate the following memory files. Rules:\\n\"\n \"1. Merge duplicates into one\\n\"\n \"2. Remove outdated/contradicted memories\\n\"\n \"3. Keep the total under 30 memories\\n\"\n \"4. Preserve important user preferences above all\\n\"\n \"Return a JSON array. Each item: {name, type, description, body}.\\n\\n\"\n f\"{catalog[:16000]}\"\n )\n\n try:\n response = client.messages.create(\n model=MODEL, messages=[{\"role\": \"user\", \"content\": prompt}], max_tokens=3000\n )\n text = extract_text(response.content).strip()\n match = re.search(r'\\[.*\\]', text, re.DOTALL)\n if not match:\n return\n items = json.loads(match.group())\n\n # Remove old memory files (keep MEMORY.md)\n for f in MEMORY_DIR.glob(\"*.md\"):\n if f.name != \"MEMORY.md\":\n f.unlink()\n\n for mem in items:\n name = mem.get(\"name\", f\"memory_{int(time.time())}\")\n mem_type = mem.get(\"type\", \"user\")\n desc = mem.get(\"description\", \"\")\n body = mem.get(\"body\", \"\")\n if desc and body:\n write_memory_file(name, mem_type, desc, body)\n\n print(f\"\\n\\033[33m[Memory: consolidated {len(files)} → {len(items)} memories]\\033[0m\")\n except Exception:\n pass\n\n\n# Build SYSTEM with memory index\ndef build_system() -> str:\n index = read_memory_index()\n memories_section = f\"\\n\\nMemories available:\\n{index}\" if index else \"\"\n return (\n f\"You are a coding agent at {WORKDIR}.\"\n f\"{memories_section}\\n\"\n \"Relevant memories are injected below. Respect user preferences from memory.\\n\"\n \"When the user says 'remember' or expresses a clear preference, extract it as a memory.\"\n )\n\nSYSTEM = build_system()\n\nSUB_SYSTEM = (\n f\"You are a coding agent at {WORKDIR}. \"\n \"Complete the task you were given, then return a concise summary. \"\n \"Do not delegate further.\"\n)\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s02-s08 (skeleton): Basic tools\n# ═══════════════════════════════════════════════════════════\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR): raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR, capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired: return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines): lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e: return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n file_path = safe_path(path); file_path.parent.mkdir(parents=True, exist_ok=True)\n file_path.write_text(content); return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e: return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n file_path = safe_path(path)\n text = file_path.read_text()\n if old_text not in text: return f\"Error: text not found in {path}\"\n file_path.write_text(text.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e: return f\"Error: {e}\"\n\ndef run_glob(pattern: str) -> str:\n import glob as g\n try:\n results = []\n for match in g.glob(pattern, root_dir=WORKDIR):\n if (WORKDIR / match).resolve().is_relative_to(WORKDIR):\n results.append(match)\n return \"\\n\".join(results) if results else \"(no matches)\"\n except Exception as e: return f\"Error: {e}\"\n\ndef extract_text(content) -> str:\n if not isinstance(content, list): return str(content)\n return \"\\n\".join(getattr(b, \"text\", \"\") for b in content if getattr(b, \"type\", None) == \"text\")\n\n# Subagent (simplified from s06-s07)\nSUB_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n]\nSUB_HANDLERS = {\"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write}\n\ndef spawn_subagent(task: str) -> str:\n print(f\"\\n\\033[35m[Subagent spawned]\\033[0m\")\n messages = [{\"role\": \"user\", \"content\": task}]\n for _ in range(30):\n response = client.messages.create(model=MODEL, system=SUB_SYSTEM,\n messages=messages, tools=SUB_TOOLS, max_tokens=8000)\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\": break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = SUB_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n print(f\" \\033[90m[sub] {block.name}: {str(output)[:100]}\\033[0m\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n result = extract_text(messages[-1][\"content\"])\n if not result:\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\":\n result = extract_text(msg[\"content\"])\n if result: break\n if not result: result = \"Subagent stopped after 30 turns without final answer.\"\n print(f\"\\033[35m[Subagent done]\\033[0m\")\n return result\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s08 (skeleton): Compaction pipeline\n# ═══════════════════════════════════════════════════════════\n\nCONTEXT_LIMIT = 50000; KEEP_RECENT = 3; PERSIST_THRESHOLD = 30000\n\ndef estimate_size(msgs): return len(str(msgs))\n\ndef snip_compact(msgs, mx=50):\n if len(msgs) <= mx: return msgs\n return msgs[:3] + [{\"role\": \"user\", \"content\": f\"[snipped {len(msgs)-mx} msgs]\"}] + msgs[-(mx-3):]\n\ndef collect_tool_results(msgs):\n blocks = []\n for mi, msg in enumerate(msgs):\n if msg.get(\"role\") != \"user\" or not isinstance(msg.get(\"content\"), list): continue\n for bi, block in enumerate(msg[\"content\"]):\n if isinstance(block, dict) and block.get(\"type\") == \"tool_result\": blocks.append((mi, bi, block))\n return blocks\n\ndef micro_compact(msgs):\n tr = collect_tool_results(msgs)\n if len(tr) <= KEEP_RECENT: return msgs\n for _, _, b in tr[:-KEEP_RECENT]:\n if len(b.get(\"content\", \"\")) > 120: b[\"content\"] = \"[Earlier tool result compacted.]\"\n return msgs\n\ndef persist_large(tid, out):\n if len(out) <= PERSIST_THRESHOLD: return out\n TOOL_RESULTS_DIR.mkdir(parents=True, exist_ok=True)\n p = TOOL_RESULTS_DIR / f\"{tid}.txt\"\n if not p.exists(): p.write_text(out)\n return f\"\\nFull: {p}\\nPreview:\\n{out[:2000]}\\n\"\n\ndef tool_result_budget(msgs, mx=200_000):\n last = msgs[-1] if msgs else None\n if not last or last.get(\"role\") != \"user\" or not isinstance(last.get(\"content\"), list): return msgs\n blocks = [(i, b) for i, b in enumerate(last[\"content\"]) if isinstance(b, dict) and b.get(\"type\") == \"tool_result\"]\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n if total <= mx: return msgs\n for _, block in sorted(blocks, key=lambda p: len(str(p[1].get(\"content\", \"\"))), reverse=True):\n if total <= mx: break\n c = str(block.get(\"content\", \"\"))\n if len(c) <= PERSIST_THRESHOLD: continue\n block[\"content\"] = persist_large(block.get(\"tool_use_id\", \"?\"), c)\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n return msgs\n\ndef write_transcript(msgs):\n TRANSCRIPT_DIR.mkdir(parents=True, exist_ok=True)\n p = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with p.open(\"w\") as f:\n for m in msgs: f.write(json.dumps(m, default=str) + \"\\n\")\n return p\n\ndef summarize_history(msgs):\n conv = json.dumps(msgs, default=str)[:80000]\n r = client.messages.create(model=MODEL, messages=[{\"role\": \"user\", \"content\":\n \"Summarize this coding-agent conversation so work can continue.\\n\"\n \"Preserve: 1. current goal, 2. key findings, 3. files changed, 4. remaining work, 5. user constraints.\\n\\n\" + conv}],\n max_tokens=2000)\n return extract_text(r.content).strip()\n\ndef compact_history(msgs):\n write_transcript(msgs)\n summary = summarize_history(msgs)\n return [{\"role\": \"user\", \"content\": f\"[Compacted]\\n\\n{summary}\"}]\n\ndef reactive_compact(msgs):\n write_transcript(msgs)\n summary = summarize_history(msgs)\n return [{\"role\": \"user\", \"content\": f\"[Reactive compact]\\n\\n{summary}\"}, *msgs[-5:]]\n\n\n# ═══════════════════════════════════════════════════════════\n# Tool Definitions (skeleton — fewer tools to focus on memory)\n# ═══════════════════════════════════════════════════════════\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"pattern\": {\"type\": \"string\"}}, \"required\": [\"pattern\"]}},\n {\"name\": \"task\", \"description\": \"Launch a subagent to handle a subtask.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"description\": {\"type\": \"string\"}}, \"required\": [\"description\"]}},\n]\n\nTOOL_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"edit_file\": run_edit, \"glob\": run_glob, \"task\": spawn_subagent,\n}\n\n\n# ═══════════════════════════════════════════════════════════\n# agent_loop — s09: inject memories + extract after each turn\n# ═══════════════════════════════════════════════════════════\n\nMAX_REACTIVE_RETRIES = 1\n\ndef agent_loop(messages: list):\n reactive_retries = 0\n # s09: inject relevant memory content into the current user turn\n memories_content = load_memories(messages)\n memory_turn = len(messages) - 1 if messages and isinstance(messages[-1].get(\"content\"), str) else None\n while True:\n # s09: rebuild system with current memory index\n system = build_system()\n\n # s09: save pre-compression snapshot for accurate memory extraction\n pre_compress = [m if isinstance(m, dict) else {\"role\": m.get(\"role\",\"\"),\n \"content\": str(m.get(\"content\",\"\"))} for m in messages]\n\n # s08: compression pipeline (budget → snip → micro)\n messages[:] = tool_result_budget(messages)\n messages[:] = snip_compact(messages)\n messages[:] = micro_compact(messages)\n\n if estimate_size(messages) > CONTEXT_LIMIT:\n print(\"[auto compact]\")\n messages[:] = compact_history(messages)\n\n try:\n request_messages = messages\n if memories_content and memory_turn is not None and memory_turn < len(messages):\n request_messages = messages.copy()\n request_messages[memory_turn] = {\n **messages[memory_turn],\n \"content\": memories_content + \"\\n\\n\" + messages[memory_turn][\"content\"],\n }\n response = client.messages.create(\n model=MODEL, system=system, messages=request_messages, tools=TOOLS, max_tokens=8000\n )\n reactive_retries = 0\n except Exception as e:\n if (\"prompt_too_long\" in str(e).lower() or \"too many tokens\" in str(e).lower()) and reactive_retries < MAX_REACTIVE_RETRIES:\n print(\"[reactive compact]\")\n messages[:] = reactive_compact(messages)\n reactive_retries += 1\n continue\n raise\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n # s09: extract from pre-compression snapshot for full fidelity\n extract_memories(pre_compress)\n consolidate_memories()\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\": continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n print(str(output)[:200])\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n print(\"s09: Memory — persistent cross-session knowledge\")\n print(\"输入问题,回车发送。输入 q 退出。\\n\")\n history = []\n while True:\n try: query = input(\"\\033[36ms09 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt): break\n if query.strip().lower() in (\"q\", \"exit\", \"\"): break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\": print(block.text)\n print()\n",
+ "source": "#!/usr/bin/env python3\n\"\"\"\ns09_memory.py - Memory System\n\nPersistent, cross-session knowledge for the coding agent.\n\nStorage:\n .memory/\n MEMORY.md ← index (one line per memory, ≤200 lines)\n feedback_tabs.md ← individual memory files (Markdown + YAML frontmatter)\n user_profile.md\n project_facts.md\n\nFlow in agent_loop:\n 1. Load MEMORY.md index into SYSTEM prompt (cheap, always present)\n 2. Select relevant memories by filename/description → inject content\n 3. Run compression pipeline from s08\n 4. After each turn ends → extract new memories from original messages\n 5. Periodically consolidate (Dream)\n\nBuilds on s08 (context compact). Usage:\n\n python s09_memory/code.py\n Needs: pip install anthropic python-dotenv + ANTHROPIC_API_KEY in .env\n\"\"\"\n\nimport os, subprocess, json, time, re\nfrom pathlib import Path\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"): os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nMEMORY_DIR = WORKDIR / \".memory\"; MEMORY_DIR.mkdir(exist_ok=True)\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\nSKILLS_DIR = WORKDIR / \"skills\"\nTRANSCRIPT_DIR = WORKDIR / \".transcripts\"\nTOOL_RESULTS_DIR = WORKDIR / \".task_outputs\" / \"tool-results\"\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n\n# ═══════════════════════════════════════════════════════════\n# NEW in s09: Memory System\n# ═══════════════════════════════════════════════════════════\n\nMEMORY_TYPES = [\"user\", \"feedback\", \"project\", \"reference\"]\n\ndef _parse_frontmatter(text: str) -> tuple[dict, str]:\n if not text.startswith(\"---\"):\n return {}, text\n parts = text.split(\"---\", 2)\n if len(parts) < 3:\n return {}, text\n meta = {}\n for line in parts[1].strip().splitlines():\n if \":\" in line:\n k, v = line.split(\":\", 1)\n meta[k.strip()] = v.strip().strip('\"').strip(\"'\")\n return meta, parts[2].strip()\n\n\ndef write_memory_file(name: str, mem_type: str, description: str, body: str):\n \"\"\"Write a single memory file with YAML frontmatter.\"\"\"\n slug = name.lower().replace(\" \", \"-\").replace(\"/\", \"-\")\n filename = f\"{slug}.md\"\n filepath = MEMORY_DIR / filename\n filepath.write_text(\n f\"---\\nname: {name}\\ndescription: {description}\\ntype: {mem_type}\\n---\\n\\n{body}\\n\"\n )\n _rebuild_index()\n return filepath\n\n\ndef _rebuild_index():\n \"\"\"Rebuild MEMORY.md index from all memory files.\"\"\"\n lines = []\n for f in sorted(MEMORY_DIR.glob(\"*.md\")):\n if f.name == \"MEMORY.md\":\n continue\n raw = f.read_text()\n meta, body = _parse_frontmatter(raw)\n name = meta.get(\"name\", f.stem)\n desc = meta.get(\"description\", body.split(\"\\n\")[0][:80])\n lines.append(f\"- [{name}]({f.name}) — {desc}\")\n MEMORY_INDEX.write_text(\"\\n\".join(lines) + \"\\n\" if lines else \"\")\n\n\ndef read_memory_index() -> str:\n \"\"\"Read MEMORY.md index (injected into SYSTEM every turn).\"\"\"\n if not MEMORY_INDEX.exists():\n return \"\"\n text = MEMORY_INDEX.read_text().strip()\n return text if text else \"\"\n\n\ndef read_memory_file(filename: str) -> str | None:\n \"\"\"Read a single memory file's full content.\"\"\"\n path = MEMORY_DIR / filename\n if not path.exists():\n return None\n return path.read_text()\n\n\ndef list_memory_files() -> list[dict]:\n \"\"\"List all memory files with metadata.\"\"\"\n result = []\n for f in sorted(MEMORY_DIR.glob(\"*.md\")):\n if f.name == \"MEMORY.md\":\n continue\n raw = f.read_text()\n meta, body = _parse_frontmatter(raw)\n result.append({\n \"filename\": f.name,\n \"name\": meta.get(\"name\", f.stem),\n \"description\": meta.get(\"description\", \"\"),\n \"type\": meta.get(\"type\", \"user\"),\n \"body\": body,\n })\n return result\n\n\ndef select_relevant_memories(messages: list, max_items: int = 5) -> list[str]:\n \"\"\"Select relevant memory filenames by matching recent conversation against\n memory names/descriptions. Uses a simple LLM call (or falls back to keyword\n matching on name+description).\"\"\"\n files = list_memory_files()\n if not files:\n return []\n\n # Collect recent user text for context\n recent_texts = []\n for msg in reversed(messages):\n if msg.get(\"role\") == \"user\":\n content = msg.get(\"content\", \"\")\n if isinstance(content, list):\n content = \" \".join(\n str(getattr(b, \"text\", \"\")) for b in content\n if getattr(b, \"type\", None) == \"text\"\n )\n if isinstance(content, str):\n recent_texts.append(content)\n if len(recent_texts) >= 3:\n break\n recent = \" \".join(reversed(recent_texts))[:2000]\n\n if not recent.strip():\n return []\n\n # Build catalog of name + description for LLM to choose from\n catalog_lines = []\n for i, f in enumerate(files):\n catalog_lines.append(f\"{i}: {f['name']} — {f['description']}\")\n catalog = \"\\n\".join(catalog_lines)\n\n prompt = (\n \"Given the recent conversation and the memory catalog below, \"\n \"select the indices of memories that are clearly relevant. \"\n \"Return ONLY a JSON array of integers, e.g. [0, 3]. \"\n \"If none are relevant, return [].\\n\\n\"\n f\"Recent conversation:\\n{recent}\\n\\n\"\n f\"Memory catalog:\\n{catalog}\"\n )\n\n try:\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\": prompt}],\n max_tokens=200,\n )\n text = extract_text(response.content).strip()\n # Extract JSON array from response\n match = re.search(r'\\[.*?\\]', text, re.DOTALL)\n if match:\n indices = json.loads(match.group())\n selected = []\n for idx in indices:\n if isinstance(idx, int) and 0 <= idx < len(files):\n selected.append(files[idx][\"filename\"])\n if len(selected) >= max_items:\n break\n return selected\n except Exception:\n pass\n\n # Fallback: keyword matching on name + description\n keywords = [w.lower() for w in recent.split() if len(w) > 3]\n selected = []\n for f in files:\n text = (f[\"name\"] + \" \" + f[\"description\"]).lower()\n if any(kw in text for kw in keywords):\n selected.append(f[\"filename\"])\n if len(selected) >= max_items:\n break\n return selected\n\n\ndef load_memories(messages: list) -> str:\n \"\"\"Load relevant memory content for injection into context.\"\"\"\n selected_files = select_relevant_memories(messages)\n if not selected_files:\n return \"\"\n\n parts = [\"\"]\n for filename in selected_files:\n content = read_memory_file(filename)\n if content:\n parts.append(content)\n parts.append(\"\")\n return \"\\n\\n\".join(parts)\n\n\ndef extract_memories(messages: list):\n \"\"\"Extract new memories from recent dialogue. Runs after each turn.\"\"\"\n # Collect recent conversation text\n dialogue_parts = []\n for msg in messages[-10:]:\n role = msg.get(\"role\", \"?\")\n content = msg.get(\"content\", \"\")\n if isinstance(content, list):\n content = \" \".join(\n str(getattr(b, \"text\", \"\")) for b in content\n if getattr(b, \"type\", None) == \"text\"\n )\n if isinstance(content, str) and content.strip():\n dialogue_parts.append(f\"{role}: {content}\")\n dialogue = \"\\n\".join(dialogue_parts)\n\n if not dialogue.strip():\n return\n\n # Check existing memories to avoid duplicates\n existing = list_memory_files()\n existing_desc = \"\\n\".join(f\"- {m['name']}: {m['description']}\" for m in existing) if existing else \"(none)\"\n\n prompt = (\n \"Extract user preferences, constraints, or project facts from this dialogue.\\n\"\n \"Return a JSON array. Each item: {name, type, description, body}.\\n\"\n \"- name: short kebab-case identifier (e.g. 'user-preference-tabs')\\n\"\n \"- type: one of 'user' (user preference), 'feedback' (guidance), \"\n \"'project' (project fact), 'reference' (external pointer)\\n\"\n \"- description: one-line summary for index lookup\\n\"\n \"- body: full detail in markdown\\n\"\n \"If nothing new or already covered by existing memories, return [].\\n\\n\"\n f\"Existing memories:\\n{existing_desc}\\n\\n\"\n f\"Dialogue:\\n{dialogue[:4000]}\"\n )\n\n try:\n response = client.messages.create(\n model=MODEL, messages=[{\"role\": \"user\", \"content\": prompt}], max_tokens=800\n )\n text = extract_text(response.content).strip()\n # Extract JSON array from response\n match = re.search(r'\\[.*\\]', text, re.DOTALL)\n if not match:\n return\n items = json.loads(match.group())\n if not items:\n return\n count = 0\n for mem in items:\n name = mem.get(\"name\", f\"memory_{int(time.time())}\")\n mem_type = mem.get(\"type\", \"user\")\n desc = mem.get(\"description\", \"\")\n body = mem.get(\"body\", \"\")\n if desc and body:\n write_memory_file(name, mem_type, desc, body)\n count += 1\n if count:\n print(f\"\\n\\033[33m[Memory: extracted {count} new memories]\\033[0m\")\n except Exception:\n pass\n\n\nCONSOLIDATE_THRESHOLD = 10\n\ndef consolidate_memories():\n \"\"\"Merge duplicate/stale memories. Triggered when file count ≥ threshold.\"\"\"\n files = list_memory_files()\n if len(files) < CONSOLIDATE_THRESHOLD:\n return\n\n catalog = \"\\n\\n\".join(\n f\"## {f['filename']}\\nname: {f['name']}\\ndescription: {f['description']}\\n{f['body']}\"\n for f in files\n )\n\n prompt = (\n \"Consolidate the following memory files. Rules:\\n\"\n \"1. Merge duplicates into one\\n\"\n \"2. Remove outdated/contradicted memories\\n\"\n \"3. Keep the total under 30 memories\\n\"\n \"4. Preserve important user preferences above all\\n\"\n \"Return a JSON array. Each item: {name, type, description, body}.\\n\\n\"\n f\"{catalog[:16000]}\"\n )\n\n try:\n response = client.messages.create(\n model=MODEL, messages=[{\"role\": \"user\", \"content\": prompt}], max_tokens=3000\n )\n text = extract_text(response.content).strip()\n match = re.search(r'\\[.*\\]', text, re.DOTALL)\n if not match:\n return\n items = json.loads(match.group())\n\n # Remove old memory files (keep MEMORY.md)\n for f in MEMORY_DIR.glob(\"*.md\"):\n if f.name != \"MEMORY.md\":\n f.unlink()\n\n for mem in items:\n name = mem.get(\"name\", f\"memory_{int(time.time())}\")\n mem_type = mem.get(\"type\", \"user\")\n desc = mem.get(\"description\", \"\")\n body = mem.get(\"body\", \"\")\n if desc and body:\n write_memory_file(name, mem_type, desc, body)\n\n print(f\"\\n\\033[33m[Memory: consolidated {len(files)} → {len(items)} memories]\\033[0m\")\n except Exception:\n pass\n\n\n# Build SYSTEM with memory index\ndef build_system() -> str:\n index = read_memory_index()\n memories_section = f\"\\n\\nMemories available:\\n{index}\" if index else \"\"\n return (\n f\"You are a coding agent at {WORKDIR}.\"\n f\"{memories_section}\\n\"\n \"Relevant memories are injected below. Respect user preferences from memory.\\n\"\n \"When the user says 'remember' or expresses a clear preference, extract it as a memory.\"\n )\n\nSUB_SYSTEM = (\n f\"You are a coding agent at {WORKDIR}. \"\n \"Complete the task you were given, then return a concise summary. \"\n \"Do not delegate further.\"\n)\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s02-s08 (skeleton): Basic tools\n# ═══════════════════════════════════════════════════════════\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR): raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR, capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired: return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines): lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e: return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n file_path = safe_path(path); file_path.parent.mkdir(parents=True, exist_ok=True)\n file_path.write_text(content); return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e: return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n file_path = safe_path(path)\n text = file_path.read_text()\n if old_text not in text: return f\"Error: text not found in {path}\"\n file_path.write_text(text.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e: return f\"Error: {e}\"\n\ndef run_glob(pattern: str) -> str:\n import glob as g\n try:\n results = []\n for match in g.glob(pattern, root_dir=WORKDIR):\n if (WORKDIR / match).resolve().is_relative_to(WORKDIR):\n results.append(match)\n return \"\\n\".join(results) if results else \"(no matches)\"\n except Exception as e: return f\"Error: {e}\"\n\ndef extract_text(content) -> str:\n if not isinstance(content, list): return str(content)\n return \"\\n\".join(getattr(b, \"text\", \"\") for b in content if getattr(b, \"type\", None) == \"text\")\n\n# Subagent (simplified from s06-s07)\nSUB_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n]\nSUB_HANDLERS = {\"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write}\n\ndef spawn_subagent(description: str) -> str:\n print(f\"\\n\\033[35m[Subagent spawned]\\033[0m\")\n messages = [{\"role\": \"user\", \"content\": description}]\n for _ in range(30):\n response = client.messages.create(model=MODEL, system=SUB_SYSTEM,\n messages=messages, tools=SUB_TOOLS, max_tokens=8000)\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\": break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = SUB_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n print(f\" \\033[90m[sub] {block.name}: {str(output)[:100]}\\033[0m\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n result = extract_text(messages[-1][\"content\"])\n if not result:\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\":\n result = extract_text(msg[\"content\"])\n if result: break\n if not result: result = \"Subagent stopped after 30 turns without final answer.\"\n print(f\"\\033[35m[Subagent done]\\033[0m\")\n return result\n\n\n# ═══════════════════════════════════════════════════════════\n# FROM s08 (skeleton): Compaction pipeline\n# ═══════════════════════════════════════════════════════════\n\nCONTEXT_LIMIT = 50000; KEEP_RECENT = 3; PERSIST_THRESHOLD = 30000\n\ndef estimate_size(msgs): return len(str(msgs))\n\ndef _block_type(block):\n return block.get(\"type\") if isinstance(block, dict) else getattr(block, \"type\", None)\n\ndef _message_has_tool_use(msg):\n if msg.get(\"role\") != \"assistant\":\n return False\n content = msg.get(\"content\")\n if not isinstance(content, list):\n return False\n return any(_block_type(block) == \"tool_use\" for block in content)\n\ndef _is_tool_result_message(msg):\n if msg.get(\"role\") != \"user\":\n return False\n content = msg.get(\"content\")\n if not isinstance(content, list):\n return False\n return any(isinstance(block, dict) and block.get(\"type\") == \"tool_result\" for block in content)\n\ndef snip_compact(msgs, mx=50):\n if len(msgs) <= mx: return msgs\n head_end, tail_start = 3, len(msgs) - (mx - 3)\n if head_end > 0 and _message_has_tool_use(msgs[head_end - 1]):\n while head_end < len(msgs) and _is_tool_result_message(msgs[head_end]):\n head_end += 1\n if (tail_start > 0 and tail_start < len(msgs)\n and _is_tool_result_message(msgs[tail_start])\n and _message_has_tool_use(msgs[tail_start - 1])):\n tail_start -= 1\n if head_end >= tail_start:\n return msgs\n return msgs[:head_end] + [{\"role\": \"user\", \"content\": f\"[snipped {tail_start - head_end} msgs]\"}] + msgs[tail_start:]\n\ndef collect_tool_results(msgs):\n blocks = []\n for mi, msg in enumerate(msgs):\n if msg.get(\"role\") != \"user\" or not isinstance(msg.get(\"content\"), list): continue\n for bi, block in enumerate(msg[\"content\"]):\n if isinstance(block, dict) and block.get(\"type\") == \"tool_result\": blocks.append((mi, bi, block))\n return blocks\n\ndef micro_compact(msgs):\n tr = collect_tool_results(msgs)\n if len(tr) <= KEEP_RECENT: return msgs\n for _, _, b in tr[:-KEEP_RECENT]:\n if len(b.get(\"content\", \"\")) > 120: b[\"content\"] = \"[Earlier tool result compacted.]\"\n return msgs\n\ndef persist_large(tid, out):\n if len(out) <= PERSIST_THRESHOLD: return out\n TOOL_RESULTS_DIR.mkdir(parents=True, exist_ok=True)\n p = TOOL_RESULTS_DIR / f\"{tid}.txt\"\n if not p.exists(): p.write_text(out)\n return f\"\\nFull: {p}\\nPreview:\\n{out[:2000]}\\n\"\n\ndef tool_result_budget(msgs, mx=200_000):\n last = msgs[-1] if msgs else None\n if not last or last.get(\"role\") != \"user\" or not isinstance(last.get(\"content\"), list): return msgs\n blocks = [(i, b) for i, b in enumerate(last[\"content\"]) if isinstance(b, dict) and b.get(\"type\") == \"tool_result\"]\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n if total <= mx: return msgs\n for _, block in sorted(blocks, key=lambda p: len(str(p[1].get(\"content\", \"\"))), reverse=True):\n if total <= mx: break\n c = str(block.get(\"content\", \"\"))\n if len(c) <= PERSIST_THRESHOLD: continue\n block[\"content\"] = persist_large(block.get(\"tool_use_id\", \"?\"), c)\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n return msgs\n\ndef write_transcript(msgs):\n TRANSCRIPT_DIR.mkdir(parents=True, exist_ok=True)\n p = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with p.open(\"w\") as f:\n for m in msgs: f.write(json.dumps(m, default=str) + \"\\n\")\n return p\n\ndef summarize_history(msgs):\n conv = json.dumps(msgs, default=str)[:80000]\n r = client.messages.create(model=MODEL, messages=[{\"role\": \"user\", \"content\":\n \"Summarize this coding-agent conversation so work can continue.\\n\"\n \"Preserve: 1. current goal, 2. key findings, 3. files changed, 4. remaining work, 5. user constraints.\\n\\n\" + conv}],\n max_tokens=2000)\n return extract_text(r.content).strip()\n\ndef compact_history(msgs):\n write_transcript(msgs)\n summary = summarize_history(msgs)\n return [{\"role\": \"user\", \"content\": f\"[Compacted]\\n\\n{summary}\"}]\n\ndef reactive_compact(msgs):\n write_transcript(msgs)\n tail_start = max(0, len(msgs) - 5)\n if (tail_start > 0 and tail_start < len(msgs)\n and _is_tool_result_message(msgs[tail_start])\n and _message_has_tool_use(msgs[tail_start - 1])):\n tail_start -= 1\n summary = summarize_history(msgs[:tail_start])\n return [{\"role\": \"user\", \"content\": f\"[Reactive compact]\\n\\n{summary}\"}, *msgs[tail_start:]]\n\n\n# ═══════════════════════════════════════════════════════════\n# Tool Definitions (skeleton — fewer tools to focus on memory)\n# ═══════════════════════════════════════════════════════════\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"pattern\": {\"type\": \"string\"}}, \"required\": [\"pattern\"]}},\n {\"name\": \"task\", \"description\": \"Launch a subagent to handle a subtask.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"description\": {\"type\": \"string\"}}, \"required\": [\"description\"]}},\n]\n\nTOOL_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"edit_file\": run_edit, \"glob\": run_glob, \"task\": spawn_subagent,\n}\n\n\n# ═══════════════════════════════════════════════════════════\n# agent_loop — s09: inject memories + extract after each turn\n# ═══════════════════════════════════════════════════════════\n\nMAX_REACTIVE_RETRIES = 1\n\ndef agent_loop(messages: list):\n reactive_retries = 0\n # s09: inject relevant memory content into the current user turn\n memories_content = load_memories(messages)\n memory_turn = len(messages) - 1 if messages and isinstance(messages[-1].get(\"content\"), str) else None\n # s09: build system once per user turn; memory is updated after the loop returns\n system = build_system()\n\n while True:\n # s09: save pre-compression snapshot for accurate memory extraction\n pre_compress = [m if isinstance(m, dict) else {\"role\": m.get(\"role\",\"\"),\n \"content\": str(m.get(\"content\",\"\"))} for m in messages]\n\n # s08: compression pipeline (budget → snip → micro)\n messages[:] = tool_result_budget(messages)\n messages[:] = snip_compact(messages)\n messages[:] = micro_compact(messages)\n\n if estimate_size(messages) > CONTEXT_LIMIT:\n print(\"[auto compact]\")\n messages[:] = compact_history(messages)\n\n try:\n request_messages = messages\n if memories_content and memory_turn is not None and memory_turn < len(messages):\n request_messages = messages.copy()\n request_messages[memory_turn] = {\n **messages[memory_turn],\n \"content\": memories_content + \"\\n\\n\" + messages[memory_turn][\"content\"],\n }\n response = client.messages.create(\n model=MODEL, system=system, messages=request_messages, tools=TOOLS, max_tokens=8000\n )\n reactive_retries = 0\n except Exception as e:\n if (\"prompt_too_long\" in str(e).lower() or \"too many tokens\" in str(e).lower()) and reactive_retries < MAX_REACTIVE_RETRIES:\n print(\"[reactive compact]\")\n messages[:] = reactive_compact(messages)\n reactive_retries += 1\n continue\n raise\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n # s09: extract from pre-compression snapshot for full fidelity\n extract_memories(pre_compress)\n consolidate_memories()\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\": continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n print(str(output)[:200])\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n print(\"s09: Memory — persistent cross-session knowledge\")\n print(\"输入问题,回车发送。输入 q 退出。\\n\")\n history = []\n while True:\n try: query = input(\"\\033[36ms09 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt): break\n if query.strip().lower() in (\"q\", \"exit\", \"\"): break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\": print(block.text)\n print()\n",
"images": [
{
"src": "/course-assets/s09_memory/memory-overview.svg",
@@ -1200,7 +1230,7 @@
"filename": "s12_task_system/code.py",
"title": "Task System",
"subtitle": "Break Big Goals into Small Tasks",
- "loc": 297,
+ "loc": 299,
"tools": [
"bash",
"read_file",
@@ -1330,7 +1360,7 @@
}
],
"layer": "collaboration",
- "source": "#!/usr/bin/env python3\n\"\"\"\ns12: Task System — file-persisted task graph with blockedBy dependencies.\n\nRun: python s12_task_system/code.py\nNeed: pip install anthropic python-dotenv + .env with ANTHROPIC_API_KEY\n\nChanges from s11:\n - Task dataclass (id, subject, description, status, owner, blockedBy)\n - TASKS_DIR = .tasks/ for persistent JSON storage\n - create_task / save_task / load_task / list_tasks / get_task\n - can_start: checks blockedBy all completed (missing deps = blocked)\n - claim_task: set owner + pending -> in_progress\n - complete_task: set completed + report unblocked downstream\n - 5 new tools: create_task, list_tasks, get_task, claim_task, complete_task\n\nNote: Teaching code keeps a basic agent loop to stay focused on the task\nsystem. S11's full error recovery (RecoveryState, backoff, escalation,\nreactive compact, fallback model) is omitted — in real CC, tasks.ts and\nwithRetry are independent layers that compose naturally.\n\"\"\"\n\nimport os, subprocess, json, time, random\nfrom pathlib import Path\nfrom dataclasses import dataclass, asdict\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n# ── Task System ──\n\nTASKS_DIR = WORKDIR / \".tasks\"\nTASKS_DIR.mkdir(exist_ok=True)\n\n\n@dataclass\nclass Task:\n id: str\n subject: str\n description: str\n status: str # pending | in_progress | completed\n owner: str | None # Agent name (multi-agent scenarios)\n blockedBy: list[str] # Dependency task IDs\n\n\ndef _task_path(task_id: str) -> Path:\n return TASKS_DIR / f\"{task_id}.json\"\n\n\ndef create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> Task:\n task = Task(\n id=f\"task_{int(time.time())}_{random.randint(0, 9999):04d}\",\n subject=subject,\n description=description,\n status=\"pending\",\n owner=None,\n blockedBy=blockedBy or [],\n )\n save_task(task)\n return task\n\n\ndef save_task(task: Task):\n _task_path(task.id).write_text(json.dumps(asdict(task), indent=2))\n\n\ndef load_task(task_id: str) -> Task:\n return Task(**json.loads(_task_path(task_id).read_text()))\n\n\ndef list_tasks() -> list[Task]:\n return [Task(**json.loads(p.read_text()))\n for p in sorted(TASKS_DIR.glob(\"task_*.json\"))]\n\n\ndef get_task(task_id: str) -> str:\n \"\"\"Return full task details as JSON.\"\"\"\n task = load_task(task_id)\n return json.dumps(asdict(task), indent=2)\n\n\ndef can_start(task_id: str) -> bool:\n \"\"\"Check if all blockedBy dependencies are completed.\n Missing dependencies are treated as blocked.\"\"\"\n task = load_task(task_id)\n for dep_id in task.blockedBy:\n if not _task_path(dep_id).exists():\n return False\n if load_task(dep_id).status != \"completed\":\n return False\n return True\n\n\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if not can_start(task_id):\n deps = [d for d in task.blockedBy\n if not _task_path(d).exists() or load_task(d).status != \"completed\"]\n return f\"Blocked by: {deps}\"\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n print(f\" \\033[36m[claim] {task.subject} → in_progress (owner: {owner})\\033[0m\")\n return f\"Claimed {task.id} ({task.subject})\"\n\n\ndef complete_task(task_id: str) -> str:\n task = load_task(task_id)\n if task.status != \"in_progress\":\n return f\"Task {task_id} is {task.status}, cannot complete\"\n task.status = \"completed\"\n save_task(task)\n unblocked = [t.subject for t in list_tasks()\n if t.status == \"pending\" and t.blockedBy and can_start(t.id)]\n print(f\" \\033[32m[complete] {task.subject} ✓\\033[0m\")\n msg = f\"Completed {task.id} ({task.subject})\"\n if unblocked:\n msg += f\"\\nUnblocked: {', '.join(unblocked)}\"\n print(f\" \\033[33m[unblocked] {', '.join(unblocked)}\\033[0m\")\n return msg\n\n\n# ── Prompt Assembly (from s10, synced) ──\n\nPROMPT_SECTIONS = {\n \"identity\": \"You are a coding agent. Act, don't explain.\",\n \"tools\": \"Available tools: bash, read_file, write_file, \"\n \"create_task, list_tasks, get_task, claim_task, complete_task.\",\n \"workspace\": f\"Working directory: {WORKDIR}\",\n \"memory\": \"Relevant memories are injected below when available.\",\n}\n\n\ndef assemble_system_prompt(context: dict) -> str:\n sections = [PROMPT_SECTIONS[\"identity\"],\n PROMPT_SECTIONS[\"tools\"],\n PROMPT_SECTIONS[\"workspace\"]]\n memories = context.get(\"memories\", \"\")\n if memories:\n sections.append(f\"Relevant memories:\\n{memories}\")\n return \"\\n\\n\".join(sections)\n\n\n_last_context_key, _last_prompt = None, None\n\n\ndef get_system_prompt(context: dict) -> str:\n global _last_context_key, _last_prompt\n key = json.dumps(context, sort_keys=True, ensure_ascii=False, default=str)\n if key == _last_context_key and _last_prompt:\n return _last_prompt\n _last_context_key = key\n _last_prompt = assemble_system_prompt(context)\n return _last_prompt\n\n\n# ── Tools ──\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# Task tools\n\ndef run_create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> str:\n task = create_task(subject, description, blockedBy)\n deps = f\" (blockedBy: {', '.join(blockedBy)})\" if blockedBy else \"\"\n print(f\" \\033[34m[create] {task.subject}{deps}\\033[0m\")\n return f\"Created {task.id}: {task.subject}{deps}\"\n\n\ndef run_list_tasks() -> str:\n tasks = list_tasks()\n if not tasks:\n return \"No tasks. Use create_task to add some.\"\n lines = []\n for t in tasks:\n icon = {\"pending\": \"○\", \"in_progress\": \"●\",\n \"completed\": \"✓\"}.get(t.status, \"?\")\n deps = f\" (blockedBy: {', '.join(t.blockedBy)})\" if t.blockedBy else \"\"\n owner = f\" [{t.owner}]\" if t.owner else \"\"\n lines.append(f\" {icon} {t.id}: {t.subject} \"\n f\"[{t.status}]{owner}{deps}\")\n return \"\\n\".join(lines)\n\n\ndef run_get_task(task_id: str) -> str:\n try:\n return get_task(task_id)\n except FileNotFoundError:\n return f\"Error: Task {task_id} not found\"\n\n\ndef run_claim_task(task_id: str) -> str:\n return claim_task(task_id, owner=\"agent\")\n\n\ndef run_complete_task(task_id: str) -> str:\n return complete_task(task_id)\n\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"create_task\",\n \"description\": \"Create a new task with optional blockedBy dependencies.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n \"blockedBy\": {\"type\": \"array\",\n \"items\": {\"type\": \"string\"}}},\n \"required\": [\"subject\"]}},\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks with status, owner, and dependencies.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"get_task\",\n \"description\": \"Get full details of a specific task by ID.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task. Sets owner, changes status to in_progress.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Complete an in-progress task. Reports unblocked downstream tasks.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n]\n\nTOOL_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"create_task\": run_create_task, \"list_tasks\": run_list_tasks,\n \"get_task\": run_get_task, \"claim_task\": run_claim_task,\n \"complete_task\": run_complete_task,\n}\n\n\n# ── Context ──\n\ndef update_context(context: dict, messages: list) -> dict:\n \"\"\"Derive context from real state.\"\"\"\n memories = \"\"\n if MEMORY_INDEX.exists():\n content = MEMORY_INDEX.read_text().strip()\n if content:\n memories = content\n return {\n \"enabled_tools\": list(TOOL_HANDLERS.keys()),\n \"workspace\": str(WORKDIR),\n \"memories\": memories,\n }\n\n\n# ── Agent Loop (simplified, focused on task system) ──\n\ndef agent_loop(messages: list, context: dict):\n system = get_system_prompt(context)\n while True:\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=TOOLS, max_tokens=8000)\n except Exception as e:\n messages.append({\"role\": \"assistant\", \"content\": [\n {\"type\": \"text\",\n \"text\": f\"[Error] {type(e).__name__}: {e}\"}]})\n return\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n print(str(output)[:300])\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n context = update_context(context, messages)\n system = get_system_prompt(context)\n\n\nif __name__ == \"__main__\":\n print(\"s12: task system\")\n print(\"Enter a question, press Enter to send. Type q to quit.\\n\")\n history = []\n context = update_context({}, [])\n while True:\n try:\n query = input(\"\\033[36ms12 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history, context)\n context = update_context(context, history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\":\n print(block.text)\n print()\n",
+ "source": "#!/usr/bin/env python3\n\"\"\"\ns12: Task System — file-persisted task graph with blockedBy dependencies.\n\nRun: python s12_task_system/code.py\nNeed: pip install anthropic python-dotenv + .env with ANTHROPIC_API_KEY\n\nChanges from s11:\n - Task dataclass (id, subject, description, status, owner, blockedBy)\n - TASKS_DIR = .tasks/ for persistent JSON storage\n - create_task / save_task / load_task / list_tasks / get_task\n - can_start: checks blockedBy all completed (missing deps = blocked)\n - claim_task: set owner + pending -> in_progress\n - complete_task: set completed + report unblocked downstream\n - 5 new tools: create_task, list_tasks, get_task, claim_task, complete_task\n\nNote: Teaching code keeps a basic agent loop to stay focused on the task\nsystem. S11's full error recovery (RecoveryState, backoff, escalation,\nreactive compact, fallback model) is omitted — in real CC, tasks.ts and\nwithRetry are independent layers that compose naturally.\n\"\"\"\n\nimport os, subprocess, json, time, random\nfrom pathlib import Path\nfrom dataclasses import dataclass, asdict\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n# ── Task System ──\n\nTASKS_DIR = WORKDIR / \".tasks\"\nTASKS_DIR.mkdir(exist_ok=True)\n\n\n@dataclass\nclass Task:\n id: str\n subject: str\n description: str\n status: str # pending | in_progress | completed\n owner: str | None # Agent name (multi-agent scenarios)\n blockedBy: list[str] # Dependency task IDs\n\n\ndef _task_path(task_id: str) -> Path:\n return TASKS_DIR / f\"{task_id}.json\"\n\n\ndef create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> Task:\n task = Task(\n id=f\"task_{int(time.time())}_{random.randint(0, 9999):04d}\",\n subject=subject,\n description=description,\n status=\"pending\",\n owner=None,\n blockedBy=blockedBy or [],\n )\n save_task(task)\n return task\n\n\ndef save_task(task: Task):\n _task_path(task.id).write_text(json.dumps(asdict(task), indent=2))\n\n\ndef load_task(task_id: str) -> Task:\n return Task(**json.loads(_task_path(task_id).read_text()))\n\n\ndef list_tasks() -> list[Task]:\n return [Task(**json.loads(p.read_text()))\n for p in sorted(TASKS_DIR.glob(\"task_*.json\"))]\n\n\ndef get_task(task_id: str) -> str:\n \"\"\"Return full task details as JSON.\"\"\"\n task = load_task(task_id)\n return json.dumps(asdict(task), indent=2)\n\n\ndef can_start(task_id: str) -> bool:\n \"\"\"Check if all blockedBy dependencies are completed.\n Missing dependencies are treated as blocked.\"\"\"\n task = load_task(task_id)\n for dep_id in task.blockedBy:\n if not _task_path(dep_id).exists():\n return False\n if load_task(dep_id).status != \"completed\":\n return False\n return True\n\n\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if not can_start(task_id):\n deps = [d for d in task.blockedBy\n if not _task_path(d).exists() or load_task(d).status != \"completed\"]\n return f\"Blocked by: {deps}\"\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n print(f\" \\033[36m[claim] {task.subject} → in_progress (owner: {owner})\\033[0m\")\n return f\"Claimed {task.id} ({task.subject})\"\n\n\ndef complete_task(task_id: str) -> str:\n task = load_task(task_id)\n if task.status != \"in_progress\":\n return f\"Task {task_id} is {task.status}, cannot complete\"\n task.status = \"completed\"\n save_task(task)\n unblocked = [t.subject for t in list_tasks()\n if t.status == \"pending\" and t.blockedBy and can_start(t.id)]\n print(f\" \\033[32m[complete] {task.subject} ✓\\033[0m\")\n msg = f\"Completed {task.id} ({task.subject})\"\n if unblocked:\n msg += f\"\\nUnblocked: {', '.join(unblocked)}\"\n print(f\" \\033[33m[unblocked] {', '.join(unblocked)}\\033[0m\")\n return msg\n\n\n# ── Prompt Assembly (from s10, synced) ──\n\nPROMPT_SECTIONS = {\n \"identity\": \"You are a coding agent. Act, don't explain.\",\n \"tools\": \"Available tools: bash, read_file, write_file, \"\n \"create_task, list_tasks, get_task, claim_task, complete_task.\",\n \"workspace\": f\"Working directory: {WORKDIR}\",\n \"memory\": \"Relevant memories are injected below when available.\",\n}\n\n\ndef assemble_system_prompt(context: dict) -> str:\n sections = [PROMPT_SECTIONS[\"identity\"],\n PROMPT_SECTIONS[\"tools\"],\n PROMPT_SECTIONS[\"workspace\"]]\n memories = context.get(\"memories\", \"\")\n if memories:\n sections.append(f\"Relevant memories:\\n{memories}\")\n return \"\\n\\n\".join(sections)\n\n\n_last_context_key, _last_prompt = None, None\n\n\ndef get_system_prompt(context: dict) -> str:\n global _last_context_key, _last_prompt\n key = json.dumps(context, sort_keys=True, ensure_ascii=False, default=str)\n if key == _last_context_key and _last_prompt:\n return _last_prompt\n _last_context_key = key\n _last_prompt = assemble_system_prompt(context)\n return _last_prompt\n\n\n# ── Tools ──\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# Task tools\n\ndef run_create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> str:\n task = create_task(subject, description, blockedBy)\n deps = f\" (blockedBy: {', '.join(blockedBy)})\" if blockedBy else \"\"\n print(f\" \\033[34m[create] {task.subject}{deps}\\033[0m\")\n return f\"Created {task.id}: {task.subject}{deps}\"\n\n\ndef run_list_tasks() -> str:\n tasks = list_tasks()\n if not tasks:\n return \"No tasks. Use create_task to add some.\"\n lines = []\n for t in tasks:\n icon = {\"pending\": \"○\", \"in_progress\": \"●\",\n \"completed\": \"✓\"}.get(t.status, \"?\")\n deps = f\" (blockedBy: {', '.join(t.blockedBy)})\" if t.blockedBy else \"\"\n owner = f\" [{t.owner}]\" if t.owner else \"\"\n lines.append(f\" {icon} {t.id}: {t.subject} \"\n f\"[{t.status}]{owner}{deps}\")\n return \"\\n\".join(lines)\n\n\ndef run_get_task(task_id: str) -> str:\n try:\n return get_task(task_id)\n except FileNotFoundError:\n return f\"Error: Task {task_id} not found\"\n\n\ndef run_claim_task(task_id: str) -> str:\n return claim_task(task_id, owner=\"agent\")\n\n\ndef run_complete_task(task_id: str) -> str:\n return complete_task(task_id)\n\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"create_task\",\n \"description\": \"Create a new task with optional blockedBy dependencies.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n \"blockedBy\": {\"type\": \"array\",\n \"items\": {\"type\": \"string\"}}},\n \"required\": [\"subject\"]}},\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks with status, owner, and dependencies.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"get_task\",\n \"description\": \"Get full details of a specific task by ID.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task. Sets owner, changes status to in_progress.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Complete an in-progress task. Reports unblocked downstream tasks.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n]\n\nTOOL_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"create_task\": run_create_task, \"list_tasks\": run_list_tasks,\n \"get_task\": run_get_task, \"claim_task\": run_claim_task,\n \"complete_task\": run_complete_task,\n}\n\n\n# ── Context ──\n\ndef update_context(context: dict, messages: list) -> dict:\n \"\"\"Derive context from real state.\"\"\"\n memories = \"\"\n if MEMORY_INDEX.exists():\n content = MEMORY_INDEX.read_text().strip()\n if content:\n memories = content\n return {\n \"enabled_tools\": list(TOOL_HANDLERS.keys()),\n \"workspace\": str(WORKDIR),\n \"memories\": memories,\n }\n\n\n# ── Agent Loop (simplified, focused on task system) ──\n\ndef agent_loop(messages: list, context: dict):\n system = get_system_prompt(context)\n while True:\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=TOOLS, max_tokens=8000)\n except Exception as e:\n messages.append({\"role\": \"assistant\", \"content\": [\n {\"type\": \"text\",\n \"text\": f\"[Error] {type(e).__name__}: {e}\"}]})\n return\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown: {block.name}\"\n print(str(output)[:300])\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n context = update_context(context, messages)\n system = get_system_prompt(context)\n\n\nif __name__ == \"__main__\":\n print(\"s12: task system\")\n print(\"Enter a question, press Enter to send. Type q to quit.\\n\")\n history = []\n context = update_context({}, [])\n while True:\n try:\n query = input(\"\\033[36ms12 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history, context)\n context = update_context(context, history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\":\n print(block.text)\n elif isinstance(block, dict) and block.get(\"type\") == \"text\":\n print(block.get(\"text\", \"\"))\n print()\n",
"images": [
{
"src": "/course-assets/s12_task_system/task-dag.svg",
@@ -1347,7 +1377,7 @@
"filename": "s13_background_tasks/code.py",
"title": "Background Tasks",
"subtitle": "Slow Operations Go to the Background",
- "loc": 379,
+ "loc": 381,
"tools": [
"bash",
"read_file",
@@ -1496,7 +1526,7 @@
}
],
"layer": "concurrency",
- "source": "#!/usr/bin/env python3\n\"\"\"\ns13: Background Tasks — thread-based async execution + notification injection.\n\nRun: python s13_background_tasks/code.py\nNeed: pip install anthropic python-dotenv + .env with ANTHROPIC_API_KEY\n\nChanges from s12:\n - threading.Thread for background execution\n - background_tasks dict for lifecycle tracking (bg_id, command, status)\n - background_results dict + threading.Lock for thread-safe storage\n - should_run_background: model explicit request via run_in_background param\n - is_slow_operation: fallback heuristic when model doesn't specify\n - start_background_task: dispatch to daemon thread, return bg task id\n - collect_background_results: gather completed, return as notifications\n - agent_loop: slow ops → background + placeholder, inject notifications\n - Notifications use format, not reused tool_use_id\n\nNote: Teaching code keeps a basic agent loop to stay focused on background\ntasks. S11's full error recovery (RecoveryState, backoff, escalation,\nreactive compact, fallback model) is omitted.\n\"\"\"\n\nimport os, subprocess, json, time, random, threading\nfrom pathlib import Path\nfrom dataclasses import dataclass, asdict\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n# ── Task System (from s12, synced) ──\n\nTASKS_DIR = WORKDIR / \".tasks\"\nTASKS_DIR.mkdir(exist_ok=True)\n\n\n@dataclass\nclass Task:\n id: str\n subject: str\n description: str\n status: str # pending | in_progress | completed\n owner: str | None\n blockedBy: list[str]\n\n\ndef _task_path(task_id: str) -> Path:\n return TASKS_DIR / f\"{task_id}.json\"\n\n\ndef create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> Task:\n task = Task(\n id=f\"task_{int(time.time())}_{random.randint(0, 9999):04d}\",\n subject=subject, description=description,\n status=\"pending\", owner=None,\n blockedBy=blockedBy or [],\n )\n save_task(task)\n return task\n\n\ndef save_task(task: Task):\n _task_path(task.id).write_text(json.dumps(asdict(task), indent=2))\n\n\ndef load_task(task_id: str) -> Task:\n return Task(**json.loads(_task_path(task_id).read_text()))\n\n\ndef list_tasks() -> list[Task]:\n return [Task(**json.loads(p.read_text()))\n for p in sorted(TASKS_DIR.glob(\"task_*.json\"))]\n\n\ndef get_task(task_id: str) -> str:\n \"\"\"Return full task details as JSON.\"\"\"\n task = load_task(task_id)\n return json.dumps(asdict(task), indent=2)\n\n\ndef can_start(task_id: str) -> bool:\n \"\"\"Check if all blockedBy dependencies are completed.\n Missing dependencies are treated as blocked.\"\"\"\n task = load_task(task_id)\n for dep_id in task.blockedBy:\n if not _task_path(dep_id).exists():\n return False\n if load_task(dep_id).status != \"completed\":\n return False\n return True\n\n\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if not can_start(task_id):\n deps = [d for d in task.blockedBy\n if not _task_path(d).exists() or load_task(d).status != \"completed\"]\n return f\"Blocked by: {deps}\"\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n print(f\" \\033[36m[claim] {task.subject} → in_progress (owner: {owner})\\033[0m\")\n return f\"Claimed {task.id} ({task.subject})\"\n\n\ndef complete_task(task_id: str) -> str:\n task = load_task(task_id)\n if task.status != \"in_progress\":\n return f\"Task {task_id} is {task.status}, cannot complete\"\n task.status = \"completed\"\n save_task(task)\n unblocked = [t.subject for t in list_tasks()\n if t.status == \"pending\" and t.blockedBy and can_start(t.id)]\n print(f\" \\033[32m[complete] {task.subject} ✓\\033[0m\")\n msg = f\"Completed {task.id} ({task.subject})\"\n if unblocked:\n msg += f\"\\nUnblocked: {', '.join(unblocked)}\"\n print(f\" \\033[33m[unblocked] {', '.join(unblocked)}\\033[0m\")\n return msg\n\n\n# ── Prompt Assembly (from s10, synced) ──\n\nPROMPT_SECTIONS = {\n \"identity\": \"You are a coding agent. Act, don't explain.\",\n \"tools\": \"Available tools: bash, read_file, write_file, \"\n \"create_task, list_tasks, get_task, claim_task, complete_task.\",\n \"workspace\": f\"Working directory: {WORKDIR}\",\n \"memory\": \"Relevant memories are injected below when available.\",\n}\n\n\ndef assemble_system_prompt(context: dict) -> str:\n sections = [PROMPT_SECTIONS[\"identity\"],\n PROMPT_SECTIONS[\"tools\"],\n PROMPT_SECTIONS[\"workspace\"]]\n memories = context.get(\"memories\", \"\")\n if memories:\n sections.append(f\"Relevant memories:\\n{memories}\")\n return \"\\n\\n\".join(sections)\n\n\n_last_context_key, _last_prompt = None, None\n\n\ndef get_system_prompt(context: dict) -> str:\n global _last_context_key, _last_prompt\n key = json.dumps(context, sort_keys=True, ensure_ascii=False, default=str)\n if key == _last_context_key and _last_prompt:\n return _last_prompt\n _last_context_key = key\n _last_prompt = assemble_system_prompt(context)\n return _last_prompt\n\n\n# ── Tools ──\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str, run_in_background: bool = False) -> str:\n # run_in_background is handled by agent_loop dispatch, not here\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# Task tools\n\ndef run_create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> str:\n task = create_task(subject, description, blockedBy)\n deps = f\" (blockedBy: {', '.join(blockedBy)})\" if blockedBy else \"\"\n print(f\" \\033[34m[create] {task.subject}{deps}\\033[0m\")\n return f\"Created {task.id}: {task.subject}{deps}\"\n\n\ndef run_list_tasks() -> str:\n tasks = list_tasks()\n if not tasks:\n return \"No tasks. Use create_task to add some.\"\n lines = []\n for t in tasks:\n icon = {\"pending\": \"○\", \"in_progress\": \"●\",\n \"completed\": \"✓\"}.get(t.status, \"?\")\n deps = f\" (blockedBy: {', '.join(t.blockedBy)})\" if t.blockedBy else \"\"\n owner = f\" [{t.owner}]\" if t.owner else \"\"\n lines.append(f\" {icon} {t.id}: {t.subject} \"\n f\"[{t.status}]{owner}{deps}\")\n return \"\\n\".join(lines)\n\n\ndef run_get_task(task_id: str) -> str:\n try:\n return get_task(task_id)\n except FileNotFoundError:\n return f\"Error: Task {task_id} not found\"\n\n\ndef run_claim_task(task_id: str) -> str:\n return claim_task(task_id, owner=\"agent\")\n\n\ndef run_complete_task(task_id: str) -> str:\n return complete_task(task_id)\n\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"command\": {\"type\": \"string\"},\n \"run_in_background\": {\"type\": \"boolean\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"create_task\",\n \"description\": \"Create a new task with optional blockedBy dependencies.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n \"blockedBy\": {\"type\": \"array\",\n \"items\": {\"type\": \"string\"}}},\n \"required\": [\"subject\"]}},\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks with status, owner, and dependencies.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"get_task\",\n \"description\": \"Get full details of a specific task by ID.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task. Sets owner, changes status to in_progress.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Complete an in-progress task. Reports unblocked downstream tasks.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n]\n\nTOOL_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"create_task\": run_create_task, \"list_tasks\": run_list_tasks,\n \"get_task\": run_get_task, \"claim_task\": run_claim_task,\n \"complete_task\": run_complete_task,\n}\n\n\n# ── Background Tasks (s13 new) ──\n\n_bg_counter = 0\nbackground_tasks: dict[str, dict] = {} # bg_id → {tool_use_id, command, status}\nbackground_results: dict[str, str] = {} # bg_id → output\nbackground_lock = threading.Lock()\n\n\ndef is_slow_operation(tool_name: str, tool_input: dict) -> bool:\n \"\"\"Fallback heuristic: commands likely to take > 30s.\"\"\"\n if tool_name != \"bash\":\n return False\n cmd = tool_input.get(\"command\", \"\").lower()\n slow_keywords = [\"install\", \"build\", \"test\", \"deploy\", \"compile\",\n \"docker build\", \"pip install\", \"npm install\",\n \"cargo build\", \"pytest\", \"make\"]\n return any(kw in cmd for kw in slow_keywords)\n\n\ndef should_run_background(tool_name: str, tool_input: dict) -> bool:\n \"\"\"Model explicit request takes priority; fallback to heuristic.\"\"\"\n if tool_input.get(\"run_in_background\"):\n return True\n return is_slow_operation(tool_name, tool_input)\n\n\ndef execute_tool(block) -> str:\n \"\"\"Execute a tool call block, return output.\"\"\"\n handler = TOOL_HANDLERS.get(block.name)\n if handler:\n return handler(**block.input)\n return f\"Unknown tool: {block.name}\"\n\n\ndef start_background_task(block) -> str:\n \"\"\"Run tool in a daemon thread. Returns background task ID.\"\"\"\n global _bg_counter\n _bg_counter += 1\n bg_id = f\"bg_{_bg_counter:04d}\"\n cmd = block.input.get(\"command\", block.name)\n\n def worker():\n result = execute_tool(block)\n with background_lock:\n background_tasks[bg_id][\"status\"] = \"completed\"\n background_results[bg_id] = result\n\n with background_lock:\n background_tasks[bg_id] = {\n \"tool_use_id\": block.id,\n \"command\": cmd,\n \"status\": \"running\",\n }\n thread = threading.Thread(target=worker, daemon=True)\n thread.start()\n print(f\" \\033[33m[background] dispatched {bg_id}: {cmd[:40]}\\033[0m\")\n return bg_id\n\n\ndef collect_background_results() -> list[str]:\n \"\"\"Collect completed background results as task_notification messages.\"\"\"\n with background_lock:\n ready_ids = [bid for bid, task in background_tasks.items()\n if task[\"status\"] == \"completed\"]\n notifications = []\n for bg_id in ready_ids:\n with background_lock:\n task = background_tasks.pop(bg_id)\n output = background_results.pop(bg_id, \"\")\n summary = output[:200] if len(output) > 200 else output\n notifications.append(\n f\"\\n\"\n f\" {bg_id}\\n\"\n f\" completed\\n\"\n f\" {task['command']}\\n\"\n f\" {summary}\\n\"\n f\"\")\n print(f\" \\033[32m[background done] {bg_id}: \"\n f\"{task['command'][:40]} ({len(output)} chars)\\033[0m\")\n return notifications\n\n\n# ── Context ──\n\ndef update_context(context: dict, messages: list) -> dict:\n \"\"\"Derive context from real state.\"\"\"\n memories = \"\"\n if MEMORY_INDEX.exists():\n content = MEMORY_INDEX.read_text().strip()\n if content:\n memories = content\n return {\n \"enabled_tools\": list(TOOL_HANDLERS.keys()),\n \"workspace\": str(WORKDIR),\n \"memories\": memories,\n }\n\n\n# ── Agent Loop (simplified, focused on background tasks) ──\n\ndef agent_loop(messages: list, context: dict):\n system = get_system_prompt(context)\n while True:\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=TOOLS, max_tokens=8000)\n except Exception as e:\n messages.append({\"role\": \"assistant\", \"content\": [\n {\"type\": \"text\",\n \"text\": f\"[Error] {type(e).__name__}: {e}\"}]})\n return\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n\n if should_run_background(block.name, block.input):\n bg_id = start_background_task(block)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": f\"[Background task {bg_id} started] \"\n f\"Command: {block.input.get('command', '')}. \"\n f\"Result will be available when complete.\"})\n else:\n output = execute_tool(block)\n print(str(output)[:300])\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output})\n\n # Inject tool results + background notifications in one user message\n user_content = list(results)\n bg_notifications = collect_background_results()\n if bg_notifications:\n for notif in bg_notifications:\n user_content.append({\"type\": \"text\", \"text\": notif})\n print(f\" \\033[32m[inject] {len(bg_notifications)} background \"\n f\"notification(s)\\033[0m\")\n messages.append({\"role\": \"user\", \"content\": user_content})\n context = update_context(context, messages)\n system = get_system_prompt(context)\n\n\nif __name__ == \"__main__\":\n print(\"s13: background tasks\")\n print(\"Enter a question, press Enter to send. Type q to quit.\\n\")\n history = []\n context = update_context({}, [])\n while True:\n try:\n query = input(\"\\033[36ms13 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history, context)\n context = update_context(context, history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\":\n print(block.text)\n print()\n",
+ "source": "#!/usr/bin/env python3\n\"\"\"\ns13: Background Tasks — thread-based async execution + notification injection.\n\nRun: python s13_background_tasks/code.py\nNeed: pip install anthropic python-dotenv + .env with ANTHROPIC_API_KEY\n\nChanges from s12:\n - threading.Thread for background execution\n - background_tasks dict for lifecycle tracking (bg_id, command, status)\n - background_results dict + threading.Lock for thread-safe storage\n - should_run_background: model explicit request via run_in_background param\n - is_slow_operation: fallback heuristic when model doesn't specify\n - start_background_task: dispatch to daemon thread, return bg task id\n - collect_background_results: gather completed, return as notifications\n - agent_loop: slow ops → background + placeholder, inject notifications\n - Notifications use format, not reused tool_use_id\n\nNote: Teaching code keeps a basic agent loop to stay focused on background\ntasks. S11's full error recovery (RecoveryState, backoff, escalation,\nreactive compact, fallback model) is omitted.\n\"\"\"\n\nimport os, subprocess, json, time, random, threading\nfrom pathlib import Path\nfrom dataclasses import dataclass, asdict\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n# ── Task System (from s12, synced) ──\n\nTASKS_DIR = WORKDIR / \".tasks\"\nTASKS_DIR.mkdir(exist_ok=True)\n\n\n@dataclass\nclass Task:\n id: str\n subject: str\n description: str\n status: str # pending | in_progress | completed\n owner: str | None\n blockedBy: list[str]\n\n\ndef _task_path(task_id: str) -> Path:\n return TASKS_DIR / f\"{task_id}.json\"\n\n\ndef create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> Task:\n task = Task(\n id=f\"task_{int(time.time())}_{random.randint(0, 9999):04d}\",\n subject=subject, description=description,\n status=\"pending\", owner=None,\n blockedBy=blockedBy or [],\n )\n save_task(task)\n return task\n\n\ndef save_task(task: Task):\n _task_path(task.id).write_text(json.dumps(asdict(task), indent=2))\n\n\ndef load_task(task_id: str) -> Task:\n return Task(**json.loads(_task_path(task_id).read_text()))\n\n\ndef list_tasks() -> list[Task]:\n return [Task(**json.loads(p.read_text()))\n for p in sorted(TASKS_DIR.glob(\"task_*.json\"))]\n\n\ndef get_task(task_id: str) -> str:\n \"\"\"Return full task details as JSON.\"\"\"\n task = load_task(task_id)\n return json.dumps(asdict(task), indent=2)\n\n\ndef can_start(task_id: str) -> bool:\n \"\"\"Check if all blockedBy dependencies are completed.\n Missing dependencies are treated as blocked.\"\"\"\n task = load_task(task_id)\n for dep_id in task.blockedBy:\n if not _task_path(dep_id).exists():\n return False\n if load_task(dep_id).status != \"completed\":\n return False\n return True\n\n\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if not can_start(task_id):\n deps = [d for d in task.blockedBy\n if not _task_path(d).exists() or load_task(d).status != \"completed\"]\n return f\"Blocked by: {deps}\"\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n print(f\" \\033[36m[claim] {task.subject} → in_progress (owner: {owner})\\033[0m\")\n return f\"Claimed {task.id} ({task.subject})\"\n\n\ndef complete_task(task_id: str) -> str:\n task = load_task(task_id)\n if task.status != \"in_progress\":\n return f\"Task {task_id} is {task.status}, cannot complete\"\n task.status = \"completed\"\n save_task(task)\n unblocked = [t.subject for t in list_tasks()\n if t.status == \"pending\" and t.blockedBy and can_start(t.id)]\n print(f\" \\033[32m[complete] {task.subject} ✓\\033[0m\")\n msg = f\"Completed {task.id} ({task.subject})\"\n if unblocked:\n msg += f\"\\nUnblocked: {', '.join(unblocked)}\"\n print(f\" \\033[33m[unblocked] {', '.join(unblocked)}\\033[0m\")\n return msg\n\n\n# ── Prompt Assembly (from s10, synced) ──\n\nPROMPT_SECTIONS = {\n \"identity\": \"You are a coding agent. Act, don't explain.\",\n \"tools\": \"Available tools: bash, read_file, write_file, \"\n \"create_task, list_tasks, get_task, claim_task, complete_task.\",\n \"workspace\": f\"Working directory: {WORKDIR}\",\n \"memory\": \"Relevant memories are injected below when available.\",\n}\n\n\ndef assemble_system_prompt(context: dict) -> str:\n sections = [PROMPT_SECTIONS[\"identity\"],\n PROMPT_SECTIONS[\"tools\"],\n PROMPT_SECTIONS[\"workspace\"]]\n memories = context.get(\"memories\", \"\")\n if memories:\n sections.append(f\"Relevant memories:\\n{memories}\")\n return \"\\n\\n\".join(sections)\n\n\n_last_context_key, _last_prompt = None, None\n\n\ndef get_system_prompt(context: dict) -> str:\n global _last_context_key, _last_prompt\n key = json.dumps(context, sort_keys=True, ensure_ascii=False, default=str)\n if key == _last_context_key and _last_prompt:\n return _last_prompt\n _last_context_key = key\n _last_prompt = assemble_system_prompt(context)\n return _last_prompt\n\n\n# ── Tools ──\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str, run_in_background: bool = False) -> str:\n # run_in_background is handled by agent_loop dispatch, not here\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# Task tools\n\ndef run_create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> str:\n task = create_task(subject, description, blockedBy)\n deps = f\" (blockedBy: {', '.join(blockedBy)})\" if blockedBy else \"\"\n print(f\" \\033[34m[create] {task.subject}{deps}\\033[0m\")\n return f\"Created {task.id}: {task.subject}{deps}\"\n\n\ndef run_list_tasks() -> str:\n tasks = list_tasks()\n if not tasks:\n return \"No tasks. Use create_task to add some.\"\n lines = []\n for t in tasks:\n icon = {\"pending\": \"○\", \"in_progress\": \"●\",\n \"completed\": \"✓\"}.get(t.status, \"?\")\n deps = f\" (blockedBy: {', '.join(t.blockedBy)})\" if t.blockedBy else \"\"\n owner = f\" [{t.owner}]\" if t.owner else \"\"\n lines.append(f\" {icon} {t.id}: {t.subject} \"\n f\"[{t.status}]{owner}{deps}\")\n return \"\\n\".join(lines)\n\n\ndef run_get_task(task_id: str) -> str:\n try:\n return get_task(task_id)\n except FileNotFoundError:\n return f\"Error: Task {task_id} not found\"\n\n\ndef run_claim_task(task_id: str) -> str:\n return claim_task(task_id, owner=\"agent\")\n\n\ndef run_complete_task(task_id: str) -> str:\n return complete_task(task_id)\n\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"command\": {\"type\": \"string\"},\n \"run_in_background\": {\"type\": \"boolean\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"create_task\",\n \"description\": \"Create a new task with optional blockedBy dependencies.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n \"blockedBy\": {\"type\": \"array\",\n \"items\": {\"type\": \"string\"}}},\n \"required\": [\"subject\"]}},\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks with status, owner, and dependencies.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"get_task\",\n \"description\": \"Get full details of a specific task by ID.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task. Sets owner, changes status to in_progress.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Complete an in-progress task. Reports unblocked downstream tasks.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n]\n\nTOOL_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"create_task\": run_create_task, \"list_tasks\": run_list_tasks,\n \"get_task\": run_get_task, \"claim_task\": run_claim_task,\n \"complete_task\": run_complete_task,\n}\n\n\n# ── Background Tasks (s13 new) ──\n\n_bg_counter = 0\nbackground_tasks: dict[str, dict] = {} # bg_id → {tool_use_id, command, status}\nbackground_results: dict[str, str] = {} # bg_id → output\nbackground_lock = threading.Lock()\n\n\ndef is_slow_operation(tool_name: str, tool_input: dict) -> bool:\n \"\"\"Fallback heuristic: commands likely to take > 30s.\"\"\"\n if tool_name != \"bash\":\n return False\n cmd = tool_input.get(\"command\", \"\").lower()\n slow_keywords = [\"install\", \"build\", \"test\", \"deploy\", \"compile\",\n \"docker build\", \"pip install\", \"npm install\",\n \"cargo build\", \"pytest\", \"make\"]\n return any(kw in cmd for kw in slow_keywords)\n\n\ndef should_run_background(tool_name: str, tool_input: dict) -> bool:\n \"\"\"Model explicit request takes priority; fallback to heuristic.\"\"\"\n if tool_input.get(\"run_in_background\"):\n return True\n return is_slow_operation(tool_name, tool_input)\n\n\ndef execute_tool(block) -> str:\n \"\"\"Execute a tool call block, return output.\"\"\"\n handler = TOOL_HANDLERS.get(block.name)\n if handler:\n return handler(**block.input)\n return f\"Unknown tool: {block.name}\"\n\n\ndef start_background_task(block) -> str:\n \"\"\"Run tool in a daemon thread. Returns background task ID.\"\"\"\n global _bg_counter\n _bg_counter += 1\n bg_id = f\"bg_{_bg_counter:04d}\"\n cmd = block.input.get(\"command\", block.name)\n\n def worker():\n result = execute_tool(block)\n with background_lock:\n background_tasks[bg_id][\"status\"] = \"completed\"\n background_results[bg_id] = result\n\n with background_lock:\n background_tasks[bg_id] = {\n \"tool_use_id\": block.id,\n \"command\": cmd,\n \"status\": \"running\",\n }\n thread = threading.Thread(target=worker, daemon=True)\n thread.start()\n print(f\" \\033[33m[background] dispatched {bg_id}: {cmd[:40]}\\033[0m\")\n return bg_id\n\n\ndef collect_background_results() -> list[str]:\n \"\"\"Collect completed background results as task_notification messages.\"\"\"\n with background_lock:\n ready_ids = [bid for bid, task in background_tasks.items()\n if task[\"status\"] == \"completed\"]\n notifications = []\n for bg_id in ready_ids:\n with background_lock:\n task = background_tasks.pop(bg_id)\n output = background_results.pop(bg_id, \"\")\n summary = output[:200] if len(output) > 200 else output\n notifications.append(\n f\"\\n\"\n f\" {bg_id}\\n\"\n f\" completed\\n\"\n f\" {task['command']}\\n\"\n f\" {summary}\\n\"\n f\"\")\n print(f\" \\033[32m[background done] {bg_id}: \"\n f\"{task['command'][:40]} ({len(output)} chars)\\033[0m\")\n return notifications\n\n\n# ── Context ──\n\ndef update_context(context: dict, messages: list) -> dict:\n \"\"\"Derive context from real state.\"\"\"\n memories = \"\"\n if MEMORY_INDEX.exists():\n content = MEMORY_INDEX.read_text().strip()\n if content:\n memories = content\n return {\n \"enabled_tools\": list(TOOL_HANDLERS.keys()),\n \"workspace\": str(WORKDIR),\n \"memories\": memories,\n }\n\n\n# ── Agent Loop (simplified, focused on background tasks) ──\n\ndef agent_loop(messages: list, context: dict):\n system = get_system_prompt(context)\n while True:\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=TOOLS, max_tokens=8000)\n except Exception as e:\n messages.append({\"role\": \"assistant\", \"content\": [\n {\"type\": \"text\",\n \"text\": f\"[Error] {type(e).__name__}: {e}\"}]})\n return\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n\n if should_run_background(block.name, block.input):\n bg_id = start_background_task(block)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": f\"[Background task {bg_id} started] \"\n f\"Command: {block.input.get('command', '')}. \"\n f\"Result will be available when complete.\"})\n else:\n output = execute_tool(block)\n print(str(output)[:300])\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output})\n\n # Inject tool results + background notifications in one user message\n user_content = list(results)\n bg_notifications = collect_background_results()\n if bg_notifications:\n for notif in bg_notifications:\n user_content.append({\"type\": \"text\", \"text\": notif})\n print(f\" \\033[32m[inject] {len(bg_notifications)} background \"\n f\"notification(s)\\033[0m\")\n messages.append({\"role\": \"user\", \"content\": user_content})\n context = update_context(context, messages)\n system = get_system_prompt(context)\n\n\nif __name__ == \"__main__\":\n print(\"s13: background tasks\")\n print(\"Enter a question, press Enter to send. Type q to quit.\\n\")\n history = []\n context = update_context({}, [])\n while True:\n try:\n query = input(\"\\033[36ms13 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history, context)\n context = update_context(context, history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\":\n print(block.text)\n elif isinstance(block, dict) and block.get(\"type\") == \"text\":\n print(block.get(\"text\", \"\"))\n print()\n",
"images": [
{
"src": "/course-assets/s13_background_tasks/background-tasks-overview.svg",
@@ -1758,7 +1788,7 @@
"filename": "s15_agent_teams/code.py",
"title": "Agent Teams",
"subtitle": "One Agent Isn't Enough, Form a Team",
- "loc": 745,
+ "loc": 786,
"tools": [
"bash",
"read_file",
@@ -1790,13 +1820,13 @@
},
{
"name": "CronJob",
- "startLine": 353,
- "endLine": 360
+ "startLine": 360,
+ "endLine": 367
},
{
"name": "MessageBus",
- "startLine": 595,
- "endLine": 620
+ "startLine": 602,
+ "endLine": 634
}
],
"functions": [
@@ -1915,94 +1945,99 @@
"signature": "def collect_background_results()",
"startLine": 324
},
+ {
+ "name": "has_pending_background",
+ "signature": "def has_pending_background()",
+ "startLine": 347
+ },
{
"name": "_cron_field_matches",
"signature": "def _cron_field_matches(field: str, value: int)",
- "startLine": 367
+ "startLine": 374
},
{
"name": "cron_matches",
"signature": "def cron_matches(cron_expr: str, dt: datetime)",
- "startLine": 383
+ "startLine": 390
},
{
"name": "_validate_cron_field",
"signature": "def _validate_cron_field(field: str, lo: int, hi: int)",
- "startLine": 413
+ "startLine": 420
},
{
"name": "validate_cron",
"signature": "def validate_cron(cron_expr: str)",
- "startLine": 448
+ "startLine": 455
},
{
"name": "save_durable_jobs",
"signature": "def save_durable_jobs()",
- "startLine": 462
+ "startLine": 469
},
{
"name": "load_durable_jobs",
"signature": "def load_durable_jobs()",
- "startLine": 468
+ "startLine": 475
},
{
"name": "cancel_job",
"signature": "def cancel_job(job_id: str)",
- "startLine": 507
+ "startLine": 514
},
{
"name": "cron_scheduler_loop",
"signature": "def cron_scheduler_loop()",
- "startLine": 519
+ "startLine": 526
},
{
"name": "consume_cron_queue",
"signature": "def consume_cron_queue()",
- "startLine": 545
+ "startLine": 552
},
{
"name": "run_list_crons",
"signature": "def run_list_crons()",
- "startLine": 569
+ "startLine": 576
},
{
"name": "run_cancel_cron",
"signature": "def run_cancel_cron(job_id: str)",
- "startLine": 583
+ "startLine": 590
},
{
"name": "spawn_teammate_thread",
"signature": "def spawn_teammate_thread(name: str, role: str, prompt: str)",
- "startLine": 629
+ "startLine": 643
},
{
"name": "run_spawn_teammate",
"signature": "def run_spawn_teammate(name: str, role: str, prompt: str)",
- "startLine": 717
+ "startLine": 731
},
{
"name": "run_send_message",
"signature": "def run_send_message(to: str, content: str)",
- "startLine": 721
+ "startLine": 735
},
{
"name": "run_check_inbox",
"signature": "def run_check_inbox()",
- "startLine": 726
+ "startLine": 740
},
{
"name": "update_context",
"signature": "def update_context(context: dict, messages: list)",
- "startLine": 828
+ "startLine": 842
},
{
"name": "agent_loop",
"signature": "def agent_loop(messages: list, context: dict)",
- "startLine": 847
+ "startLine": 861
}
],
"layer": "collaboration",
- "source": "#!/usr/bin/env python3\n\"\"\"\ns15: Agent Teams — MessageBus + spawn_teammate_thread + inbox injection.\n\nRun: python s15_agent_teams/code.py\nNeed: pip install anthropic python-dotenv + .env with ANTHROPIC_API_KEY\n\nChanges from s14:\n - MessageBus class: file-based mailboxes (.mailboxes/*.jsonl)\n - spawn_teammate_thread: creates teammate in background thread\n - Teammate runs own simplified agent_loop (bash, read, write, send_message)\n - Lead tools: spawn_teammate, send_message, check_inbox (3 new)\n - Lead inbox: teammate messages injected into history (not just printed)\n - Teaching version: teammates limited to 10 rounds (real CC uses idle loop)\n\nASCII flow:\n Lead: cron_queue → messages → prompt → LLM → TOOLS ────→ loop\n ↑ ↓ |\n └── inbox ← MessageBus ← teammate.send_message ←┘\n Teammate: inbox → LLM → bash/read/write/send → loop (max 10 turns)\n\"\"\"\n\nimport os, subprocess, json, time, random, threading\nfrom pathlib import Path\nfrom datetime import datetime\nfrom dataclasses import dataclass, asdict\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n# ── Task System (from s12, synced) ──\n\nTASKS_DIR = WORKDIR / \".tasks\"\nTASKS_DIR.mkdir(exist_ok=True)\n\n\n@dataclass\nclass Task:\n id: str\n subject: str\n description: str\n status: str # pending | in_progress | completed\n owner: str | None\n blockedBy: list[str]\n\n\ndef _task_path(task_id: str) -> Path:\n return TASKS_DIR / f\"{task_id}.json\"\n\n\ndef create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> Task:\n task = Task(\n id=f\"task_{int(time.time())}_{random.randint(0, 9999):04d}\",\n subject=subject, description=description,\n status=\"pending\", owner=None,\n blockedBy=blockedBy or [],\n )\n save_task(task)\n return task\n\n\ndef save_task(task: Task):\n _task_path(task.id).write_text(json.dumps(asdict(task), indent=2))\n\n\ndef load_task(task_id: str) -> Task:\n return Task(**json.loads(_task_path(task_id).read_text()))\n\n\ndef list_tasks() -> list[Task]:\n return [Task(**json.loads(p.read_text()))\n for p in sorted(TASKS_DIR.glob(\"task_*.json\"))]\n\n\ndef get_task(task_id: str) -> str:\n \"\"\"Return full task details as JSON.\"\"\"\n task = load_task(task_id)\n return json.dumps(asdict(task), indent=2)\n\n\ndef can_start(task_id: str) -> bool:\n \"\"\"Check if all blockedBy dependencies are completed.\n Missing dependencies are treated as blocked.\"\"\"\n task = load_task(task_id)\n for dep_id in task.blockedBy:\n if not _task_path(dep_id).exists():\n return False\n if load_task(dep_id).status != \"completed\":\n return False\n return True\n\n\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if not can_start(task_id):\n deps = [d for d in task.blockedBy\n if not _task_path(d).exists() or load_task(d).status != \"completed\"]\n return f\"Blocked by: {deps}\"\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n print(f\" \\033[36m[claim] {task.subject} → in_progress (owner: {owner})\\033[0m\")\n return f\"Claimed {task.id} ({task.subject})\"\n\n\ndef complete_task(task_id: str) -> str:\n task = load_task(task_id)\n if task.status != \"in_progress\":\n return f\"Task {task_id} is {task.status}, cannot complete\"\n task.status = \"completed\"\n save_task(task)\n unblocked = [t.subject for t in list_tasks()\n if t.status == \"pending\" and t.blockedBy and can_start(t.id)]\n print(f\" \\033[32m[complete] {task.subject} ✓\\033[0m\")\n msg = f\"Completed {task.id} ({task.subject})\"\n if unblocked:\n msg += f\"\\nUnblocked: {', '.join(unblocked)}\"\n print(f\" \\033[33m[unblocked] {', '.join(unblocked)}\\033[0m\")\n return msg\n\n\n# ── Prompt Assembly (from s10, synced) ──\n\nPROMPT_SECTIONS = {\n \"identity\": \"You are a coding agent. Act, don't explain.\",\n \"tools\": \"Available tools: bash, read_file, write_file, \"\n \"get_task, create_task, list_tasks, claim_task, complete_task, \"\n \"schedule_cron, list_crons, cancel_cron, \"\n \"spawn_teammate, send_message, check_inbox.\",\n \"workspace\": f\"Working directory: {WORKDIR}\",\n \"memory\": \"Relevant memories are injected below when available.\",\n}\n\n\ndef assemble_system_prompt(context: dict) -> str:\n sections = [PROMPT_SECTIONS[\"identity\"],\n PROMPT_SECTIONS[\"tools\"],\n PROMPT_SECTIONS[\"workspace\"]]\n memories = context.get(\"memories\", \"\")\n if memories:\n sections.append(f\"Relevant memories:\\n{memories}\")\n return \"\\n\\n\".join(sections)\n\n\n_last_context_key, _last_prompt = None, None\n\n\ndef get_system_prompt(context: dict) -> str:\n global _last_context_key, _last_prompt\n key = json.dumps(context, sort_keys=True, ensure_ascii=False, default=str)\n if key == _last_context_key and _last_prompt:\n return _last_prompt\n _last_context_key = key\n _last_prompt = assemble_system_prompt(context)\n return _last_prompt\n\n\n# ── Tools ──\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str, run_in_background: bool = False) -> str:\n # run_in_background is handled by agent_loop dispatch, not here\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# Task tools\n\ndef run_create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> str:\n task = create_task(subject, description, blockedBy)\n deps = f\" (blockedBy: {', '.join(blockedBy)})\" if blockedBy else \"\"\n print(f\" \\033[34m[create] {task.subject}{deps}\\033[0m\")\n return f\"Created {task.id}: {task.subject}{deps}\"\n\n\ndef run_list_tasks() -> str:\n tasks = list_tasks()\n if not tasks:\n return \"No tasks. Use create_task to add some.\"\n lines = []\n for t in tasks:\n icon = {\"pending\": \"○\", \"in_progress\": \"●\",\n \"completed\": \"✓\"}.get(t.status, \"?\")\n deps = f\" (blockedBy: {', '.join(t.blockedBy)})\" if t.blockedBy else \"\"\n owner = f\" [{t.owner}]\" if t.owner else \"\"\n lines.append(f\" {icon} {t.id}: {t.subject} \"\n f\"[{t.status}]{owner}{deps}\")\n return \"\\n\".join(lines)\n\n\ndef run_get_task(task_id: str) -> str:\n try:\n return get_task(task_id)\n except FileNotFoundError:\n return f\"Error: Task {task_id} not found\"\n\n\ndef run_claim_task(task_id: str) -> str:\n return claim_task(task_id, owner=\"agent\")\n\n\ndef run_complete_task(task_id: str) -> str:\n return complete_task(task_id)\n\n\n# ── Background Tasks (from s13, synced) ──\n\n_bg_counter = 0\nbackground_tasks: dict[str, dict] = {}\nbackground_results: dict[str, str] = {}\nbackground_lock = threading.Lock()\n\n\ndef is_slow_operation(tool_name: str, tool_input: dict) -> bool:\n \"\"\"Fallback heuristic: commands likely to take > 30s.\"\"\"\n if tool_name != \"bash\":\n return False\n cmd = tool_input.get(\"command\", \"\").lower()\n slow_keywords = [\"install\", \"build\", \"test\", \"deploy\", \"compile\",\n \"docker build\", \"pip install\", \"npm install\",\n \"cargo build\", \"pytest\", \"make\"]\n return any(kw in cmd for kw in slow_keywords)\n\n\ndef should_run_background(tool_name: str, tool_input: dict) -> bool:\n \"\"\"Model explicit request takes priority; fallback to heuristic.\"\"\"\n if tool_input.get(\"run_in_background\"):\n return True\n return is_slow_operation(tool_name, tool_input)\n\n\ndef execute_tool(block) -> str:\n \"\"\"Execute a tool call block, return output.\"\"\"\n handler = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"create_task\": run_create_task, \"list_tasks\": run_list_tasks,\n \"get_task\": run_get_task, \"claim_task\": run_claim_task,\n \"complete_task\": run_complete_task,\n \"schedule_cron\": run_schedule_cron, \"list_crons\": run_list_crons,\n \"cancel_cron\": run_cancel_cron,\n \"spawn_teammate\": run_spawn_teammate,\n \"send_message\": run_send_message, \"check_inbox\": run_check_inbox,\n }.get(block.name)\n if handler:\n return handler(**block.input)\n return f\"Unknown tool: {block.name}\"\n\n\ndef start_background_task(block) -> str:\n \"\"\"Run tool in a daemon thread. Returns background task ID.\"\"\"\n global _bg_counter\n _bg_counter += 1\n bg_id = f\"bg_{_bg_counter:04d}\"\n cmd = block.input.get(\"command\", block.name)\n\n def worker():\n result = execute_tool(block)\n with background_lock:\n background_tasks[bg_id][\"status\"] = \"completed\"\n background_results[bg_id] = result\n\n with background_lock:\n background_tasks[bg_id] = {\n \"tool_use_id\": block.id,\n \"command\": cmd,\n \"status\": \"running\",\n }\n threading.Thread(target=worker, daemon=True).start()\n print(f\" \\033[33m[background] dispatched {bg_id}: {cmd[:40]}\\033[0m\")\n return bg_id\n\n\ndef collect_background_results() -> list[str]:\n \"\"\"Collect completed background results as task_notification messages.\"\"\"\n with background_lock:\n ready_ids = [bid for bid, task in background_tasks.items()\n if task[\"status\"] == \"completed\"]\n notifications = []\n for bg_id in ready_ids:\n with background_lock:\n task = background_tasks.pop(bg_id)\n output = background_results.pop(bg_id, \"\")\n summary = output[:200] if len(output) > 200 else output\n notifications.append(\n f\"\\n\"\n f\" {bg_id}\\n\"\n f\" completed\\n\"\n f\" {task['command']}\\n\"\n f\" {summary}\\n\"\n f\"\")\n print(f\" \\033[32m[background done] {bg_id}: \"\n f\"{task['command'][:40]} ({len(output)} chars)\\033[0m\")\n return notifications\n\n\n# ── Cron Scheduler (from s14, synced) ──\n\nDURABLE_PATH = WORKDIR / \".scheduled_tasks.json\"\n\n\n@dataclass\nclass CronJob:\n id: str\n cron: str # \"0 9 * * *\"\n prompt: str # message to inject when fired\n recurring: bool # True = recurring, False = one-shot\n durable: bool # True = persist to disk\n\n\nscheduled_jobs: dict[str, CronJob] = {}\ncron_queue: list[CronJob] = []\ncron_lock = threading.Lock()\n_last_fired: dict[str, str] = {} # job_id → \"YYYY-MM-DD HH:MM\"\n\n\ndef _cron_field_matches(field: str, value: int) -> bool:\n \"\"\"Match a single cron field against a value.\"\"\"\n if field == \"*\":\n return True\n if field.startswith(\"*/\"):\n step = int(field[2:])\n return step > 0 and value % step == 0\n if \",\" in field:\n return any(_cron_field_matches(f.strip(), value)\n for f in field.split(\",\"))\n if \"-\" in field:\n lo, hi = field.split(\"-\", 1)\n return int(lo) <= value <= int(hi)\n return value == int(field)\n\n\ndef cron_matches(cron_expr: str, dt: datetime) -> bool:\n \"\"\"Check if a 5-field cron expression matches the given datetime.\n Standard cron semantics: DOM and DOW use OR when both are constrained.\"\"\"\n fields = cron_expr.strip().split()\n if len(fields) != 5:\n return False\n minute, hour, dom, month, dow = fields\n dow_val = (dt.weekday() + 1) % 7 # Python Monday=0 → cron Sunday=0\n\n m = _cron_field_matches(minute, dt.minute)\n h = _cron_field_matches(hour, dt.hour)\n dom_ok = _cron_field_matches(dom, dt.day)\n month_ok = _cron_field_matches(month, dt.month)\n dow_ok = _cron_field_matches(dow, dow_val)\n\n # Minute, hour, month must all match\n if not (m and h and month_ok):\n return False\n # DOM and DOW: if both constrained, either matching is enough (OR)\n dom_unconstrained = dom == \"*\"\n dow_unconstrained = dow == \"*\"\n if dom_unconstrained and dow_unconstrained:\n return True\n if dom_unconstrained:\n return dow_ok\n if dow_unconstrained:\n return dom_ok\n return dom_ok or dow_ok\n\n\ndef _validate_cron_field(field: str, lo: int, hi: int) -> str | None:\n \"\"\"Validate a single cron field value is within [lo, hi].\"\"\"\n if field == \"*\":\n return None\n if field.startswith(\"*/\"):\n step_str = field[2:]\n if not step_str.isdigit():\n return f\"Invalid step: {field}\"\n step = int(step_str)\n if step <= 0:\n return f\"Step must be > 0: {field}\"\n return None\n if \",\" in field:\n for part in field.split(\",\"):\n err = _validate_cron_field(part.strip(), lo, hi)\n if err: return err\n return None\n if \"-\" in field:\n parts = field.split(\"-\", 1)\n if not parts[0].isdigit() or not parts[1].isdigit():\n return f\"Invalid range: {field}\"\n a, b = int(parts[0]), int(parts[1])\n if a < lo or a > hi or b < lo or b > hi:\n return f\"Range {field} out of bounds [{lo}-{hi}]\"\n if a > b:\n return f\"Range start > end: {field}\"\n return None\n if not field.isdigit():\n return f\"Invalid field: {field}\"\n val = int(field)\n if val < lo or val > hi:\n return f\"Value {val} out of bounds [{lo}-{hi}]\"\n return None\n\n\ndef validate_cron(cron_expr: str) -> str | None:\n \"\"\"Validate a cron expression. Returns error message or None.\"\"\"\n fields = cron_expr.strip().split()\n if len(fields) != 5:\n return f\"Expected 5 fields, got {len(fields)}\"\n bounds = [(0, 59), (0, 23), (1, 31), (1, 12), (0, 6)]\n names = [\"minute\", \"hour\", \"day-of-month\", \"month\", \"day-of-week\"]\n for i, (field, (lo, hi), name) in enumerate(zip(fields, bounds, names)):\n err = _validate_cron_field(field, lo, hi)\n if err:\n return f\"{name}: {err}\"\n return None\n\n\ndef save_durable_jobs():\n \"\"\"Persist durable jobs to .scheduled_tasks.json.\"\"\"\n durable = [asdict(j) for j in scheduled_jobs.values() if j.durable]\n DURABLE_PATH.write_text(json.dumps(durable, indent=2))\n\n\ndef load_durable_jobs():\n \"\"\"Load durable jobs from disk on startup.\"\"\"\n if not DURABLE_PATH.exists():\n return\n try:\n jobs = json.loads(DURABLE_PATH.read_text())\n for j in jobs:\n job = CronJob(**j)\n err = validate_cron(job.cron)\n if err:\n print(f\" \\033[31m[cron] skipping invalid job {job.id}: {err}\\033[0m\")\n continue\n scheduled_jobs[job.id] = job\n valid = [j for j in jobs if j[\"id\"] in scheduled_jobs]\n if valid:\n print(f\" \\033[35m[cron] loaded {len(valid)} durable job(s)\\033[0m\")\n except Exception:\n pass\n\n\ndef schedule_job(cron: str, prompt: str, recurring: bool = True,\n durable: bool = True) -> CronJob | str:\n \"\"\"Register a new cron job. Returns CronJob or error string.\"\"\"\n err = validate_cron(cron)\n if err:\n return err\n job = CronJob(\n id=f\"cron_{random.randint(0, 999999):06d}\",\n cron=cron, prompt=prompt,\n recurring=recurring, durable=durable,\n )\n with cron_lock:\n scheduled_jobs[job.id] = job\n if durable:\n save_durable_jobs()\n print(f\" \\033[35m[cron register] {job.id} '{cron}' → {prompt[:40]}\\033[0m\")\n return job\n\n\ndef cancel_job(job_id: str) -> str:\n \"\"\"Cancel a cron job.\"\"\"\n with cron_lock:\n job = scheduled_jobs.pop(job_id, None)\n if not job:\n return f\"Job {job_id} not found\"\n if job.durable:\n save_durable_jobs()\n print(f\" \\033[31m[cron cancel] {job_id}\\033[0m\")\n return f\"Cancelled {job_id}\"\n\n\ndef cron_scheduler_loop():\n \"\"\"Independent daemon thread: poll every 1s, fire matching jobs.\n Individual job errors are caught to prevent one bad job from\n killing the entire scheduler thread.\"\"\"\n while True:\n time.sleep(1)\n now = datetime.now()\n # Date-aware marker prevents daily jobs from skipping on day 2+\n minute_marker = now.strftime(\"%Y-%m-%d %H:%M\")\n with cron_lock:\n for job in list(scheduled_jobs.values()):\n try:\n if cron_matches(job.cron, now):\n if _last_fired.get(job.id) != minute_marker:\n cron_queue.append(job)\n _last_fired[job.id] = minute_marker\n print(f\" \\033[35m[cron fire] {job.id} → \"\n f\"{job.prompt[:40]}\\033[0m\")\n if not job.recurring:\n scheduled_jobs.pop(job.id, None)\n if job.durable:\n save_durable_jobs()\n except Exception as e:\n print(f\" \\033[31m[cron error] {job.id}: {e}\\033[0m\")\n\n\ndef consume_cron_queue() -> list[CronJob]:\n \"\"\"Consume fired jobs from cron_queue (called by agent_loop).\"\"\"\n with cron_lock:\n fired = list(cron_queue)\n cron_queue.clear()\n return fired\n\n\n# Load durable jobs on startup, then start scheduler thread\nload_durable_jobs()\nthreading.Thread(target=cron_scheduler_loop, daemon=True).start()\nprint(\" \\033[35m[cron] scheduler thread started\\033[0m\")\n\n\n# Cron tool handlers\n\ndef run_schedule_cron(cron: str, prompt: str,\n recurring: bool = True, durable: bool = True) -> str:\n result = schedule_job(cron, prompt, recurring, durable)\n if isinstance(result, str):\n return f\"Error: {result}\"\n return f\"Scheduled {result.id}: '{cron}' → {prompt}\"\n\n\ndef run_list_crons() -> str:\n with cron_lock:\n jobs = list(scheduled_jobs.values())\n if not jobs:\n return \"No cron jobs. Use schedule_cron to add one.\"\n lines = []\n for j in jobs:\n tag = \"recurring\" if j.recurring else \"one-shot\"\n dur = \"durable\" if j.durable else \"session\"\n lines.append(f\" {j.id}: '{j.cron}' → {j.prompt[:40]} \"\n f\"[{tag}, {dur}]\")\n return \"\\n\".join(lines)\n\n\ndef run_cancel_cron(job_id: str) -> str:\n return cancel_job(job_id)\n\n\n# ── MessageBus (s15 new) ──\n# Teaching version uses simple file append + unlink.\n# Real CC uses proper-lockfile for concurrent write safety.\n\nMAILBOX_DIR = WORKDIR / \".mailboxes\"\nMAILBOX_DIR.mkdir(exist_ok=True)\n\n\nclass MessageBus:\n \"\"\"File-based message bus. Each agent has a .jsonl inbox.\n Read is destructive: read_text + unlink (consumes messages).\n Teaching version: no file locking; real CC uses proper-lockfile.\"\"\"\n\n def send(self, from_agent: str, to_agent: str, content: str,\n msg_type: str = \"message\"):\n msg = {\"from\": from_agent, \"to\": to_agent,\n \"content\": content, \"type\": msg_type,\n \"ts\": time.time()}\n inbox = MAILBOX_DIR / f\"{to_agent}.jsonl\"\n with open(inbox, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n print(f\" \\033[33m[bus] {from_agent} → {to_agent}: \"\n f\"{content[:50]}\\033[0m\")\n\n def read_inbox(self, agent: str) -> list[dict]:\n inbox = MAILBOX_DIR / f\"{agent}.jsonl\"\n if not inbox.exists():\n return []\n msgs = [json.loads(line) for line in inbox.read_text().splitlines()\n if line.strip()]\n inbox.unlink() # consume: read + delete\n return msgs\n\n\nBUS = MessageBus()\n\n# Track spawned teammates\nactive_teammates: dict[str, bool] = {}\n\n\n# ── Teammate Thread (s15 new) ──\n\ndef spawn_teammate_thread(name: str, role: str, prompt: str) -> str:\n \"\"\"Spawn a teammate agent in a background thread.\n Teaching version: max 10 rounds per teammate.\n Real CC: teammates use idle loop (wait for inbox, work, repeat)\n until shutdown_request.\"\"\"\n if name in active_teammates:\n return f\"Teammate '{name}' already exists\"\n\n system = (f\"You are '{name}', a {role}. \"\n f\"Use tools to complete tasks. \"\n f\"Send results via send_message to 'lead'.\")\n\n def run():\n messages = [{\"role\": \"user\", \"content\": prompt}]\n sub_tools = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"send_message\",\n \"description\": \"Send a message to another agent.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n ]\n sub_handlers = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"send_message\": lambda to, content: (BUS.send(name, to, content),\n \"Sent\")[1],\n }\n\n for _ in range(10):\n inbox = BUS.read_inbox(name)\n if inbox:\n messages.append({\"role\": \"user\",\n \"content\": f\"{json.dumps(inbox)}\"})\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages[-20:],\n tools=sub_tools, max_tokens=8000)\n except Exception:\n break\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = sub_handlers.get(block.name)\n output = handler(**block.input) if handler else \"Unknown\"\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n # Send final summary to Lead\n summary = \"Done.\"\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\" and isinstance(msg[\"content\"], list):\n for b in msg[\"content\"]:\n if getattr(b, \"type\", None) == \"text\":\n summary = b.text\n break\n else:\n continue\n break\n BUS.send(name, \"lead\", summary, \"result\")\n active_teammates.pop(name, None)\n print(f\" \\033[32m[teammate] {name} finished\\033[0m\")\n\n active_teammates[name] = True\n threading.Thread(target=run, daemon=True).start()\n print(f\" \\033[36m[teammate] {name} spawned as {role}\\033[0m\")\n return f\"Teammate '{name}' spawned as {role}\"\n\n\n# ── Team Tool Handlers (s15 new) ──\n\ndef run_spawn_teammate(name: str, role: str, prompt: str) -> str:\n return spawn_teammate_thread(name, role, prompt)\n\n\ndef run_send_message(to: str, content: str) -> str:\n BUS.send(\"lead\", to, content)\n return f\"Sent to {to}\"\n\n\ndef run_check_inbox() -> str:\n msgs = BUS.read_inbox(\"lead\")\n if not msgs:\n return \"(inbox empty)\"\n lines = []\n for m in msgs:\n lines.append(f\" [{m['from']}] {m['content'][:200]}\")\n return \"\\n\".join(lines)\n\n\n# ── Tool Definitions ──\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"command\": {\"type\": \"string\"},\n \"run_in_background\": {\"type\": \"boolean\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"create_task\",\n \"description\": \"Create a new task with optional blockedBy dependencies.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n \"blockedBy\": {\"type\": \"array\",\n \"items\": {\"type\": \"string\"}}},\n \"required\": [\"subject\"]}},\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks with status, owner, and dependencies.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"get_task\",\n \"description\": \"Get full details of a specific task by ID.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task. Sets owner, changes status to in_progress.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Complete an in-progress task. Reports unblocked downstream tasks.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"schedule_cron\",\n \"description\": \"Schedule a cron job. cron is 5-field: min hour dom month dow.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"cron\": {\"type\": \"string\",\n \"description\": \"5-field cron expression\"},\n \"prompt\": {\"type\": \"string\",\n \"description\": \"Message to inject when fired\"},\n \"recurring\": {\"type\": \"boolean\",\n \"description\": \"True=recurring, False=one-shot\"},\n \"durable\": {\"type\": \"boolean\",\n \"description\": \"True=persist to disk\"}},\n \"required\": [\"cron\", \"prompt\"]}},\n {\"name\": \"list_crons\",\n \"description\": \"List all registered cron jobs.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"cancel_cron\",\n \"description\": \"Cancel a cron job by ID.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"job_id\": {\"type\": \"string\"}},\n \"required\": [\"job_id\"]}},\n {\"name\": \"spawn_teammate\",\n \"description\": \"Spawn a teammate agent in a background thread.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"name\": {\"type\": \"string\"},\n \"role\": {\"type\": \"string\"},\n \"prompt\": {\"type\": \"string\"}},\n \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"send_message\",\n \"description\": \"Send a message to a teammate via MessageBus.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"check_inbox\",\n \"description\": \"Check Lead's inbox for teammate messages.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n]\n\n\n# ── Context ──\n\ndef update_context(context: dict, messages: list) -> dict:\n \"\"\"Derive context from real state.\"\"\"\n memories = \"\"\n if MEMORY_INDEX.exists():\n content = MEMORY_INDEX.read_text().strip()\n if content:\n memories = content\n return {\n \"enabled_tools\": [t[\"name\"] for t in TOOLS],\n \"workspace\": str(WORKDIR),\n \"memories\": memories,\n }\n\n\n# ── Agent Loop ──\n# Teaching code keeps a basic agent loop. S11's full error recovery is omitted.\n# Cron queue is consumed when agent_loop is called; real CC auto-wakes via\n# queue processor (useQueueProcessor.ts) when items arrive.\n\ndef agent_loop(messages: list, context: dict):\n system = get_system_prompt(context)\n while True:\n # Consume fired cron jobs → inject as messages\n fired = consume_cron_queue()\n for job in fired:\n messages.append({\"role\": \"user\",\n \"content\": f\"[Scheduled] {job.prompt}\"})\n print(f\" \\033[35m[inject cron] {job.prompt[:50]}\\033[0m\")\n\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=TOOLS, max_tokens=8000)\n except Exception as e:\n messages.append({\"role\": \"assistant\", \"content\": [\n {\"type\": \"text\",\n \"text\": f\"[Error] {type(e).__name__}: {e}\"}]})\n return\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n\n if should_run_background(block.name, block.input):\n bg_id = start_background_task(block)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": f\"[Background task {bg_id} started] \"\n f\"Result will be available when complete.\"})\n else:\n output = execute_tool(block)\n print(str(output)[:300])\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output})\n\n # Merge background tool results + notifications into one user message\n user_content = list(results)\n bg_notifications = collect_background_results()\n if bg_notifications:\n for notif in bg_notifications:\n user_content.append({\"type\": \"text\", \"text\": notif})\n messages.append({\"role\": \"user\", \"content\": user_content})\n context = update_context(context, messages)\n system = get_system_prompt(context)\n\n\nif __name__ == \"__main__\":\n print(\"s15: agent teams\")\n print(\"Enter a question, press Enter to send. Type q to quit.\\n\")\n history = []\n context = update_context({}, [])\n while True:\n try:\n query = input(\"\\033[36ms15 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history, context)\n context = update_context(context, history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\":\n print(block.text)\n\n # Check inbox for teammate results → inject into history\n inbox = BUS.read_inbox(\"lead\")\n if inbox:\n inbox_text = \"\\n\".join(\n f\"From {m['from']}: {m['content'][:200]}\" for m in inbox)\n history.append({\"role\": \"user\",\n \"content\": f\"[Inbox]\\n{inbox_text}\"})\n print(f\"\\n\\033[33m[Inbox: {len(inbox)} messages injected]\\033[0m\")\n print()\n",
+ "source": "#!/usr/bin/env python3\n\"\"\"\ns15: Agent Teams — MessageBus + spawn_teammate_thread + inbox injection.\n\nRun: python s15_agent_teams/code.py\nNeed: pip install anthropic python-dotenv + .env with ANTHROPIC_API_KEY\n\nChanges from s14:\n - MessageBus class: file-based mailboxes (.mailboxes/*.jsonl)\n - spawn_teammate_thread: creates teammate in background thread\n - Teammate runs own simplified agent_loop (bash, read, write, send_message)\n - Lead tools: spawn_teammate, send_message, check_inbox (3 new)\n - Lead inbox: teammate messages injected into history (not just printed)\n - Teaching version: teammates limited to 10 rounds (real CC uses idle loop)\n\nASCII flow:\n Lead: cron_queue → messages → prompt → LLM → TOOLS ────→ loop\n ↑ ↓ |\n └── inbox ← MessageBus ← teammate.send_message ←┘\n Teammate: inbox → LLM → bash/read/write/send → loop (max 10 turns)\n\"\"\"\n\nimport os, subprocess, json, time, random, threading, queue\nfrom pathlib import Path\nfrom datetime import datetime\nfrom dataclasses import dataclass, asdict\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n# ── Task System (from s12, synced) ──\n\nTASKS_DIR = WORKDIR / \".tasks\"\nTASKS_DIR.mkdir(exist_ok=True)\n\n\n@dataclass\nclass Task:\n id: str\n subject: str\n description: str\n status: str # pending | in_progress | completed\n owner: str | None\n blockedBy: list[str]\n\n\ndef _task_path(task_id: str) -> Path:\n return TASKS_DIR / f\"{task_id}.json\"\n\n\ndef create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> Task:\n task = Task(\n id=f\"task_{int(time.time())}_{random.randint(0, 9999):04d}\",\n subject=subject, description=description,\n status=\"pending\", owner=None,\n blockedBy=blockedBy or [],\n )\n save_task(task)\n return task\n\n\ndef save_task(task: Task):\n _task_path(task.id).write_text(json.dumps(asdict(task), indent=2))\n\n\ndef load_task(task_id: str) -> Task:\n return Task(**json.loads(_task_path(task_id).read_text()))\n\n\ndef list_tasks() -> list[Task]:\n return [Task(**json.loads(p.read_text()))\n for p in sorted(TASKS_DIR.glob(\"task_*.json\"))]\n\n\ndef get_task(task_id: str) -> str:\n \"\"\"Return full task details as JSON.\"\"\"\n task = load_task(task_id)\n return json.dumps(asdict(task), indent=2)\n\n\ndef can_start(task_id: str) -> bool:\n \"\"\"Check if all blockedBy dependencies are completed.\n Missing dependencies are treated as blocked.\"\"\"\n task = load_task(task_id)\n for dep_id in task.blockedBy:\n if not _task_path(dep_id).exists():\n return False\n if load_task(dep_id).status != \"completed\":\n return False\n return True\n\n\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if not can_start(task_id):\n deps = [d for d in task.blockedBy\n if not _task_path(d).exists() or load_task(d).status != \"completed\"]\n return f\"Blocked by: {deps}\"\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n print(f\" \\033[36m[claim] {task.subject} → in_progress (owner: {owner})\\033[0m\")\n return f\"Claimed {task.id} ({task.subject})\"\n\n\ndef complete_task(task_id: str) -> str:\n task = load_task(task_id)\n if task.status != \"in_progress\":\n return f\"Task {task_id} is {task.status}, cannot complete\"\n task.status = \"completed\"\n save_task(task)\n unblocked = [t.subject for t in list_tasks()\n if t.status == \"pending\" and t.blockedBy and can_start(t.id)]\n print(f\" \\033[32m[complete] {task.subject} ✓\\033[0m\")\n msg = f\"Completed {task.id} ({task.subject})\"\n if unblocked:\n msg += f\"\\nUnblocked: {', '.join(unblocked)}\"\n print(f\" \\033[33m[unblocked] {', '.join(unblocked)}\\033[0m\")\n return msg\n\n\n# ── Prompt Assembly (from s10, synced) ──\n\nPROMPT_SECTIONS = {\n \"identity\": \"You are a coding agent. Act, don't explain.\",\n \"tools\": \"Available tools: bash, read_file, write_file, \"\n \"get_task, create_task, list_tasks, claim_task, complete_task, \"\n \"schedule_cron, list_crons, cancel_cron, \"\n \"spawn_teammate, send_message, check_inbox.\",\n \"workspace\": f\"Working directory: {WORKDIR}\",\n \"memory\": \"Relevant memories are injected below when available.\",\n}\n\n\ndef assemble_system_prompt(context: dict) -> str:\n sections = [PROMPT_SECTIONS[\"identity\"],\n PROMPT_SECTIONS[\"tools\"],\n PROMPT_SECTIONS[\"workspace\"]]\n memories = context.get(\"memories\", \"\")\n if memories:\n sections.append(f\"Relevant memories:\\n{memories}\")\n return \"\\n\\n\".join(sections)\n\n\n_last_context_key, _last_prompt = None, None\n\n\ndef get_system_prompt(context: dict) -> str:\n global _last_context_key, _last_prompt\n key = json.dumps(context, sort_keys=True, ensure_ascii=False, default=str)\n if key == _last_context_key and _last_prompt:\n return _last_prompt\n _last_context_key = key\n _last_prompt = assemble_system_prompt(context)\n return _last_prompt\n\n\n# ── Tools ──\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str, run_in_background: bool = False) -> str:\n # run_in_background is handled by agent_loop dispatch, not here\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# Task tools\n\ndef run_create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> str:\n task = create_task(subject, description, blockedBy)\n deps = f\" (blockedBy: {', '.join(blockedBy)})\" if blockedBy else \"\"\n print(f\" \\033[34m[create] {task.subject}{deps}\\033[0m\")\n return f\"Created {task.id}: {task.subject}{deps}\"\n\n\ndef run_list_tasks() -> str:\n tasks = list_tasks()\n if not tasks:\n return \"No tasks. Use create_task to add some.\"\n lines = []\n for t in tasks:\n icon = {\"pending\": \"○\", \"in_progress\": \"●\",\n \"completed\": \"✓\"}.get(t.status, \"?\")\n deps = f\" (blockedBy: {', '.join(t.blockedBy)})\" if t.blockedBy else \"\"\n owner = f\" [{t.owner}]\" if t.owner else \"\"\n lines.append(f\" {icon} {t.id}: {t.subject} \"\n f\"[{t.status}]{owner}{deps}\")\n return \"\\n\".join(lines)\n\n\ndef run_get_task(task_id: str) -> str:\n try:\n return get_task(task_id)\n except FileNotFoundError:\n return f\"Error: Task {task_id} not found\"\n\n\ndef run_claim_task(task_id: str) -> str:\n return claim_task(task_id, owner=\"agent\")\n\n\ndef run_complete_task(task_id: str) -> str:\n return complete_task(task_id)\n\n\n# ── Background Tasks (from s13, synced) ──\n\n_bg_counter = 0\nbackground_tasks: dict[str, dict] = {}\nbackground_results: dict[str, str] = {}\nbackground_lock = threading.Lock()\n\n\ndef is_slow_operation(tool_name: str, tool_input: dict) -> bool:\n \"\"\"Fallback heuristic: commands likely to take > 30s.\"\"\"\n if tool_name != \"bash\":\n return False\n cmd = tool_input.get(\"command\", \"\").lower()\n slow_keywords = [\"install\", \"build\", \"test\", \"deploy\", \"compile\",\n \"docker build\", \"pip install\", \"npm install\",\n \"cargo build\", \"pytest\", \"make\"]\n return any(kw in cmd for kw in slow_keywords)\n\n\ndef should_run_background(tool_name: str, tool_input: dict) -> bool:\n \"\"\"Model explicit request takes priority; fallback to heuristic.\"\"\"\n if tool_input.get(\"run_in_background\"):\n return True\n return is_slow_operation(tool_name, tool_input)\n\n\ndef execute_tool(block) -> str:\n \"\"\"Execute a tool call block, return output.\"\"\"\n handler = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"create_task\": run_create_task, \"list_tasks\": run_list_tasks,\n \"get_task\": run_get_task, \"claim_task\": run_claim_task,\n \"complete_task\": run_complete_task,\n \"schedule_cron\": run_schedule_cron, \"list_crons\": run_list_crons,\n \"cancel_cron\": run_cancel_cron,\n \"spawn_teammate\": run_spawn_teammate,\n \"send_message\": run_send_message, \"check_inbox\": run_check_inbox,\n }.get(block.name)\n if handler:\n return handler(**block.input)\n return f\"Unknown tool: {block.name}\"\n\n\ndef start_background_task(block) -> str:\n \"\"\"Run tool in a daemon thread. Returns background task ID.\"\"\"\n global _bg_counter\n _bg_counter += 1\n bg_id = f\"bg_{_bg_counter:04d}\"\n cmd = block.input.get(\"command\", block.name)\n\n def worker():\n result = execute_tool(block)\n with background_lock:\n background_tasks[bg_id][\"status\"] = \"completed\"\n background_results[bg_id] = result\n\n with background_lock:\n background_tasks[bg_id] = {\n \"tool_use_id\": block.id,\n \"command\": cmd,\n \"status\": \"running\",\n }\n threading.Thread(target=worker, daemon=True).start()\n print(f\" \\033[33m[background] dispatched {bg_id}: {cmd[:40]}\\033[0m\")\n return bg_id\n\n\ndef collect_background_results() -> list[str]:\n \"\"\"Collect completed background results as task_notification messages.\"\"\"\n with background_lock:\n ready_ids = [bid for bid, task in background_tasks.items()\n if task[\"status\"] == \"completed\"]\n notifications = []\n for bg_id in ready_ids:\n with background_lock:\n task = background_tasks.pop(bg_id)\n output = background_results.pop(bg_id, \"\")\n summary = output[:200] if len(output) > 200 else output\n notifications.append(\n f\"\\n\"\n f\" {bg_id}\\n\"\n f\" completed\\n\"\n f\" {task['command']}\\n\"\n f\" {summary}\\n\"\n f\"\")\n print(f\" \\033[32m[background done] {bg_id}: \"\n f\"{task['command'][:40]} ({len(output)} chars)\\033[0m\")\n return notifications\n\n\ndef has_pending_background() -> bool:\n \"\"\"Non-destructive: True if any background task has completed and is\n waiting to be collected. The inbox poller uses this in its wake condition.\"\"\"\n with background_lock:\n return any(t[\"status\"] == \"completed\" for t in background_tasks.values())\n\n\n# ── Cron Scheduler (from s14, synced) ──\n\nDURABLE_PATH = WORKDIR / \".scheduled_tasks.json\"\n\n\n@dataclass\nclass CronJob:\n id: str\n cron: str # \"0 9 * * *\"\n prompt: str # message to inject when fired\n recurring: bool # True = recurring, False = one-shot\n durable: bool # True = persist to disk\n\n\nscheduled_jobs: dict[str, CronJob] = {}\ncron_queue: list[CronJob] = []\ncron_lock = threading.Lock()\n_last_fired: dict[str, str] = {} # job_id → \"YYYY-MM-DD HH:MM\"\n\n\ndef _cron_field_matches(field: str, value: int) -> bool:\n \"\"\"Match a single cron field against a value.\"\"\"\n if field == \"*\":\n return True\n if field.startswith(\"*/\"):\n step = int(field[2:])\n return step > 0 and value % step == 0\n if \",\" in field:\n return any(_cron_field_matches(f.strip(), value)\n for f in field.split(\",\"))\n if \"-\" in field:\n lo, hi = field.split(\"-\", 1)\n return int(lo) <= value <= int(hi)\n return value == int(field)\n\n\ndef cron_matches(cron_expr: str, dt: datetime) -> bool:\n \"\"\"Check if a 5-field cron expression matches the given datetime.\n Standard cron semantics: DOM and DOW use OR when both are constrained.\"\"\"\n fields = cron_expr.strip().split()\n if len(fields) != 5:\n return False\n minute, hour, dom, month, dow = fields\n dow_val = (dt.weekday() + 1) % 7 # Python Monday=0 → cron Sunday=0\n\n m = _cron_field_matches(minute, dt.minute)\n h = _cron_field_matches(hour, dt.hour)\n dom_ok = _cron_field_matches(dom, dt.day)\n month_ok = _cron_field_matches(month, dt.month)\n dow_ok = _cron_field_matches(dow, dow_val)\n\n # Minute, hour, month must all match\n if not (m and h and month_ok):\n return False\n # DOM and DOW: if both constrained, either matching is enough (OR)\n dom_unconstrained = dom == \"*\"\n dow_unconstrained = dow == \"*\"\n if dom_unconstrained and dow_unconstrained:\n return True\n if dom_unconstrained:\n return dow_ok\n if dow_unconstrained:\n return dom_ok\n return dom_ok or dow_ok\n\n\ndef _validate_cron_field(field: str, lo: int, hi: int) -> str | None:\n \"\"\"Validate a single cron field value is within [lo, hi].\"\"\"\n if field == \"*\":\n return None\n if field.startswith(\"*/\"):\n step_str = field[2:]\n if not step_str.isdigit():\n return f\"Invalid step: {field}\"\n step = int(step_str)\n if step <= 0:\n return f\"Step must be > 0: {field}\"\n return None\n if \",\" in field:\n for part in field.split(\",\"):\n err = _validate_cron_field(part.strip(), lo, hi)\n if err: return err\n return None\n if \"-\" in field:\n parts = field.split(\"-\", 1)\n if not parts[0].isdigit() or not parts[1].isdigit():\n return f\"Invalid range: {field}\"\n a, b = int(parts[0]), int(parts[1])\n if a < lo or a > hi or b < lo or b > hi:\n return f\"Range {field} out of bounds [{lo}-{hi}]\"\n if a > b:\n return f\"Range start > end: {field}\"\n return None\n if not field.isdigit():\n return f\"Invalid field: {field}\"\n val = int(field)\n if val < lo or val > hi:\n return f\"Value {val} out of bounds [{lo}-{hi}]\"\n return None\n\n\ndef validate_cron(cron_expr: str) -> str | None:\n \"\"\"Validate a cron expression. Returns error message or None.\"\"\"\n fields = cron_expr.strip().split()\n if len(fields) != 5:\n return f\"Expected 5 fields, got {len(fields)}\"\n bounds = [(0, 59), (0, 23), (1, 31), (1, 12), (0, 6)]\n names = [\"minute\", \"hour\", \"day-of-month\", \"month\", \"day-of-week\"]\n for i, (field, (lo, hi), name) in enumerate(zip(fields, bounds, names)):\n err = _validate_cron_field(field, lo, hi)\n if err:\n return f\"{name}: {err}\"\n return None\n\n\ndef save_durable_jobs():\n \"\"\"Persist durable jobs to .scheduled_tasks.json.\"\"\"\n durable = [asdict(j) for j in scheduled_jobs.values() if j.durable]\n DURABLE_PATH.write_text(json.dumps(durable, indent=2))\n\n\ndef load_durable_jobs():\n \"\"\"Load durable jobs from disk on startup.\"\"\"\n if not DURABLE_PATH.exists():\n return\n try:\n jobs = json.loads(DURABLE_PATH.read_text())\n for j in jobs:\n job = CronJob(**j)\n err = validate_cron(job.cron)\n if err:\n print(f\" \\033[31m[cron] skipping invalid job {job.id}: {err}\\033[0m\")\n continue\n scheduled_jobs[job.id] = job\n valid = [j for j in jobs if j[\"id\"] in scheduled_jobs]\n if valid:\n print(f\" \\033[35m[cron] loaded {len(valid)} durable job(s)\\033[0m\")\n except Exception:\n pass\n\n\ndef schedule_job(cron: str, prompt: str, recurring: bool = True,\n durable: bool = True) -> CronJob | str:\n \"\"\"Register a new cron job. Returns CronJob or error string.\"\"\"\n err = validate_cron(cron)\n if err:\n return err\n job = CronJob(\n id=f\"cron_{random.randint(0, 999999):06d}\",\n cron=cron, prompt=prompt,\n recurring=recurring, durable=durable,\n )\n with cron_lock:\n scheduled_jobs[job.id] = job\n if durable:\n save_durable_jobs()\n print(f\" \\033[35m[cron register] {job.id} '{cron}' → {prompt[:40]}\\033[0m\")\n return job\n\n\ndef cancel_job(job_id: str) -> str:\n \"\"\"Cancel a cron job.\"\"\"\n with cron_lock:\n job = scheduled_jobs.pop(job_id, None)\n if not job:\n return f\"Job {job_id} not found\"\n if job.durable:\n save_durable_jobs()\n print(f\" \\033[31m[cron cancel] {job_id}\\033[0m\")\n return f\"Cancelled {job_id}\"\n\n\ndef cron_scheduler_loop():\n \"\"\"Independent daemon thread: poll every 1s, fire matching jobs.\n Individual job errors are caught to prevent one bad job from\n killing the entire scheduler thread.\"\"\"\n while True:\n time.sleep(1)\n now = datetime.now()\n # Date-aware marker prevents daily jobs from skipping on day 2+\n minute_marker = now.strftime(\"%Y-%m-%d %H:%M\")\n with cron_lock:\n for job in list(scheduled_jobs.values()):\n try:\n if cron_matches(job.cron, now):\n if _last_fired.get(job.id) != minute_marker:\n cron_queue.append(job)\n _last_fired[job.id] = minute_marker\n print(f\" \\033[35m[cron fire] {job.id} → \"\n f\"{job.prompt[:40]}\\033[0m\")\n if not job.recurring:\n scheduled_jobs.pop(job.id, None)\n if job.durable:\n save_durable_jobs()\n except Exception as e:\n print(f\" \\033[31m[cron error] {job.id}: {e}\\033[0m\")\n\n\ndef consume_cron_queue() -> list[CronJob]:\n \"\"\"Consume fired jobs from cron_queue (called by agent_loop).\"\"\"\n with cron_lock:\n fired = list(cron_queue)\n cron_queue.clear()\n return fired\n\n\n# Load durable jobs on startup, then start scheduler thread\nload_durable_jobs()\nthreading.Thread(target=cron_scheduler_loop, daemon=True).start()\nprint(\" \\033[35m[cron] scheduler thread started\\033[0m\")\n\n\n# Cron tool handlers\n\ndef run_schedule_cron(cron: str, prompt: str,\n recurring: bool = True, durable: bool = True) -> str:\n result = schedule_job(cron, prompt, recurring, durable)\n if isinstance(result, str):\n return f\"Error: {result}\"\n return f\"Scheduled {result.id}: '{cron}' → {prompt}\"\n\n\ndef run_list_crons() -> str:\n with cron_lock:\n jobs = list(scheduled_jobs.values())\n if not jobs:\n return \"No cron jobs. Use schedule_cron to add one.\"\n lines = []\n for j in jobs:\n tag = \"recurring\" if j.recurring else \"one-shot\"\n dur = \"durable\" if j.durable else \"session\"\n lines.append(f\" {j.id}: '{j.cron}' → {j.prompt[:40]} \"\n f\"[{tag}, {dur}]\")\n return \"\\n\".join(lines)\n\n\ndef run_cancel_cron(job_id: str) -> str:\n return cancel_job(job_id)\n\n\n# ── MessageBus (s15 new) ──\n# Teaching version uses simple file append + unlink.\n# Real CC uses proper-lockfile for concurrent write safety.\n\nMAILBOX_DIR = WORKDIR / \".mailboxes\"\nMAILBOX_DIR.mkdir(exist_ok=True)\n\n\nclass MessageBus:\n \"\"\"File-based message bus. Each agent has a .jsonl inbox.\n Read is destructive: read_text + unlink (consumes messages).\n Teaching version: no file locking; real CC uses proper-lockfile.\"\"\"\n\n def send(self, from_agent: str, to_agent: str, content: str,\n msg_type: str = \"message\"):\n msg = {\"from\": from_agent, \"to\": to_agent,\n \"content\": content, \"type\": msg_type,\n \"ts\": time.time()}\n inbox = MAILBOX_DIR / f\"{to_agent}.jsonl\"\n with open(inbox, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n print(f\" \\033[33m[bus] {from_agent} → {to_agent}: \"\n f\"{content[:50]}\\033[0m\")\n\n def read_inbox(self, agent: str) -> list[dict]:\n inbox = MAILBOX_DIR / f\"{agent}.jsonl\"\n if not inbox.exists():\n return []\n msgs = [json.loads(line) for line in inbox.read_text().splitlines()\n if line.strip()]\n inbox.unlink() # consume: read + delete\n return msgs\n\n def peek(self, agent: str) -> bool:\n \"\"\"Non-destructive: True if the agent has unread inbox messages.\n The Lead's inbox poller uses this to decide whether to wake a turn\n without consuming the mailbox.\"\"\"\n inbox = MAILBOX_DIR / f\"{agent}.jsonl\"\n return inbox.exists() and inbox.stat().st_size > 0\n\n\nBUS = MessageBus()\n\n# Track spawned teammates\nactive_teammates: dict[str, bool] = {}\n\n\n# ── Teammate Thread (s15 new) ──\n\ndef spawn_teammate_thread(name: str, role: str, prompt: str) -> str:\n \"\"\"Spawn a teammate agent in a background thread.\n Teaching version: max 10 rounds per teammate.\n Real CC: teammates use idle loop (wait for inbox, work, repeat)\n until shutdown_request.\"\"\"\n if name in active_teammates:\n return f\"Teammate '{name}' already exists\"\n\n system = (f\"You are '{name}', a {role}. \"\n f\"Use tools to complete tasks. \"\n f\"Send results via send_message to 'lead'.\")\n\n def run():\n messages = [{\"role\": \"user\", \"content\": prompt}]\n sub_tools = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"send_message\",\n \"description\": \"Send a message to another agent.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n ]\n sub_handlers = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"send_message\": lambda to, content: (BUS.send(name, to, content),\n \"Sent\")[1],\n }\n\n for _ in range(10):\n inbox = BUS.read_inbox(name)\n if inbox:\n messages.append({\"role\": \"user\",\n \"content\": f\"{json.dumps(inbox)}\"})\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages[-20:],\n tools=sub_tools, max_tokens=8000)\n except Exception:\n break\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = sub_handlers.get(block.name)\n output = handler(**block.input) if handler else \"Unknown\"\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n # Send final summary to Lead\n summary = \"Done.\"\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\" and isinstance(msg[\"content\"], list):\n for b in msg[\"content\"]:\n if getattr(b, \"type\", None) == \"text\":\n summary = b.text\n break\n else:\n continue\n break\n BUS.send(name, \"lead\", summary, \"result\")\n active_teammates.pop(name, None)\n print(f\" \\033[32m[teammate] {name} finished\\033[0m\")\n\n active_teammates[name] = True\n threading.Thread(target=run, daemon=True).start()\n print(f\" \\033[36m[teammate] {name} spawned as {role}\\033[0m\")\n return f\"Teammate '{name}' spawned as {role}\"\n\n\n# ── Team Tool Handlers (s15 new) ──\n\ndef run_spawn_teammate(name: str, role: str, prompt: str) -> str:\n return spawn_teammate_thread(name, role, prompt)\n\n\ndef run_send_message(to: str, content: str) -> str:\n BUS.send(\"lead\", to, content)\n return f\"Sent to {to}\"\n\n\ndef run_check_inbox() -> str:\n msgs = BUS.read_inbox(\"lead\")\n if not msgs:\n return \"(inbox empty)\"\n lines = []\n for m in msgs:\n lines.append(f\" [{m['from']}] {m['content'][:200]}\")\n return \"\\n\".join(lines)\n\n\n# ── Tool Definitions ──\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"command\": {\"type\": \"string\"},\n \"run_in_background\": {\"type\": \"boolean\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"create_task\",\n \"description\": \"Create a new task with optional blockedBy dependencies.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n \"blockedBy\": {\"type\": \"array\",\n \"items\": {\"type\": \"string\"}}},\n \"required\": [\"subject\"]}},\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks with status, owner, and dependencies.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"get_task\",\n \"description\": \"Get full details of a specific task by ID.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task. Sets owner, changes status to in_progress.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Complete an in-progress task. Reports unblocked downstream tasks.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"schedule_cron\",\n \"description\": \"Schedule a cron job. cron is 5-field: min hour dom month dow.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"cron\": {\"type\": \"string\",\n \"description\": \"5-field cron expression\"},\n \"prompt\": {\"type\": \"string\",\n \"description\": \"Message to inject when fired\"},\n \"recurring\": {\"type\": \"boolean\",\n \"description\": \"True=recurring, False=one-shot\"},\n \"durable\": {\"type\": \"boolean\",\n \"description\": \"True=persist to disk\"}},\n \"required\": [\"cron\", \"prompt\"]}},\n {\"name\": \"list_crons\",\n \"description\": \"List all registered cron jobs.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"cancel_cron\",\n \"description\": \"Cancel a cron job by ID.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"job_id\": {\"type\": \"string\"}},\n \"required\": [\"job_id\"]}},\n {\"name\": \"spawn_teammate\",\n \"description\": \"Spawn a teammate agent in a background thread.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"name\": {\"type\": \"string\"},\n \"role\": {\"type\": \"string\"},\n \"prompt\": {\"type\": \"string\"}},\n \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"send_message\",\n \"description\": \"Send a message to a teammate via MessageBus.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"check_inbox\",\n \"description\": \"Check Lead's inbox for teammate messages.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n]\n\n\n# ── Context ──\n\ndef update_context(context: dict, messages: list) -> dict:\n \"\"\"Derive context from real state.\"\"\"\n memories = \"\"\n if MEMORY_INDEX.exists():\n content = MEMORY_INDEX.read_text().strip()\n if content:\n memories = content\n return {\n \"enabled_tools\": [t[\"name\"] for t in TOOLS],\n \"workspace\": str(WORKDIR),\n \"memories\": memories,\n }\n\n\n# ── Agent Loop ──\n# Teaching code keeps a basic agent loop. S11's full error recovery is omitted.\n# Cron queue is consumed when agent_loop is called; real CC auto-wakes via\n# queue processor (useQueueProcessor.ts) when items arrive.\n\ndef agent_loop(messages: list, context: dict):\n system = get_system_prompt(context)\n while True:\n # Consume fired cron jobs → inject as messages\n fired = consume_cron_queue()\n for job in fired:\n messages.append({\"role\": \"user\",\n \"content\": f\"[Scheduled] {job.prompt}\"})\n print(f\" \\033[35m[inject cron] {job.prompt[:50]}\\033[0m\")\n\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=TOOLS, max_tokens=8000)\n except Exception as e:\n messages.append({\"role\": \"assistant\", \"content\": [\n {\"type\": \"text\",\n \"text\": f\"[Error] {type(e).__name__}: {e}\"}]})\n return\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n\n if should_run_background(block.name, block.input):\n bg_id = start_background_task(block)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": f\"[Background task {bg_id} started] \"\n f\"Result will be available when complete.\"})\n else:\n output = execute_tool(block)\n print(str(output)[:300])\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output})\n\n # Merge background tool results + notifications into one user message\n user_content = list(results)\n bg_notifications = collect_background_results()\n if bg_notifications:\n for notif in bg_notifications:\n user_content.append({\"type\": \"text\", \"text\": notif})\n messages.append({\"role\": \"user\", \"content\": user_content})\n context = update_context(context, messages)\n system = get_system_prompt(context)\n\n\nif __name__ == \"__main__\":\n print(\"s15: agent teams\")\n print(\"Enter a question, press Enter to send. Type q to quit.\\n\")\n history = []\n context = update_context({}, [])\n\n # input() and a 1s poller (teammate inbox or background results) feed one\n # event queue (issues #291, #46).\n events = queue.Queue()\n\n def input_reader():\n while True:\n try:\n line = input(\"\\033[36ms15 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n events.put((\"quit\", None))\n return\n events.put((\"user\", line))\n\n def inbox_poller():\n # Poll ~1s and wake the Lead when async results are ready: teammate\n # inbox messages or completed background tasks. Don't gate on\n # active_teammates: a teammate sends its result and then removes itself,\n # so the final message can outlive its registry entry.\n while True:\n time.sleep(1)\n if BUS.peek(\"lead\") or has_pending_background():\n events.put((\"wake\", None))\n\n threading.Thread(target=input_reader, daemon=True).start()\n threading.Thread(target=inbox_poller, daemon=True).start()\n\n had_teammates = False\n while True:\n kind, payload = events.get()\n if kind == \"quit\":\n break\n if kind == \"user\":\n if payload.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": payload})\n else: # \"wake\": teammate inbox or background results are ready\n parts = []\n inbox = BUS.read_inbox(\"lead\")\n if inbox:\n parts.append(\"[Inbox]\\n\" + \"\\n\".join(\n f\"From {m['from']}: {m['content'][:200]}\" for m in inbox))\n bg = collect_background_results()\n parts.extend(bg)\n if not parts:\n continue # already drained by an earlier wake (idempotent)\n history.append({\"role\": \"user\", \"content\": \"\\n\".join(parts)})\n print(f\"\\n\\033[33m[wake: {len(inbox)} inbox + {len(bg)} background \"\n f\"-> new turn]\\033[0m\")\n\n # One turn for whichever source woke us.\n agent_loop(history, context)\n context = update_context(context, history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\":\n print(block.text)\n elif isinstance(block, dict) and block.get(\"type\") == \"text\":\n print(block.get(\"text\", \"\"))\n\n # Announce once when every teammate has finished and its output drained.\n if active_teammates:\n had_teammates = True\n elif had_teammates and not BUS.peek(\"lead\") and not has_pending_background():\n print(\"\\033[32m[all teammates done]\\033[0m\")\n had_teammates = False\n print()\n",
"images": [
{
"src": "/course-assets/s15_agent_teams/agent-teams-overview.svg",
@@ -2019,7 +2054,7 @@
"filename": "s16_team_protocols/code.py",
"title": "Team Protocols",
"subtitle": "Teammates Need Agreements",
- "loc": 709,
+ "loc": 711,
"tools": [
"bash",
"read_file",
@@ -2245,7 +2280,7 @@
}
],
"layer": "collaboration",
- "source": "#!/usr/bin/env python3\n\"\"\"\ns16: Team Protocols — request-response protocol + request_id + dispatch + state machine.\n\nRun: python s16_team_protocols/code.py\nNeed: pip install anthropic python-dotenv + .env with ANTHROPIC_API_KEY\n\nChanges from s15:\n - ProtocolState dataclass (request_id, type, sender, status, created_at)\n - pending_requests dict: tracks in-flight protocol requests\n - dispatch_message: routes incoming messages by type to handlers\n - request_shutdown: Lead sends shutdown protocol request\n - request_plan: Lead asks teammate to submit plan\n - handle_shutdown_request / handle_plan_response: teammate receives & responds\n - match_response: Lead correlates response to request via request_id (with type validation)\n - Teammate idle loop: waits for inbox messages instead of exiting after 10 rounds\n - Unified consume_lead_inbox: protocol routing + injection into history\n - 3 new Lead tools: request_shutdown, request_plan, review_plan\n - 1 new teammate tool: submit_plan\n\nASCII flow:\n Lead: BUS.send(\"shutdown_request\", {request_id}) ──────→ teammate inbox\n Teammate: dispatch → handler → BUS.send(\"shutdown_response\", {request_id}) ─→ Lead inbox\n Lead: consume_lead_inbox → match_response(request_id) → pending_requests[req_id].status = approved\n\"\"\"\n\nimport os, subprocess, json, time, random, threading\nfrom pathlib import Path\nfrom datetime import datetime\nfrom dataclasses import dataclass, asdict, field\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n# ── Task System (from s12, synced) ──\n\nTASKS_DIR = WORKDIR / \".tasks\"\nTASKS_DIR.mkdir(exist_ok=True)\n\n\n@dataclass\nclass Task:\n id: str\n subject: str\n description: str\n status: str # pending | in_progress | completed\n owner: str | None\n blockedBy: list[str]\n\n\ndef _task_path(task_id: str) -> Path:\n return TASKS_DIR / f\"{task_id}.json\"\n\n\ndef create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> Task:\n task = Task(\n id=f\"task_{int(time.time())}_{random.randint(0, 9999):04d}\",\n subject=subject, description=description,\n status=\"pending\", owner=None,\n blockedBy=blockedBy or [],\n )\n save_task(task)\n return task\n\n\ndef save_task(task: Task):\n _task_path(task.id).write_text(json.dumps(asdict(task), indent=2))\n\n\ndef load_task(task_id: str) -> Task:\n return Task(**json.loads(_task_path(task_id).read_text()))\n\n\ndef list_tasks() -> list[Task]:\n return [Task(**json.loads(p.read_text()))\n for p in sorted(TASKS_DIR.glob(\"task_*.json\"))]\n\n\ndef get_task(task_id: str) -> str:\n \"\"\"Return full task details as JSON.\"\"\"\n task = load_task(task_id)\n return json.dumps(asdict(task), indent=2)\n\n\ndef can_start(task_id: str) -> bool:\n \"\"\"Check if all blockedBy dependencies are completed.\n Missing dependencies are treated as blocked.\"\"\"\n task = load_task(task_id)\n for dep_id in task.blockedBy:\n if not _task_path(dep_id).exists():\n return False\n if load_task(dep_id).status != \"completed\":\n return False\n return True\n\n\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if not can_start(task_id):\n deps = [d for d in task.blockedBy\n if not _task_path(d).exists() or load_task(d).status != \"completed\"]\n return f\"Blocked by: {deps}\"\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n print(f\" \\033[36m[claim] {task.subject} → in_progress (owner: {owner})\\033[0m\")\n return f\"Claimed {task.id} ({task.subject})\"\n\n\ndef complete_task(task_id: str) -> str:\n task = load_task(task_id)\n if task.status != \"in_progress\":\n return f\"Task {task_id} is {task.status}, cannot complete\"\n task.status = \"completed\"\n save_task(task)\n unblocked = [t.subject for t in list_tasks()\n if t.status == \"pending\" and t.blockedBy and can_start(t.id)]\n print(f\" \\033[32m[complete] {task.subject} ✓\\033[0m\")\n msg = f\"Completed {task.id} ({task.subject})\"\n if unblocked:\n msg += f\"\\nUnblocked: {', '.join(unblocked)}\"\n print(f\" \\033[33m[unblocked] {', '.join(unblocked)}\\033[0m\")\n return msg\n\n\n# ── Prompt Assembly (from s10, synced) ──\n\nPROMPT_SECTIONS = {\n \"identity\": \"You are a coding agent. Act, don't explain.\",\n \"tools\": \"Available tools: bash, read_file, write_file, \"\n \"get_task, create_task, list_tasks, claim_task, complete_task, \"\n \"spawn_teammate, send_message, check_inbox, \"\n \"request_shutdown, request_plan, review_plan.\",\n \"workspace\": f\"Working directory: {WORKDIR}\",\n \"memory\": \"Relevant memories are injected below when available.\",\n}\n\n\ndef assemble_system_prompt(context: dict) -> str:\n sections = [PROMPT_SECTIONS[\"identity\"],\n PROMPT_SECTIONS[\"tools\"],\n PROMPT_SECTIONS[\"workspace\"]]\n memories = context.get(\"memories\", \"\")\n if memories:\n sections.append(f\"Relevant memories:\\n{memories}\")\n return \"\\n\\n\".join(sections)\n\n\n_last_context_key, _last_prompt = None, None\n\n\ndef get_system_prompt(context: dict) -> str:\n global _last_context_key, _last_prompt\n key = json.dumps(context, sort_keys=True, ensure_ascii=False, default=str)\n if key == _last_context_key and _last_prompt:\n return _last_prompt\n _last_context_key = key\n _last_prompt = assemble_system_prompt(context)\n return _last_prompt\n\n\n# ── Tools ──\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str, run_in_background: bool = False) -> str:\n # run_in_background is handled by agent_loop dispatch, not here\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# Task tools\n\ndef run_create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> str:\n task = create_task(subject, description, blockedBy)\n deps = f\" (blockedBy: {', '.join(blockedBy)})\" if blockedBy else \"\"\n print(f\" \\033[34m[create] {task.subject}{deps}\\033[0m\")\n return f\"Created {task.id}: {task.subject}{deps}\"\n\n\ndef run_list_tasks() -> str:\n tasks = list_tasks()\n if not tasks:\n return \"No tasks. Use create_task to add some.\"\n lines = []\n for t in tasks:\n icon = {\"pending\": \"○\", \"in_progress\": \"●\",\n \"completed\": \"✓\"}.get(t.status, \"?\")\n deps = f\" (blockedBy: {', '.join(t.blockedBy)})\" if t.blockedBy else \"\"\n owner = f\" [{t.owner}]\" if t.owner else \"\"\n lines.append(f\" {icon} {t.id}: {t.subject} \"\n f\"[{t.status}]{owner}{deps}\")\n return \"\\n\".join(lines)\n\n\ndef run_get_task(task_id: str) -> str:\n try:\n return get_task(task_id)\n except FileNotFoundError:\n return f\"Error: Task {task_id} not found\"\n\n\ndef run_claim_task(task_id: str) -> str:\n return claim_task(task_id, owner=\"agent\")\n\n\ndef run_complete_task(task_id: str) -> str:\n return complete_task(task_id)\n\n\n# ── Background Tasks (from s13, synced) ──\n\n_bg_counter = 0\nbackground_tasks: dict[str, dict] = {}\nbackground_results: dict[str, str] = {}\nbackground_lock = threading.Lock()\n\n\ndef is_slow_operation(tool_name: str, tool_input: dict) -> bool:\n \"\"\"Fallback heuristic: commands likely to take > 30s.\"\"\"\n if tool_name != \"bash\":\n return False\n cmd = tool_input.get(\"command\", \"\").lower()\n slow_keywords = [\"install\", \"build\", \"test\", \"deploy\", \"compile\",\n \"docker build\", \"pip install\", \"npm install\",\n \"cargo build\", \"pytest\", \"make\"]\n return any(kw in cmd for kw in slow_keywords)\n\n\ndef should_run_background(tool_name: str, tool_input: dict) -> bool:\n \"\"\"Model explicit request takes priority; fallback to heuristic.\"\"\"\n if tool_input.get(\"run_in_background\"):\n return True\n return is_slow_operation(tool_name, tool_input)\n\n\ndef start_background_task(block) -> str:\n \"\"\"Run tool in a daemon thread. Returns background task ID.\"\"\"\n global _bg_counter\n _bg_counter += 1\n bg_id = f\"bg_{_bg_counter:04d}\"\n cmd = block.input.get(\"command\", block.name)\n\n def worker():\n result = execute_tool(block)\n with background_lock:\n background_tasks[bg_id][\"status\"] = \"completed\"\n background_results[bg_id] = result\n\n with background_lock:\n background_tasks[bg_id] = {\n \"tool_use_id\": block.id,\n \"command\": cmd,\n \"status\": \"running\",\n }\n threading.Thread(target=worker, daemon=True).start()\n print(f\" \\033[33m[background] dispatched {bg_id}: {cmd[:40]}\\033[0m\")\n return bg_id\n\n\ndef collect_background_results() -> list[str]:\n \"\"\"Collect completed background results as task_notification messages.\"\"\"\n with background_lock:\n ready_ids = [bid for bid, task in background_tasks.items()\n if task[\"status\"] == \"completed\"]\n notifications = []\n for bg_id in ready_ids:\n with background_lock:\n task = background_tasks.pop(bg_id)\n output = background_results.pop(bg_id, \"\")\n summary = output[:200] if len(output) > 200 else output\n notifications.append(\n f\"\\n\"\n f\" {bg_id}\\n\"\n f\" completed\\n\"\n f\" {task['command']}\\n\"\n f\" {summary}\\n\"\n f\"\")\n print(f\" \\033[32m[background done] {bg_id}: \"\n f\"{task['command'][:40]} ({len(output)} chars)\\033[0m\")\n return notifications\n\n\n# ── MessageBus (from s15) ──\n\nMAILBOX_DIR = WORKDIR / \".mailboxes\"\nMAILBOX_DIR.mkdir(exist_ok=True)\n\n\nclass MessageBus:\n \"\"\"File-based message bus. Each agent has a .jsonl inbox.\n Read is destructive: read_text + unlink (consumes messages).\n Teaching version: no file locking; real CC uses proper-lockfile.\"\"\"\n\n def send(self, from_agent: str, to_agent: str, content: str,\n msg_type: str = \"message\", metadata: dict = None):\n msg = {\"from\": from_agent, \"to\": to_agent,\n \"content\": content, \"type\": msg_type,\n \"ts\": time.time(), \"metadata\": metadata or {}}\n inbox = MAILBOX_DIR / f\"{to_agent}.jsonl\"\n with open(inbox, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n print(f\" \\033[33m[bus] {from_agent} → {to_agent}: \"\n f\"({msg_type}) {content[:50]}\\033[0m\")\n\n def read_inbox(self, agent: str) -> list[dict]:\n inbox = MAILBOX_DIR / f\"{agent}.jsonl\"\n if not inbox.exists():\n return []\n msgs = [json.loads(line) for line in inbox.read_text().splitlines()\n if line.strip()]\n inbox.unlink() # consume: read + delete\n return msgs\n\n\nBUS = MessageBus()\nactive_teammates: dict[str, bool] = {}\n\n# ── Protocol State (s16 new) ──\n\n@dataclass\nclass ProtocolState:\n request_id: str\n type: str # \"shutdown\" | \"plan_approval\"\n sender: str\n target: str\n status: str # pending | approved | rejected\n payload: str # plan text or shutdown reason\n created_at: float = field(default_factory=time.time)\n\n\npending_requests: dict[str, ProtocolState] = {}\n\n\ndef new_request_id() -> str:\n return f\"req_{random.randint(0, 999999):06d}\"\n\n\ndef match_response(response_type: str, request_id: str, approve: bool):\n \"\"\"Correlate a response to the original request via request_id.\n Validates that response_type matches the request type.\"\"\"\n state = pending_requests.get(request_id)\n if not state:\n print(f\" \\033[31m[protocol] unknown request_id: {request_id}\\033[0m\")\n return\n # Validate response type matches request type\n if state.type == \"shutdown\" and response_type != \"shutdown_response\":\n print(f\" \\033[31m[protocol] type mismatch: expected shutdown_response, \"\n f\"got {response_type}\\033[0m\")\n return\n if state.type == \"plan_approval\" and response_type != \"plan_approval_response\":\n print(f\" \\033[31m[protocol] type mismatch: expected plan_approval_response, \"\n f\"got {response_type}\\033[0m\")\n return\n if state.status != \"pending\":\n print(f\" \\033[33m[protocol] {request_id} already {state.status}, \"\n f\"ignoring duplicate\\033[0m\")\n return\n state.status = \"approved\" if approve else \"rejected\"\n icon = \"✓\" if approve else \"✗\"\n color = \"32\" if approve else \"31\"\n print(f\" \\033[{color}m[protocol] {state.type} {icon} \"\n f\"({request_id}: {state.status})\\033[0m\")\n\n\n# ── Unified Lead Inbox Consumer (s16 fix) ──\n# Both check_inbox tool and main loop call this function.\n# Protocol responses are routed via match_response before returning.\n\ndef consume_lead_inbox(route_protocol: bool = True) -> list[dict]:\n \"\"\"Read Lead's inbox. Route protocol responses, return all messages.\n Called by both run_check_inbox() and main loop to avoid\n messages being consumed without protocol routing.\"\"\"\n msgs = BUS.read_inbox(\"lead\")\n if not msgs:\n return []\n if route_protocol:\n for msg in msgs:\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n msg_type = msg.get(\"type\", \"\")\n if req_id and msg_type.endswith(\"_response\"):\n approve = meta.get(\"approve\", False)\n match_response(msg_type, req_id, approve)\n return msgs\n\n\n# ── Teammate Thread (s16: idle loop + dispatch) ──\n\ndef spawn_teammate_thread(name: str, role: str, prompt: str) -> str:\n \"\"\"Spawn a teammate agent in a background thread.\n Uses idle loop: after each LLM turn, waits for inbox messages\n (shutdown_request, new task) instead of exiting.\"\"\"\n if name in active_teammates:\n return f\"Teammate '{name}' already exists\"\n\n system = (f\"You are '{name}', a {role}. \"\n f\"Use tools to complete tasks. \"\n f\"Check inbox for protocol messages (shutdown_request, etc).\")\n\n def handle_inbox_message(name: str, msg: dict, messages: list) -> bool:\n \"\"\"Dispatch incoming protocol messages by type.\n Returns True if teammate should stop.\"\"\"\n msg_type = msg.get(\"type\", \"message\")\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n\n if msg_type == \"shutdown_request\":\n BUS.send(name, \"lead\", \"Shutting down gracefully.\",\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": True})\n print(f\" \\033[35m[protocol] {name} approved shutdown \"\n f\"({req_id})\\033[0m\")\n return True # stop the loop\n\n if msg_type == \"plan_approval_response\":\n approve = meta.get(\"approve\", False)\n if approve:\n messages.append({\"role\": \"user\",\n \"content\": f\"[Plan approved] Proceed with the task.\"})\n else:\n messages.append({\"role\": \"user\",\n \"content\": f\"[Plan rejected] Feedback: {msg['content']}\"})\n\n return False # continue\n\n def run():\n messages = [{\"role\": \"user\", \"content\": prompt}]\n sub_tools = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"send_message\",\n \"description\": \"Send message to another agent.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"submit_plan\",\n \"description\": \"Submit a plan for Lead approval.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"plan\": {\"type\": \"string\"}},\n \"required\": [\"plan\"]}},\n ]\n sub_handlers = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"send_message\": lambda to, content: (BUS.send(name, to, content),\n \"Sent\")[1],\n \"submit_plan\": lambda plan: _teammate_submit_plan(name, plan),\n }\n\n shutdown_requested = False\n while not shutdown_requested:\n # Check inbox for protocol messages\n inbox = BUS.read_inbox(name)\n should_stop = False\n non_protocol = []\n for msg in inbox:\n if msg.get(\"type\") in (\"shutdown_request\", \"plan_approval_response\"):\n should_stop = handle_inbox_message(name, msg, messages)\n if should_stop:\n break\n else:\n non_protocol.append(msg)\n if should_stop:\n shutdown_requested = True\n break\n if non_protocol:\n inbox_json = json.dumps(non_protocol)\n messages.append({\"role\": \"user\",\n \"content\": \"\" + inbox_json + \"\"})\n\n # LLM turn\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages[-20:],\n tools=sub_tools, max_tokens=8000)\n except Exception:\n break\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n # Idle: wait for inbox messages instead of exiting\n # Real CC sends idle_notification to Lead here\n while not shutdown_requested:\n time.sleep(1)\n inbox = BUS.read_inbox(name)\n if not inbox:\n continue\n for msg in inbox:\n if msg.get(\"type\") in (\"shutdown_request\", \"plan_approval_response\"):\n should_stop = handle_inbox_message(name, msg, messages)\n if should_stop:\n shutdown_requested = True\n break\n else:\n non_protocol.append(msg)\n if shutdown_requested:\n break\n if non_protocol:\n inbox_json = json.dumps(non_protocol)\n messages.append({\"role\": \"user\",\n \"content\": \"\" + inbox_json + \"\"})\n break # back to LLM turn with new messages\n\n # Execute tool calls\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = sub_handlers.get(block.name)\n output = handler(**block.input) if handler else \"Unknown\"\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n # Send final summary to Lead\n summary = \"Done.\"\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\" and isinstance(msg[\"content\"], list):\n for b in msg[\"content\"]:\n if getattr(b, \"type\", None) == \"text\":\n summary = b.text\n break\n else:\n continue\n break\n BUS.send(name, \"lead\", summary, \"result\")\n active_teammates.pop(name, None)\n print(f\" \\033[32m[teammate] {name} finished\\033[0m\")\n\n active_teammates[name] = True\n threading.Thread(target=run, daemon=True).start()\n print(f\" \\033[36m[teammate] {name} spawned as {role}\\033[0m\")\n return f\"Teammate '{name}' spawned as {role}\"\n\n\ndef _teammate_submit_plan(from_name: str, plan: str) -> str:\n \"\"\"Teammate submits a plan to Lead for approval.\n\n Note: This is a protocol-level request, not a code-level gate.\n After submitting, the teammate's thread continues running — it can\n still call bash/write/etc. Real enforcement relies on the model\n waiting for the approval response before acting. Code-level tool\n gating would require blocking the teammate's tool dispatch until\n approval arrives.\n \"\"\"\n req_id = new_request_id()\n pending_requests[req_id] = ProtocolState(\n request_id=req_id, type=\"plan_approval\",\n sender=from_name, target=\"lead\",\n status=\"pending\", payload=plan)\n BUS.send(from_name, \"lead\", plan,\n \"plan_approval_request\",\n {\"request_id\": req_id})\n return f\"Plan submitted ({req_id}). Waiting for approval...\"\n\n\n# ── Lead Protocol Tools (s16 new) ──\n\ndef run_request_shutdown(teammate: str) -> str:\n req_id = new_request_id()\n pending_requests[req_id] = ProtocolState(\n request_id=req_id, type=\"shutdown\",\n sender=\"lead\", target=teammate,\n status=\"pending\", payload=\"\")\n BUS.send(\"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\",\n {\"request_id\": req_id})\n print(f\" \\033[35m[protocol] shutdown_request → {teammate} \"\n f\"({req_id})\\033[0m\")\n return f\"Shutdown request sent to {teammate} (req: {req_id})\"\n\n\ndef run_request_plan(teammate: str, task: str) -> str:\n \"\"\"Lead asks a teammate to submit a plan for a task.\"\"\"\n BUS.send(\"lead\", teammate, f\"Please submit a plan for: {task}\",\n \"message\")\n return f\"Asked {teammate} to submit a plan\"\n\n\ndef run_review_plan(request_id: str, approve: bool, feedback: str = \"\") -> str:\n state = pending_requests.get(request_id)\n if not state:\n return f\"Request {request_id} not found\"\n if state.status != \"pending\":\n return f\"Request {request_id} already {state.status}\"\n state.status = \"approved\" if approve else \"rejected\"\n BUS.send(\"lead\", state.sender, feedback or (\"Approved\" if approve else \"Rejected\"),\n \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve})\n icon = \"✓\" if approve else \"✗\"\n print(f\" \\033[32m[protocol] plan {icon} ({request_id})\\033[0m\")\n return f\"Plan {'approved' if approve else 'rejected'} ({request_id})\"\n\n\n# ── Other Lead Tool Handlers ──\n\ndef run_spawn_teammate(name: str, role: str, prompt: str) -> str:\n return spawn_teammate_thread(name, role, prompt)\n\n\ndef run_send_message(to: str, content: str) -> str:\n BUS.send(\"lead\", to, content)\n return f\"Sent to {to}\"\n\n\ndef run_check_inbox() -> str:\n \"\"\"Check Lead's inbox. Routes protocol responses via match_response.\"\"\"\n msgs = consume_lead_inbox(route_protocol=True)\n if not msgs:\n return \"(inbox empty)\"\n lines = []\n for m in msgs:\n meta = m.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n tag = f\" [{m['type']} req:{req_id}]\" if req_id else f\" [{m['type']}]\"\n lines.append(f\" [{m['from']}]{tag} {m['content'][:200]}\")\n return \"\\n\".join(lines)\n\n\n# ── Tool Dispatch ──\n\ndef execute_tool(block) -> str:\n \"\"\"Execute a tool call block, return output.\"\"\"\n handler = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"create_task\": run_create_task, \"list_tasks\": run_list_tasks,\n \"get_task\": run_get_task, \"claim_task\": run_claim_task,\n \"complete_task\": run_complete_task,\n \"spawn_teammate\": run_spawn_teammate,\n \"send_message\": run_send_message, \"check_inbox\": run_check_inbox,\n \"request_shutdown\": run_request_shutdown,\n \"request_plan\": run_request_plan, \"review_plan\": run_review_plan,\n }.get(block.name)\n if handler:\n return handler(**block.input)\n return f\"Unknown tool: {block.name}\"\n\n\n# ── Tool Definitions ──\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"command\": {\"type\": \"string\"},\n \"run_in_background\": {\"type\": \"boolean\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"create_task\",\n \"description\": \"Create a new task with optional blockedBy dependencies.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n \"blockedBy\": {\"type\": \"array\",\n \"items\": {\"type\": \"string\"}}},\n \"required\": [\"subject\"]}},\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks with status, owner, and dependencies.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"get_task\",\n \"description\": \"Get full details of a specific task by ID.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task. Sets owner, changes status to in_progress.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Complete an in-progress task. Reports unblocked downstream tasks.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"spawn_teammate\",\n \"description\": \"Spawn a teammate agent in a background thread.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"name\": {\"type\": \"string\"},\n \"role\": {\"type\": \"string\"},\n \"prompt\": {\"type\": \"string\"}},\n \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"send_message\",\n \"description\": \"Send message to a teammate via MessageBus.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"check_inbox\",\n \"description\": \"Check Lead's inbox. Routes protocol responses automatically.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"request_shutdown\",\n \"description\": \"Request a teammate to shut down gracefully.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"teammate\": {\"type\": \"string\"}},\n \"required\": [\"teammate\"]}},\n {\"name\": \"request_plan\",\n \"description\": \"Ask a teammate to submit a plan for review.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"teammate\": {\"type\": \"string\"},\n \"task\": {\"type\": \"string\"}},\n \"required\": [\"teammate\", \"task\"]}},\n {\"name\": \"review_plan\",\n \"description\": \"Approve or reject a submitted plan by request_id.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"request_id\": {\"type\": \"string\"},\n \"approve\": {\"type\": \"boolean\"},\n \"feedback\": {\"type\": \"string\"}},\n \"required\": [\"request_id\", \"approve\"]}},\n]\n\n\n# ── Context ──\n\ndef update_context(context: dict, messages: list) -> dict:\n \"\"\"Derive context from real state.\"\"\"\n memories = \"\"\n if MEMORY_INDEX.exists():\n content = MEMORY_INDEX.read_text().strip()\n if content:\n memories = content\n return {\n \"enabled_tools\": [t[\"name\"] for t in TOOLS],\n \"workspace\": str(WORKDIR),\n \"memories\": memories,\n }\n\n\n# ── Agent Loop ──\n\ndef agent_loop(messages: list, context: dict):\n system = get_system_prompt(context)\n while True:\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=TOOLS, max_tokens=8000)\n except Exception as e:\n messages.append({\"role\": \"assistant\", \"content\": [\n {\"type\": \"text\",\n \"text\": f\"[Error] {type(e).__name__}: {e}\"}]})\n return\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n\n if should_run_background(block.name, block.input):\n bg_id = start_background_task(block)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": f\"[Background task {bg_id} started] \"\n f\"Result will be available when complete.\"})\n else:\n output = execute_tool(block)\n print(str(output)[:300])\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output})\n\n # Merge background tool results + notifications into one user message\n user_content = list(results)\n bg_notifications = collect_background_results()\n if bg_notifications:\n for notif in bg_notifications:\n user_content.append({\"type\": \"text\", \"text\": notif})\n messages.append({\"role\": \"user\", \"content\": user_content})\n context = update_context(context, messages)\n system = get_system_prompt(context)\n\n\nif __name__ == \"__main__\":\n print(\"s16: team protocols\")\n print(\"Enter a question, press Enter to send. Type q to quit.\\n\")\n history = []\n context = update_context({}, [])\n while True:\n try:\n query = input(\"\\033[36ms16 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history, context)\n context = update_context(context, history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\":\n print(block.text)\n\n # Check inbox → route protocol + inject into history\n inbox_msgs = consume_lead_inbox(route_protocol=True)\n if inbox_msgs:\n inbox_text = \"\\n\".join(\n f\"From {m['from']}: {m['content'][:200]}\" for m in inbox_msgs)\n history.append({\"role\": \"user\",\n \"content\": f\"[Inbox]\\n{inbox_text}\"})\n print(f\"\\n\\033[33m[Inbox: {len(inbox_msgs)} messages injected]\\033[0m\")\n print()\n",
+ "source": "#!/usr/bin/env python3\n\"\"\"\ns16: Team Protocols — request-response protocol + request_id + dispatch + state machine.\n\nRun: python s16_team_protocols/code.py\nNeed: pip install anthropic python-dotenv + .env with ANTHROPIC_API_KEY\n\nChanges from s15:\n - ProtocolState dataclass (request_id, type, sender, status, created_at)\n - pending_requests dict: tracks in-flight protocol requests\n - dispatch_message: routes incoming messages by type to handlers\n - request_shutdown: Lead sends shutdown protocol request\n - request_plan: Lead asks teammate to submit plan\n - handle_shutdown_request / handle_plan_response: teammate receives & responds\n - match_response: Lead correlates response to request via request_id (with type validation)\n - Teammate idle loop: waits for inbox messages instead of exiting after 10 rounds\n - Unified consume_lead_inbox: protocol routing + injection into history\n - 3 new Lead tools: request_shutdown, request_plan, review_plan\n - 1 new teammate tool: submit_plan\n\nASCII flow:\n Lead: BUS.send(\"shutdown_request\", {request_id}) ──────→ teammate inbox\n Teammate: dispatch → handler → BUS.send(\"shutdown_response\", {request_id}) ─→ Lead inbox\n Lead: consume_lead_inbox → match_response(request_id) → pending_requests[req_id].status = approved\n\"\"\"\n\nimport os, subprocess, json, time, random, threading\nfrom pathlib import Path\nfrom datetime import datetime\nfrom dataclasses import dataclass, asdict, field\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n# ── Task System (from s12, synced) ──\n\nTASKS_DIR = WORKDIR / \".tasks\"\nTASKS_DIR.mkdir(exist_ok=True)\n\n\n@dataclass\nclass Task:\n id: str\n subject: str\n description: str\n status: str # pending | in_progress | completed\n owner: str | None\n blockedBy: list[str]\n\n\ndef _task_path(task_id: str) -> Path:\n return TASKS_DIR / f\"{task_id}.json\"\n\n\ndef create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> Task:\n task = Task(\n id=f\"task_{int(time.time())}_{random.randint(0, 9999):04d}\",\n subject=subject, description=description,\n status=\"pending\", owner=None,\n blockedBy=blockedBy or [],\n )\n save_task(task)\n return task\n\n\ndef save_task(task: Task):\n _task_path(task.id).write_text(json.dumps(asdict(task), indent=2))\n\n\ndef load_task(task_id: str) -> Task:\n return Task(**json.loads(_task_path(task_id).read_text()))\n\n\ndef list_tasks() -> list[Task]:\n return [Task(**json.loads(p.read_text()))\n for p in sorted(TASKS_DIR.glob(\"task_*.json\"))]\n\n\ndef get_task(task_id: str) -> str:\n \"\"\"Return full task details as JSON.\"\"\"\n task = load_task(task_id)\n return json.dumps(asdict(task), indent=2)\n\n\ndef can_start(task_id: str) -> bool:\n \"\"\"Check if all blockedBy dependencies are completed.\n Missing dependencies are treated as blocked.\"\"\"\n task = load_task(task_id)\n for dep_id in task.blockedBy:\n if not _task_path(dep_id).exists():\n return False\n if load_task(dep_id).status != \"completed\":\n return False\n return True\n\n\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if not can_start(task_id):\n deps = [d for d in task.blockedBy\n if not _task_path(d).exists() or load_task(d).status != \"completed\"]\n return f\"Blocked by: {deps}\"\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n print(f\" \\033[36m[claim] {task.subject} → in_progress (owner: {owner})\\033[0m\")\n return f\"Claimed {task.id} ({task.subject})\"\n\n\ndef complete_task(task_id: str) -> str:\n task = load_task(task_id)\n if task.status != \"in_progress\":\n return f\"Task {task_id} is {task.status}, cannot complete\"\n task.status = \"completed\"\n save_task(task)\n unblocked = [t.subject for t in list_tasks()\n if t.status == \"pending\" and t.blockedBy and can_start(t.id)]\n print(f\" \\033[32m[complete] {task.subject} ✓\\033[0m\")\n msg = f\"Completed {task.id} ({task.subject})\"\n if unblocked:\n msg += f\"\\nUnblocked: {', '.join(unblocked)}\"\n print(f\" \\033[33m[unblocked] {', '.join(unblocked)}\\033[0m\")\n return msg\n\n\n# ── Prompt Assembly (from s10, synced) ──\n\nPROMPT_SECTIONS = {\n \"identity\": \"You are a coding agent. Act, don't explain.\",\n \"tools\": \"Available tools: bash, read_file, write_file, \"\n \"get_task, create_task, list_tasks, claim_task, complete_task, \"\n \"spawn_teammate, send_message, check_inbox, \"\n \"request_shutdown, request_plan, review_plan.\",\n \"workspace\": f\"Working directory: {WORKDIR}\",\n \"memory\": \"Relevant memories are injected below when available.\",\n}\n\n\ndef assemble_system_prompt(context: dict) -> str:\n sections = [PROMPT_SECTIONS[\"identity\"],\n PROMPT_SECTIONS[\"tools\"],\n PROMPT_SECTIONS[\"workspace\"]]\n memories = context.get(\"memories\", \"\")\n if memories:\n sections.append(f\"Relevant memories:\\n{memories}\")\n return \"\\n\\n\".join(sections)\n\n\n_last_context_key, _last_prompt = None, None\n\n\ndef get_system_prompt(context: dict) -> str:\n global _last_context_key, _last_prompt\n key = json.dumps(context, sort_keys=True, ensure_ascii=False, default=str)\n if key == _last_context_key and _last_prompt:\n return _last_prompt\n _last_context_key = key\n _last_prompt = assemble_system_prompt(context)\n return _last_prompt\n\n\n# ── Tools ──\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str, run_in_background: bool = False) -> str:\n # run_in_background is handled by agent_loop dispatch, not here\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# Task tools\n\ndef run_create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> str:\n task = create_task(subject, description, blockedBy)\n deps = f\" (blockedBy: {', '.join(blockedBy)})\" if blockedBy else \"\"\n print(f\" \\033[34m[create] {task.subject}{deps}\\033[0m\")\n return f\"Created {task.id}: {task.subject}{deps}\"\n\n\ndef run_list_tasks() -> str:\n tasks = list_tasks()\n if not tasks:\n return \"No tasks. Use create_task to add some.\"\n lines = []\n for t in tasks:\n icon = {\"pending\": \"○\", \"in_progress\": \"●\",\n \"completed\": \"✓\"}.get(t.status, \"?\")\n deps = f\" (blockedBy: {', '.join(t.blockedBy)})\" if t.blockedBy else \"\"\n owner = f\" [{t.owner}]\" if t.owner else \"\"\n lines.append(f\" {icon} {t.id}: {t.subject} \"\n f\"[{t.status}]{owner}{deps}\")\n return \"\\n\".join(lines)\n\n\ndef run_get_task(task_id: str) -> str:\n try:\n return get_task(task_id)\n except FileNotFoundError:\n return f\"Error: Task {task_id} not found\"\n\n\ndef run_claim_task(task_id: str) -> str:\n return claim_task(task_id, owner=\"agent\")\n\n\ndef run_complete_task(task_id: str) -> str:\n return complete_task(task_id)\n\n\n# ── Background Tasks (from s13, synced) ──\n\n_bg_counter = 0\nbackground_tasks: dict[str, dict] = {}\nbackground_results: dict[str, str] = {}\nbackground_lock = threading.Lock()\n\n\ndef is_slow_operation(tool_name: str, tool_input: dict) -> bool:\n \"\"\"Fallback heuristic: commands likely to take > 30s.\"\"\"\n if tool_name != \"bash\":\n return False\n cmd = tool_input.get(\"command\", \"\").lower()\n slow_keywords = [\"install\", \"build\", \"test\", \"deploy\", \"compile\",\n \"docker build\", \"pip install\", \"npm install\",\n \"cargo build\", \"pytest\", \"make\"]\n return any(kw in cmd for kw in slow_keywords)\n\n\ndef should_run_background(tool_name: str, tool_input: dict) -> bool:\n \"\"\"Model explicit request takes priority; fallback to heuristic.\"\"\"\n if tool_input.get(\"run_in_background\"):\n return True\n return is_slow_operation(tool_name, tool_input)\n\n\ndef start_background_task(block) -> str:\n \"\"\"Run tool in a daemon thread. Returns background task ID.\"\"\"\n global _bg_counter\n _bg_counter += 1\n bg_id = f\"bg_{_bg_counter:04d}\"\n cmd = block.input.get(\"command\", block.name)\n\n def worker():\n result = execute_tool(block)\n with background_lock:\n background_tasks[bg_id][\"status\"] = \"completed\"\n background_results[bg_id] = result\n\n with background_lock:\n background_tasks[bg_id] = {\n \"tool_use_id\": block.id,\n \"command\": cmd,\n \"status\": \"running\",\n }\n threading.Thread(target=worker, daemon=True).start()\n print(f\" \\033[33m[background] dispatched {bg_id}: {cmd[:40]}\\033[0m\")\n return bg_id\n\n\ndef collect_background_results() -> list[str]:\n \"\"\"Collect completed background results as task_notification messages.\"\"\"\n with background_lock:\n ready_ids = [bid for bid, task in background_tasks.items()\n if task[\"status\"] == \"completed\"]\n notifications = []\n for bg_id in ready_ids:\n with background_lock:\n task = background_tasks.pop(bg_id)\n output = background_results.pop(bg_id, \"\")\n summary = output[:200] if len(output) > 200 else output\n notifications.append(\n f\"\\n\"\n f\" {bg_id}\\n\"\n f\" completed\\n\"\n f\" {task['command']}\\n\"\n f\" {summary}\\n\"\n f\"\")\n print(f\" \\033[32m[background done] {bg_id}: \"\n f\"{task['command'][:40]} ({len(output)} chars)\\033[0m\")\n return notifications\n\n\n# ── MessageBus (from s15) ──\n\nMAILBOX_DIR = WORKDIR / \".mailboxes\"\nMAILBOX_DIR.mkdir(exist_ok=True)\n\n\nclass MessageBus:\n \"\"\"File-based message bus. Each agent has a .jsonl inbox.\n Read is destructive: read_text + unlink (consumes messages).\n Teaching version: no file locking; real CC uses proper-lockfile.\"\"\"\n\n def send(self, from_agent: str, to_agent: str, content: str,\n msg_type: str = \"message\", metadata: dict = None):\n msg = {\"from\": from_agent, \"to\": to_agent,\n \"content\": content, \"type\": msg_type,\n \"ts\": time.time(), \"metadata\": metadata or {}}\n inbox = MAILBOX_DIR / f\"{to_agent}.jsonl\"\n with open(inbox, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n print(f\" \\033[33m[bus] {from_agent} → {to_agent}: \"\n f\"({msg_type}) {content[:50]}\\033[0m\")\n\n def read_inbox(self, agent: str) -> list[dict]:\n inbox = MAILBOX_DIR / f\"{agent}.jsonl\"\n if not inbox.exists():\n return []\n msgs = [json.loads(line) for line in inbox.read_text().splitlines()\n if line.strip()]\n inbox.unlink() # consume: read + delete\n return msgs\n\n\nBUS = MessageBus()\nactive_teammates: dict[str, bool] = {}\n\n# ── Protocol State (s16 new) ──\n\n@dataclass\nclass ProtocolState:\n request_id: str\n type: str # \"shutdown\" | \"plan_approval\"\n sender: str\n target: str\n status: str # pending | approved | rejected\n payload: str # plan text or shutdown reason\n created_at: float = field(default_factory=time.time)\n\n\npending_requests: dict[str, ProtocolState] = {}\n\n\ndef new_request_id() -> str:\n return f\"req_{random.randint(0, 999999):06d}\"\n\n\ndef match_response(response_type: str, request_id: str, approve: bool):\n \"\"\"Correlate a response to the original request via request_id.\n Validates that response_type matches the request type.\"\"\"\n state = pending_requests.get(request_id)\n if not state:\n print(f\" \\033[31m[protocol] unknown request_id: {request_id}\\033[0m\")\n return\n # Validate response type matches request type\n if state.type == \"shutdown\" and response_type != \"shutdown_response\":\n print(f\" \\033[31m[protocol] type mismatch: expected shutdown_response, \"\n f\"got {response_type}\\033[0m\")\n return\n if state.type == \"plan_approval\" and response_type != \"plan_approval_response\":\n print(f\" \\033[31m[protocol] type mismatch: expected plan_approval_response, \"\n f\"got {response_type}\\033[0m\")\n return\n if state.status != \"pending\":\n print(f\" \\033[33m[protocol] {request_id} already {state.status}, \"\n f\"ignoring duplicate\\033[0m\")\n return\n state.status = \"approved\" if approve else \"rejected\"\n icon = \"✓\" if approve else \"✗\"\n color = \"32\" if approve else \"31\"\n print(f\" \\033[{color}m[protocol] {state.type} {icon} \"\n f\"({request_id}: {state.status})\\033[0m\")\n\n\n# ── Unified Lead Inbox Consumer (s16 fix) ──\n# Both check_inbox tool and main loop call this function.\n# Protocol responses are routed via match_response before returning.\n\ndef consume_lead_inbox(route_protocol: bool = True) -> list[dict]:\n \"\"\"Read Lead's inbox. Route protocol responses, return all messages.\n Called by both run_check_inbox() and main loop to avoid\n messages being consumed without protocol routing.\"\"\"\n msgs = BUS.read_inbox(\"lead\")\n if not msgs:\n return []\n if route_protocol:\n for msg in msgs:\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n msg_type = msg.get(\"type\", \"\")\n if req_id and msg_type.endswith(\"_response\"):\n approve = meta.get(\"approve\", False)\n match_response(msg_type, req_id, approve)\n return msgs\n\n\n# ── Teammate Thread (s16: idle loop + dispatch) ──\n\ndef spawn_teammate_thread(name: str, role: str, prompt: str) -> str:\n \"\"\"Spawn a teammate agent in a background thread.\n Uses idle loop: after each LLM turn, waits for inbox messages\n (shutdown_request, new task) instead of exiting.\"\"\"\n if name in active_teammates:\n return f\"Teammate '{name}' already exists\"\n\n system = (f\"You are '{name}', a {role}. \"\n f\"Use tools to complete tasks. \"\n f\"Check inbox for protocol messages (shutdown_request, etc).\")\n\n def handle_inbox_message(name: str, msg: dict, messages: list) -> bool:\n \"\"\"Dispatch incoming protocol messages by type.\n Returns True if teammate should stop.\"\"\"\n msg_type = msg.get(\"type\", \"message\")\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n\n if msg_type == \"shutdown_request\":\n BUS.send(name, \"lead\", \"Shutting down gracefully.\",\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": True})\n print(f\" \\033[35m[protocol] {name} approved shutdown \"\n f\"({req_id})\\033[0m\")\n return True # stop the loop\n\n if msg_type == \"plan_approval_response\":\n approve = meta.get(\"approve\", False)\n if approve:\n messages.append({\"role\": \"user\",\n \"content\": f\"[Plan approved] Proceed with the task.\"})\n else:\n messages.append({\"role\": \"user\",\n \"content\": f\"[Plan rejected] Feedback: {msg['content']}\"})\n\n return False # continue\n\n def run():\n messages = [{\"role\": \"user\", \"content\": prompt}]\n sub_tools = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"send_message\",\n \"description\": \"Send message to another agent.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"submit_plan\",\n \"description\": \"Submit a plan for Lead approval.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"plan\": {\"type\": \"string\"}},\n \"required\": [\"plan\"]}},\n ]\n sub_handlers = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"send_message\": lambda to, content: (BUS.send(name, to, content),\n \"Sent\")[1],\n \"submit_plan\": lambda plan: _teammate_submit_plan(name, plan),\n }\n\n shutdown_requested = False\n while not shutdown_requested:\n # Check inbox for protocol messages\n inbox = BUS.read_inbox(name)\n should_stop = False\n non_protocol = []\n for msg in inbox:\n if msg.get(\"type\") in (\"shutdown_request\", \"plan_approval_response\"):\n should_stop = handle_inbox_message(name, msg, messages)\n if should_stop:\n break\n else:\n non_protocol.append(msg)\n if should_stop:\n shutdown_requested = True\n break\n if non_protocol:\n inbox_json = json.dumps(non_protocol)\n messages.append({\"role\": \"user\",\n \"content\": \"\" + inbox_json + \"\"})\n\n # LLM turn\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages[-20:],\n tools=sub_tools, max_tokens=8000)\n except Exception:\n break\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n # Idle: wait for inbox messages instead of exiting\n # Real CC sends idle_notification to Lead here\n while not shutdown_requested:\n time.sleep(1)\n inbox = BUS.read_inbox(name)\n if not inbox:\n continue\n for msg in inbox:\n if msg.get(\"type\") in (\"shutdown_request\", \"plan_approval_response\"):\n should_stop = handle_inbox_message(name, msg, messages)\n if should_stop:\n shutdown_requested = True\n break\n else:\n non_protocol.append(msg)\n if shutdown_requested:\n break\n if non_protocol:\n inbox_json = json.dumps(non_protocol)\n messages.append({\"role\": \"user\",\n \"content\": \"\" + inbox_json + \"\"})\n break # back to LLM turn with new messages\n\n # Execute tool calls\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = sub_handlers.get(block.name)\n output = handler(**block.input) if handler else \"Unknown\"\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n # Send final summary to Lead\n summary = \"Done.\"\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\" and isinstance(msg[\"content\"], list):\n for b in msg[\"content\"]:\n if getattr(b, \"type\", None) == \"text\":\n summary = b.text\n break\n else:\n continue\n break\n BUS.send(name, \"lead\", summary, \"result\")\n active_teammates.pop(name, None)\n print(f\" \\033[32m[teammate] {name} finished\\033[0m\")\n\n active_teammates[name] = True\n threading.Thread(target=run, daemon=True).start()\n print(f\" \\033[36m[teammate] {name} spawned as {role}\\033[0m\")\n return f\"Teammate '{name}' spawned as {role}\"\n\n\ndef _teammate_submit_plan(from_name: str, plan: str) -> str:\n \"\"\"Teammate submits a plan to Lead for approval.\n\n Note: This is a protocol-level request, not a code-level gate.\n After submitting, the teammate's thread continues running — it can\n still call bash/write/etc. Real enforcement relies on the model\n waiting for the approval response before acting. Code-level tool\n gating would require blocking the teammate's tool dispatch until\n approval arrives.\n \"\"\"\n req_id = new_request_id()\n pending_requests[req_id] = ProtocolState(\n request_id=req_id, type=\"plan_approval\",\n sender=from_name, target=\"lead\",\n status=\"pending\", payload=plan)\n BUS.send(from_name, \"lead\", plan,\n \"plan_approval_request\",\n {\"request_id\": req_id})\n return f\"Plan submitted ({req_id}). Waiting for approval...\"\n\n\n# ── Lead Protocol Tools (s16 new) ──\n\ndef run_request_shutdown(teammate: str) -> str:\n req_id = new_request_id()\n pending_requests[req_id] = ProtocolState(\n request_id=req_id, type=\"shutdown\",\n sender=\"lead\", target=teammate,\n status=\"pending\", payload=\"\")\n BUS.send(\"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\",\n {\"request_id\": req_id})\n print(f\" \\033[35m[protocol] shutdown_request → {teammate} \"\n f\"({req_id})\\033[0m\")\n return f\"Shutdown request sent to {teammate} (req: {req_id})\"\n\n\ndef run_request_plan(teammate: str, task: str) -> str:\n \"\"\"Lead asks a teammate to submit a plan for a task.\"\"\"\n BUS.send(\"lead\", teammate, f\"Please submit a plan for: {task}\",\n \"message\")\n return f\"Asked {teammate} to submit a plan\"\n\n\ndef run_review_plan(request_id: str, approve: bool, feedback: str = \"\") -> str:\n state = pending_requests.get(request_id)\n if not state:\n return f\"Request {request_id} not found\"\n if state.status != \"pending\":\n return f\"Request {request_id} already {state.status}\"\n state.status = \"approved\" if approve else \"rejected\"\n BUS.send(\"lead\", state.sender, feedback or (\"Approved\" if approve else \"Rejected\"),\n \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve})\n icon = \"✓\" if approve else \"✗\"\n print(f\" \\033[32m[protocol] plan {icon} ({request_id})\\033[0m\")\n return f\"Plan {'approved' if approve else 'rejected'} ({request_id})\"\n\n\n# ── Other Lead Tool Handlers ──\n\ndef run_spawn_teammate(name: str, role: str, prompt: str) -> str:\n return spawn_teammate_thread(name, role, prompt)\n\n\ndef run_send_message(to: str, content: str) -> str:\n BUS.send(\"lead\", to, content)\n return f\"Sent to {to}\"\n\n\ndef run_check_inbox() -> str:\n \"\"\"Check Lead's inbox. Routes protocol responses via match_response.\"\"\"\n msgs = consume_lead_inbox(route_protocol=True)\n if not msgs:\n return \"(inbox empty)\"\n lines = []\n for m in msgs:\n meta = m.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n tag = f\" [{m['type']} req:{req_id}]\" if req_id else f\" [{m['type']}]\"\n lines.append(f\" [{m['from']}]{tag} {m['content'][:200]}\")\n return \"\\n\".join(lines)\n\n\n# ── Tool Dispatch ──\n\ndef execute_tool(block) -> str:\n \"\"\"Execute a tool call block, return output.\"\"\"\n handler = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"create_task\": run_create_task, \"list_tasks\": run_list_tasks,\n \"get_task\": run_get_task, \"claim_task\": run_claim_task,\n \"complete_task\": run_complete_task,\n \"spawn_teammate\": run_spawn_teammate,\n \"send_message\": run_send_message, \"check_inbox\": run_check_inbox,\n \"request_shutdown\": run_request_shutdown,\n \"request_plan\": run_request_plan, \"review_plan\": run_review_plan,\n }.get(block.name)\n if handler:\n return handler(**block.input)\n return f\"Unknown tool: {block.name}\"\n\n\n# ── Tool Definitions ──\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"command\": {\"type\": \"string\"},\n \"run_in_background\": {\"type\": \"boolean\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"create_task\",\n \"description\": \"Create a new task with optional blockedBy dependencies.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n \"blockedBy\": {\"type\": \"array\",\n \"items\": {\"type\": \"string\"}}},\n \"required\": [\"subject\"]}},\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks with status, owner, and dependencies.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"get_task\",\n \"description\": \"Get full details of a specific task by ID.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task. Sets owner, changes status to in_progress.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Complete an in-progress task. Reports unblocked downstream tasks.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"spawn_teammate\",\n \"description\": \"Spawn a teammate agent in a background thread.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"name\": {\"type\": \"string\"},\n \"role\": {\"type\": \"string\"},\n \"prompt\": {\"type\": \"string\"}},\n \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"send_message\",\n \"description\": \"Send message to a teammate via MessageBus.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"check_inbox\",\n \"description\": \"Check Lead's inbox. Routes protocol responses automatically.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"request_shutdown\",\n \"description\": \"Request a teammate to shut down gracefully.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"teammate\": {\"type\": \"string\"}},\n \"required\": [\"teammate\"]}},\n {\"name\": \"request_plan\",\n \"description\": \"Ask a teammate to submit a plan for review.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"teammate\": {\"type\": \"string\"},\n \"task\": {\"type\": \"string\"}},\n \"required\": [\"teammate\", \"task\"]}},\n {\"name\": \"review_plan\",\n \"description\": \"Approve or reject a submitted plan by request_id.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"request_id\": {\"type\": \"string\"},\n \"approve\": {\"type\": \"boolean\"},\n \"feedback\": {\"type\": \"string\"}},\n \"required\": [\"request_id\", \"approve\"]}},\n]\n\n\n# ── Context ──\n\ndef update_context(context: dict, messages: list) -> dict:\n \"\"\"Derive context from real state.\"\"\"\n memories = \"\"\n if MEMORY_INDEX.exists():\n content = MEMORY_INDEX.read_text().strip()\n if content:\n memories = content\n return {\n \"enabled_tools\": [t[\"name\"] for t in TOOLS],\n \"workspace\": str(WORKDIR),\n \"memories\": memories,\n }\n\n\n# ── Agent Loop ──\n\ndef agent_loop(messages: list, context: dict):\n system = get_system_prompt(context)\n while True:\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=TOOLS, max_tokens=8000)\n except Exception as e:\n messages.append({\"role\": \"assistant\", \"content\": [\n {\"type\": \"text\",\n \"text\": f\"[Error] {type(e).__name__}: {e}\"}]})\n return\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n\n if should_run_background(block.name, block.input):\n bg_id = start_background_task(block)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": f\"[Background task {bg_id} started] \"\n f\"Result will be available when complete.\"})\n else:\n output = execute_tool(block)\n print(str(output)[:300])\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output})\n\n # Merge background tool results + notifications into one user message\n user_content = list(results)\n bg_notifications = collect_background_results()\n if bg_notifications:\n for notif in bg_notifications:\n user_content.append({\"type\": \"text\", \"text\": notif})\n messages.append({\"role\": \"user\", \"content\": user_content})\n context = update_context(context, messages)\n system = get_system_prompt(context)\n\n\nif __name__ == \"__main__\":\n print(\"s16: team protocols\")\n print(\"Enter a question, press Enter to send. Type q to quit.\\n\")\n history = []\n context = update_context({}, [])\n while True:\n try:\n query = input(\"\\033[36ms16 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history, context)\n context = update_context(context, history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\":\n print(block.text)\n elif isinstance(block, dict) and block.get(\"type\") == \"text\":\n print(block.get(\"text\", \"\"))\n\n # Check inbox → route protocol + inject into history\n inbox_msgs = consume_lead_inbox(route_protocol=True)\n if inbox_msgs:\n inbox_text = \"\\n\".join(\n f\"From {m['from']}: {m['content'][:200]}\" for m in inbox_msgs)\n history.append({\"role\": \"user\",\n \"content\": f\"[Inbox]\\n{inbox_text}\"})\n print(f\"\\n\\033[33m[Inbox: {len(inbox_msgs)} messages injected]\\033[0m\")\n print()\n",
"images": [
{
"src": "/course-assets/s16_team_protocols/team-protocols-overview.svg",
@@ -2258,7 +2293,7 @@
"filename": "s17_autonomous_agents/code.py",
"title": "Autonomous Agents",
"subtitle": "Check the Board, Claim the Task",
- "loc": 648,
+ "loc": 649,
"tools": [
"bash",
"read_file",
@@ -2382,79 +2417,84 @@
"signature": "def scan_unclaimed_tasks()",
"startLine": 292
},
+ {
+ "name": "idle_poll",
+ "signature": "def idle_poll(name: str, messages: list, role: str)",
+ "startLine": 304
+ },
{
"name": "spawn_teammate_thread",
"signature": "def spawn_teammate_thread(name: str, role: str, prompt: str)",
- "startLine": 351
+ "startLine": 350
},
{
"name": "_teammate_submit_plan",
"signature": "def _teammate_submit_plan(from_name: str, plan: str)",
- "startLine": 528
+ "startLine": 527
},
{
"name": "run_request_shutdown",
"signature": "def run_request_shutdown(teammate: str)",
- "startLine": 543
+ "startLine": 542
},
{
"name": "run_request_plan",
"signature": "def run_request_plan(teammate: str, task: str)",
- "startLine": 557
+ "startLine": 556
},
{
"name": "run_list_tasks",
"signature": "def run_list_tasks()",
- "startLine": 591
+ "startLine": 590
},
{
"name": "run_get_task",
"signature": "def run_get_task(task_id: str)",
- "startLine": 600
+ "startLine": 599
},
{
"name": "run_claim_task",
"signature": "def run_claim_task(task_id: str)",
- "startLine": 604
+ "startLine": 603
},
{
"name": "run_complete_task",
"signature": "def run_complete_task(task_id: str)",
- "startLine": 608
+ "startLine": 607
},
{
"name": "run_spawn_teammate",
"signature": "def run_spawn_teammate(name: str, role: str, prompt: str)",
- "startLine": 612
+ "startLine": 611
},
{
"name": "run_send_message",
"signature": "def run_send_message(to: str, content: str)",
- "startLine": 616
+ "startLine": 615
},
{
"name": "consume_lead_inbox",
"signature": "def consume_lead_inbox(route_protocol=True)",
- "startLine": 621
+ "startLine": 620
},
{
"name": "run_check_inbox",
"signature": "def run_check_inbox()",
- "startLine": 634
+ "startLine": 633
},
{
"name": "update_context",
"signature": "def update_context(context: dict, messages: list)",
- "startLine": 745
+ "startLine": 744
},
{
"name": "agent_loop",
"signature": "def agent_loop(messages: list, context: dict)",
- "startLine": 754
+ "startLine": 753
}
],
"layer": "collaboration",
- "source": "#!/usr/bin/env python3\n\"\"\"\ns17: Autonomous Agents — idle poll + auto-claim + WORK/IDLE lifecycle.\n\nRun: python s17_autonomous_agents/code.py\nNeed: pip install anthropic python-dotenv + .env with ANTHROPIC_API_KEY\n\nChanges from s16:\n - scan_unclaimed_tasks: find pending, unowned tasks with deps completed\n - idle_poll: 60s polling loop (inbox + task board), dispatches shutdown in IDLE\n - claim_task: owner check + return value verification\n - Teammate lifecycle: WORK → IDLE → SHUTDOWN\n - Teammate tools: + list_tasks, claim_task, complete_task (5→8)\n - consume_lead_inbox: unified inbox consumer for protocol + context injection\n - Identity re-injection after context compression\n\nASCII lifecycle:\n WORK: inbox → LLM → tools → (tool_use? loop) → (done? → IDLE)\n IDLE: 5s poll → inbox? → WORK / unclaimed? → claim → WORK / 60s? → SHUTDOWN\n\"\"\"\n\nimport os, subprocess, json, time, random, threading\nfrom pathlib import Path\nfrom datetime import datetime\nfrom dataclasses import dataclass, asdict, field\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n# ── Task System (from s12) ──\n\nTASKS_DIR = WORKDIR / \".tasks\"\nTASKS_DIR.mkdir(exist_ok=True)\n\n\n@dataclass\nclass Task:\n id: str\n subject: str\n description: str\n status: str\n owner: str | None\n blockedBy: list[str]\n\n\ndef _task_path(task_id: str) -> Path:\n return TASKS_DIR / f\"{task_id}.json\"\n\n\ndef create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> Task:\n task = Task(\n id=f\"task_{int(time.time())}_{random.randint(0, 9999):04d}\",\n subject=subject, description=description,\n status=\"pending\", owner=None,\n blockedBy=blockedBy or [],\n )\n save_task(task)\n return task\n\n\ndef save_task(task: Task):\n _task_path(task.id).write_text(json.dumps(asdict(task), indent=2))\n\n\ndef load_task(task_id: str) -> Task:\n return Task(**json.loads(_task_path(task_id).read_text()))\n\n\ndef list_tasks() -> list[Task]:\n return [Task(**json.loads(p.read_text()))\n for p in sorted(TASKS_DIR.glob(\"task_*.json\"))]\n\n\ndef get_task(task_id: str) -> str:\n task = load_task(task_id)\n return json.dumps(asdict(task), indent=2)\n\n\ndef can_start(task_id: str) -> bool:\n task = load_task(task_id)\n for dep_id in task.blockedBy:\n if not _task_path(dep_id).exists():\n return False\n if load_task(dep_id).status != \"completed\":\n return False\n return True\n\n\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if task.owner:\n return f\"Task {task_id} already owned by {task.owner}\"\n if not can_start(task_id):\n deps = [d for d in task.blockedBy\n if _task_path(d).exists() and load_task(d).status != \"completed\"]\n missing = [d for d in task.blockedBy if not _task_path(d).exists()]\n parts = []\n if deps: parts.append(f\"blocked by: {deps}\")\n if missing: parts.append(f\"missing deps: {missing}\")\n return \"Cannot start — \" + \", \".join(parts)\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n print(f\" \\033[36m[claim] {task.subject} → in_progress\\033[0m\")\n return f\"Claimed {task.id} ({task.subject})\"\n\n\ndef complete_task(task_id: str) -> str:\n task = load_task(task_id)\n if task.status != \"in_progress\":\n return f\"Task {task_id} is {task.status}, cannot complete\"\n task.status = \"completed\"\n save_task(task)\n unblocked = [t.subject for t in list_tasks()\n if t.status == \"pending\" and t.blockedBy and can_start(t.id)]\n print(f\" \\033[32m[complete] {task.subject} ✓\\033[0m\")\n msg = f\"Completed {task.id} ({task.subject})\"\n if unblocked:\n msg += f\"\\nUnblocked: {', '.join(unblocked)}\"\n return msg\n\n\n# ── Prompt Assembly (from s10) ──\n\nPROMPT_SECTIONS = {\n \"identity\": \"You are a coding agent. Act, don't explain.\",\n \"tools\": \"Available tools: bash, read_file, write_file, \"\n \"create_task, list_tasks, get_task, claim_task, complete_task, \"\n \"spawn_teammate, send_message, check_inbox, \"\n \"request_shutdown, request_plan, review_plan.\",\n \"workspace\": f\"Working directory: {WORKDIR}\",\n \"memory\": \"Relevant memories are injected below when available.\",\n}\n\n\ndef assemble_system_prompt(context: dict) -> str:\n sections = [PROMPT_SECTIONS[\"identity\"],\n PROMPT_SECTIONS[\"tools\"],\n PROMPT_SECTIONS[\"workspace\"]]\n if context.get(\"memories\"):\n sections.append(f\"Relevant memories:\\n{context['memories']}\")\n return \"\\n\\n\".join(sections)\n\n\n_last_context_hash, _last_prompt = None, None\n\n\ndef get_system_prompt(context: dict) -> str:\n global _last_context_hash, _last_prompt\n h = json.dumps(context, sort_keys=True)\n if h == _last_context_hash and _last_prompt:\n return _last_prompt\n _last_context_hash, _last_prompt = h, assemble_system_prompt(context)\n return _last_prompt\n\n\n# ── Tools (from s15) ──\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# ── MessageBus (from s15) ──\n\nMAILBOX_DIR = WORKDIR / \".mailboxes\"\nMAILBOX_DIR.mkdir(exist_ok=True)\n\n\nclass MessageBus:\n def send(self, from_agent: str, to_agent: str, content: str,\n msg_type: str = \"message\", metadata: dict = None):\n msg = {\"from\": from_agent, \"to\": to_agent,\n \"content\": content, \"type\": msg_type,\n \"ts\": time.time(), \"metadata\": metadata or {}}\n inbox = MAILBOX_DIR / f\"{to_agent}.jsonl\"\n with open(inbox, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n print(f\" \\033[33m[bus] {from_agent} → {to_agent}: \"\n f\"({msg_type}) {content[:50]}\\033[0m\")\n\n def read_inbox(self, agent: str) -> list[dict]:\n inbox = MAILBOX_DIR / f\"{agent}.jsonl\"\n if not inbox.exists():\n return []\n msgs = [json.loads(line) for line in inbox.read_text().splitlines()\n if line.strip()]\n inbox.unlink()\n return msgs\n\n\nBUS = MessageBus()\nactive_teammates: dict[str, bool] = {}\n\n\n# ── Protocol State (from s16) ──\n\n@dataclass\nclass ProtocolState:\n request_id: str\n type: str\n sender: str\n target: str\n status: str\n payload: str\n created_at: float = field(default_factory=time.time)\n\n\npending_requests: dict[str, ProtocolState] = {}\n\n\ndef new_request_id() -> str:\n return f\"req_{random.randint(0, 999999):06d}\"\n\n\ndef match_response(response_type: str, request_id: str, approve: bool):\n \"\"\"Correlate a response to the original request via request_id.\"\"\"\n state = pending_requests.get(request_id)\n if not state:\n print(f\" \\033[31m[protocol] unknown request_id: {request_id}\\033[0m\")\n return\n if state.type == \"shutdown\" and response_type != \"shutdown_response\":\n print(f\" \\033[31m[protocol] type mismatch: expected shutdown_response, \"\n f\"got {response_type}\\033[0m\")\n return\n if state.type == \"plan_approval\" and response_type != \"plan_approval_response\":\n print(f\" \\033[31m[protocol] type mismatch: expected plan_approval_response, \"\n f\"got {response_type}\\033[0m\")\n return\n state.status = \"approved\" if approve else \"rejected\"\n icon = \"✓\" if approve else \"✗\"\n color = \"32\" if approve else \"31\"\n print(f\" \\033[{color}m[protocol] {state.type} {icon} \"\n f\"({request_id}: {state.status})\\033[0m\")\n\n\n# ── Autonomous Agent (s17 new) ──\n\nIDLE_POLL_INTERVAL = 5 # seconds\nIDLE_TIMEOUT = 60 # seconds\n\n\ndef scan_unclaimed_tasks() -> list[dict]:\n \"\"\"Find pending, unowned tasks with all dependencies completed.\"\"\"\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and can_start(task[\"id\"])):\n unclaimed.append(task)\n return unclaimed\n\n\ndef idle_poll(agent_name: str, messages: list,\n name: str, role: str) -> str:\n \"\"\"Poll for 60s. Return 'work', 'shutdown', or 'timeout'.\"\"\"\n for _ in range(IDLE_TIMEOUT // IDLE_POLL_INTERVAL):\n time.sleep(IDLE_POLL_INTERVAL)\n\n # Check inbox — dispatch protocol messages first\n inbox = BUS.read_inbox(agent_name)\n if inbox:\n # Check for shutdown_request\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n req_id = msg.get(\"metadata\", {}).get(\"request_id\", \"\")\n BUS.send(name, \"lead\", \"Shutting down gracefully.\",\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": True})\n print(f\" \\033[35m[protocol] {name} approved shutdown \"\n f\"in idle ({req_id})\\033[0m\")\n return \"shutdown\"\n\n # Non-protocol inbox: inject and resume work\n messages.append({\"role\": \"user\",\n \"content\": \"\" + json.dumps(inbox) + \"\"})\n print(f\" \\033[36m[idle] {name} found inbox messages\\033[0m\")\n return \"work\"\n\n # Scan task board\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n task = unclaimed[0]\n result = claim_task(task[\"id\"], agent_name)\n if \"Claimed\" in result:\n messages.append({\"role\": \"user\",\n \"content\": f\"Task {task['id']}: \"\n f\"{task['subject']}\"})\n print(f\" \\033[32m[idle] {name} auto-claimed: \"\n f\"{task['subject']}\\033[0m\")\n return \"work\"\n print(f\" \\033[33m[idle] {name} claim failed: \"\n f\"{result}\\033[0m\")\n\n print(f\" \\033[31m[idle] {name} timeout ({IDLE_TIMEOUT}s)\\033[0m\")\n return \"timeout\"\n\n\n# ── Teammate Thread (from s15 + s16 + s17) ──\n\ndef spawn_teammate_thread(name: str, role: str, prompt: str) -> str:\n if name in active_teammates:\n return f\"Teammate '{name}' already exists\"\n\n system = (f\"You are '{name}', a {role}. \"\n f\"Use tools to complete tasks. \"\n f\"You can list and claim tasks from the board. \"\n f\"Check inbox for protocol messages.\")\n\n def handle_inbox_message(name: str, msg: dict, messages: list):\n \"\"\"Dispatch incoming protocol messages by type.\"\"\"\n msg_type = msg.get(\"type\", \"message\")\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n\n if msg_type == \"shutdown_request\":\n BUS.send(name, \"lead\", \"Shutting down gracefully.\",\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": True})\n print(f\" \\033[35m[protocol] {name} approved shutdown \"\n f\"({req_id})\\033[0m\")\n return True\n\n if msg_type == \"plan_approval_response\":\n approve = meta.get(\"approve\", False)\n if approve:\n messages.append({\"role\": \"user\",\n \"content\": \"[Plan approved] Proceed with the task.\"})\n else:\n messages.append({\"role\": \"user\",\n \"content\": f\"[Plan rejected] Feedback: {msg['content']}\"})\n return False\n\n def run():\n messages = [{\"role\": \"user\", \"content\": prompt}]\n sub_tools = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"send_message\",\n \"description\": \"Send message to another agent.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"submit_plan\",\n \"description\": \"Submit a plan for Lead approval.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"plan\": {\"type\": \"string\"}},\n \"required\": [\"plan\"]}},\n # s17 new: teammates can list, claim, and complete tasks\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks on the board.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Mark an in-progress task as completed.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n ]\n\n def _run_list_tasks():\n tasks = list_tasks()\n if not tasks:\n return \"No tasks.\"\n return \"\\n\".join(\n f\" {t.id}: {t.subject} [{t.status}]\"\n for t in tasks)\n\n def _run_claim_task(task_id: str):\n return claim_task(task_id, owner=name)\n\n def _run_complete_task(task_id: str):\n return complete_task(task_id)\n\n sub_handlers = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"send_message\": lambda to, content: (BUS.send(name, to, content),\n \"Sent\")[1],\n \"submit_plan\": lambda plan: _teammate_submit_plan(name, plan),\n \"list_tasks\": _run_list_tasks,\n \"claim_task\": _run_claim_task,\n \"complete_task\": _run_complete_task,\n }\n\n # Outer loop: WORK → IDLE cycle\n while True:\n # Identity re-injection (s17)\n if len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}. \"\n f\"Continue your work.\"})\n\n # WORK phase\n should_shutdown = False\n for _ in range(10):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n stopped = handle_inbox_message(name, msg, messages)\n if stopped:\n should_shutdown = True\n break\n if should_shutdown:\n break\n if inbox and not should_shutdown:\n non_protocol = [m for m in inbox\n if m.get(\"type\") == \"message\"]\n if non_protocol:\n messages.append({\"role\": \"user\",\n \"content\": f\"{json.dumps(non_protocol)}\"})\n\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages[-20:],\n tools=sub_tools, max_tokens=8000)\n except Exception:\n break\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = sub_handlers.get(block.name)\n output = handler(**block.input) if handler else \"Unknown\"\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n if should_shutdown:\n break\n\n # IDLE phase (s17 new)\n idle_result = idle_poll(name, messages, name, role)\n if idle_result == \"shutdown\":\n break\n if idle_result == \"timeout\":\n break\n\n # Summary\n summary = \"Done.\"\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\" and isinstance(msg[\"content\"], list):\n for b in msg[\"content\"]:\n if getattr(b, \"type\", None) == \"text\":\n summary = b.text\n break\n else:\n continue\n break\n BUS.send(name, \"lead\", summary, \"result\")\n active_teammates.pop(name, None)\n print(f\" \\033[32m[teammate] {name} finished\\033[0m\")\n\n active_teammates[name] = True\n threading.Thread(target=run, daemon=True).start()\n print(f\" \\033[36m[teammate] {name} spawned as {role}\\033[0m\")\n return f\"Teammate '{name}' spawned as {role} (autonomous)\"\n\n\ndef _teammate_submit_plan(from_name: str, plan: str) -> str:\n \"\"\"Teammate submits a plan to Lead for approval.\"\"\"\n req_id = new_request_id()\n pending_requests[req_id] = ProtocolState(\n request_id=req_id, type=\"plan_approval\",\n sender=from_name, target=\"lead\",\n status=\"pending\", payload=plan)\n BUS.send(from_name, \"lead\", plan,\n \"plan_approval_request\",\n {\"request_id\": req_id})\n return f\"Plan submitted ({req_id}). Waiting for approval...\"\n\n\n# ── Lead Protocol Tools (from s16) ──\n\ndef run_request_shutdown(teammate: str) -> str:\n req_id = new_request_id()\n pending_requests[req_id] = ProtocolState(\n request_id=req_id, type=\"shutdown\",\n sender=\"lead\", target=teammate,\n status=\"pending\", payload=\"\")\n BUS.send(\"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\",\n {\"request_id\": req_id})\n print(f\" \\033[35m[protocol] shutdown_request → {teammate} \"\n f\"({req_id})\\033[0m\")\n return f\"Shutdown request sent to {teammate} (req: {req_id})\"\n\n\ndef run_request_plan(teammate: str, task: str) -> str:\n \"\"\"Lead asks a teammate to submit a plan.\"\"\"\n BUS.send(\"lead\", teammate, f\"Please submit a plan for: {task}\",\n \"message\")\n return f\"Asked {teammate} to submit a plan\"\n\n\ndef run_review_plan(request_id: str, approve: bool,\n feedback: str = \"\") -> str:\n state = pending_requests.get(request_id)\n if not state:\n return f\"Request {request_id} not found\"\n if state.status != \"pending\":\n return f\"Request {request_id} already {state.status}\"\n state.status = \"approved\" if approve else \"rejected\"\n BUS.send(\"lead\", state.sender,\n feedback or (\"Approved\" if approve else \"Rejected\"),\n \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve})\n icon = \"✓\" if approve else \"✗\"\n print(f\" \\033[32m[protocol] plan {icon} ({request_id})\\033[0m\")\n return f\"Plan {'approved' if approve else 'rejected'} ({request_id})\"\n\n\n# ── Basic tool handlers ──\n\ndef run_create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> str:\n task = create_task(subject, description, blockedBy)\n deps = f\" (blockedBy: {', '.join(blockedBy)})\" if blockedBy else \"\"\n print(f\" \\033[34m[create] {task.subject}{deps}\\033[0m\")\n return f\"Created {task.id}: {task.subject}{deps}\"\n\n\ndef run_list_tasks() -> str:\n tasks = list_tasks()\n if not tasks:\n return \"No tasks.\"\n return \"\\n\".join(\n f\" {t.id}: {t.subject} [{t.status}]\"\n for t in tasks)\n\n\ndef run_get_task(task_id: str) -> str:\n return get_task(task_id)\n\n\ndef run_claim_task(task_id: str) -> str:\n return claim_task(task_id, owner=\"agent\")\n\n\ndef run_complete_task(task_id: str) -> str:\n return complete_task(task_id)\n\n\ndef run_spawn_teammate(name: str, role: str, prompt: str) -> str:\n return spawn_teammate_thread(name, role, prompt)\n\n\ndef run_send_message(to: str, content: str) -> str:\n BUS.send(\"lead\", to, content)\n return f\"Sent to {to}\"\n\n\ndef consume_lead_inbox(route_protocol=True) -> list[dict]:\n \"\"\"Read Lead inbox: route protocol responses, return all messages.\"\"\"\n msgs = BUS.read_inbox(\"lead\")\n if route_protocol:\n for msg in msgs:\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n msg_type = msg.get(\"type\", \"\")\n if req_id and msg_type.endswith(\"_response\"):\n match_response(msg_type, req_id, meta.get(\"approve\", False))\n return msgs\n\n\ndef run_check_inbox() -> str:\n msgs = consume_lead_inbox(route_protocol=True)\n if not msgs:\n return \"(inbox empty)\"\n lines = []\n for m in msgs:\n meta = m.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n tag = f\" [{m['type']} req:{req_id}]\" if req_id else f\" [{m['type']}]\"\n lines.append(f\" [{m['from']}]{tag} {m['content'][:200]}\")\n return \"\\n\".join(lines)\n\n\n# ── Tool Definitions ──\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"create_task\",\n \"description\": \"Create a task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n \"blockedBy\": {\"type\": \"array\",\n \"items\": {\"type\": \"string\"}}},\n \"required\": [\"subject\"]}},\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}, \"required\": []}},\n {\"name\": \"get_task\",\n \"description\": \"Get full details of a specific task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Complete an in-progress task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"spawn_teammate\",\n \"description\": \"Spawn an autonomous teammate agent.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"},\n \"role\": {\"type\": \"string\"},\n \"prompt\": {\"type\": \"string\"}},\n \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"send_message\",\n \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"check_inbox\",\n \"description\": \"Check inbox for messages and protocol responses.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}, \"required\": []}},\n {\"name\": \"request_shutdown\",\n \"description\": \"Request a teammate to shut down gracefully.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"teammate\": {\"type\": \"string\"}},\n \"required\": [\"teammate\"]}},\n {\"name\": \"request_plan\",\n \"description\": \"Ask a teammate to submit a plan for review.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"teammate\": {\"type\": \"string\"},\n \"task\": {\"type\": \"string\"}},\n \"required\": [\"teammate\", \"task\"]}},\n {\"name\": \"review_plan\",\n \"description\": \"Approve or reject a submitted plan.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"request_id\": {\"type\": \"string\"},\n \"approve\": {\"type\": \"boolean\"},\n \"feedback\": {\"type\": \"string\"}},\n \"required\": [\"request_id\", \"approve\"]}},\n]\n\nTOOL_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"create_task\": run_create_task, \"list_tasks\": run_list_tasks,\n \"get_task\": run_get_task,\n \"claim_task\": run_claim_task, \"complete_task\": run_complete_task,\n \"spawn_teammate\": run_spawn_teammate,\n \"send_message\": run_send_message, \"check_inbox\": run_check_inbox,\n \"request_shutdown\": run_request_shutdown,\n \"request_plan\": run_request_plan, \"review_plan\": run_review_plan,\n}\n\n\n# ── Context ──\n\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\n\n\ndef update_context(context: dict, messages: list) -> dict:\n memories = \"\"\n if MEMORY_INDEX.exists():\n memories = MEMORY_INDEX.read_text()[:2000]\n return {\"memories\": memories}\n\n\n# ── Agent Loop ──\n\ndef agent_loop(messages: list, context: dict):\n system = get_system_prompt(context)\n while True:\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=TOOLS, max_tokens=8000)\n except Exception as e:\n messages.append({\"role\": \"assistant\", \"content\": [\n {\"type\": \"text\", \"text\": f\"[Error] {type(e).__name__}: {e}\"}]})\n return\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else \"Unknown\"\n print(str(output)[:300])\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n context = update_context(context, messages)\n system = get_system_prompt(context)\n\n\nif __name__ == \"__main__\":\n print(\"s17: autonomous agents\")\n print(\"Enter a question, press Enter to send. Type q to quit.\\n\")\n history = []\n context = {\"memories\": \"\"}\n while True:\n try:\n query = input(\"\\033[36ms17 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history, context)\n context = update_context(context, history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\":\n print(block.text)\n\n # Consume lead inbox: route protocol + inject into history\n inbox = consume_lead_inbox(route_protocol=True)\n if inbox:\n inbox_text = \"\\n\".join(\n f\"From {m['from']} [{m.get('type', 'message')}]: \"\n f\"{m['content'][:200]}\" for m in inbox)\n history.append({\"role\": \"user\",\n \"content\": f\"[Inbox]\\n{inbox_text}\"})\n print()\n",
+ "source": "#!/usr/bin/env python3\n\"\"\"\ns17: Autonomous Agents — idle poll + auto-claim + WORK/IDLE lifecycle.\n\nRun: python s17_autonomous_agents/code.py\nNeed: pip install anthropic python-dotenv + .env with ANTHROPIC_API_KEY\n\nChanges from s16:\n - scan_unclaimed_tasks: find pending, unowned tasks with deps completed\n - idle_poll: 60s polling loop (inbox + task board), dispatches shutdown in IDLE\n - claim_task: owner check + return value verification\n - Teammate lifecycle: WORK → IDLE → SHUTDOWN\n - Teammate tools: + list_tasks, claim_task, complete_task (5→8)\n - consume_lead_inbox: unified inbox consumer for protocol + context injection\n - Identity re-injection after context compression\n\nASCII lifecycle:\n WORK: inbox → LLM → tools → (tool_use? loop) → (done? → IDLE)\n IDLE: 5s poll → inbox? → WORK / unclaimed? → claim → WORK / 60s? → SHUTDOWN\n\"\"\"\n\nimport os, subprocess, json, time, random, threading\nfrom pathlib import Path\nfrom datetime import datetime\nfrom dataclasses import dataclass, asdict, field\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n# ── Task System (from s12) ──\n\nTASKS_DIR = WORKDIR / \".tasks\"\nTASKS_DIR.mkdir(exist_ok=True)\n\n\n@dataclass\nclass Task:\n id: str\n subject: str\n description: str\n status: str\n owner: str | None\n blockedBy: list[str]\n\n\ndef _task_path(task_id: str) -> Path:\n return TASKS_DIR / f\"{task_id}.json\"\n\n\ndef create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> Task:\n task = Task(\n id=f\"task_{int(time.time())}_{random.randint(0, 9999):04d}\",\n subject=subject, description=description,\n status=\"pending\", owner=None,\n blockedBy=blockedBy or [],\n )\n save_task(task)\n return task\n\n\ndef save_task(task: Task):\n _task_path(task.id).write_text(json.dumps(asdict(task), indent=2))\n\n\ndef load_task(task_id: str) -> Task:\n return Task(**json.loads(_task_path(task_id).read_text()))\n\n\ndef list_tasks() -> list[Task]:\n return [Task(**json.loads(p.read_text()))\n for p in sorted(TASKS_DIR.glob(\"task_*.json\"))]\n\n\ndef get_task(task_id: str) -> str:\n task = load_task(task_id)\n return json.dumps(asdict(task), indent=2)\n\n\ndef can_start(task_id: str) -> bool:\n task = load_task(task_id)\n for dep_id in task.blockedBy:\n if not _task_path(dep_id).exists():\n return False\n if load_task(dep_id).status != \"completed\":\n return False\n return True\n\n\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if task.owner:\n return f\"Task {task_id} already owned by {task.owner}\"\n if not can_start(task_id):\n deps = [d for d in task.blockedBy\n if _task_path(d).exists() and load_task(d).status != \"completed\"]\n missing = [d for d in task.blockedBy if not _task_path(d).exists()]\n parts = []\n if deps: parts.append(f\"blocked by: {deps}\")\n if missing: parts.append(f\"missing deps: {missing}\")\n return \"Cannot start — \" + \", \".join(parts)\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n print(f\" \\033[36m[claim] {task.subject} → in_progress\\033[0m\")\n return f\"Claimed {task.id} ({task.subject})\"\n\n\ndef complete_task(task_id: str) -> str:\n task = load_task(task_id)\n if task.status != \"in_progress\":\n return f\"Task {task_id} is {task.status}, cannot complete\"\n task.status = \"completed\"\n save_task(task)\n unblocked = [t.subject for t in list_tasks()\n if t.status == \"pending\" and t.blockedBy and can_start(t.id)]\n print(f\" \\033[32m[complete] {task.subject} ✓\\033[0m\")\n msg = f\"Completed {task.id} ({task.subject})\"\n if unblocked:\n msg += f\"\\nUnblocked: {', '.join(unblocked)}\"\n return msg\n\n\n# ── Prompt Assembly (from s10) ──\n\nPROMPT_SECTIONS = {\n \"identity\": \"You are a coding agent. Act, don't explain.\",\n \"tools\": \"Available tools: bash, read_file, write_file, \"\n \"create_task, list_tasks, get_task, claim_task, complete_task, \"\n \"spawn_teammate, send_message, check_inbox, \"\n \"request_shutdown, request_plan, review_plan.\",\n \"workspace\": f\"Working directory: {WORKDIR}\",\n \"memory\": \"Relevant memories are injected below when available.\",\n}\n\n\ndef assemble_system_prompt(context: dict) -> str:\n sections = [PROMPT_SECTIONS[\"identity\"],\n PROMPT_SECTIONS[\"tools\"],\n PROMPT_SECTIONS[\"workspace\"]]\n if context.get(\"memories\"):\n sections.append(f\"Relevant memories:\\n{context['memories']}\")\n return \"\\n\\n\".join(sections)\n\n\n_last_context_hash, _last_prompt = None, None\n\n\ndef get_system_prompt(context: dict) -> str:\n global _last_context_hash, _last_prompt\n h = json.dumps(context, sort_keys=True)\n if h == _last_context_hash and _last_prompt:\n return _last_prompt\n _last_context_hash, _last_prompt = h, assemble_system_prompt(context)\n return _last_prompt\n\n\n# ── Tools (from s15) ──\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# ── MessageBus (from s15) ──\n\nMAILBOX_DIR = WORKDIR / \".mailboxes\"\nMAILBOX_DIR.mkdir(exist_ok=True)\n\n\nclass MessageBus:\n def send(self, from_agent: str, to_agent: str, content: str,\n msg_type: str = \"message\", metadata: dict = None):\n msg = {\"from\": from_agent, \"to\": to_agent,\n \"content\": content, \"type\": msg_type,\n \"ts\": time.time(), \"metadata\": metadata or {}}\n inbox = MAILBOX_DIR / f\"{to_agent}.jsonl\"\n with open(inbox, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n print(f\" \\033[33m[bus] {from_agent} → {to_agent}: \"\n f\"({msg_type}) {content[:50]}\\033[0m\")\n\n def read_inbox(self, agent: str) -> list[dict]:\n inbox = MAILBOX_DIR / f\"{agent}.jsonl\"\n if not inbox.exists():\n return []\n msgs = [json.loads(line) for line in inbox.read_text().splitlines()\n if line.strip()]\n inbox.unlink()\n return msgs\n\n\nBUS = MessageBus()\nactive_teammates: dict[str, bool] = {}\n\n\n# ── Protocol State (from s16) ──\n\n@dataclass\nclass ProtocolState:\n request_id: str\n type: str\n sender: str\n target: str\n status: str\n payload: str\n created_at: float = field(default_factory=time.time)\n\n\npending_requests: dict[str, ProtocolState] = {}\n\n\ndef new_request_id() -> str:\n return f\"req_{random.randint(0, 999999):06d}\"\n\n\ndef match_response(response_type: str, request_id: str, approve: bool):\n \"\"\"Correlate a response to the original request via request_id.\"\"\"\n state = pending_requests.get(request_id)\n if not state:\n print(f\" \\033[31m[protocol] unknown request_id: {request_id}\\033[0m\")\n return\n if state.type == \"shutdown\" and response_type != \"shutdown_response\":\n print(f\" \\033[31m[protocol] type mismatch: expected shutdown_response, \"\n f\"got {response_type}\\033[0m\")\n return\n if state.type == \"plan_approval\" and response_type != \"plan_approval_response\":\n print(f\" \\033[31m[protocol] type mismatch: expected plan_approval_response, \"\n f\"got {response_type}\\033[0m\")\n return\n state.status = \"approved\" if approve else \"rejected\"\n icon = \"✓\" if approve else \"✗\"\n color = \"32\" if approve else \"31\"\n print(f\" \\033[{color}m[protocol] {state.type} {icon} \"\n f\"({request_id}: {state.status})\\033[0m\")\n\n\n# ── Autonomous Agent (s17 new) ──\n\nIDLE_POLL_INTERVAL = 5 # seconds\nIDLE_TIMEOUT = 60 # seconds\n\n\ndef scan_unclaimed_tasks() -> list[dict]:\n \"\"\"Find pending, unowned tasks with all dependencies completed.\"\"\"\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and can_start(task[\"id\"])):\n unclaimed.append(task)\n return unclaimed\n\n\ndef idle_poll(name: str, messages: list, role: str) -> str:\n \"\"\"Poll for 60s. Return 'work', 'shutdown', or 'timeout'.\"\"\"\n for _ in range(IDLE_TIMEOUT // IDLE_POLL_INTERVAL):\n time.sleep(IDLE_POLL_INTERVAL)\n\n # Check inbox — dispatch protocol messages first\n inbox = BUS.read_inbox(name)\n if inbox:\n # Check for shutdown_request\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n req_id = msg.get(\"metadata\", {}).get(\"request_id\", \"\")\n BUS.send(name, \"lead\", \"Shutting down gracefully.\",\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": True})\n print(f\" \\033[35m[protocol] {name} approved shutdown \"\n f\"in idle ({req_id})\\033[0m\")\n return \"shutdown\"\n\n # Non-protocol inbox: inject and resume work\n messages.append({\"role\": \"user\",\n \"content\": \"\" + json.dumps(inbox) + \"\"})\n print(f\" \\033[36m[idle] {name} found inbox messages\\033[0m\")\n return \"work\"\n\n # Scan task board\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n task = unclaimed[0]\n result = claim_task(task[\"id\"], name)\n if \"Claimed\" in result:\n messages.append({\"role\": \"user\",\n \"content\": f\"Task {task['id']}: \"\n f\"{task['subject']}\"})\n print(f\" \\033[32m[idle] {name} auto-claimed: \"\n f\"{task['subject']}\\033[0m\")\n return \"work\"\n print(f\" \\033[33m[idle] {name} claim failed: \"\n f\"{result}\\033[0m\")\n\n print(f\" \\033[31m[idle] {name} timeout ({IDLE_TIMEOUT}s)\\033[0m\")\n return \"timeout\"\n\n\n# ── Teammate Thread (from s15 + s16 + s17) ──\n\ndef spawn_teammate_thread(name: str, role: str, prompt: str) -> str:\n if name in active_teammates:\n return f\"Teammate '{name}' already exists\"\n\n system = (f\"You are '{name}', a {role}. \"\n f\"Use tools to complete tasks. \"\n f\"You can list and claim tasks from the board. \"\n f\"Check inbox for protocol messages.\")\n\n def handle_inbox_message(name: str, msg: dict, messages: list):\n \"\"\"Dispatch incoming protocol messages by type.\"\"\"\n msg_type = msg.get(\"type\", \"message\")\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n\n if msg_type == \"shutdown_request\":\n BUS.send(name, \"lead\", \"Shutting down gracefully.\",\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": True})\n print(f\" \\033[35m[protocol] {name} approved shutdown \"\n f\"({req_id})\\033[0m\")\n return True\n\n if msg_type == \"plan_approval_response\":\n approve = meta.get(\"approve\", False)\n if approve:\n messages.append({\"role\": \"user\",\n \"content\": \"[Plan approved] Proceed with the task.\"})\n else:\n messages.append({\"role\": \"user\",\n \"content\": f\"[Plan rejected] Feedback: {msg['content']}\"})\n return False\n\n def run():\n messages = [{\"role\": \"user\", \"content\": prompt}]\n sub_tools = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"send_message\",\n \"description\": \"Send message to another agent.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"submit_plan\",\n \"description\": \"Submit a plan for Lead approval.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"plan\": {\"type\": \"string\"}},\n \"required\": [\"plan\"]}},\n # s17 new: teammates can list, claim, and complete tasks\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks on the board.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Mark an in-progress task as completed.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n ]\n\n def _run_list_tasks():\n tasks = list_tasks()\n if not tasks:\n return \"No tasks.\"\n return \"\\n\".join(\n f\" {t.id}: {t.subject} [{t.status}]\"\n for t in tasks)\n\n def _run_claim_task(task_id: str):\n return claim_task(task_id, owner=name)\n\n def _run_complete_task(task_id: str):\n return complete_task(task_id)\n\n sub_handlers = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"send_message\": lambda to, content: (BUS.send(name, to, content),\n \"Sent\")[1],\n \"submit_plan\": lambda plan: _teammate_submit_plan(name, plan),\n \"list_tasks\": _run_list_tasks,\n \"claim_task\": _run_claim_task,\n \"complete_task\": _run_complete_task,\n }\n\n # Outer loop: WORK → IDLE cycle\n while True:\n # Identity re-injection (s17)\n if len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}. \"\n f\"Continue your work.\"})\n\n # WORK phase\n should_shutdown = False\n for _ in range(10):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n stopped = handle_inbox_message(name, msg, messages)\n if stopped:\n should_shutdown = True\n break\n if should_shutdown:\n break\n if inbox and not should_shutdown:\n non_protocol = [m for m in inbox\n if m.get(\"type\") == \"message\"]\n if non_protocol:\n messages.append({\"role\": \"user\",\n \"content\": f\"{json.dumps(non_protocol)}\"})\n\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages[-20:],\n tools=sub_tools, max_tokens=8000)\n except Exception:\n break\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = sub_handlers.get(block.name)\n output = handler(**block.input) if handler else \"Unknown\"\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n if should_shutdown:\n break\n\n # IDLE phase (s17 new)\n idle_result = idle_poll(name, messages, role)\n if idle_result == \"shutdown\":\n break\n if idle_result == \"timeout\":\n break\n\n # Summary\n summary = \"Done.\"\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\" and isinstance(msg[\"content\"], list):\n for b in msg[\"content\"]:\n if getattr(b, \"type\", None) == \"text\":\n summary = b.text\n break\n else:\n continue\n break\n BUS.send(name, \"lead\", summary, \"result\")\n active_teammates.pop(name, None)\n print(f\" \\033[32m[teammate] {name} finished\\033[0m\")\n\n active_teammates[name] = True\n threading.Thread(target=run, daemon=True).start()\n print(f\" \\033[36m[teammate] {name} spawned as {role}\\033[0m\")\n return f\"Teammate '{name}' spawned as {role} (autonomous)\"\n\n\ndef _teammate_submit_plan(from_name: str, plan: str) -> str:\n \"\"\"Teammate submits a plan to Lead for approval.\"\"\"\n req_id = new_request_id()\n pending_requests[req_id] = ProtocolState(\n request_id=req_id, type=\"plan_approval\",\n sender=from_name, target=\"lead\",\n status=\"pending\", payload=plan)\n BUS.send(from_name, \"lead\", plan,\n \"plan_approval_request\",\n {\"request_id\": req_id})\n return f\"Plan submitted ({req_id}). Waiting for approval...\"\n\n\n# ── Lead Protocol Tools (from s16) ──\n\ndef run_request_shutdown(teammate: str) -> str:\n req_id = new_request_id()\n pending_requests[req_id] = ProtocolState(\n request_id=req_id, type=\"shutdown\",\n sender=\"lead\", target=teammate,\n status=\"pending\", payload=\"\")\n BUS.send(\"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\",\n {\"request_id\": req_id})\n print(f\" \\033[35m[protocol] shutdown_request → {teammate} \"\n f\"({req_id})\\033[0m\")\n return f\"Shutdown request sent to {teammate} (req: {req_id})\"\n\n\ndef run_request_plan(teammate: str, task: str) -> str:\n \"\"\"Lead asks a teammate to submit a plan.\"\"\"\n BUS.send(\"lead\", teammate, f\"Please submit a plan for: {task}\",\n \"message\")\n return f\"Asked {teammate} to submit a plan\"\n\n\ndef run_review_plan(request_id: str, approve: bool,\n feedback: str = \"\") -> str:\n state = pending_requests.get(request_id)\n if not state:\n return f\"Request {request_id} not found\"\n if state.status != \"pending\":\n return f\"Request {request_id} already {state.status}\"\n state.status = \"approved\" if approve else \"rejected\"\n BUS.send(\"lead\", state.sender,\n feedback or (\"Approved\" if approve else \"Rejected\"),\n \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve})\n icon = \"✓\" if approve else \"✗\"\n print(f\" \\033[32m[protocol] plan {icon} ({request_id})\\033[0m\")\n return f\"Plan {'approved' if approve else 'rejected'} ({request_id})\"\n\n\n# ── Basic tool handlers ──\n\ndef run_create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> str:\n task = create_task(subject, description, blockedBy)\n deps = f\" (blockedBy: {', '.join(blockedBy)})\" if blockedBy else \"\"\n print(f\" \\033[34m[create] {task.subject}{deps}\\033[0m\")\n return f\"Created {task.id}: {task.subject}{deps}\"\n\n\ndef run_list_tasks() -> str:\n tasks = list_tasks()\n if not tasks:\n return \"No tasks.\"\n return \"\\n\".join(\n f\" {t.id}: {t.subject} [{t.status}]\"\n for t in tasks)\n\n\ndef run_get_task(task_id: str) -> str:\n return get_task(task_id)\n\n\ndef run_claim_task(task_id: str) -> str:\n return claim_task(task_id, owner=\"agent\")\n\n\ndef run_complete_task(task_id: str) -> str:\n return complete_task(task_id)\n\n\ndef run_spawn_teammate(name: str, role: str, prompt: str) -> str:\n return spawn_teammate_thread(name, role, prompt)\n\n\ndef run_send_message(to: str, content: str) -> str:\n BUS.send(\"lead\", to, content)\n return f\"Sent to {to}\"\n\n\ndef consume_lead_inbox(route_protocol=True) -> list[dict]:\n \"\"\"Read Lead inbox: route protocol responses, return all messages.\"\"\"\n msgs = BUS.read_inbox(\"lead\")\n if route_protocol:\n for msg in msgs:\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n msg_type = msg.get(\"type\", \"\")\n if req_id and msg_type.endswith(\"_response\"):\n match_response(msg_type, req_id, meta.get(\"approve\", False))\n return msgs\n\n\ndef run_check_inbox() -> str:\n msgs = consume_lead_inbox(route_protocol=True)\n if not msgs:\n return \"(inbox empty)\"\n lines = []\n for m in msgs:\n meta = m.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n tag = f\" [{m['type']} req:{req_id}]\" if req_id else f\" [{m['type']}]\"\n lines.append(f\" [{m['from']}]{tag} {m['content'][:200]}\")\n return \"\\n\".join(lines)\n\n\n# ── Tool Definitions ──\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"create_task\",\n \"description\": \"Create a task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n \"blockedBy\": {\"type\": \"array\",\n \"items\": {\"type\": \"string\"}}},\n \"required\": [\"subject\"]}},\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}, \"required\": []}},\n {\"name\": \"get_task\",\n \"description\": \"Get full details of a specific task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Complete an in-progress task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"spawn_teammate\",\n \"description\": \"Spawn an autonomous teammate agent.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"},\n \"role\": {\"type\": \"string\"},\n \"prompt\": {\"type\": \"string\"}},\n \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"send_message\",\n \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"check_inbox\",\n \"description\": \"Check inbox for messages and protocol responses.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}, \"required\": []}},\n {\"name\": \"request_shutdown\",\n \"description\": \"Request a teammate to shut down gracefully.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"teammate\": {\"type\": \"string\"}},\n \"required\": [\"teammate\"]}},\n {\"name\": \"request_plan\",\n \"description\": \"Ask a teammate to submit a plan for review.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"teammate\": {\"type\": \"string\"},\n \"task\": {\"type\": \"string\"}},\n \"required\": [\"teammate\", \"task\"]}},\n {\"name\": \"review_plan\",\n \"description\": \"Approve or reject a submitted plan.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"request_id\": {\"type\": \"string\"},\n \"approve\": {\"type\": \"boolean\"},\n \"feedback\": {\"type\": \"string\"}},\n \"required\": [\"request_id\", \"approve\"]}},\n]\n\nTOOL_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"create_task\": run_create_task, \"list_tasks\": run_list_tasks,\n \"get_task\": run_get_task,\n \"claim_task\": run_claim_task, \"complete_task\": run_complete_task,\n \"spawn_teammate\": run_spawn_teammate,\n \"send_message\": run_send_message, \"check_inbox\": run_check_inbox,\n \"request_shutdown\": run_request_shutdown,\n \"request_plan\": run_request_plan, \"review_plan\": run_review_plan,\n}\n\n\n# ── Context ──\n\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\n\n\ndef update_context(context: dict, messages: list) -> dict:\n memories = \"\"\n if MEMORY_INDEX.exists():\n memories = MEMORY_INDEX.read_text()[:2000]\n return {\"memories\": memories}\n\n\n# ── Agent Loop ──\n\ndef agent_loop(messages: list, context: dict):\n system = get_system_prompt(context)\n while True:\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=TOOLS, max_tokens=8000)\n except Exception as e:\n messages.append({\"role\": \"assistant\", \"content\": [\n {\"type\": \"text\", \"text\": f\"[Error] {type(e).__name__}: {e}\"}]})\n return\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else \"Unknown\"\n print(str(output)[:300])\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n context = update_context(context, messages)\n system = get_system_prompt(context)\n\n\nif __name__ == \"__main__\":\n print(\"s17: autonomous agents\")\n print(\"Enter a question, press Enter to send. Type q to quit.\\n\")\n history = []\n context = {\"memories\": \"\"}\n while True:\n try:\n query = input(\"\\033[36ms17 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history, context)\n context = update_context(context, history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\":\n print(block.text)\n elif isinstance(block, dict) and block.get(\"type\") == \"text\":\n print(block.get(\"text\", \"\"))\n\n # Consume lead inbox: route protocol + inject into history\n inbox = consume_lead_inbox(route_protocol=True)\n if inbox:\n inbox_text = \"\\n\".join(\n f\"From {m['from']} [{m.get('type', 'message')}]: \"\n f\"{m['content'][:200]}\" for m in inbox)\n history.append({\"role\": \"user\",\n \"content\": f\"[Inbox]\\n{inbox_text}\"})\n print()\n",
"images": [
{
"src": "/course-assets/s17_autonomous_agents/autonomous-agents-overview.svg",
@@ -2467,7 +2507,7 @@
"filename": "s18_worktree_isolation/code.py",
"title": "Worktree Isolation",
"subtitle": "Separate Directories, No Conflicts",
- "loc": 802,
+ "loc": 804,
"tools": [
"bash",
"read_file",
@@ -2725,7 +2765,7 @@
}
],
"layer": "collaboration",
- "source": "#!/usr/bin/env python3\n\"\"\"\ns18: Worktree Isolation — git worktree + task-directory binding + event log.\n\nRun: python s18_worktree_isolation/code.py\nNeed: pip install anthropic python-dotenv + .env with ANTHROPIC_API_KEY\n\nChanges from s17:\n - Task dataclass gains worktree field (str | None)\n - validate_worktree_name: reject path traversal and illegal chars\n - create_worktree: validate name, git worktree add, optional task binding\n - bind_task_to_worktree: write worktree field only, keep task pending\n - remove_worktree: safety check before force, no auto-complete\n - run_git returns (ok, output), events only on success\n - Teammate tools: + complete_task, run in worktree cwd when bound\n - scan_unclaimed_tasks: uses can_start() for dependency checking\n - idle_poll: checks claim result, dispatches shutdown in IDLE\n - consume_lead_inbox: unified inbox consumer\n - 3 new Lead tools: create_worktree, remove_worktree, keep_worktree\n\nASCII topology:\n Main repo (/)\n ├── .worktrees/auth/ (branch: wt/auth) ← Task #1\n ├── .worktrees/ui/ (branch: wt/ui) ← Task #2\n ├── .tasks/task_xxx.json (worktree: \"auth\")\n └── .worktrees/events.jsonl\n\"\"\"\n\nimport os, subprocess, json, time, random, threading, re\nfrom pathlib import Path\nfrom datetime import datetime\nfrom dataclasses import dataclass, asdict, field\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n# ── Task System (from s12 + s18 worktree field) ──\n\nTASKS_DIR = WORKDIR / \".tasks\"\nTASKS_DIR.mkdir(exist_ok=True)\n\n\n@dataclass\nclass Task:\n id: str\n subject: str\n description: str\n status: str\n owner: str | None\n blockedBy: list[str]\n worktree: str | None = None # s18: bound worktree name\n\n\ndef _task_path(task_id: str) -> Path:\n return TASKS_DIR / f\"{task_id}.json\"\n\n\ndef create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> Task:\n task = Task(\n id=f\"task_{int(time.time())}_{random.randint(0, 9999):04d}\",\n subject=subject, description=description,\n status=\"pending\", owner=None,\n blockedBy=blockedBy or [],\n )\n save_task(task)\n return task\n\n\ndef save_task(task: Task):\n _task_path(task.id).write_text(json.dumps(asdict(task), indent=2))\n\n\ndef load_task(task_id: str) -> Task:\n return Task(**json.loads(_task_path(task_id).read_text()))\n\n\ndef list_tasks() -> list[Task]:\n return [Task(**json.loads(p.read_text()))\n for p in sorted(TASKS_DIR.glob(\"task_*.json\"))]\n\n\ndef get_task_json(task_id: str) -> str:\n task = load_task(task_id)\n return json.dumps(asdict(task), indent=2)\n\n\ndef can_start(task_id: str) -> bool:\n task = load_task(task_id)\n for dep_id in task.blockedBy:\n if not _task_path(dep_id).exists():\n return False\n if load_task(dep_id).status != \"completed\":\n return False\n return True\n\n\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if task.owner:\n return f\"Task {task_id} already owned by {task.owner}\"\n if not can_start(task_id):\n deps = [d for d in task.blockedBy\n if _task_path(d).exists() and load_task(d).status != \"completed\"]\n missing = [d for d in task.blockedBy if not _task_path(d).exists()]\n parts = []\n if deps: parts.append(f\"blocked by: {deps}\")\n if missing: parts.append(f\"missing deps: {missing}\")\n return \"Cannot start — \" + \", \".join(parts)\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n print(f\" \\033[36m[claim] {task.subject} → in_progress\\033[0m\")\n return f\"Claimed {task.id} ({task.subject})\"\n\n\ndef complete_task(task_id: str) -> str:\n task = load_task(task_id)\n if task.status != \"in_progress\":\n return f\"Task {task_id} is {task.status}, cannot complete\"\n task.status = \"completed\"\n save_task(task)\n unblocked = [t.subject for t in list_tasks()\n if t.status == \"pending\" and t.blockedBy and can_start(t.id)]\n print(f\" \\033[32m[complete] {task.subject} ✓\\033[0m\")\n msg = f\"Completed {task.id} ({task.subject})\"\n if unblocked:\n msg += f\"\\nUnblocked: {', '.join(unblocked)}\"\n return msg\n\n\n# ── Worktree System (s18 new) ──\n\nWORKTREES_DIR = WORKDIR / \".worktrees\"\nWORKTREES_DIR.mkdir(exist_ok=True)\n\nVALID_WT_NAME = re.compile(r'^[A-Za-z0-9._-]{1,64}$')\n\n\ndef validate_worktree_name(name: str) -> str | None:\n \"\"\"Return error message if invalid, None if valid.\"\"\"\n if not name:\n return \"Worktree name cannot be empty\"\n if name == \".\" or name == \"..\":\n return f\"'{name}' is not a valid worktree name\"\n if not VALID_WT_NAME.match(name):\n return (f\"Invalid worktree name '{name}': \"\n \"only letters, digits, dots, underscores, dashes (1-64 chars)\")\n return None\n\n\ndef run_git(args: list[str]) -> tuple[bool, str]:\n \"\"\"Run git command. Return (ok, output).\"\"\"\n try:\n r = subprocess.run([\"git\"] + args, cwd=WORKDIR,\n capture_output=True, text=True, timeout=30)\n out = (r.stdout + r.stderr).strip()\n out = out[:5000] if out else \"(no output)\"\n return r.returncode == 0, out\n except subprocess.TimeoutExpired:\n return False, \"Error: git timeout\"\n\n\ndef log_event(event_type: str, worktree_name: str, task_id: str = \"\"):\n \"\"\"Append a lifecycle event to events.jsonl.\"\"\"\n event = {\"type\": event_type, \"worktree\": worktree_name,\n \"task_id\": task_id, \"ts\": time.time()}\n events_file = WORKTREES_DIR / \"events.jsonl\"\n with open(events_file, \"a\") as f:\n f.write(json.dumps(event) + \"\\n\")\n\n\ndef create_worktree(name: str, task_id: str = \"\") -> str:\n \"\"\"Create a git worktree with a dedicated branch. Optionally bind to a task.\"\"\"\n err = validate_worktree_name(name)\n if err:\n return f\"Error: {err}\"\n path = WORKTREES_DIR / name\n if path.exists():\n return f\"Worktree '{name}' already exists at {path}\"\n ok, result = run_git([\"worktree\", \"add\", str(path), \"-b\", f\"wt/{name}\", \"HEAD\"])\n if not ok:\n return f\"Git error: {result}\"\n if task_id:\n bind_task_to_worktree(task_id, name)\n log_event(\"create\", name, task_id)\n print(f\" \\033[33m[worktree] created: {name} at {path}\\033[0m\")\n return f\"Worktree '{name}' created at {path}\"\n\n\ndef bind_task_to_worktree(task_id: str, worktree_name: str):\n \"\"\"Write worktree field to task. Keep status as pending for auto-claim.\"\"\"\n task = load_task(task_id)\n task.worktree = worktree_name\n save_task(task)\n print(f\" \\033[33m[bind] {task.subject} → worktree:{worktree_name}\\033[0m\")\n\n\ndef _count_worktree_changes(path: Path) -> tuple[int, int]:\n \"\"\"Count uncommitted files and commits in a worktree.\"\"\"\n try:\n r1 = subprocess.run([\"git\", \"status\", \"--porcelain\"],\n cwd=path, capture_output=True, text=True, timeout=10)\n files = len([l for l in r1.stdout.strip().splitlines() if l.strip()])\n r2 = subprocess.run([\"git\", \"log\", \"@{push}..HEAD\", \"--oneline\"],\n cwd=path, capture_output=True, text=True, timeout=10)\n commits = len([l for l in r2.stdout.strip().splitlines() if l.strip()])\n return files, commits\n except Exception:\n return -1, -1\n\n\ndef remove_worktree(name: str, discard_changes: bool = False) -> str:\n \"\"\"Remove worktree. Refuses if uncommitted changes unless discard_changes.\"\"\"\n err = validate_worktree_name(name)\n if err:\n return err\n path = WORKTREES_DIR / name\n if not path.exists():\n return f\"Worktree '{name}' not found\"\n if not discard_changes:\n files, commits = _count_worktree_changes(path)\n if files < 0:\n return (f\"Cannot verify worktree '{name}' status. \"\n \"Use discard_changes=true to force removal.\")\n if files > 0 or commits > 0:\n return (f\"Worktree '{name}' has {files} uncommitted file(s) \"\n f\"and {commits} unpushed commit(s). \"\n \"Use discard_changes=true to force removal, \"\n \"or keep_worktree to preserve for review.\")\n ok1, _ = run_git([\"worktree\", \"remove\", str(path), \"--force\"])\n if not ok1:\n return f\"Failed to remove worktree directory for '{name}'\"\n run_git([\"branch\", \"-D\", f\"wt/{name}\"])\n log_event(\"remove\", name)\n print(f\" \\033[33m[worktree] removed: {name}\\033[0m\")\n return f\"Worktree '{name}' removed\"\n\n\ndef keep_worktree(name: str) -> str:\n \"\"\"Keep worktree for manual review. Branch preserved.\"\"\"\n err = validate_worktree_name(name)\n if err:\n return err\n log_event(\"keep\", name)\n print(f\" \\033[36m[worktree] kept: {name}\\033[0m\")\n return f\"Worktree '{name}' kept for review (branch: wt/{name})\"\n\n\n# ── Prompt Assembly (from s10) ──\n\nPROMPT_SECTIONS = {\n \"identity\": \"You are a coding agent. Act, don't explain.\",\n \"tools\": \"Available tools: bash, read_file, write_file, \"\n \"create_task, list_tasks, get_task, claim_task, complete_task, \"\n \"spawn_teammate, send_message, check_inbox, \"\n \"request_shutdown, request_plan, review_plan, \"\n \"create_worktree, remove_worktree, keep_worktree.\",\n \"workspace\": f\"Working directory: {WORKDIR}\",\n \"memory\": \"Relevant memories are injected below when available.\",\n}\n\n\ndef assemble_system_prompt(context: dict) -> str:\n sections = [PROMPT_SECTIONS[\"identity\"],\n PROMPT_SECTIONS[\"tools\"],\n PROMPT_SECTIONS[\"workspace\"]]\n if context.get(\"memories\"):\n sections.append(f\"Relevant memories:\\n{context['memories']}\")\n return \"\\n\\n\".join(sections)\n\n\n_last_context_hash, _last_prompt = None, None\n\n\ndef get_system_prompt(context: dict) -> str:\n global _last_context_hash, _last_prompt\n h = json.dumps(context, sort_keys=True)\n if h == _last_context_hash and _last_prompt:\n return _last_prompt\n _last_context_hash, _last_prompt = h, assemble_system_prompt(context)\n return _last_prompt\n\n\n# ── Basic Tools ──\n\ndef safe_path(p: str, cwd: Path = None) -> Path:\n base = cwd or WORKDIR\n path = (base / p).resolve()\n if not path.is_relative_to(base):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str, cwd: Path = None) -> str:\n try:\n r = subprocess.run(command, shell=True, cwd=cwd or WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int | None = None, cwd: Path = None) -> str:\n try:\n lines = safe_path(path, cwd).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str, cwd: Path = None) -> str:\n try:\n fp = safe_path(path, cwd)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# ── MessageBus (from s15) ──\n\nMAILBOX_DIR = WORKDIR / \".mailboxes\"\nMAILBOX_DIR.mkdir(exist_ok=True)\n\n\nclass MessageBus:\n def send(self, from_agent: str, to_agent: str, content: str,\n msg_type: str = \"message\", metadata: dict = None):\n msg = {\"from\": from_agent, \"to\": to_agent,\n \"content\": content, \"type\": msg_type,\n \"ts\": time.time(), \"metadata\": metadata or {}}\n inbox = MAILBOX_DIR / f\"{to_agent}.jsonl\"\n with open(inbox, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n print(f\" \\033[33m[bus] {from_agent} → {to_agent}: \"\n f\"({msg_type}) {content[:50]}\\033[0m\")\n\n def read_inbox(self, agent: str) -> list[dict]:\n inbox = MAILBOX_DIR / f\"{agent}.jsonl\"\n if not inbox.exists():\n return []\n msgs = [json.loads(line) for line in inbox.read_text().splitlines()\n if line.strip()]\n inbox.unlink()\n return msgs\n\n\nBUS = MessageBus()\nactive_teammates: dict[str, bool] = {}\n\n# ── Protocol State (from s16) ──\n\n@dataclass\nclass ProtocolState:\n request_id: str\n type: str\n sender: str\n target: str\n status: str\n payload: str\n created_at: float = field(default_factory=time.time)\n\n\npending_requests: dict[str, ProtocolState] = {}\n\n\ndef new_request_id() -> str:\n return f\"req_{random.randint(0, 999999):06d}\"\n\n\ndef match_response(response_type: str, request_id: str, approve: bool):\n state = pending_requests.get(request_id)\n if not state:\n print(f\" \\033[31m[protocol] unknown request_id: {request_id}\\033[0m\")\n return\n if state.type == \"shutdown\" and response_type != \"shutdown_response\":\n print(f\" \\033[31m[protocol] type mismatch: expected shutdown_response, \"\n f\"got {response_type}\\033[0m\")\n return\n if state.type == \"plan_approval\" and response_type != \"plan_approval_response\":\n print(f\" \\033[31m[protocol] type mismatch: expected plan_approval_response, \"\n f\"got {response_type}\\033[0m\")\n return\n state.status = \"approved\" if approve else \"rejected\"\n icon = \"✓\" if approve else \"✗\"\n color = \"32\" if approve else \"31\"\n print(f\" \\033[{color}m[protocol] {state.type} {icon} \"\n f\"({request_id}: {state.status})\\033[0m\")\n\n\ndef consume_lead_inbox(route_protocol=True) -> list[dict]:\n msgs = BUS.read_inbox(\"lead\")\n if route_protocol:\n for msg in msgs:\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n msg_type = msg.get(\"type\", \"\")\n if req_id and msg_type.endswith(\"_response\"):\n match_response(msg_type, req_id, meta.get(\"approve\", False))\n return msgs\n\n\n# ── Autonomous Agent (from s17, + worktree cwd) ──\n\nIDLE_POLL_INTERVAL = 5\nIDLE_TIMEOUT = 60\n\n\ndef scan_unclaimed_tasks() -> list[dict]:\n \"\"\"Find pending, unowned tasks with all dependencies completed.\"\"\"\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and can_start(task[\"id\"])):\n unclaimed.append(task)\n return unclaimed\n\n\ndef idle_poll(agent_name: str, messages: list,\n name: str, role: str) -> str:\n \"\"\"Poll for 60s. Return 'work', 'shutdown', or 'timeout'.\"\"\"\n for _ in range(IDLE_TIMEOUT // IDLE_POLL_INTERVAL):\n time.sleep(IDLE_POLL_INTERVAL)\n\n inbox = BUS.read_inbox(agent_name)\n if inbox:\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n req_id = msg.get(\"metadata\", {}).get(\"request_id\", \"\")\n BUS.send(name, \"lead\", \"Shutting down gracefully.\",\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": True})\n print(f\" \\033[35m[protocol] {name} approved shutdown \"\n f\"in idle ({req_id})\\033[0m\")\n return \"shutdown\"\n\n messages.append({\"role\": \"user\",\n \"content\": \"\" + json.dumps(inbox) + \"\"})\n print(f\" \\033[36m[idle] {name} found inbox messages\\033[0m\")\n return \"work\"\n\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n task_data = unclaimed[0]\n result = claim_task(task_data[\"id\"], agent_name)\n if \"Claimed\" in result:\n wt_info = \"\"\n if task_data.get(\"worktree\"):\n wt_path = WORKTREES_DIR / task_data[\"worktree\"]\n wt_info = f\"\\nWork directory: {wt_path}\"\n messages.append({\"role\": \"user\",\n \"content\": f\"Task {task_data['id']}: \"\n f\"{task_data['subject']}{wt_info}\"})\n print(f\" \\033[32m[idle] {name} auto-claimed: \"\n f\"{task_data['subject']}\\033[0m\")\n return \"work\"\n print(f\" \\033[33m[idle] {name} claim failed: \"\n f\"{result}\\033[0m\")\n\n print(f\" \\033[31m[idle] {name} timeout ({IDLE_TIMEOUT}s)\\033[0m\")\n return \"timeout\"\n\n\n# ── Teammate Thread (from s15 + s16 + s17 + s18) ──\n\ndef spawn_teammate_thread(name: str, role: str, prompt: str) -> str:\n if name in active_teammates:\n return f\"Teammate '{name}' already exists\"\n\n system = (f\"You are '{name}', a {role}. \"\n f\"Use tools to complete tasks. \"\n f\"You can list and claim tasks from the board. \"\n f\"If a task has a worktree, work in that directory.\")\n\n def handle_inbox_message(name: str, msg: dict, messages: list):\n msg_type = msg.get(\"type\", \"message\")\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n\n if msg_type == \"shutdown_request\":\n BUS.send(name, \"lead\", \"Shutting down gracefully.\",\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": True})\n print(f\" \\033[35m[protocol] {name} approved shutdown \"\n f\"({req_id})\\033[0m\")\n return True\n\n if msg_type == \"plan_approval_response\":\n approve = meta.get(\"approve\", False)\n if approve:\n messages.append({\"role\": \"user\",\n \"content\": \"[Plan approved] Proceed with the task.\"})\n else:\n messages.append({\"role\": \"user\",\n \"content\": f\"[Plan rejected] Feedback: {msg['content']}\"})\n return False\n\n def run():\n # Track current worktree for this teammate's cwd\n wt_ctx = {\"path\": None}\n\n def _wt_cwd() -> Path | None:\n p = wt_ctx[\"path\"]\n return Path(p) if p else None\n\n def _run_bash(command: str) -> str:\n return run_bash(command, cwd=_wt_cwd())\n\n def _run_read(path: str) -> str:\n return run_read(path, cwd=_wt_cwd())\n\n def _run_write(path: str, content: str) -> str:\n return run_write(path, content, cwd=_wt_cwd())\n\n def _run_list_tasks():\n tasks = list_tasks()\n if not tasks:\n return \"No tasks.\"\n return \"\\n\".join(\n f\" {t.id}: {t.subject} [{t.status}]\"\n + (f\" (wt:{t.worktree})\" if t.worktree else \"\")\n for t in tasks)\n\n def _run_claim_task(task_id: str):\n result = claim_task(task_id, owner=name)\n if \"Claimed\" in result:\n # Set worktree cwd if task has one\n task = load_task(task_id)\n if task.worktree:\n wt_ctx[\"path\"] = str(WORKTREES_DIR / task.worktree)\n else:\n wt_ctx[\"path\"] = None\n return result\n\n def _run_complete_task(task_id: str):\n result = complete_task(task_id)\n wt_ctx[\"path\"] = None\n return result\n\n messages = [{\"role\": \"user\", \"content\": prompt}]\n sub_tools = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"send_message\",\n \"description\": \"Send message to another agent.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"submit_plan\",\n \"description\": \"Submit a plan for Lead approval.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"plan\": {\"type\": \"string\"}},\n \"required\": [\"plan\"]}},\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks on the board.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Mark an in-progress task as completed.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n ]\n\n sub_handlers = {\n \"bash\": _run_bash, \"read_file\": _run_read,\n \"write_file\": _run_write,\n \"send_message\": lambda to, content: (BUS.send(name, to, content),\n \"Sent\")[1],\n \"submit_plan\": lambda plan: _teammate_submit_plan(name, plan),\n \"list_tasks\": _run_list_tasks,\n \"claim_task\": _run_claim_task,\n \"complete_task\": _run_complete_task,\n }\n\n # Outer loop: WORK → IDLE cycle\n while True:\n if len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}. \"\n f\"Continue your work.\"})\n\n # WORK phase\n should_shutdown = False\n for _ in range(10):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n stopped = handle_inbox_message(name, msg, messages)\n if stopped:\n should_shutdown = True\n break\n if should_shutdown:\n break\n if inbox and not should_shutdown:\n non_protocol = [m for m in inbox\n if m.get(\"type\") == \"message\"]\n if non_protocol:\n messages.append({\"role\": \"user\",\n \"content\": \"\" + json.dumps(non_protocol) + \"\"})\n\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages[-20:],\n tools=sub_tools, max_tokens=8000)\n except Exception:\n break\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = sub_handlers.get(block.name)\n output = handler(**block.input) if handler else \"Unknown\"\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n if should_shutdown:\n break\n\n # IDLE phase\n idle_result = idle_poll(name, messages, name, role)\n if idle_result == \"shutdown\":\n break\n if idle_result == \"timeout\":\n break\n\n # Summary\n summary = \"Done.\"\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\" and isinstance(msg[\"content\"], list):\n for b in msg[\"content\"]:\n if getattr(b, \"type\", None) == \"text\":\n summary = b.text\n break\n else:\n continue\n break\n BUS.send(name, \"lead\", summary, \"result\")\n active_teammates.pop(name, None)\n print(f\" \\033[32m[teammate] {name} finished\\033[0m\")\n\n active_teammates[name] = True\n threading.Thread(target=run, daemon=True).start()\n print(f\" \\033[36m[teammate] {name} spawned as {role}\\033[0m\")\n return f\"Teammate '{name}' spawned as {role} (autonomous)\"\n\n\ndef _teammate_submit_plan(from_name: str, plan: str) -> str:\n req_id = new_request_id()\n pending_requests[req_id] = ProtocolState(\n request_id=req_id, type=\"plan_approval\",\n sender=from_name, target=\"lead\",\n status=\"pending\", payload=plan)\n BUS.send(from_name, \"lead\", plan,\n \"plan_approval_request\",\n {\"request_id\": req_id})\n return f\"Plan submitted ({req_id}). Waiting for approval...\"\n\n\n# ── Lead Protocol Tools (from s16) ──\n\ndef run_request_shutdown(teammate: str) -> str:\n req_id = new_request_id()\n pending_requests[req_id] = ProtocolState(\n request_id=req_id, type=\"shutdown\",\n sender=\"lead\", target=teammate,\n status=\"pending\", payload=\"\")\n BUS.send(\"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\",\n {\"request_id\": req_id})\n print(f\" \\033[35m[protocol] shutdown_request → {teammate} \"\n f\"({req_id})\\033[0m\")\n return f\"Shutdown request sent to {teammate} (req: {req_id})\"\n\n\ndef run_request_plan(teammate: str, task: str) -> str:\n BUS.send(\"lead\", teammate, f\"Please submit a plan for: {task}\",\n \"message\")\n return f\"Asked {teammate} to submit a plan\"\n\n\ndef run_review_plan(request_id: str, approve: bool,\n feedback: str = \"\") -> str:\n state = pending_requests.get(request_id)\n if not state:\n return f\"Request {request_id} not found\"\n if state.status != \"pending\":\n return f\"Request {request_id} already {state.status}\"\n state.status = \"approved\" if approve else \"rejected\"\n BUS.send(\"lead\", state.sender,\n feedback or (\"Approved\" if approve else \"Rejected\"),\n \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve})\n icon = \"✓\" if approve else \"✗\"\n print(f\" \\033[32m[protocol] plan {icon} ({request_id})\\033[0m\")\n return f\"Plan {'approved' if approve else 'rejected'} ({request_id})\"\n\n\n# ── Lead Worktree Tools (s18 new) ──\n\ndef run_create_worktree(name: str, task_id: str = \"\") -> str:\n return create_worktree(name, task_id)\n\n\ndef run_remove_worktree(name: str, discard_changes: bool = False) -> str:\n return remove_worktree(name, discard_changes)\n\n\ndef run_keep_worktree(name: str) -> str:\n return keep_worktree(name)\n\n\n# ── Basic tool handlers ──\n\ndef run_create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> str:\n task = create_task(subject, description, blockedBy)\n deps = f\" (blockedBy: {', '.join(blockedBy)})\" if blockedBy else \"\"\n print(f\" \\033[34m[create] {task.subject}{deps}\\033[0m\")\n return f\"Created {task.id}: {task.subject}{deps}\"\n\n\ndef run_list_tasks() -> str:\n tasks = list_tasks()\n if not tasks:\n return \"No tasks.\"\n return \"\\n\".join(\n f\" {t.id}: {t.subject} [{t.status}]\"\n + (f\" (wt:{t.worktree})\" if t.worktree else \"\")\n for t in tasks)\n\n\ndef run_get_task(task_id: str) -> str:\n return get_task_json(task_id)\n\n\ndef run_claim_task(task_id: str) -> str:\n return claim_task(task_id, owner=\"agent\")\n\n\ndef run_complete_task(task_id: str) -> str:\n return complete_task(task_id)\n\n\ndef run_spawn_teammate(name: str, role: str, prompt: str) -> str:\n return spawn_teammate_thread(name, role, prompt)\n\n\ndef run_send_message(to: str, content: str) -> str:\n BUS.send(\"lead\", to, content)\n return f\"Sent to {to}\"\n\n\ndef run_check_inbox() -> str:\n msgs = consume_lead_inbox(route_protocol=True)\n if not msgs:\n return \"(inbox empty)\"\n lines = []\n for m in msgs:\n meta = m.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n tag = f\" [{m['type']} req:{req_id}]\" if req_id else f\" [{m['type']}]\"\n lines.append(f\" [{m['from']}]{tag} {m['content'][:200]}\")\n return \"\\n\".join(lines)\n\n\n# ── Tool Definitions ──\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"create_task\",\n \"description\": \"Create a task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n \"blockedBy\": {\"type\": \"array\",\n \"items\": {\"type\": \"string\"}}},\n \"required\": [\"subject\"]}},\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}, \"required\": []}},\n {\"name\": \"get_task\",\n \"description\": \"Get full details of a specific task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Complete an in-progress task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"spawn_teammate\",\n \"description\": \"Spawn an autonomous teammate agent.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"},\n \"role\": {\"type\": \"string\"},\n \"prompt\": {\"type\": \"string\"}},\n \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"send_message\",\n \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"check_inbox\",\n \"description\": \"Check inbox for messages and protocol responses.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}, \"required\": []}},\n {\"name\": \"request_shutdown\",\n \"description\": \"Request a teammate to shut down gracefully.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"teammate\": {\"type\": \"string\"}},\n \"required\": [\"teammate\"]}},\n {\"name\": \"request_plan\",\n \"description\": \"Ask a teammate to submit a plan for review.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"teammate\": {\"type\": \"string\"},\n \"task\": {\"type\": \"string\"}},\n \"required\": [\"teammate\", \"task\"]}},\n {\"name\": \"review_plan\",\n \"description\": \"Approve or reject a submitted plan.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"request_id\": {\"type\": \"string\"},\n \"approve\": {\"type\": \"boolean\"},\n \"feedback\": {\"type\": \"string\"}},\n \"required\": [\"request_id\", \"approve\"]}},\n # s18 new: worktree tools\n {\"name\": \"create_worktree\",\n \"description\": \"Create an isolated git worktree with its own branch.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"},\n \"task_id\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"remove_worktree\",\n \"description\": \"Remove a worktree. Refuses if uncommitted changes unless discard_changes=true.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"},\n \"discard_changes\": {\"type\": \"boolean\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"keep_worktree\",\n \"description\": \"Keep a worktree for manual review.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n]\n\nTOOL_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"create_task\": run_create_task, \"list_tasks\": run_list_tasks,\n \"get_task\": run_get_task,\n \"claim_task\": run_claim_task, \"complete_task\": run_complete_task,\n \"spawn_teammate\": run_spawn_teammate,\n \"send_message\": run_send_message, \"check_inbox\": run_check_inbox,\n \"request_shutdown\": run_request_shutdown,\n \"request_plan\": run_request_plan, \"review_plan\": run_review_plan,\n \"create_worktree\": run_create_worktree,\n \"remove_worktree\": run_remove_worktree,\n \"keep_worktree\": run_keep_worktree,\n}\n\n\n# ── Context ──\n\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\n\n\ndef update_context(context: dict, messages: list) -> dict:\n memories = \"\"\n if MEMORY_INDEX.exists():\n memories = MEMORY_INDEX.read_text()[:2000]\n return {\"memories\": memories}\n\n\n# ── Agent Loop ──\n\ndef agent_loop(messages: list, context: dict):\n system = get_system_prompt(context)\n while True:\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=TOOLS, max_tokens=8000)\n except Exception as e:\n messages.append({\"role\": \"assistant\", \"content\": [\n {\"type\": \"text\", \"text\": f\"[Error] {type(e).__name__}: {e}\"}]})\n return\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else \"Unknown\"\n print(str(output)[:300])\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n context = update_context(context, messages)\n system = get_system_prompt(context)\n\n\nif __name__ == \"__main__\":\n print(\"s18: worktree isolation\")\n print(\"Enter a question, press Enter to send. Type q to quit.\\n\")\n history = []\n context = {\"memories\": \"\"}\n while True:\n try:\n query = input(\"\\033[36ms18 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history, context)\n context = update_context(context, history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\":\n print(block.text)\n\n # Consume lead inbox: route protocol + inject into history\n inbox = consume_lead_inbox(route_protocol=True)\n if inbox:\n inbox_text = \"\\n\".join(\n f\"From {m['from']} [{m.get('type', 'message')}]: \"\n f\"{m['content'][:200]}\" for m in inbox)\n history.append({\"role\": \"user\",\n \"content\": f\"[Inbox]\\n{inbox_text}\"})\n print()\n",
+ "source": "#!/usr/bin/env python3\n\"\"\"\ns18: Worktree Isolation — git worktree + task-directory binding + event log.\n\nRun: python s18_worktree_isolation/code.py\nNeed: pip install anthropic python-dotenv + .env with ANTHROPIC_API_KEY\n\nChanges from s17:\n - Task dataclass gains worktree field (str | None)\n - validate_worktree_name: reject path traversal and illegal chars\n - create_worktree: validate name, git worktree add, optional task binding\n - bind_task_to_worktree: write worktree field only, keep task pending\n - remove_worktree: safety check before force, no auto-complete\n - run_git returns (ok, output), events only on success\n - Teammate tools: + complete_task, run in worktree cwd when bound\n - scan_unclaimed_tasks: uses can_start() for dependency checking\n - idle_poll: checks claim result, dispatches shutdown in IDLE\n - consume_lead_inbox: unified inbox consumer\n - 3 new Lead tools: create_worktree, remove_worktree, keep_worktree\n\nASCII topology:\n Main repo (/)\n ├── .worktrees/auth/ (branch: wt/auth) ← Task #1\n ├── .worktrees/ui/ (branch: wt/ui) ← Task #2\n ├── .tasks/task_xxx.json (worktree: \"auth\")\n └── .worktrees/events.jsonl\n\"\"\"\n\nimport os, subprocess, json, time, random, threading, re\nfrom pathlib import Path\nfrom datetime import datetime\nfrom dataclasses import dataclass, asdict, field\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n# ── Task System (from s12 + s18 worktree field) ──\n\nTASKS_DIR = WORKDIR / \".tasks\"\nTASKS_DIR.mkdir(exist_ok=True)\n\n\n@dataclass\nclass Task:\n id: str\n subject: str\n description: str\n status: str\n owner: str | None\n blockedBy: list[str]\n worktree: str | None = None # s18: bound worktree name\n\n\ndef _task_path(task_id: str) -> Path:\n return TASKS_DIR / f\"{task_id}.json\"\n\n\ndef create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> Task:\n task = Task(\n id=f\"task_{int(time.time())}_{random.randint(0, 9999):04d}\",\n subject=subject, description=description,\n status=\"pending\", owner=None,\n blockedBy=blockedBy or [],\n )\n save_task(task)\n return task\n\n\ndef save_task(task: Task):\n _task_path(task.id).write_text(json.dumps(asdict(task), indent=2))\n\n\ndef load_task(task_id: str) -> Task:\n return Task(**json.loads(_task_path(task_id).read_text()))\n\n\ndef list_tasks() -> list[Task]:\n return [Task(**json.loads(p.read_text()))\n for p in sorted(TASKS_DIR.glob(\"task_*.json\"))]\n\n\ndef get_task_json(task_id: str) -> str:\n task = load_task(task_id)\n return json.dumps(asdict(task), indent=2)\n\n\ndef can_start(task_id: str) -> bool:\n task = load_task(task_id)\n for dep_id in task.blockedBy:\n if not _task_path(dep_id).exists():\n return False\n if load_task(dep_id).status != \"completed\":\n return False\n return True\n\n\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if task.owner:\n return f\"Task {task_id} already owned by {task.owner}\"\n if not can_start(task_id):\n deps = [d for d in task.blockedBy\n if _task_path(d).exists() and load_task(d).status != \"completed\"]\n missing = [d for d in task.blockedBy if not _task_path(d).exists()]\n parts = []\n if deps: parts.append(f\"blocked by: {deps}\")\n if missing: parts.append(f\"missing deps: {missing}\")\n return \"Cannot start — \" + \", \".join(parts)\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n print(f\" \\033[36m[claim] {task.subject} → in_progress\\033[0m\")\n return f\"Claimed {task.id} ({task.subject})\"\n\n\ndef complete_task(task_id: str) -> str:\n task = load_task(task_id)\n if task.status != \"in_progress\":\n return f\"Task {task_id} is {task.status}, cannot complete\"\n task.status = \"completed\"\n save_task(task)\n unblocked = [t.subject for t in list_tasks()\n if t.status == \"pending\" and t.blockedBy and can_start(t.id)]\n print(f\" \\033[32m[complete] {task.subject} ✓\\033[0m\")\n msg = f\"Completed {task.id} ({task.subject})\"\n if unblocked:\n msg += f\"\\nUnblocked: {', '.join(unblocked)}\"\n return msg\n\n\n# ── Worktree System (s18 new) ──\n\nWORKTREES_DIR = WORKDIR / \".worktrees\"\nWORKTREES_DIR.mkdir(exist_ok=True)\n\nVALID_WT_NAME = re.compile(r'^[A-Za-z0-9._-]{1,64}$')\n\n\ndef validate_worktree_name(name: str) -> str | None:\n \"\"\"Return error message if invalid, None if valid.\"\"\"\n if not name:\n return \"Worktree name cannot be empty\"\n if name == \".\" or name == \"..\":\n return f\"'{name}' is not a valid worktree name\"\n if not VALID_WT_NAME.match(name):\n return (f\"Invalid worktree name '{name}': \"\n \"only letters, digits, dots, underscores, dashes (1-64 chars)\")\n return None\n\n\ndef run_git(args: list[str]) -> tuple[bool, str]:\n \"\"\"Run git command. Return (ok, output).\"\"\"\n try:\n r = subprocess.run([\"git\"] + args, cwd=WORKDIR,\n capture_output=True, text=True, timeout=30)\n out = (r.stdout + r.stderr).strip()\n out = out[:5000] if out else \"(no output)\"\n return r.returncode == 0, out\n except subprocess.TimeoutExpired:\n return False, \"Error: git timeout\"\n\n\ndef log_event(event_type: str, worktree_name: str, task_id: str = \"\"):\n \"\"\"Append a lifecycle event to events.jsonl.\"\"\"\n event = {\"type\": event_type, \"worktree\": worktree_name,\n \"task_id\": task_id, \"ts\": time.time()}\n events_file = WORKTREES_DIR / \"events.jsonl\"\n with open(events_file, \"a\") as f:\n f.write(json.dumps(event) + \"\\n\")\n\n\ndef create_worktree(name: str, task_id: str = \"\") -> str:\n \"\"\"Create a git worktree with a dedicated branch. Optionally bind to a task.\"\"\"\n err = validate_worktree_name(name)\n if err:\n return f\"Error: {err}\"\n path = WORKTREES_DIR / name\n if path.exists():\n return f\"Worktree '{name}' already exists at {path}\"\n ok, result = run_git([\"worktree\", \"add\", str(path), \"-b\", f\"wt/{name}\", \"HEAD\"])\n if not ok:\n return f\"Git error: {result}\"\n if task_id:\n bind_task_to_worktree(task_id, name)\n log_event(\"create\", name, task_id)\n print(f\" \\033[33m[worktree] created: {name} at {path}\\033[0m\")\n return f\"Worktree '{name}' created at {path}\"\n\n\ndef bind_task_to_worktree(task_id: str, worktree_name: str):\n \"\"\"Write worktree field to task. Keep status as pending for auto-claim.\"\"\"\n task = load_task(task_id)\n task.worktree = worktree_name\n save_task(task)\n print(f\" \\033[33m[bind] {task.subject} → worktree:{worktree_name}\\033[0m\")\n\n\ndef _count_worktree_changes(path: Path) -> tuple[int, int]:\n \"\"\"Count uncommitted files and commits in a worktree.\"\"\"\n try:\n r1 = subprocess.run([\"git\", \"status\", \"--porcelain\"],\n cwd=path, capture_output=True, text=True, timeout=10)\n files = len([l for l in r1.stdout.strip().splitlines() if l.strip()])\n r2 = subprocess.run([\"git\", \"log\", \"@{push}..HEAD\", \"--oneline\"],\n cwd=path, capture_output=True, text=True, timeout=10)\n commits = len([l for l in r2.stdout.strip().splitlines() if l.strip()])\n return files, commits\n except Exception:\n return -1, -1\n\n\ndef remove_worktree(name: str, discard_changes: bool = False) -> str:\n \"\"\"Remove worktree. Refuses if uncommitted changes unless discard_changes.\"\"\"\n err = validate_worktree_name(name)\n if err:\n return err\n path = WORKTREES_DIR / name\n if not path.exists():\n return f\"Worktree '{name}' not found\"\n if not discard_changes:\n files, commits = _count_worktree_changes(path)\n if files < 0:\n return (f\"Cannot verify worktree '{name}' status. \"\n \"Use discard_changes=true to force removal.\")\n if files > 0 or commits > 0:\n return (f\"Worktree '{name}' has {files} uncommitted file(s) \"\n f\"and {commits} unpushed commit(s). \"\n \"Use discard_changes=true to force removal, \"\n \"or keep_worktree to preserve for review.\")\n ok1, _ = run_git([\"worktree\", \"remove\", str(path), \"--force\"])\n if not ok1:\n return f\"Failed to remove worktree directory for '{name}'\"\n run_git([\"branch\", \"-D\", f\"wt/{name}\"])\n log_event(\"remove\", name)\n print(f\" \\033[33m[worktree] removed: {name}\\033[0m\")\n return f\"Worktree '{name}' removed\"\n\n\ndef keep_worktree(name: str) -> str:\n \"\"\"Keep worktree for manual review. Branch preserved.\"\"\"\n err = validate_worktree_name(name)\n if err:\n return err\n log_event(\"keep\", name)\n print(f\" \\033[36m[worktree] kept: {name}\\033[0m\")\n return f\"Worktree '{name}' kept for review (branch: wt/{name})\"\n\n\n# ── Prompt Assembly (from s10) ──\n\nPROMPT_SECTIONS = {\n \"identity\": \"You are a coding agent. Act, don't explain.\",\n \"tools\": \"Available tools: bash, read_file, write_file, \"\n \"create_task, list_tasks, get_task, claim_task, complete_task, \"\n \"spawn_teammate, send_message, check_inbox, \"\n \"request_shutdown, request_plan, review_plan, \"\n \"create_worktree, remove_worktree, keep_worktree.\",\n \"workspace\": f\"Working directory: {WORKDIR}\",\n \"memory\": \"Relevant memories are injected below when available.\",\n}\n\n\ndef assemble_system_prompt(context: dict) -> str:\n sections = [PROMPT_SECTIONS[\"identity\"],\n PROMPT_SECTIONS[\"tools\"],\n PROMPT_SECTIONS[\"workspace\"]]\n if context.get(\"memories\"):\n sections.append(f\"Relevant memories:\\n{context['memories']}\")\n return \"\\n\\n\".join(sections)\n\n\n_last_context_hash, _last_prompt = None, None\n\n\ndef get_system_prompt(context: dict) -> str:\n global _last_context_hash, _last_prompt\n h = json.dumps(context, sort_keys=True)\n if h == _last_context_hash and _last_prompt:\n return _last_prompt\n _last_context_hash, _last_prompt = h, assemble_system_prompt(context)\n return _last_prompt\n\n\n# ── Basic Tools ──\n\ndef safe_path(p: str, cwd: Path = None) -> Path:\n base = cwd or WORKDIR\n path = (base / p).resolve()\n if not path.is_relative_to(base):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str, cwd: Path = None) -> str:\n try:\n r = subprocess.run(command, shell=True, cwd=cwd or WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int | None = None, cwd: Path = None) -> str:\n try:\n lines = safe_path(path, cwd).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str, cwd: Path = None) -> str:\n try:\n fp = safe_path(path, cwd)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# ── MessageBus (from s15) ──\n\nMAILBOX_DIR = WORKDIR / \".mailboxes\"\nMAILBOX_DIR.mkdir(exist_ok=True)\n\n\nclass MessageBus:\n def send(self, from_agent: str, to_agent: str, content: str,\n msg_type: str = \"message\", metadata: dict = None):\n msg = {\"from\": from_agent, \"to\": to_agent,\n \"content\": content, \"type\": msg_type,\n \"ts\": time.time(), \"metadata\": metadata or {}}\n inbox = MAILBOX_DIR / f\"{to_agent}.jsonl\"\n with open(inbox, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n print(f\" \\033[33m[bus] {from_agent} → {to_agent}: \"\n f\"({msg_type}) {content[:50]}\\033[0m\")\n\n def read_inbox(self, agent: str) -> list[dict]:\n inbox = MAILBOX_DIR / f\"{agent}.jsonl\"\n if not inbox.exists():\n return []\n msgs = [json.loads(line) for line in inbox.read_text().splitlines()\n if line.strip()]\n inbox.unlink()\n return msgs\n\n\nBUS = MessageBus()\nactive_teammates: dict[str, bool] = {}\n\n# ── Protocol State (from s16) ──\n\n@dataclass\nclass ProtocolState:\n request_id: str\n type: str\n sender: str\n target: str\n status: str\n payload: str\n created_at: float = field(default_factory=time.time)\n\n\npending_requests: dict[str, ProtocolState] = {}\n\n\ndef new_request_id() -> str:\n return f\"req_{random.randint(0, 999999):06d}\"\n\n\ndef match_response(response_type: str, request_id: str, approve: bool):\n state = pending_requests.get(request_id)\n if not state:\n print(f\" \\033[31m[protocol] unknown request_id: {request_id}\\033[0m\")\n return\n if state.type == \"shutdown\" and response_type != \"shutdown_response\":\n print(f\" \\033[31m[protocol] type mismatch: expected shutdown_response, \"\n f\"got {response_type}\\033[0m\")\n return\n if state.type == \"plan_approval\" and response_type != \"plan_approval_response\":\n print(f\" \\033[31m[protocol] type mismatch: expected plan_approval_response, \"\n f\"got {response_type}\\033[0m\")\n return\n state.status = \"approved\" if approve else \"rejected\"\n icon = \"✓\" if approve else \"✗\"\n color = \"32\" if approve else \"31\"\n print(f\" \\033[{color}m[protocol] {state.type} {icon} \"\n f\"({request_id}: {state.status})\\033[0m\")\n\n\ndef consume_lead_inbox(route_protocol=True) -> list[dict]:\n msgs = BUS.read_inbox(\"lead\")\n if route_protocol:\n for msg in msgs:\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n msg_type = msg.get(\"type\", \"\")\n if req_id and msg_type.endswith(\"_response\"):\n match_response(msg_type, req_id, meta.get(\"approve\", False))\n return msgs\n\n\n# ── Autonomous Agent (from s17, + worktree cwd) ──\n\nIDLE_POLL_INTERVAL = 5\nIDLE_TIMEOUT = 60\n\n\ndef scan_unclaimed_tasks() -> list[dict]:\n \"\"\"Find pending, unowned tasks with all dependencies completed.\"\"\"\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and can_start(task[\"id\"])):\n unclaimed.append(task)\n return unclaimed\n\n\ndef idle_poll(agent_name: str, messages: list,\n name: str, role: str) -> str:\n \"\"\"Poll for 60s. Return 'work', 'shutdown', or 'timeout'.\"\"\"\n for _ in range(IDLE_TIMEOUT // IDLE_POLL_INTERVAL):\n time.sleep(IDLE_POLL_INTERVAL)\n\n inbox = BUS.read_inbox(agent_name)\n if inbox:\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n req_id = msg.get(\"metadata\", {}).get(\"request_id\", \"\")\n BUS.send(name, \"lead\", \"Shutting down gracefully.\",\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": True})\n print(f\" \\033[35m[protocol] {name} approved shutdown \"\n f\"in idle ({req_id})\\033[0m\")\n return \"shutdown\"\n\n messages.append({\"role\": \"user\",\n \"content\": \"\" + json.dumps(inbox) + \"\"})\n print(f\" \\033[36m[idle] {name} found inbox messages\\033[0m\")\n return \"work\"\n\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n task_data = unclaimed[0]\n result = claim_task(task_data[\"id\"], agent_name)\n if \"Claimed\" in result:\n wt_info = \"\"\n if task_data.get(\"worktree\"):\n wt_path = WORKTREES_DIR / task_data[\"worktree\"]\n wt_info = f\"\\nWork directory: {wt_path}\"\n messages.append({\"role\": \"user\",\n \"content\": f\"Task {task_data['id']}: \"\n f\"{task_data['subject']}{wt_info}\"})\n print(f\" \\033[32m[idle] {name} auto-claimed: \"\n f\"{task_data['subject']}\\033[0m\")\n return \"work\"\n print(f\" \\033[33m[idle] {name} claim failed: \"\n f\"{result}\\033[0m\")\n\n print(f\" \\033[31m[idle] {name} timeout ({IDLE_TIMEOUT}s)\\033[0m\")\n return \"timeout\"\n\n\n# ── Teammate Thread (from s15 + s16 + s17 + s18) ──\n\ndef spawn_teammate_thread(name: str, role: str, prompt: str) -> str:\n if name in active_teammates:\n return f\"Teammate '{name}' already exists\"\n\n system = (f\"You are '{name}', a {role}. \"\n f\"Use tools to complete tasks. \"\n f\"You can list and claim tasks from the board. \"\n f\"If a task has a worktree, work in that directory.\")\n\n def handle_inbox_message(name: str, msg: dict, messages: list):\n msg_type = msg.get(\"type\", \"message\")\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n\n if msg_type == \"shutdown_request\":\n BUS.send(name, \"lead\", \"Shutting down gracefully.\",\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": True})\n print(f\" \\033[35m[protocol] {name} approved shutdown \"\n f\"({req_id})\\033[0m\")\n return True\n\n if msg_type == \"plan_approval_response\":\n approve = meta.get(\"approve\", False)\n if approve:\n messages.append({\"role\": \"user\",\n \"content\": \"[Plan approved] Proceed with the task.\"})\n else:\n messages.append({\"role\": \"user\",\n \"content\": f\"[Plan rejected] Feedback: {msg['content']}\"})\n return False\n\n def run():\n # Track current worktree for this teammate's cwd\n wt_ctx = {\"path\": None}\n\n def _wt_cwd() -> Path | None:\n p = wt_ctx[\"path\"]\n return Path(p) if p else None\n\n def _run_bash(command: str) -> str:\n return run_bash(command, cwd=_wt_cwd())\n\n def _run_read(path: str) -> str:\n return run_read(path, cwd=_wt_cwd())\n\n def _run_write(path: str, content: str) -> str:\n return run_write(path, content, cwd=_wt_cwd())\n\n def _run_list_tasks():\n tasks = list_tasks()\n if not tasks:\n return \"No tasks.\"\n return \"\\n\".join(\n f\" {t.id}: {t.subject} [{t.status}]\"\n + (f\" (wt:{t.worktree})\" if t.worktree else \"\")\n for t in tasks)\n\n def _run_claim_task(task_id: str):\n result = claim_task(task_id, owner=name)\n if \"Claimed\" in result:\n # Set worktree cwd if task has one\n task = load_task(task_id)\n if task.worktree:\n wt_ctx[\"path\"] = str(WORKTREES_DIR / task.worktree)\n else:\n wt_ctx[\"path\"] = None\n return result\n\n def _run_complete_task(task_id: str):\n result = complete_task(task_id)\n wt_ctx[\"path\"] = None\n return result\n\n messages = [{\"role\": \"user\", \"content\": prompt}]\n sub_tools = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"send_message\",\n \"description\": \"Send message to another agent.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"submit_plan\",\n \"description\": \"Submit a plan for Lead approval.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"plan\": {\"type\": \"string\"}},\n \"required\": [\"plan\"]}},\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks on the board.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Mark an in-progress task as completed.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n ]\n\n sub_handlers = {\n \"bash\": _run_bash, \"read_file\": _run_read,\n \"write_file\": _run_write,\n \"send_message\": lambda to, content: (BUS.send(name, to, content),\n \"Sent\")[1],\n \"submit_plan\": lambda plan: _teammate_submit_plan(name, plan),\n \"list_tasks\": _run_list_tasks,\n \"claim_task\": _run_claim_task,\n \"complete_task\": _run_complete_task,\n }\n\n # Outer loop: WORK → IDLE cycle\n while True:\n if len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}. \"\n f\"Continue your work.\"})\n\n # WORK phase\n should_shutdown = False\n for _ in range(10):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n stopped = handle_inbox_message(name, msg, messages)\n if stopped:\n should_shutdown = True\n break\n if should_shutdown:\n break\n if inbox and not should_shutdown:\n non_protocol = [m for m in inbox\n if m.get(\"type\") == \"message\"]\n if non_protocol:\n messages.append({\"role\": \"user\",\n \"content\": \"\" + json.dumps(non_protocol) + \"\"})\n\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages[-20:],\n tools=sub_tools, max_tokens=8000)\n except Exception:\n break\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = sub_handlers.get(block.name)\n output = handler(**block.input) if handler else \"Unknown\"\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n if should_shutdown:\n break\n\n # IDLE phase\n idle_result = idle_poll(name, messages, name, role)\n if idle_result == \"shutdown\":\n break\n if idle_result == \"timeout\":\n break\n\n # Summary\n summary = \"Done.\"\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\" and isinstance(msg[\"content\"], list):\n for b in msg[\"content\"]:\n if getattr(b, \"type\", None) == \"text\":\n summary = b.text\n break\n else:\n continue\n break\n BUS.send(name, \"lead\", summary, \"result\")\n active_teammates.pop(name, None)\n print(f\" \\033[32m[teammate] {name} finished\\033[0m\")\n\n active_teammates[name] = True\n threading.Thread(target=run, daemon=True).start()\n print(f\" \\033[36m[teammate] {name} spawned as {role}\\033[0m\")\n return f\"Teammate '{name}' spawned as {role} (autonomous)\"\n\n\ndef _teammate_submit_plan(from_name: str, plan: str) -> str:\n req_id = new_request_id()\n pending_requests[req_id] = ProtocolState(\n request_id=req_id, type=\"plan_approval\",\n sender=from_name, target=\"lead\",\n status=\"pending\", payload=plan)\n BUS.send(from_name, \"lead\", plan,\n \"plan_approval_request\",\n {\"request_id\": req_id})\n return f\"Plan submitted ({req_id}). Waiting for approval...\"\n\n\n# ── Lead Protocol Tools (from s16) ──\n\ndef run_request_shutdown(teammate: str) -> str:\n req_id = new_request_id()\n pending_requests[req_id] = ProtocolState(\n request_id=req_id, type=\"shutdown\",\n sender=\"lead\", target=teammate,\n status=\"pending\", payload=\"\")\n BUS.send(\"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\",\n {\"request_id\": req_id})\n print(f\" \\033[35m[protocol] shutdown_request → {teammate} \"\n f\"({req_id})\\033[0m\")\n return f\"Shutdown request sent to {teammate} (req: {req_id})\"\n\n\ndef run_request_plan(teammate: str, task: str) -> str:\n BUS.send(\"lead\", teammate, f\"Please submit a plan for: {task}\",\n \"message\")\n return f\"Asked {teammate} to submit a plan\"\n\n\ndef run_review_plan(request_id: str, approve: bool,\n feedback: str = \"\") -> str:\n state = pending_requests.get(request_id)\n if not state:\n return f\"Request {request_id} not found\"\n if state.status != \"pending\":\n return f\"Request {request_id} already {state.status}\"\n state.status = \"approved\" if approve else \"rejected\"\n BUS.send(\"lead\", state.sender,\n feedback or (\"Approved\" if approve else \"Rejected\"),\n \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve})\n icon = \"✓\" if approve else \"✗\"\n print(f\" \\033[32m[protocol] plan {icon} ({request_id})\\033[0m\")\n return f\"Plan {'approved' if approve else 'rejected'} ({request_id})\"\n\n\n# ── Lead Worktree Tools (s18 new) ──\n\ndef run_create_worktree(name: str, task_id: str = \"\") -> str:\n return create_worktree(name, task_id)\n\n\ndef run_remove_worktree(name: str, discard_changes: bool = False) -> str:\n return remove_worktree(name, discard_changes)\n\n\ndef run_keep_worktree(name: str) -> str:\n return keep_worktree(name)\n\n\n# ── Basic tool handlers ──\n\ndef run_create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> str:\n task = create_task(subject, description, blockedBy)\n deps = f\" (blockedBy: {', '.join(blockedBy)})\" if blockedBy else \"\"\n print(f\" \\033[34m[create] {task.subject}{deps}\\033[0m\")\n return f\"Created {task.id}: {task.subject}{deps}\"\n\n\ndef run_list_tasks() -> str:\n tasks = list_tasks()\n if not tasks:\n return \"No tasks.\"\n return \"\\n\".join(\n f\" {t.id}: {t.subject} [{t.status}]\"\n + (f\" (wt:{t.worktree})\" if t.worktree else \"\")\n for t in tasks)\n\n\ndef run_get_task(task_id: str) -> str:\n return get_task_json(task_id)\n\n\ndef run_claim_task(task_id: str) -> str:\n return claim_task(task_id, owner=\"agent\")\n\n\ndef run_complete_task(task_id: str) -> str:\n return complete_task(task_id)\n\n\ndef run_spawn_teammate(name: str, role: str, prompt: str) -> str:\n return spawn_teammate_thread(name, role, prompt)\n\n\ndef run_send_message(to: str, content: str) -> str:\n BUS.send(\"lead\", to, content)\n return f\"Sent to {to}\"\n\n\ndef run_check_inbox() -> str:\n msgs = consume_lead_inbox(route_protocol=True)\n if not msgs:\n return \"(inbox empty)\"\n lines = []\n for m in msgs:\n meta = m.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n tag = f\" [{m['type']} req:{req_id}]\" if req_id else f\" [{m['type']}]\"\n lines.append(f\" [{m['from']}]{tag} {m['content'][:200]}\")\n return \"\\n\".join(lines)\n\n\n# ── Tool Definitions ──\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"create_task\",\n \"description\": \"Create a task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n \"blockedBy\": {\"type\": \"array\",\n \"items\": {\"type\": \"string\"}}},\n \"required\": [\"subject\"]}},\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}, \"required\": []}},\n {\"name\": \"get_task\",\n \"description\": \"Get full details of a specific task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Complete an in-progress task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"spawn_teammate\",\n \"description\": \"Spawn an autonomous teammate agent.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"},\n \"role\": {\"type\": \"string\"},\n \"prompt\": {\"type\": \"string\"}},\n \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"send_message\",\n \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"check_inbox\",\n \"description\": \"Check inbox for messages and protocol responses.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}, \"required\": []}},\n {\"name\": \"request_shutdown\",\n \"description\": \"Request a teammate to shut down gracefully.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"teammate\": {\"type\": \"string\"}},\n \"required\": [\"teammate\"]}},\n {\"name\": \"request_plan\",\n \"description\": \"Ask a teammate to submit a plan for review.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"teammate\": {\"type\": \"string\"},\n \"task\": {\"type\": \"string\"}},\n \"required\": [\"teammate\", \"task\"]}},\n {\"name\": \"review_plan\",\n \"description\": \"Approve or reject a submitted plan.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\n \"request_id\": {\"type\": \"string\"},\n \"approve\": {\"type\": \"boolean\"},\n \"feedback\": {\"type\": \"string\"}},\n \"required\": [\"request_id\", \"approve\"]}},\n # s18 new: worktree tools\n {\"name\": \"create_worktree\",\n \"description\": \"Create an isolated git worktree with its own branch.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"},\n \"task_id\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"remove_worktree\",\n \"description\": \"Remove a worktree. Refuses if uncommitted changes unless discard_changes=true.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"},\n \"discard_changes\": {\"type\": \"boolean\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"keep_worktree\",\n \"description\": \"Keep a worktree for manual review.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n]\n\nTOOL_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"create_task\": run_create_task, \"list_tasks\": run_list_tasks,\n \"get_task\": run_get_task,\n \"claim_task\": run_claim_task, \"complete_task\": run_complete_task,\n \"spawn_teammate\": run_spawn_teammate,\n \"send_message\": run_send_message, \"check_inbox\": run_check_inbox,\n \"request_shutdown\": run_request_shutdown,\n \"request_plan\": run_request_plan, \"review_plan\": run_review_plan,\n \"create_worktree\": run_create_worktree,\n \"remove_worktree\": run_remove_worktree,\n \"keep_worktree\": run_keep_worktree,\n}\n\n\n# ── Context ──\n\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\n\n\ndef update_context(context: dict, messages: list) -> dict:\n memories = \"\"\n if MEMORY_INDEX.exists():\n memories = MEMORY_INDEX.read_text()[:2000]\n return {\"memories\": memories}\n\n\n# ── Agent Loop ──\n\ndef agent_loop(messages: list, context: dict):\n system = get_system_prompt(context)\n while True:\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=TOOLS, max_tokens=8000)\n except Exception as e:\n messages.append({\"role\": \"assistant\", \"content\": [\n {\"type\": \"text\", \"text\": f\"[Error] {type(e).__name__}: {e}\"}]})\n return\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else \"Unknown\"\n print(str(output)[:300])\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n context = update_context(context, messages)\n system = get_system_prompt(context)\n\n\nif __name__ == \"__main__\":\n print(\"s18: worktree isolation\")\n print(\"Enter a question, press Enter to send. Type q to quit.\\n\")\n history = []\n context = {\"memories\": \"\"}\n while True:\n try:\n query = input(\"\\033[36ms18 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history, context)\n context = update_context(context, history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\":\n print(block.text)\n elif isinstance(block, dict) and block.get(\"type\") == \"text\":\n print(block.get(\"text\", \"\"))\n\n # Consume lead inbox: route protocol + inject into history\n inbox = consume_lead_inbox(route_protocol=True)\n if inbox:\n inbox_text = \"\\n\".join(\n f\"From {m['from']} [{m.get('type', 'message')}]: \"\n f\"{m['content'][:200]}\" for m in inbox)\n history.append({\"role\": \"user\",\n \"content\": f\"[Inbox]\\n{inbox_text}\"})\n print()\n",
"images": [
{
"src": "/course-assets/s18_worktree_isolation/worktree-overview.svg",
@@ -2738,7 +2778,7 @@
"filename": "s19_mcp_plugin/code.py",
"title": "MCP Tools",
"subtitle": "External Tools, Standard Protocol",
- "loc": 835,
+ "loc": 837,
"tools": [
"bash",
"read_file",
@@ -3033,7 +3073,7 @@
}
],
"layer": "collaboration",
- "source": "#!/usr/bin/env python3\n\"\"\"\ns19: MCP Tools — MCPClient + tool discovery + assemble_tool_pool.\n\nRun: python s19_mcp_plugin/code.py\nNeed: pip install anthropic python-dotenv + .env with ANTHROPIC_API_KEY\n\nChanges from s18:\n - MCPClient class: discovers tools, calls tools via mock handler\n - normalize_mcp_name: normalize tool/server names\n - assemble_tool_pool: assembles builtin + MCP tools into one pool\n - connect_mcp: connect to an MCP server, discover tools\n - Tool naming: mcp__{server}__{tool} with normalization\n - MCP tools have readOnly/destructive annotations\n - agent_loop uses dynamic tool pool (builtin + MCP), no prompt cache\n - Teammate tools: complete_task, worktree cwd (from s17/s18 fixes)\n\nASCII flow:\n connect_mcp(\"docs\") → MCPClient discovers tools →\n assemble_tool_pool → [builtin... , mcp__docs__search, mcp__docs__get_version]\n agent_loop uses assembled pool\n\"\"\"\n\nimport os, subprocess, json, time, random, threading, re\nfrom pathlib import Path\nfrom datetime import datetime\nfrom dataclasses import dataclass, asdict, field\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n# ── Task System ──\n\nTASKS_DIR = WORKDIR / \".tasks\"\nTASKS_DIR.mkdir(exist_ok=True)\n\n\n@dataclass\nclass Task:\n id: str\n subject: str\n description: str\n status: str\n owner: str | None\n blockedBy: list[str]\n worktree: str | None = None\n\n\ndef _task_path(task_id: str) -> Path:\n return TASKS_DIR / f\"{task_id}.json\"\n\n\ndef create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> Task:\n task = Task(\n id=f\"task_{int(time.time())}_{random.randint(0, 9999):04d}\",\n subject=subject, description=description,\n status=\"pending\", owner=None,\n blockedBy=blockedBy or [],\n )\n save_task(task)\n return task\n\n\ndef save_task(task: Task):\n _task_path(task.id).write_text(json.dumps(asdict(task), indent=2))\n\n\ndef load_task(task_id: str) -> Task:\n return Task(**json.loads(_task_path(task_id).read_text()))\n\n\ndef list_tasks() -> list[Task]:\n return [Task(**json.loads(p.read_text()))\n for p in sorted(TASKS_DIR.glob(\"task_*.json\"))]\n\n\ndef get_task_json(task_id: str) -> str:\n return json.dumps(asdict(load_task(task_id)), indent=2)\n\n\ndef can_start(task_id: str) -> bool:\n task = load_task(task_id)\n for dep_id in task.blockedBy:\n if not _task_path(dep_id).exists():\n return False\n if load_task(dep_id).status != \"completed\":\n return False\n return True\n\n\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if task.owner:\n return f\"Task {task_id} already owned by {task.owner}\"\n if not can_start(task_id):\n deps = [d for d in task.blockedBy\n if _task_path(d).exists() and load_task(d).status != \"completed\"]\n missing = [d for d in task.blockedBy if not _task_path(d).exists()]\n parts = []\n if deps: parts.append(f\"blocked by: {deps}\")\n if missing: parts.append(f\"missing deps: {missing}\")\n return \"Cannot start — \" + \", \".join(parts)\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n print(f\" \\033[36m[claim] {task.subject} → in_progress\\033[0m\")\n return f\"Claimed {task.id} ({task.subject})\"\n\n\ndef complete_task(task_id: str) -> str:\n task = load_task(task_id)\n if task.status != \"in_progress\":\n return f\"Task {task_id} is {task.status}, cannot complete\"\n task.status = \"completed\"\n save_task(task)\n unblocked = [t.subject for t in list_tasks()\n if t.status == \"pending\" and t.blockedBy and can_start(t.id)]\n print(f\" \\033[32m[complete] {task.subject} ✓\\033[0m\")\n msg = f\"Completed {task.id} ({task.subject})\"\n if unblocked:\n msg += f\"\\nUnblocked: {', '.join(unblocked)}\"\n return msg\n\n\n# ── Worktree System ──\n\nWORKTREES_DIR = WORKDIR / \".worktrees\"\nWORKTREES_DIR.mkdir(exist_ok=True)\n\nVALID_WT_NAME = re.compile(r'^[A-Za-z0-9._-]{1,64}$')\n\n\ndef validate_worktree_name(name: str) -> str | None:\n if not name:\n return \"Worktree name cannot be empty\"\n if name in (\".\", \"..\"):\n return f\"'{name}' is not a valid worktree name\"\n if not VALID_WT_NAME.match(name):\n return (f\"Invalid worktree name '{name}': \"\n \"only letters, digits, dots, underscores, dashes (1-64 chars)\")\n return None\n\n\ndef run_git(args: list[str]) -> tuple[bool, str]:\n try:\n r = subprocess.run([\"git\"] + args, cwd=WORKDIR,\n capture_output=True, text=True, timeout=30)\n out = (r.stdout + r.stderr).strip()\n return r.returncode == 0, out[:5000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return False, \"Error: git timeout\"\n\n\ndef log_event(event_type: str, worktree_name: str, task_id: str = \"\"):\n event = {\"type\": event_type, \"worktree\": worktree_name,\n \"task_id\": task_id, \"ts\": time.time()}\n events_file = WORKTREES_DIR / \"events.jsonl\"\n with open(events_file, \"a\") as f:\n f.write(json.dumps(event) + \"\\n\")\n\n\ndef create_worktree(name: str, task_id: str = \"\") -> str:\n err = validate_worktree_name(name)\n if err:\n return f\"Error: {err}\"\n path = WORKTREES_DIR / name\n if path.exists():\n return f\"Worktree '{name}' already exists at {path}\"\n ok, result = run_git([\"worktree\", \"add\", str(path), \"-b\", f\"wt/{name}\", \"HEAD\"])\n if not ok:\n return f\"Git error: {result}\"\n if task_id:\n bind_task_to_worktree(task_id, name)\n log_event(\"create\", name, task_id)\n print(f\" \\033[33m[worktree] created: {name} at {path}\\033[0m\")\n return f\"Worktree '{name}' created at {path}\"\n\n\ndef bind_task_to_worktree(task_id: str, worktree_name: str):\n task = load_task(task_id)\n task.worktree = worktree_name\n save_task(task)\n\n\ndef _count_worktree_changes(path: Path) -> tuple[int, int]:\n try:\n r1 = subprocess.run([\"git\", \"status\", \"--porcelain\"],\n cwd=path, capture_output=True, text=True, timeout=10)\n files = len([l for l in r1.stdout.strip().splitlines() if l.strip()])\n r2 = subprocess.run([\"git\", \"log\", \"@{push}..HEAD\", \"--oneline\"],\n cwd=path, capture_output=True, text=True, timeout=10)\n commits = len([l for l in r2.stdout.strip().splitlines() if l.strip()])\n return files, commits\n except Exception:\n return -1, -1\n\n\ndef remove_worktree(name: str, discard_changes: bool = False) -> str:\n err = validate_worktree_name(name)\n if err:\n return err\n path = WORKTREES_DIR / name\n if not path.exists():\n return f\"Worktree '{name}' not found\"\n if not discard_changes:\n files, commits = _count_worktree_changes(path)\n if files < 0:\n return \"Cannot verify status. Use discard_changes=true to force.\"\n if files > 0 or commits > 0:\n return (f\"Worktree '{name}' has {files} file(s), {commits} commit(s). \"\n \"Use discard_changes=true or keep_worktree.\")\n ok1, _ = run_git([\"worktree\", \"remove\", str(path), \"--force\"])\n if not ok1:\n return f\"Failed to remove worktree '{name}'\"\n run_git([\"branch\", \"-D\", f\"wt/{name}\"])\n log_event(\"remove\", name)\n print(f\" \\033[33m[worktree] removed: {name}\\033[0m\")\n return f\"Worktree '{name}' removed\"\n\n\ndef keep_worktree(name: str) -> str:\n err = validate_worktree_name(name)\n if err:\n return err\n log_event(\"keep\", name)\n return f\"Worktree '{name}' kept for review (branch: wt/{name})\"\n\n\n# ── Prompt Assembly ──\n\nPROMPT_SECTIONS = {\n \"identity\": \"You are a coding agent. Act, don't explain.\",\n \"tools\": \"Available tools: bash, read_file, write_file, \"\n \"create_task, list_tasks, get_task, claim_task, complete_task, \"\n \"spawn_teammate, send_message, check_inbox, \"\n \"request_shutdown, request_plan, review_plan, \"\n \"create_worktree, remove_worktree, keep_worktree, \"\n \"connect_mcp. MCP tools are prefixed mcp__{server}__{tool}.\",\n \"workspace\": f\"Working directory: {WORKDIR}\",\n \"memory\": \"Relevant memories are injected below when available.\",\n}\n\n\ndef assemble_system_prompt(context: dict) -> str:\n sections = [PROMPT_SECTIONS[\"identity\"],\n PROMPT_SECTIONS[\"tools\"],\n PROMPT_SECTIONS[\"workspace\"]]\n if context.get(\"memories\"):\n sections.append(f\"Relevant memories:\\n{context['memories']}\")\n mcp_names = list(mcp_clients.keys())\n if mcp_names:\n sections.append(f\"Connected MCP servers: {', '.join(mcp_names)}\")\n return \"\\n\\n\".join(sections)\n\n\n# ── Basic Tools ──\n\ndef safe_path(p: str, cwd: Path = None) -> Path:\n base = cwd or WORKDIR\n path = (base / p).resolve()\n if not path.is_relative_to(base):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str, cwd: Path = None) -> str:\n try:\n r = subprocess.run(command, shell=True, cwd=cwd or WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int | None = None, cwd: Path = None) -> str:\n try:\n lines = safe_path(path, cwd).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str, cwd: Path = None) -> str:\n try:\n fp = safe_path(path, cwd)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# ── MessageBus ──\n\nMAILBOX_DIR = WORKDIR / \".mailboxes\"\nMAILBOX_DIR.mkdir(exist_ok=True)\n\n\nclass MessageBus:\n def send(self, from_agent: str, to_agent: str, content: str,\n msg_type: str = \"message\", metadata: dict = None):\n msg = {\"from\": from_agent, \"to\": to_agent,\n \"content\": content, \"type\": msg_type,\n \"ts\": time.time(), \"metadata\": metadata or {}}\n inbox = MAILBOX_DIR / f\"{to_agent}.jsonl\"\n with open(inbox, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n print(f\" \\033[33m[bus] {from_agent} → {to_agent}: \"\n f\"({msg_type}) {content[:50]}\\033[0m\")\n\n def read_inbox(self, agent: str) -> list[dict]:\n inbox = MAILBOX_DIR / f\"{agent}.jsonl\"\n if not inbox.exists():\n return []\n msgs = [json.loads(line) for line in inbox.read_text().splitlines()\n if line.strip()]\n inbox.unlink()\n return msgs\n\n\nBUS = MessageBus()\nactive_teammates: dict[str, bool] = {}\n\n# ── Protocol State ──\n\n@dataclass\nclass ProtocolState:\n request_id: str\n type: str\n sender: str\n target: str\n status: str\n payload: str\n created_at: float = field(default_factory=time.time)\n\n\npending_requests: dict[str, ProtocolState] = {}\n\n\ndef new_request_id() -> str:\n return f\"req_{random.randint(0, 999999):06d}\"\n\n\ndef match_response(response_type: str, request_id: str, approve: bool):\n state = pending_requests.get(request_id)\n if not state:\n return\n if state.type == \"shutdown\" and response_type != \"shutdown_response\":\n return\n if state.type == \"plan_approval\" and response_type != \"plan_approval_response\":\n return\n state.status = \"approved\" if approve else \"rejected\"\n\n\ndef consume_lead_inbox(route_protocol=True) -> list[dict]:\n msgs = BUS.read_inbox(\"lead\")\n if route_protocol:\n for msg in msgs:\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n msg_type = msg.get(\"type\", \"\")\n if req_id and msg_type.endswith(\"_response\"):\n match_response(msg_type, req_id, meta.get(\"approve\", False))\n return msgs\n\n\n# ── Autonomous Agent ──\n\nIDLE_POLL_INTERVAL = 5\nIDLE_TIMEOUT = 60\n\n\ndef scan_unclaimed_tasks() -> list[dict]:\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and can_start(task[\"id\"])):\n unclaimed.append(task)\n return unclaimed\n\n\ndef idle_poll(agent_name: str, messages: list,\n name: str, role: str) -> str:\n for _ in range(IDLE_TIMEOUT // IDLE_POLL_INTERVAL):\n time.sleep(IDLE_POLL_INTERVAL)\n inbox = BUS.read_inbox(agent_name)\n if inbox:\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n req_id = msg.get(\"metadata\", {}).get(\"request_id\", \"\")\n BUS.send(name, \"lead\", \"Shutting down.\",\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": True})\n return \"shutdown\"\n messages.append({\"role\": \"user\",\n \"content\": \"\" + json.dumps(inbox) + \"\"})\n return \"work\"\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n task_data = unclaimed[0]\n result = claim_task(task_data[\"id\"], agent_name)\n if \"Claimed\" in result:\n wt_info = \"\"\n if task_data.get(\"worktree\"):\n wt_info = f\"\\nWork directory: {WORKTREES_DIR / task_data['worktree']}\"\n messages.append({\"role\": \"user\",\n \"content\": f\"Task {task_data['id']}: \"\n f\"{task_data['subject']}{wt_info}\"})\n return \"work\"\n return \"timeout\"\n\n\n# ── Teammate Thread ──\n\ndef spawn_teammate_thread(name: str, role: str, prompt: str) -> str:\n if name in active_teammates:\n return f\"Teammate '{name}' already exists\"\n\n system = (f\"You are '{name}', a {role}. \"\n f\"Use tools to complete tasks. \"\n f\"If a task has a worktree, work in that directory.\")\n\n def handle_inbox_message(name: str, msg: dict, messages: list):\n msg_type = msg.get(\"type\", \"message\")\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n if msg_type == \"shutdown_request\":\n BUS.send(name, \"lead\", \"Shutting down.\",\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": True})\n return True\n if msg_type == \"plan_approval_response\":\n approve = meta.get(\"approve\", False)\n messages.append({\"role\": \"user\",\n \"content\": \"[Plan approved]\" if approve\n else f\"[Plan rejected] {msg['content']}\"})\n return False\n\n def run():\n wt_ctx = {\"path\": None}\n\n def _wt_cwd():\n p = wt_ctx[\"path\"]\n return Path(p) if p else None\n\n def _run_bash(command: str) -> str:\n return run_bash(command, cwd=_wt_cwd())\n\n def _run_read(path: str) -> str:\n return run_read(path, cwd=_wt_cwd())\n\n def _run_write(path: str, content: str) -> str:\n return run_write(path, content, cwd=_wt_cwd())\n\n def _run_list_tasks():\n tasks = list_tasks()\n if not tasks:\n return \"No tasks.\"\n return \"\\n\".join(\n f\" {t.id}: {t.subject} [{t.status}]\"\n + (f\" (wt:{t.worktree})\" if t.worktree else \"\")\n for t in tasks)\n\n def _run_claim_task(task_id: str):\n result = claim_task(task_id, owner=name)\n if \"Claimed\" in result:\n task = load_task(task_id)\n wt_ctx[\"path\"] = (str(WORKTREES_DIR / task.worktree)\n if task.worktree else None)\n return result\n\n def _run_complete_task(task_id: str):\n result = complete_task(task_id)\n wt_ctx[\"path\"] = None\n return result\n\n messages = [{\"role\": \"user\", \"content\": prompt}]\n sub_tools = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"send_message\",\n \"description\": \"Send message to another agent.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"submit_plan\",\n \"description\": \"Submit a plan for Lead approval.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"plan\": {\"type\": \"string\"}},\n \"required\": [\"plan\"]}},\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Mark an in-progress task as completed.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n ]\n\n sub_handlers = {\n \"bash\": _run_bash, \"read_file\": _run_read,\n \"write_file\": _run_write,\n \"send_message\": lambda to, content: (BUS.send(name, to, content),\n \"Sent\")[1],\n \"submit_plan\": lambda plan: _teammate_submit_plan(name, plan),\n \"list_tasks\": _run_list_tasks,\n \"claim_task\": _run_claim_task,\n \"complete_task\": _run_complete_task,\n }\n\n while True:\n if len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}. \"\n f\"Continue your work.\"})\n should_shutdown = False\n for _ in range(10):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n stopped = handle_inbox_message(name, msg, messages)\n if stopped:\n should_shutdown = True\n break\n if should_shutdown:\n break\n if inbox and not should_shutdown:\n non_protocol = [m for m in inbox\n if m.get(\"type\") == \"message\"]\n if non_protocol:\n messages.append({\"role\": \"user\",\n \"content\": \"\" + json.dumps(non_protocol) + \"\"})\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages[-20:],\n tools=sub_tools, max_tokens=8000)\n except Exception:\n break\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = sub_handlers.get(block.name)\n output = handler(**block.input) if handler else \"Unknown\"\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n if should_shutdown:\n break\n idle_result = idle_poll(name, messages, name, role)\n if idle_result in (\"shutdown\", \"timeout\"):\n break\n\n summary = \"Done.\"\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\" and isinstance(msg[\"content\"], list):\n for b in msg[\"content\"]:\n if getattr(b, \"type\", None) == \"text\":\n summary = b.text\n break\n else:\n continue\n break\n BUS.send(name, \"lead\", summary, \"result\")\n active_teammates.pop(name, None)\n\n active_teammates[name] = True\n threading.Thread(target=run, daemon=True).start()\n return f\"Teammate '{name}' spawned as {role}\"\n\n\ndef _teammate_submit_plan(from_name: str, plan: str) -> str:\n req_id = new_request_id()\n pending_requests[req_id] = ProtocolState(\n request_id=req_id, type=\"plan_approval\",\n sender=from_name, target=\"lead\",\n status=\"pending\", payload=plan)\n BUS.send(from_name, \"lead\", plan,\n \"plan_approval_request\",\n {\"request_id\": req_id})\n return f\"Plan submitted ({req_id})\"\n\n\n# ── Lead Protocol Tools ──\n\ndef run_request_shutdown(teammate: str) -> str:\n req_id = new_request_id()\n pending_requests[req_id] = ProtocolState(\n request_id=req_id, type=\"shutdown\",\n sender=\"lead\", target=teammate,\n status=\"pending\", payload=\"\")\n BUS.send(\"lead\", teammate, \"Shut down.\", \"shutdown_request\",\n {\"request_id\": req_id})\n return f\"Shutdown request sent to {teammate}\"\n\n\ndef run_request_plan(teammate: str, task: str) -> str:\n BUS.send(\"lead\", teammate, f\"Submit plan for: {task}\", \"message\")\n return f\"Asked {teammate} to submit a plan\"\n\n\ndef run_review_plan(request_id: str, approve: bool,\n feedback: str = \"\") -> str:\n state = pending_requests.get(request_id)\n if not state:\n return f\"Request {request_id} not found\"\n state.status = \"approved\" if approve else \"rejected\"\n BUS.send(\"lead\", state.sender,\n feedback or (\"Approved\" if approve else \"Rejected\"),\n \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve})\n return f\"Plan {'approved' if approve else 'rejected'}\"\n\n\n# ── MCP System (s19 new) ──\n\nclass MCPClient:\n \"\"\"Discovers and calls tools on an MCP server (mock for teaching).\"\"\"\n\n def __init__(self, name: str):\n self.name = name\n self.tools: list[dict] = []\n self._handlers: dict[str, callable] = {}\n\n def register(self, tool_defs: list[dict],\n handlers: dict[str, callable]):\n self.tools = tool_defs\n self._handlers = handlers\n\n def call_tool(self, tool_name: str, args: dict) -> str:\n handler = self._handlers.get(tool_name)\n if not handler:\n return f\"MCP error: unknown tool '{tool_name}'\"\n try:\n return handler(**args)\n except Exception as e:\n return f\"MCP error: {e}\"\n\n\nmcp_clients: dict[str, MCPClient] = {}\n\n_DISALLOWED_CHARS = re.compile(r'[^a-zA-Z0-9_-]')\n\n\ndef normalize_mcp_name(name: str) -> str:\n \"\"\"Replace non [a-zA-Z0-9_-] with underscore.\"\"\"\n return _DISALLOWED_CHARS.sub('_', name)\n\n\ndef _mock_server_docs():\n client = MCPClient(\"docs\")\n client.register(\n tool_defs=[\n {\"name\": \"search\", \"description\": \"Search documentation. (readOnly)\",\n \"inputSchema\": {\"type\": \"object\",\n \"properties\": {\"query\": {\"type\": \"string\"}},\n \"required\": [\"query\"]}},\n {\"name\": \"get_version\", \"description\": \"Get API version. (readOnly)\",\n \"inputSchema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n ],\n handlers={\n \"search\": lambda query: f\"[docs] Found 3 results for '{query}'\",\n \"get_version\": lambda: \"[docs] API v2.1.0\",\n })\n return client\n\n\ndef _mock_server_deploy():\n client = MCPClient(\"deploy\")\n client.register(\n tool_defs=[\n {\"name\": \"trigger\",\n \"description\": \"Trigger a deployment. (destructive — requires approval in real CC)\",\n \"inputSchema\": {\"type\": \"object\",\n \"properties\": {\"service\": {\"type\": \"string\"}},\n \"required\": [\"service\"]}},\n {\"name\": \"status\", \"description\": \"Check deployment status. (readOnly)\",\n \"inputSchema\": {\"type\": \"object\",\n \"properties\": {\"service\": {\"type\": \"string\"}},\n \"required\": [\"service\"]}},\n ],\n handlers={\n \"trigger\": lambda service: f\"[deploy] Triggered: {service}\",\n \"status\": lambda service: f\"[deploy] {service}: running (v1.4.2)\",\n })\n return client\n\n\nMOCK_SERVERS = {\n \"docs\": _mock_server_docs,\n \"deploy\": _mock_server_deploy,\n}\n\n\ndef connect_mcp(name: str) -> str:\n if name in mcp_clients:\n return f\"MCP server '{name}' already connected\"\n factory = MOCK_SERVERS.get(name)\n if not factory:\n available = \", \".join(MOCK_SERVERS.keys())\n return f\"Unknown server '{name}'. Available: {available}\"\n mcp_client = factory()\n mcp_clients[name] = mcp_client\n tool_names = [t[\"name\"] for t in mcp_client.tools]\n print(f\" \\033[31m[mcp] connected: {name} → {tool_names}\\033[0m\")\n return (f\"Connected to MCP server '{name}'. \"\n f\"Discovered {len(mcp_client.tools)} tools: {', '.join(tool_names)}\")\n\n\ndef assemble_tool_pool() -> tuple[list[dict], dict]:\n \"\"\"Assemble builtin tools + all MCP tools into one pool.\"\"\"\n tools = list(BUILTIN_TOOLS)\n handlers = dict(BUILTIN_HANDLERS)\n for server_name, mcp_client in mcp_clients.items():\n safe_server = normalize_mcp_name(server_name)\n for tool_def in mcp_client.tools:\n safe_tool = normalize_mcp_name(tool_def[\"name\"])\n prefixed = f\"mcp__{safe_server}__{safe_tool}\"\n tools.append({\n \"name\": prefixed,\n \"description\": tool_def.get(\"description\", \"\"),\n \"input_schema\": tool_def.get(\"inputSchema\", {}),\n })\n handlers[prefixed] = (\n lambda *, c=mcp_client, t=tool_def[\"name\"], **kw: c.call_tool(t, kw))\n return tools, handlers\n\n\n# ── Lead Worktree Tools ──\n\ndef run_create_worktree(name: str, task_id: str = \"\") -> str:\n return create_worktree(name, task_id)\n\ndef run_remove_worktree(name: str, discard_changes: bool = False) -> str:\n return remove_worktree(name, discard_changes)\n\ndef run_keep_worktree(name: str) -> str:\n return keep_worktree(name)\n\n\n# ── Basic tool handlers ──\n\ndef run_create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> str:\n task = create_task(subject, description, blockedBy)\n deps = f\" (blockedBy: {', '.join(blockedBy)})\" if blockedBy else \"\"\n print(f\" \\033[34m[create] {task.subject}{deps}\\033[0m\")\n return f\"Created {task.id}: {task.subject}{deps}\"\n\n\ndef run_list_tasks() -> str:\n tasks = list_tasks()\n if not tasks:\n return \"No tasks.\"\n return \"\\n\".join(\n f\" {t.id}: {t.subject} [{t.status}]\"\n + (f\" (wt:{t.worktree})\" if t.worktree else \"\")\n for t in tasks)\n\n\ndef run_get_task(task_id: str) -> str:\n return get_task_json(task_id)\n\ndef run_claim_task(task_id: str) -> str:\n return claim_task(task_id, owner=\"agent\")\n\ndef run_complete_task(task_id: str) -> str:\n return complete_task(task_id)\n\ndef run_spawn_teammate(name: str, role: str, prompt: str) -> str:\n return spawn_teammate_thread(name, role, prompt)\n\ndef run_send_message(to: str, content: str) -> str:\n BUS.send(\"lead\", to, content)\n return f\"Sent to {to}\"\n\ndef run_check_inbox() -> str:\n msgs = consume_lead_inbox(route_protocol=True)\n if not msgs:\n return \"(inbox empty)\"\n lines = []\n for m in msgs:\n meta = m.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n tag = f\" [{m['type']} req:{req_id}]\" if req_id else f\" [{m['type']}]\"\n lines.append(f\" [{m['from']}]{tag} {m['content'][:200]}\")\n return \"\\n\".join(lines)\n\ndef run_connect_mcp(name: str) -> str:\n return connect_mcp(name)\n\n\n# ── Tool Definitions ──\n\nBUILTIN_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"create_task\", \"description\": \"Create a task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n \"blockedBy\": {\"type\": \"array\",\n \"items\": {\"type\": \"string\"}}},\n \"required\": [\"subject\"]}},\n {\"name\": \"list_tasks\", \"description\": \"List all tasks.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}, \"required\": []}},\n {\"name\": \"get_task\", \"description\": \"Get full task details.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"claim_task\", \"description\": \"Claim a pending task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\", \"description\": \"Complete an in-progress task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"spawn_teammate\", \"description\": \"Spawn an autonomous teammate.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"},\n \"role\": {\"type\": \"string\"},\n \"prompt\": {\"type\": \"string\"}},\n \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"send_message\", \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"check_inbox\",\n \"description\": \"Check inbox for messages and protocol responses.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}, \"required\": []}},\n {\"name\": \"request_shutdown\",\n \"description\": \"Request a teammate to shut down.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"teammate\": {\"type\": \"string\"}},\n \"required\": [\"teammate\"]}},\n {\"name\": \"request_plan\",\n \"description\": \"Ask a teammate to submit a plan.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"teammate\": {\"type\": \"string\"},\n \"task\": {\"type\": \"string\"}},\n \"required\": [\"teammate\", \"task\"]}},\n {\"name\": \"review_plan\",\n \"description\": \"Approve or reject a submitted plan.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"request_id\": {\"type\": \"string\"},\n \"approve\": {\"type\": \"boolean\"},\n \"feedback\": {\"type\": \"string\"}},\n \"required\": [\"request_id\", \"approve\"]}},\n {\"name\": \"create_worktree\",\n \"description\": \"Create an isolated git worktree.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"},\n \"task_id\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"remove_worktree\",\n \"description\": \"Remove a worktree. Refuses if changes exist.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"},\n \"discard_changes\": {\"type\": \"boolean\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"keep_worktree\",\n \"description\": \"Keep a worktree for manual review.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"connect_mcp\",\n \"description\": \"Connect to an MCP server (docs, deploy) and discover tools.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n]\n\nBUILTIN_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"create_task\": run_create_task, \"list_tasks\": run_list_tasks,\n \"get_task\": run_get_task,\n \"claim_task\": run_claim_task, \"complete_task\": run_complete_task,\n \"spawn_teammate\": run_spawn_teammate,\n \"send_message\": run_send_message, \"check_inbox\": run_check_inbox,\n \"request_shutdown\": run_request_shutdown,\n \"request_plan\": run_request_plan, \"review_plan\": run_review_plan,\n \"create_worktree\": run_create_worktree,\n \"remove_worktree\": run_remove_worktree,\n \"keep_worktree\": run_keep_worktree,\n \"connect_mcp\": run_connect_mcp,\n}\n\n\n# ── Context ──\n\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\n\n\ndef update_context(context: dict, messages: list) -> dict:\n memories = \"\"\n if MEMORY_INDEX.exists():\n memories = MEMORY_INDEX.read_text()[:2000]\n return {\"memories\": memories}\n\n\n# ── Agent Loop (s19: dynamic tool pool, no prompt cache) ──\n\ndef agent_loop(messages: list, context: dict):\n tools, handlers = assemble_tool_pool()\n system = assemble_system_prompt(context)\n while True:\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=tools, max_tokens=8000)\n except Exception as e:\n messages.append({\"role\": \"assistant\", \"content\": [\n {\"type\": \"text\", \"text\": f\"[Error] {type(e).__name__}: {e}\"}]})\n return\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n handler = handlers.get(block.name)\n output = handler(**block.input) if handler else \"Unknown\"\n print(str(output)[:300])\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n\n if any(b.name == \"connect_mcp\" for b in response.content\n if b.type == \"tool_use\"):\n tools, handlers = assemble_tool_pool()\n context = update_context(context, messages)\n system = assemble_system_prompt(context)\n\n\nif __name__ == \"__main__\":\n print(\"s19: mcp tools\")\n print(\"Enter a question, press Enter to send. Type q to quit.\\n\")\n history = []\n context = {\"memories\": \"\"}\n while True:\n try:\n query = input(\"\\033[36ms19 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history, context)\n context = update_context(context, history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\":\n print(block.text)\n\n inbox = consume_lead_inbox(route_protocol=True)\n if inbox:\n inbox_text = \"\\n\".join(\n f\"From {m['from']} [{m.get('type', 'message')}]: \"\n f\"{m['content'][:200]}\" for m in inbox)\n history.append({\"role\": \"user\",\n \"content\": f\"[Inbox]\\n{inbox_text}\"})\n print()\n",
+ "source": "#!/usr/bin/env python3\n\"\"\"\ns19: MCP Tools — MCPClient + tool discovery + assemble_tool_pool.\n\nRun: python s19_mcp_plugin/code.py\nNeed: pip install anthropic python-dotenv + .env with ANTHROPIC_API_KEY\n\nChanges from s18:\n - MCPClient class: discovers tools, calls tools via mock handler\n - normalize_mcp_name: normalize tool/server names\n - assemble_tool_pool: assembles builtin + MCP tools into one pool\n - connect_mcp: connect to an MCP server, discover tools\n - Tool naming: mcp__{server}__{tool} with normalization\n - MCP tools have readOnly/destructive annotations\n - agent_loop uses dynamic tool pool (builtin + MCP), no prompt cache\n - Teammate tools: complete_task, worktree cwd (from s17/s18 fixes)\n\nASCII flow:\n connect_mcp(\"docs\") → MCPClient discovers tools →\n assemble_tool_pool → [builtin... , mcp__docs__search, mcp__docs__get_version]\n agent_loop uses assembled pool\n\"\"\"\n\nimport os, subprocess, json, time, random, threading, re\nfrom pathlib import Path\nfrom datetime import datetime\nfrom dataclasses import dataclass, asdict, field\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n# ── Task System ──\n\nTASKS_DIR = WORKDIR / \".tasks\"\nTASKS_DIR.mkdir(exist_ok=True)\n\n\n@dataclass\nclass Task:\n id: str\n subject: str\n description: str\n status: str\n owner: str | None\n blockedBy: list[str]\n worktree: str | None = None\n\n\ndef _task_path(task_id: str) -> Path:\n return TASKS_DIR / f\"{task_id}.json\"\n\n\ndef create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> Task:\n task = Task(\n id=f\"task_{int(time.time())}_{random.randint(0, 9999):04d}\",\n subject=subject, description=description,\n status=\"pending\", owner=None,\n blockedBy=blockedBy or [],\n )\n save_task(task)\n return task\n\n\ndef save_task(task: Task):\n _task_path(task.id).write_text(json.dumps(asdict(task), indent=2))\n\n\ndef load_task(task_id: str) -> Task:\n return Task(**json.loads(_task_path(task_id).read_text()))\n\n\ndef list_tasks() -> list[Task]:\n return [Task(**json.loads(p.read_text()))\n for p in sorted(TASKS_DIR.glob(\"task_*.json\"))]\n\n\ndef get_task_json(task_id: str) -> str:\n return json.dumps(asdict(load_task(task_id)), indent=2)\n\n\ndef can_start(task_id: str) -> bool:\n task = load_task(task_id)\n for dep_id in task.blockedBy:\n if not _task_path(dep_id).exists():\n return False\n if load_task(dep_id).status != \"completed\":\n return False\n return True\n\n\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if task.owner:\n return f\"Task {task_id} already owned by {task.owner}\"\n if not can_start(task_id):\n deps = [d for d in task.blockedBy\n if _task_path(d).exists() and load_task(d).status != \"completed\"]\n missing = [d for d in task.blockedBy if not _task_path(d).exists()]\n parts = []\n if deps: parts.append(f\"blocked by: {deps}\")\n if missing: parts.append(f\"missing deps: {missing}\")\n return \"Cannot start — \" + \", \".join(parts)\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n print(f\" \\033[36m[claim] {task.subject} → in_progress\\033[0m\")\n return f\"Claimed {task.id} ({task.subject})\"\n\n\ndef complete_task(task_id: str) -> str:\n task = load_task(task_id)\n if task.status != \"in_progress\":\n return f\"Task {task_id} is {task.status}, cannot complete\"\n task.status = \"completed\"\n save_task(task)\n unblocked = [t.subject for t in list_tasks()\n if t.status == \"pending\" and t.blockedBy and can_start(t.id)]\n print(f\" \\033[32m[complete] {task.subject} ✓\\033[0m\")\n msg = f\"Completed {task.id} ({task.subject})\"\n if unblocked:\n msg += f\"\\nUnblocked: {', '.join(unblocked)}\"\n return msg\n\n\n# ── Worktree System ──\n\nWORKTREES_DIR = WORKDIR / \".worktrees\"\nWORKTREES_DIR.mkdir(exist_ok=True)\n\nVALID_WT_NAME = re.compile(r'^[A-Za-z0-9._-]{1,64}$')\n\n\ndef validate_worktree_name(name: str) -> str | None:\n if not name:\n return \"Worktree name cannot be empty\"\n if name in (\".\", \"..\"):\n return f\"'{name}' is not a valid worktree name\"\n if not VALID_WT_NAME.match(name):\n return (f\"Invalid worktree name '{name}': \"\n \"only letters, digits, dots, underscores, dashes (1-64 chars)\")\n return None\n\n\ndef run_git(args: list[str]) -> tuple[bool, str]:\n try:\n r = subprocess.run([\"git\"] + args, cwd=WORKDIR,\n capture_output=True, text=True, timeout=30)\n out = (r.stdout + r.stderr).strip()\n return r.returncode == 0, out[:5000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return False, \"Error: git timeout\"\n\n\ndef log_event(event_type: str, worktree_name: str, task_id: str = \"\"):\n event = {\"type\": event_type, \"worktree\": worktree_name,\n \"task_id\": task_id, \"ts\": time.time()}\n events_file = WORKTREES_DIR / \"events.jsonl\"\n with open(events_file, \"a\") as f:\n f.write(json.dumps(event) + \"\\n\")\n\n\ndef create_worktree(name: str, task_id: str = \"\") -> str:\n err = validate_worktree_name(name)\n if err:\n return f\"Error: {err}\"\n path = WORKTREES_DIR / name\n if path.exists():\n return f\"Worktree '{name}' already exists at {path}\"\n ok, result = run_git([\"worktree\", \"add\", str(path), \"-b\", f\"wt/{name}\", \"HEAD\"])\n if not ok:\n return f\"Git error: {result}\"\n if task_id:\n bind_task_to_worktree(task_id, name)\n log_event(\"create\", name, task_id)\n print(f\" \\033[33m[worktree] created: {name} at {path}\\033[0m\")\n return f\"Worktree '{name}' created at {path}\"\n\n\ndef bind_task_to_worktree(task_id: str, worktree_name: str):\n task = load_task(task_id)\n task.worktree = worktree_name\n save_task(task)\n\n\ndef _count_worktree_changes(path: Path) -> tuple[int, int]:\n try:\n r1 = subprocess.run([\"git\", \"status\", \"--porcelain\"],\n cwd=path, capture_output=True, text=True, timeout=10)\n files = len([l for l in r1.stdout.strip().splitlines() if l.strip()])\n r2 = subprocess.run([\"git\", \"log\", \"@{push}..HEAD\", \"--oneline\"],\n cwd=path, capture_output=True, text=True, timeout=10)\n commits = len([l for l in r2.stdout.strip().splitlines() if l.strip()])\n return files, commits\n except Exception:\n return -1, -1\n\n\ndef remove_worktree(name: str, discard_changes: bool = False) -> str:\n err = validate_worktree_name(name)\n if err:\n return err\n path = WORKTREES_DIR / name\n if not path.exists():\n return f\"Worktree '{name}' not found\"\n if not discard_changes:\n files, commits = _count_worktree_changes(path)\n if files < 0:\n return \"Cannot verify status. Use discard_changes=true to force.\"\n if files > 0 or commits > 0:\n return (f\"Worktree '{name}' has {files} file(s), {commits} commit(s). \"\n \"Use discard_changes=true or keep_worktree.\")\n ok1, _ = run_git([\"worktree\", \"remove\", str(path), \"--force\"])\n if not ok1:\n return f\"Failed to remove worktree '{name}'\"\n run_git([\"branch\", \"-D\", f\"wt/{name}\"])\n log_event(\"remove\", name)\n print(f\" \\033[33m[worktree] removed: {name}\\033[0m\")\n return f\"Worktree '{name}' removed\"\n\n\ndef keep_worktree(name: str) -> str:\n err = validate_worktree_name(name)\n if err:\n return err\n log_event(\"keep\", name)\n return f\"Worktree '{name}' kept for review (branch: wt/{name})\"\n\n\n# ── Prompt Assembly ──\n\nPROMPT_SECTIONS = {\n \"identity\": \"You are a coding agent. Act, don't explain.\",\n \"tools\": \"Available tools: bash, read_file, write_file, \"\n \"create_task, list_tasks, get_task, claim_task, complete_task, \"\n \"spawn_teammate, send_message, check_inbox, \"\n \"request_shutdown, request_plan, review_plan, \"\n \"create_worktree, remove_worktree, keep_worktree, \"\n \"connect_mcp. MCP tools are prefixed mcp__{server}__{tool}.\",\n \"workspace\": f\"Working directory: {WORKDIR}\",\n \"memory\": \"Relevant memories are injected below when available.\",\n}\n\n\ndef assemble_system_prompt(context: dict) -> str:\n sections = [PROMPT_SECTIONS[\"identity\"],\n PROMPT_SECTIONS[\"tools\"],\n PROMPT_SECTIONS[\"workspace\"]]\n if context.get(\"memories\"):\n sections.append(f\"Relevant memories:\\n{context['memories']}\")\n mcp_names = list(mcp_clients.keys())\n if mcp_names:\n sections.append(f\"Connected MCP servers: {', '.join(mcp_names)}\")\n return \"\\n\\n\".join(sections)\n\n\n# ── Basic Tools ──\n\ndef safe_path(p: str, cwd: Path = None) -> Path:\n base = cwd or WORKDIR\n path = (base / p).resolve()\n if not path.is_relative_to(base):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str, cwd: Path = None) -> str:\n try:\n r = subprocess.run(command, shell=True, cwd=cwd or WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int | None = None, cwd: Path = None) -> str:\n try:\n lines = safe_path(path, cwd).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str, cwd: Path = None) -> str:\n try:\n fp = safe_path(path, cwd)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# ── MessageBus ──\n\nMAILBOX_DIR = WORKDIR / \".mailboxes\"\nMAILBOX_DIR.mkdir(exist_ok=True)\n\n\nclass MessageBus:\n def send(self, from_agent: str, to_agent: str, content: str,\n msg_type: str = \"message\", metadata: dict = None):\n msg = {\"from\": from_agent, \"to\": to_agent,\n \"content\": content, \"type\": msg_type,\n \"ts\": time.time(), \"metadata\": metadata or {}}\n inbox = MAILBOX_DIR / f\"{to_agent}.jsonl\"\n with open(inbox, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n print(f\" \\033[33m[bus] {from_agent} → {to_agent}: \"\n f\"({msg_type}) {content[:50]}\\033[0m\")\n\n def read_inbox(self, agent: str) -> list[dict]:\n inbox = MAILBOX_DIR / f\"{agent}.jsonl\"\n if not inbox.exists():\n return []\n msgs = [json.loads(line) for line in inbox.read_text().splitlines()\n if line.strip()]\n inbox.unlink()\n return msgs\n\n\nBUS = MessageBus()\nactive_teammates: dict[str, bool] = {}\n\n# ── Protocol State ──\n\n@dataclass\nclass ProtocolState:\n request_id: str\n type: str\n sender: str\n target: str\n status: str\n payload: str\n created_at: float = field(default_factory=time.time)\n\n\npending_requests: dict[str, ProtocolState] = {}\n\n\ndef new_request_id() -> str:\n return f\"req_{random.randint(0, 999999):06d}\"\n\n\ndef match_response(response_type: str, request_id: str, approve: bool):\n state = pending_requests.get(request_id)\n if not state:\n return\n if state.type == \"shutdown\" and response_type != \"shutdown_response\":\n return\n if state.type == \"plan_approval\" and response_type != \"plan_approval_response\":\n return\n state.status = \"approved\" if approve else \"rejected\"\n\n\ndef consume_lead_inbox(route_protocol=True) -> list[dict]:\n msgs = BUS.read_inbox(\"lead\")\n if route_protocol:\n for msg in msgs:\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n msg_type = msg.get(\"type\", \"\")\n if req_id and msg_type.endswith(\"_response\"):\n match_response(msg_type, req_id, meta.get(\"approve\", False))\n return msgs\n\n\n# ── Autonomous Agent ──\n\nIDLE_POLL_INTERVAL = 5\nIDLE_TIMEOUT = 60\n\n\ndef scan_unclaimed_tasks() -> list[dict]:\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and can_start(task[\"id\"])):\n unclaimed.append(task)\n return unclaimed\n\n\ndef idle_poll(agent_name: str, messages: list,\n name: str, role: str) -> str:\n for _ in range(IDLE_TIMEOUT // IDLE_POLL_INTERVAL):\n time.sleep(IDLE_POLL_INTERVAL)\n inbox = BUS.read_inbox(agent_name)\n if inbox:\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n req_id = msg.get(\"metadata\", {}).get(\"request_id\", \"\")\n BUS.send(name, \"lead\", \"Shutting down.\",\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": True})\n return \"shutdown\"\n messages.append({\"role\": \"user\",\n \"content\": \"\" + json.dumps(inbox) + \"\"})\n return \"work\"\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n task_data = unclaimed[0]\n result = claim_task(task_data[\"id\"], agent_name)\n if \"Claimed\" in result:\n wt_info = \"\"\n if task_data.get(\"worktree\"):\n wt_info = f\"\\nWork directory: {WORKTREES_DIR / task_data['worktree']}\"\n messages.append({\"role\": \"user\",\n \"content\": f\"Task {task_data['id']}: \"\n f\"{task_data['subject']}{wt_info}\"})\n return \"work\"\n return \"timeout\"\n\n\n# ── Teammate Thread ──\n\ndef spawn_teammate_thread(name: str, role: str, prompt: str) -> str:\n if name in active_teammates:\n return f\"Teammate '{name}' already exists\"\n\n system = (f\"You are '{name}', a {role}. \"\n f\"Use tools to complete tasks. \"\n f\"If a task has a worktree, work in that directory.\")\n\n def handle_inbox_message(name: str, msg: dict, messages: list):\n msg_type = msg.get(\"type\", \"message\")\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n if msg_type == \"shutdown_request\":\n BUS.send(name, \"lead\", \"Shutting down.\",\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": True})\n return True\n if msg_type == \"plan_approval_response\":\n approve = meta.get(\"approve\", False)\n messages.append({\"role\": \"user\",\n \"content\": \"[Plan approved]\" if approve\n else f\"[Plan rejected] {msg['content']}\"})\n return False\n\n def run():\n wt_ctx = {\"path\": None}\n\n def _wt_cwd():\n p = wt_ctx[\"path\"]\n return Path(p) if p else None\n\n def _run_bash(command: str) -> str:\n return run_bash(command, cwd=_wt_cwd())\n\n def _run_read(path: str) -> str:\n return run_read(path, cwd=_wt_cwd())\n\n def _run_write(path: str, content: str) -> str:\n return run_write(path, content, cwd=_wt_cwd())\n\n def _run_list_tasks():\n tasks = list_tasks()\n if not tasks:\n return \"No tasks.\"\n return \"\\n\".join(\n f\" {t.id}: {t.subject} [{t.status}]\"\n + (f\" (wt:{t.worktree})\" if t.worktree else \"\")\n for t in tasks)\n\n def _run_claim_task(task_id: str):\n result = claim_task(task_id, owner=name)\n if \"Claimed\" in result:\n task = load_task(task_id)\n wt_ctx[\"path\"] = (str(WORKTREES_DIR / task.worktree)\n if task.worktree else None)\n return result\n\n def _run_complete_task(task_id: str):\n result = complete_task(task_id)\n wt_ctx[\"path\"] = None\n return result\n\n messages = [{\"role\": \"user\", \"content\": prompt}]\n sub_tools = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"send_message\",\n \"description\": \"Send message to another agent.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"submit_plan\",\n \"description\": \"Submit a plan for Lead approval.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"plan\": {\"type\": \"string\"}},\n \"required\": [\"plan\"]}},\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Mark an in-progress task as completed.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n ]\n\n sub_handlers = {\n \"bash\": _run_bash, \"read_file\": _run_read,\n \"write_file\": _run_write,\n \"send_message\": lambda to, content: (BUS.send(name, to, content),\n \"Sent\")[1],\n \"submit_plan\": lambda plan: _teammate_submit_plan(name, plan),\n \"list_tasks\": _run_list_tasks,\n \"claim_task\": _run_claim_task,\n \"complete_task\": _run_complete_task,\n }\n\n while True:\n if len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}. \"\n f\"Continue your work.\"})\n should_shutdown = False\n for _ in range(10):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n stopped = handle_inbox_message(name, msg, messages)\n if stopped:\n should_shutdown = True\n break\n if should_shutdown:\n break\n if inbox and not should_shutdown:\n non_protocol = [m for m in inbox\n if m.get(\"type\") == \"message\"]\n if non_protocol:\n messages.append({\"role\": \"user\",\n \"content\": \"\" + json.dumps(non_protocol) + \"\"})\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages[-20:],\n tools=sub_tools, max_tokens=8000)\n except Exception:\n break\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = sub_handlers.get(block.name)\n output = handler(**block.input) if handler else \"Unknown\"\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n if should_shutdown:\n break\n idle_result = idle_poll(name, messages, name, role)\n if idle_result in (\"shutdown\", \"timeout\"):\n break\n\n summary = \"Done.\"\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\" and isinstance(msg[\"content\"], list):\n for b in msg[\"content\"]:\n if getattr(b, \"type\", None) == \"text\":\n summary = b.text\n break\n else:\n continue\n break\n BUS.send(name, \"lead\", summary, \"result\")\n active_teammates.pop(name, None)\n\n active_teammates[name] = True\n threading.Thread(target=run, daemon=True).start()\n return f\"Teammate '{name}' spawned as {role}\"\n\n\ndef _teammate_submit_plan(from_name: str, plan: str) -> str:\n req_id = new_request_id()\n pending_requests[req_id] = ProtocolState(\n request_id=req_id, type=\"plan_approval\",\n sender=from_name, target=\"lead\",\n status=\"pending\", payload=plan)\n BUS.send(from_name, \"lead\", plan,\n \"plan_approval_request\",\n {\"request_id\": req_id})\n return f\"Plan submitted ({req_id})\"\n\n\n# ── Lead Protocol Tools ──\n\ndef run_request_shutdown(teammate: str) -> str:\n req_id = new_request_id()\n pending_requests[req_id] = ProtocolState(\n request_id=req_id, type=\"shutdown\",\n sender=\"lead\", target=teammate,\n status=\"pending\", payload=\"\")\n BUS.send(\"lead\", teammate, \"Shut down.\", \"shutdown_request\",\n {\"request_id\": req_id})\n return f\"Shutdown request sent to {teammate}\"\n\n\ndef run_request_plan(teammate: str, task: str) -> str:\n BUS.send(\"lead\", teammate, f\"Submit plan for: {task}\", \"message\")\n return f\"Asked {teammate} to submit a plan\"\n\n\ndef run_review_plan(request_id: str, approve: bool,\n feedback: str = \"\") -> str:\n state = pending_requests.get(request_id)\n if not state:\n return f\"Request {request_id} not found\"\n state.status = \"approved\" if approve else \"rejected\"\n BUS.send(\"lead\", state.sender,\n feedback or (\"Approved\" if approve else \"Rejected\"),\n \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve})\n return f\"Plan {'approved' if approve else 'rejected'}\"\n\n\n# ── MCP System (s19 new) ──\n\nclass MCPClient:\n \"\"\"Discovers and calls tools on an MCP server (mock for teaching).\"\"\"\n\n def __init__(self, name: str):\n self.name = name\n self.tools: list[dict] = []\n self._handlers: dict[str, callable] = {}\n\n def register(self, tool_defs: list[dict],\n handlers: dict[str, callable]):\n self.tools = tool_defs\n self._handlers = handlers\n\n def call_tool(self, tool_name: str, args: dict) -> str:\n handler = self._handlers.get(tool_name)\n if not handler:\n return f\"MCP error: unknown tool '{tool_name}'\"\n try:\n return handler(**args)\n except Exception as e:\n return f\"MCP error: {e}\"\n\n\nmcp_clients: dict[str, MCPClient] = {}\n\n_DISALLOWED_CHARS = re.compile(r'[^a-zA-Z0-9_-]')\n\n\ndef normalize_mcp_name(name: str) -> str:\n \"\"\"Replace non [a-zA-Z0-9_-] with underscore.\"\"\"\n return _DISALLOWED_CHARS.sub('_', name)\n\n\ndef _mock_server_docs():\n client = MCPClient(\"docs\")\n client.register(\n tool_defs=[\n {\"name\": \"search\", \"description\": \"Search documentation. (readOnly)\",\n \"inputSchema\": {\"type\": \"object\",\n \"properties\": {\"query\": {\"type\": \"string\"}},\n \"required\": [\"query\"]}},\n {\"name\": \"get_version\", \"description\": \"Get API version. (readOnly)\",\n \"inputSchema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n ],\n handlers={\n \"search\": lambda query: f\"[docs] Found 3 results for '{query}'\",\n \"get_version\": lambda: \"[docs] API v2.1.0\",\n })\n return client\n\n\ndef _mock_server_deploy():\n client = MCPClient(\"deploy\")\n client.register(\n tool_defs=[\n {\"name\": \"trigger\",\n \"description\": \"Trigger a deployment. (destructive — requires approval in real CC)\",\n \"inputSchema\": {\"type\": \"object\",\n \"properties\": {\"service\": {\"type\": \"string\"}},\n \"required\": [\"service\"]}},\n {\"name\": \"status\", \"description\": \"Check deployment status. (readOnly)\",\n \"inputSchema\": {\"type\": \"object\",\n \"properties\": {\"service\": {\"type\": \"string\"}},\n \"required\": [\"service\"]}},\n ],\n handlers={\n \"trigger\": lambda service: f\"[deploy] Triggered: {service}\",\n \"status\": lambda service: f\"[deploy] {service}: running (v1.4.2)\",\n })\n return client\n\n\nMOCK_SERVERS = {\n \"docs\": _mock_server_docs,\n \"deploy\": _mock_server_deploy,\n}\n\n\ndef connect_mcp(name: str) -> str:\n if name in mcp_clients:\n return f\"MCP server '{name}' already connected\"\n factory = MOCK_SERVERS.get(name)\n if not factory:\n available = \", \".join(MOCK_SERVERS.keys())\n return f\"Unknown server '{name}'. Available: {available}\"\n mcp_client = factory()\n mcp_clients[name] = mcp_client\n tool_names = [t[\"name\"] for t in mcp_client.tools]\n print(f\" \\033[31m[mcp] connected: {name} → {tool_names}\\033[0m\")\n return (f\"Connected to MCP server '{name}'. \"\n f\"Discovered {len(mcp_client.tools)} tools: {', '.join(tool_names)}\")\n\n\ndef assemble_tool_pool() -> tuple[list[dict], dict]:\n \"\"\"Assemble builtin tools + all MCP tools into one pool.\"\"\"\n tools = list(BUILTIN_TOOLS)\n handlers = dict(BUILTIN_HANDLERS)\n for server_name, mcp_client in mcp_clients.items():\n safe_server = normalize_mcp_name(server_name)\n for tool_def in mcp_client.tools:\n safe_tool = normalize_mcp_name(tool_def[\"name\"])\n prefixed = f\"mcp__{safe_server}__{safe_tool}\"\n tools.append({\n \"name\": prefixed,\n \"description\": tool_def.get(\"description\", \"\"),\n \"input_schema\": tool_def.get(\"inputSchema\", {}),\n })\n handlers[prefixed] = (\n lambda *, c=mcp_client, t=tool_def[\"name\"], **kw: c.call_tool(t, kw))\n return tools, handlers\n\n\n# ── Lead Worktree Tools ──\n\ndef run_create_worktree(name: str, task_id: str = \"\") -> str:\n return create_worktree(name, task_id)\n\ndef run_remove_worktree(name: str, discard_changes: bool = False) -> str:\n return remove_worktree(name, discard_changes)\n\ndef run_keep_worktree(name: str) -> str:\n return keep_worktree(name)\n\n\n# ── Basic tool handlers ──\n\ndef run_create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> str:\n task = create_task(subject, description, blockedBy)\n deps = f\" (blockedBy: {', '.join(blockedBy)})\" if blockedBy else \"\"\n print(f\" \\033[34m[create] {task.subject}{deps}\\033[0m\")\n return f\"Created {task.id}: {task.subject}{deps}\"\n\n\ndef run_list_tasks() -> str:\n tasks = list_tasks()\n if not tasks:\n return \"No tasks.\"\n return \"\\n\".join(\n f\" {t.id}: {t.subject} [{t.status}]\"\n + (f\" (wt:{t.worktree})\" if t.worktree else \"\")\n for t in tasks)\n\n\ndef run_get_task(task_id: str) -> str:\n return get_task_json(task_id)\n\ndef run_claim_task(task_id: str) -> str:\n return claim_task(task_id, owner=\"agent\")\n\ndef run_complete_task(task_id: str) -> str:\n return complete_task(task_id)\n\ndef run_spawn_teammate(name: str, role: str, prompt: str) -> str:\n return spawn_teammate_thread(name, role, prompt)\n\ndef run_send_message(to: str, content: str) -> str:\n BUS.send(\"lead\", to, content)\n return f\"Sent to {to}\"\n\ndef run_check_inbox() -> str:\n msgs = consume_lead_inbox(route_protocol=True)\n if not msgs:\n return \"(inbox empty)\"\n lines = []\n for m in msgs:\n meta = m.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n tag = f\" [{m['type']} req:{req_id}]\" if req_id else f\" [{m['type']}]\"\n lines.append(f\" [{m['from']}]{tag} {m['content'][:200]}\")\n return \"\\n\".join(lines)\n\ndef run_connect_mcp(name: str) -> str:\n return connect_mcp(name)\n\n\n# ── Tool Definitions ──\n\nBUILTIN_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"create_task\", \"description\": \"Create a task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n \"blockedBy\": {\"type\": \"array\",\n \"items\": {\"type\": \"string\"}}},\n \"required\": [\"subject\"]}},\n {\"name\": \"list_tasks\", \"description\": \"List all tasks.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}, \"required\": []}},\n {\"name\": \"get_task\", \"description\": \"Get full task details.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"claim_task\", \"description\": \"Claim a pending task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\", \"description\": \"Complete an in-progress task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"spawn_teammate\", \"description\": \"Spawn an autonomous teammate.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"},\n \"role\": {\"type\": \"string\"},\n \"prompt\": {\"type\": \"string\"}},\n \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"send_message\", \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"check_inbox\",\n \"description\": \"Check inbox for messages and protocol responses.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}, \"required\": []}},\n {\"name\": \"request_shutdown\",\n \"description\": \"Request a teammate to shut down.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"teammate\": {\"type\": \"string\"}},\n \"required\": [\"teammate\"]}},\n {\"name\": \"request_plan\",\n \"description\": \"Ask a teammate to submit a plan.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"teammate\": {\"type\": \"string\"},\n \"task\": {\"type\": \"string\"}},\n \"required\": [\"teammate\", \"task\"]}},\n {\"name\": \"review_plan\",\n \"description\": \"Approve or reject a submitted plan.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"request_id\": {\"type\": \"string\"},\n \"approve\": {\"type\": \"boolean\"},\n \"feedback\": {\"type\": \"string\"}},\n \"required\": [\"request_id\", \"approve\"]}},\n {\"name\": \"create_worktree\",\n \"description\": \"Create an isolated git worktree.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"},\n \"task_id\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"remove_worktree\",\n \"description\": \"Remove a worktree. Refuses if changes exist.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"},\n \"discard_changes\": {\"type\": \"boolean\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"keep_worktree\",\n \"description\": \"Keep a worktree for manual review.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"connect_mcp\",\n \"description\": \"Connect to an MCP server (docs, deploy) and discover tools.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n]\n\nBUILTIN_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"create_task\": run_create_task, \"list_tasks\": run_list_tasks,\n \"get_task\": run_get_task,\n \"claim_task\": run_claim_task, \"complete_task\": run_complete_task,\n \"spawn_teammate\": run_spawn_teammate,\n \"send_message\": run_send_message, \"check_inbox\": run_check_inbox,\n \"request_shutdown\": run_request_shutdown,\n \"request_plan\": run_request_plan, \"review_plan\": run_review_plan,\n \"create_worktree\": run_create_worktree,\n \"remove_worktree\": run_remove_worktree,\n \"keep_worktree\": run_keep_worktree,\n \"connect_mcp\": run_connect_mcp,\n}\n\n\n# ── Context ──\n\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\n\n\ndef update_context(context: dict, messages: list) -> dict:\n memories = \"\"\n if MEMORY_INDEX.exists():\n memories = MEMORY_INDEX.read_text()[:2000]\n return {\"memories\": memories}\n\n\n# ── Agent Loop (s19: dynamic tool pool, no prompt cache) ──\n\ndef agent_loop(messages: list, context: dict):\n tools, handlers = assemble_tool_pool()\n system = assemble_system_prompt(context)\n while True:\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=tools, max_tokens=8000)\n except Exception as e:\n messages.append({\"role\": \"assistant\", \"content\": [\n {\"type\": \"text\", \"text\": f\"[Error] {type(e).__name__}: {e}\"}]})\n return\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n handler = handlers.get(block.name)\n output = handler(**block.input) if handler else \"Unknown\"\n print(str(output)[:300])\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n\n if any(b.name == \"connect_mcp\" for b in response.content\n if b.type == \"tool_use\"):\n tools, handlers = assemble_tool_pool()\n context = update_context(context, messages)\n system = assemble_system_prompt(context)\n\n\nif __name__ == \"__main__\":\n print(\"s19: mcp tools\")\n print(\"Enter a question, press Enter to send. Type q to quit.\\n\")\n history = []\n context = {\"memories\": \"\"}\n while True:\n try:\n query = input(\"\\033[36ms19 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history, context)\n context = update_context(context, history)\n for block in history[-1][\"content\"]:\n if getattr(block, \"type\", None) == \"text\":\n print(block.text)\n elif isinstance(block, dict) and block.get(\"type\") == \"text\":\n print(block.get(\"text\", \"\"))\n\n inbox = consume_lead_inbox(route_protocol=True)\n if inbox:\n inbox_text = \"\\n\".join(\n f\"From {m['from']} [{m.get('type', 'message')}]: \"\n f\"{m['content'][:200]}\" for m in inbox)\n history.append({\"role\": \"user\",\n \"content\": f\"[Inbox]\\n{inbox_text}\"})\n print()\n",
"images": [
{
"src": "/course-assets/s19_mcp_plugin/mcp-architecture.svg",
@@ -3046,7 +3086,7 @@
"filename": "s20_comprehensive/code.py",
"title": "Comprehensive Agent",
"subtitle": "All Mechanisms, One Loop",
- "loc": 1677,
+ "loc": 1708,
"tools": [
"bash",
"read_file",
@@ -3112,18 +3152,18 @@
},
{
"name": "RecoveryState",
- "startLine": 1172,
- "endLine": 1180
+ "startLine": 1208,
+ "endLine": 1216
},
{
"name": "CronJob",
- "startLine": 1302,
- "endLine": 1309
+ "startLine": 1338,
+ "endLine": 1345
},
{
"name": "MCPClient",
- "startLine": 1499,
- "endLine": 1521
+ "startLine": 1535,
+ "endLine": 1557
}
],
"functions": [
@@ -3362,259 +3402,274 @@
"signature": "def estimate_size(messages: list)",
"startLine": 1060
},
+ {
+ "name": "block_type",
+ "signature": "def block_type(block)",
+ "startLine": 1063
+ },
+ {
+ "name": "message_has_tool_use",
+ "signature": "def message_has_tool_use(message: dict)",
+ "startLine": 1067
+ },
+ {
+ "name": "is_tool_result_message",
+ "signature": "def is_tool_result_message(message: dict)",
+ "startLine": 1076
+ },
{
"name": "collect_tool_results",
"signature": "def collect_tool_results(messages: list)",
- "startLine": 1064
+ "startLine": 1086
},
{
"name": "persist_large_output",
"signature": "def persist_large_output(tool_use_id: str, output: str)",
- "startLine": 1076
+ "startLine": 1098
},
{
"name": "tool_result_budget",
"signature": "def tool_result_budget(messages: list, max_bytes: int = 200_000)",
- "startLine": 1087
+ "startLine": 1109
},
{
"name": "snip_compact",
"signature": "def snip_compact(messages: list, max_messages: int = 50)",
- "startLine": 1111
+ "startLine": 1133
},
{
"name": "micro_compact",
"signature": "def micro_compact(messages: list)",
- "startLine": 1121
+ "startLine": 1152
},
{
"name": "write_transcript",
"signature": "def write_transcript(messages: list)",
- "startLine": 1131
+ "startLine": 1162
},
{
"name": "summarize_history",
"signature": "def summarize_history(messages: list)",
- "startLine": 1140
+ "startLine": 1171
},
{
"name": "compact_history",
"signature": "def compact_history(messages: list)",
- "startLine": 1152
+ "startLine": 1183
},
{
"name": "reactive_compact",
"signature": "def reactive_compact(messages: list)",
- "startLine": 1159
+ "startLine": 1190
},
{
"name": "retry_delay",
"signature": "def retry_delay(attempt: int)",
- "startLine": 1181
+ "startLine": 1217
},
{
"name": "with_retry",
"signature": "def with_retry(fn, state: RecoveryState)",
- "startLine": 1186
+ "startLine": 1222
},
{
"name": "is_prompt_too_long_error",
"signature": "def is_prompt_too_long_error(e: Exception)",
- "startLine": 1216
+ "startLine": 1252
},
{
"name": "is_slow_operation",
"signature": "def is_slow_operation(tool_name: str, tool_input: dict)",
- "startLine": 1233
+ "startLine": 1269
},
{
"name": "should_run_background",
"signature": "def should_run_background(tool_name: str, tool_input: dict)",
- "startLine": 1243
+ "startLine": 1279
},
{
"name": "start_background_task",
"signature": "def start_background_task(block, handlers: dict)",
- "startLine": 1249
+ "startLine": 1285
},
{
"name": "collect_background_results",
"signature": "def collect_background_results()",
- "startLine": 1274
+ "startLine": 1310
},
{
"name": "_cron_field_matches",
"signature": "def _cron_field_matches(field: str, value: int)",
- "startLine": 1316
+ "startLine": 1352
},
{
"name": "cron_matches",
"signature": "def cron_matches(cron_expr: str, dt: datetime)",
- "startLine": 1331
+ "startLine": 1367
},
{
"name": "_validate_cron_field",
"signature": "def _validate_cron_field(field: str, lo: int, hi: int)",
- "startLine": 1353
+ "startLine": 1389
},
{
"name": "validate_cron",
"signature": "def validate_cron(cron_expr: str)",
- "startLine": 1385
+ "startLine": 1421
},
{
"name": "save_durable_jobs",
"signature": "def save_durable_jobs()",
- "startLine": 1398
+ "startLine": 1434
},
{
"name": "load_durable_jobs",
"signature": "def load_durable_jobs()",
- "startLine": 1403
+ "startLine": 1439
},
{
"name": "cancel_job",
"signature": "def cancel_job(job_id: str)",
- "startLine": 1431
+ "startLine": 1467
},
{
"name": "cron_scheduler_loop",
"signature": "def cron_scheduler_loop()",
- "startLine": 1441
+ "startLine": 1477
},
{
"name": "consume_cron_queue",
"signature": "def consume_cron_queue()",
- "startLine": 1460
+ "startLine": 1496
},
{
"name": "run_list_crons",
"signature": "def run_list_crons()",
- "startLine": 1475
+ "startLine": 1511
},
{
"name": "run_cancel_cron",
"signature": "def run_cancel_cron(job_id: str)",
- "startLine": 1487
+ "startLine": 1523
},
{
"name": "normalize_mcp_name",
"signature": "def normalize_mcp_name(name: str)",
- "startLine": 1527
+ "startLine": 1563
},
{
"name": "_mock_server_docs",
"signature": "def _mock_server_docs()",
- "startLine": 1532
+ "startLine": 1568
},
{
"name": "_mock_server_deploy",
"signature": "def _mock_server_deploy()",
- "startLine": 1551
+ "startLine": 1587
},
{
"name": "connect_mcp",
"signature": "def connect_mcp(name: str)",
- "startLine": 1578
+ "startLine": 1614
},
{
"name": "assemble_tool_pool",
"signature": "def assemble_tool_pool()",
- "startLine": 1593
+ "startLine": 1629
},
{
"name": "run_create_worktree",
"signature": "def run_create_worktree(name: str, task_id: str = \"\")",
- "startLine": 1614
+ "startLine": 1650
},
{
"name": "run_remove_worktree",
"signature": "def run_remove_worktree(name: str, discard_changes: bool = False)",
- "startLine": 1617
+ "startLine": 1653
},
{
"name": "run_keep_worktree",
"signature": "def run_keep_worktree(name: str)",
- "startLine": 1620
+ "startLine": 1656
},
{
"name": "run_list_tasks",
"signature": "def run_list_tasks()",
- "startLine": 1634
+ "startLine": 1670
},
{
"name": "run_get_task",
"signature": "def run_get_task(task_id: str)",
- "startLine": 1644
+ "startLine": 1680
},
{
"name": "run_claim_task",
"signature": "def run_claim_task(task_id: str)",
- "startLine": 1650
+ "startLine": 1686
},
{
"name": "run_complete_task",
"signature": "def run_complete_task(task_id: str)",
- "startLine": 1656
+ "startLine": 1692
},
{
"name": "run_spawn_teammate",
"signature": "def run_spawn_teammate(name: str, role: str, prompt: str)",
- "startLine": 1662
+ "startLine": 1698
},
{
"name": "run_send_message",
"signature": "def run_send_message(to: str, content: str)",
- "startLine": 1665
+ "startLine": 1701
},
{
"name": "run_check_inbox",
"signature": "def run_check_inbox()",
- "startLine": 1669
+ "startLine": 1705
},
{
"name": "run_connect_mcp",
"signature": "def run_connect_mcp(name: str)",
- "startLine": 1681
+ "startLine": 1717
},
{
"name": "update_context",
"signature": "def update_context(context: dict, messages: list)",
- "startLine": 1863
+ "startLine": 1899
},
{
"name": "prepare_context",
"signature": "def prepare_context(messages: list)",
- "startLine": 1880
+ "startLine": 1916
},
{
"name": "build_user_content",
"signature": "def build_user_content(results: list[dict])",
- "startLine": 1890
+ "startLine": 1926
},
{
"name": "inject_background_notifications",
"signature": "def inject_background_notifications(messages: list)",
- "startLine": 1899
+ "startLine": 1935
},
{
"name": "agent_loop",
"signature": "def agent_loop(messages: list, context: dict)",
- "startLine": 1919
+ "startLine": 1955
},
{
"name": "print_turn_assistants",
"signature": "def print_turn_assistants(messages: list, turn_start: int)",
- "startLine": 2025
+ "startLine": 2061
},
{
"name": "cron_autorun_loop",
"signature": "def cron_autorun_loop(history: list, context: dict)",
- "startLine": 2034
+ "startLine": 2070
}
],
"layer": "collaboration",
- "source": "#!/usr/bin/env python3\n\"\"\"\ns20: Comprehensive Agent — all teaching components in one loop.\n\nRun: python s20_comprehensive/code.py\nNeed: pip install anthropic python-dotenv pyyaml + .env with ANTHROPIC_API_KEY\n\nThis final chapter intentionally puts the earlier teaching mechanisms back\ntogether: dispatch, permission, hooks, todo, subagent, skills, compaction,\nmemory, prompt assembly, error recovery, task graph, background tasks, cron,\nteams, protocols, autonomous agents, worktrees, and MCP.\n\"\"\"\n\nimport ast, json, os, subprocess, time, random, threading, re\nfrom pathlib import Path\nfrom datetime import datetime\nfrom dataclasses import dataclass, asdict, field\nimport yaml\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\n READLINE_AVAILABLE = True\nexcept ImportError:\n READLINE_AVAILABLE = False\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nPRIMARY_MODEL = MODEL\nFALLBACK_MODEL = os.getenv(\"FALLBACK_MODEL_ID\")\n\nSKILLS_DIR = WORKDIR / \"skills\"\nTRANSCRIPT_DIR = WORKDIR / \".transcripts\"\nTOOL_RESULTS_DIR = WORKDIR / \".task_outputs\" / \"tool-results\"\n\nDEFAULT_MAX_TOKENS = 8000\nESCALATED_MAX_TOKENS = 16000\nMAX_RETRIES = 3\nMAX_CONSECUTIVE_529 = 2\nMAX_RECOVERY_RETRIES = 2\nBASE_DELAY_MS = 500\nCONTEXT_LIMIT = 50000\nKEEP_RECENT_TOOL_RESULTS = 3\nPERSIST_THRESHOLD = 30000\nCONTINUATION_PROMPT = \"Continue from the previous response. Do not repeat completed work.\"\nPROMPT = \"\\033[36ms20 >> \\033[0m\"\nCLI_ACTIVE = False\n\n\ndef terminal_print(text: str):\n if threading.current_thread() is threading.main_thread() or not CLI_ACTIVE:\n print(text)\n return\n line = \"\"\n if READLINE_AVAILABLE:\n try:\n line = readline.get_line_buffer()\n except Exception:\n line = \"\"\n print(f\"\\r\\033[K{text}\")\n print(PROMPT + line, end=\"\", flush=True)\n\n# ── Task System ──\n\n# Tasks are tiny durable records. Later systems add ownership, dependencies,\n# worktrees, and teammates on top of this same file-backed state.\nTASKS_DIR = WORKDIR / \".tasks\"\nTASKS_DIR.mkdir(exist_ok=True)\nCURRENT_TODOS: list[dict] = []\n\n\n@dataclass\nclass Task:\n id: str\n subject: str\n description: str\n status: str\n owner: str | None\n blockedBy: list[str]\n worktree: str | None = None\n\n\ndef _task_path(task_id: str) -> Path:\n return TASKS_DIR / f\"{task_id}.json\"\n\n\ndef create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> Task:\n task = Task(\n id=f\"task_{int(time.time())}_{random.randint(0, 9999):04d}\",\n subject=subject, description=description,\n status=\"pending\", owner=None,\n blockedBy=blockedBy or [],\n )\n save_task(task)\n return task\n\n\ndef save_task(task: Task):\n _task_path(task.id).write_text(json.dumps(asdict(task), indent=2))\n\n\ndef load_task(task_id: str) -> Task:\n return Task(**json.loads(_task_path(task_id).read_text()))\n\n\ndef list_tasks() -> list[Task]:\n return [Task(**json.loads(p.read_text()))\n for p in sorted(TASKS_DIR.glob(\"task_*.json\"))]\n\n\ndef get_task_json(task_id: str) -> str:\n return json.dumps(asdict(load_task(task_id)), indent=2)\n\n\ndef can_start(task_id: str) -> bool:\n # Dependencies are intentionally simple: every blocker must exist and be\n # completed before the task can be claimed.\n task = load_task(task_id)\n for dep_id in task.blockedBy:\n if not _task_path(dep_id).exists():\n return False\n if load_task(dep_id).status != \"completed\":\n return False\n return True\n\n\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if task.owner:\n return f\"Task {task_id} already owned by {task.owner}\"\n if not can_start(task_id):\n deps = [d for d in task.blockedBy\n if _task_path(d).exists() and load_task(d).status != \"completed\"]\n missing = [d for d in task.blockedBy if not _task_path(d).exists()]\n parts = []\n if deps: parts.append(f\"blocked by: {deps}\")\n if missing: parts.append(f\"missing deps: {missing}\")\n return \"Cannot start — \" + \", \".join(parts)\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n print(f\" \\033[36m[claim] {task.subject} → in_progress\\033[0m\")\n return f\"Claimed {task.id} ({task.subject})\"\n\n\ndef complete_task(task_id: str) -> str:\n task = load_task(task_id)\n if task.status != \"in_progress\":\n return f\"Task {task_id} is {task.status}, cannot complete\"\n task.status = \"completed\"\n save_task(task)\n unblocked = [t.subject for t in list_tasks()\n if t.status == \"pending\" and t.blockedBy and can_start(t.id)]\n print(f\" \\033[32m[complete] {task.subject} ✓\\033[0m\")\n msg = f\"Completed {task.id} ({task.subject})\"\n if unblocked:\n msg += f\"\\nUnblocked: {', '.join(unblocked)}\"\n return msg\n\n\n# ── Worktree System ──\n\n# Worktree names become filesystem paths, so the teaching version keeps the\n# validation rules strict and reuses them for create/remove/keep.\nWORKTREES_DIR = WORKDIR / \".worktrees\"\nWORKTREES_DIR.mkdir(exist_ok=True)\n\nVALID_WT_NAME = re.compile(r'^[A-Za-z0-9._-]{1,64}$')\n\n\ndef validate_worktree_name(name: str) -> str | None:\n if not name:\n return \"Worktree name cannot be empty\"\n if name in (\".\", \"..\"):\n return f\"'{name}' is not a valid worktree name\"\n if not VALID_WT_NAME.match(name):\n return (f\"Invalid worktree name '{name}': \"\n \"only letters, digits, dots, underscores, dashes (1-64 chars)\")\n return None\n\n\ndef run_git(args: list[str]) -> tuple[bool, str]:\n try:\n r = subprocess.run([\"git\"] + args, cwd=WORKDIR,\n capture_output=True, text=True, timeout=30)\n out = (r.stdout + r.stderr).strip()\n return r.returncode == 0, out[:5000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return False, \"Error: git timeout\"\n\n\ndef log_event(event_type: str, worktree_name: str, task_id: str = \"\"):\n event = {\"type\": event_type, \"worktree\": worktree_name,\n \"task_id\": task_id, \"ts\": time.time()}\n events_file = WORKTREES_DIR / \"events.jsonl\"\n with open(events_file, \"a\") as f:\n f.write(json.dumps(event) + \"\\n\")\n\n\ndef create_worktree(name: str, task_id: str = \"\") -> str:\n # Tool-layer validation is part of the safety boundary; do it before git\n # sees the name, not only after git happens to reject something.\n err = validate_worktree_name(name)\n if err:\n return f\"Error: {err}\"\n if task_id:\n try:\n load_task(task_id)\n except FileNotFoundError:\n return f\"Error: task {task_id} not found\"\n path = WORKTREES_DIR / name\n if path.exists():\n return f\"Worktree '{name}' already exists at {path}\"\n ok, result = run_git([\"worktree\", \"add\", str(path), \"-b\", f\"wt/{name}\", \"HEAD\"])\n if not ok:\n return f\"Git error: {result}\"\n if task_id:\n bind_task_to_worktree(task_id, name)\n log_event(\"create\", name, task_id)\n print(f\" \\033[33m[worktree] created: {name} at {path}\\033[0m\")\n return f\"Worktree '{name}' created at {path}\"\n\n\ndef bind_task_to_worktree(task_id: str, worktree_name: str):\n task = load_task(task_id)\n task.worktree = worktree_name\n save_task(task)\n\n\ndef _count_worktree_changes(path: Path) -> tuple[int, int]:\n try:\n r1 = subprocess.run([\"git\", \"status\", \"--porcelain\"],\n cwd=path, capture_output=True, text=True, timeout=10)\n files = len([l for l in r1.stdout.strip().splitlines() if l.strip()])\n r2 = subprocess.run([\"git\", \"log\", \"@{push}..HEAD\", \"--oneline\"],\n cwd=path, capture_output=True, text=True, timeout=10)\n commits = len([l for l in r2.stdout.strip().splitlines() if l.strip()])\n return files, commits\n except Exception:\n return -1, -1\n\n\ndef remove_worktree(name: str, discard_changes: bool = False) -> str:\n err = validate_worktree_name(name)\n if err:\n return err\n path = WORKTREES_DIR / name\n if not path.exists():\n return f\"Worktree '{name}' not found\"\n if not discard_changes:\n files, commits = _count_worktree_changes(path)\n if files < 0:\n return \"Cannot verify status. Use discard_changes=true to force.\"\n if files > 0 or commits > 0:\n return (f\"Worktree '{name}' has {files} file(s), {commits} commit(s). \"\n \"Use discard_changes=true or keep_worktree.\")\n ok1, _ = run_git([\"worktree\", \"remove\", str(path), \"--force\"])\n if not ok1:\n return f\"Failed to remove worktree '{name}'\"\n run_git([\"branch\", \"-D\", f\"wt/{name}\"])\n log_event(\"remove\", name)\n print(f\" \\033[33m[worktree] removed: {name}\\033[0m\")\n return f\"Worktree '{name}' removed\"\n\n\ndef keep_worktree(name: str) -> str:\n err = validate_worktree_name(name)\n if err:\n return err\n log_event(\"keep\", name)\n return f\"Worktree '{name}' kept for review (branch: wt/{name})\"\n\n\n# ── Skill Loading ──\n\nSKILL_REGISTRY: dict[str, dict] = {}\n\n\ndef _parse_frontmatter(text: str) -> tuple[dict, str]:\n if not text.startswith(\"---\"):\n return {}, text\n parts = text.split(\"---\", 2)\n if len(parts) < 3:\n return {}, text\n try:\n meta = yaml.safe_load(parts[1]) or {}\n except yaml.YAMLError:\n meta = {}\n return meta, parts[2].strip()\n\n\ndef scan_skills():\n SKILL_REGISTRY.clear()\n if not SKILLS_DIR.exists():\n return\n for directory in sorted(SKILLS_DIR.iterdir()):\n if not directory.is_dir():\n continue\n manifest = directory / \"SKILL.md\"\n if not manifest.exists():\n continue\n raw = manifest.read_text()\n meta, _ = _parse_frontmatter(raw)\n name = meta.get(\"name\", directory.name)\n desc = meta.get(\"description\", raw.split(\"\\n\")[0].lstrip(\"#\").strip())\n SKILL_REGISTRY[name] = {\n \"name\": name,\n \"description\": desc,\n \"content\": raw,\n }\n\n\nscan_skills()\n\n\ndef list_skills() -> str:\n if not SKILL_REGISTRY:\n return \"(no skills found)\"\n return \"\\n\".join(\n f\"- {skill['name']}: {skill['description']}\"\n for skill in SKILL_REGISTRY.values())\n\n\ndef load_skill(name: str) -> str:\n skill = SKILL_REGISTRY.get(name)\n if not skill:\n available = \", \".join(SKILL_REGISTRY.keys()) or \"(none)\"\n return f\"Skill not found: {name}. Available: {available}\"\n return skill[\"content\"]\n\n\n# ── Prompt Assembly ──\n\nPROMPT_SECTIONS = {\n \"identity\": \"You are a coding agent. Act, don't explain.\",\n \"tools\": \"Available tools: bash, read_file, write_file, edit_file, glob, \"\n \"todo_write, task, load_skill, compact, \"\n \"create_task, list_tasks, get_task, claim_task, complete_task, \"\n \"schedule_cron, list_crons, cancel_cron, \"\n \"spawn_teammate, send_message, check_inbox, \"\n \"request_shutdown, request_plan, review_plan, \"\n \"create_worktree, remove_worktree, keep_worktree, \"\n \"connect_mcp. MCP tools are prefixed mcp__{server}__{tool}.\",\n \"workspace\": f\"Working directory: {WORKDIR}\",\n \"memory\": \"Relevant memories are injected below when available.\",\n}\n\n\ndef assemble_system_prompt(context: dict) -> str:\n # The system prompt is rebuilt each turn from live context. This is where\n # memory, skill catalog, MCP state, and active teammates become visible.\n sections = [PROMPT_SECTIONS[\"identity\"],\n PROMPT_SECTIONS[\"tools\"],\n PROMPT_SECTIONS[\"workspace\"]]\n sections.append(f\"Current time: {datetime.now().isoformat(timespec='seconds')}\")\n sections.append(\"Skills catalog:\\n\" + list_skills() +\n \"\\nUse load_skill(name) when a skill is relevant.\")\n if context.get(\"memories\"):\n sections.append(f\"Relevant memories:\\n{context['memories']}\")\n mcp_names = list(mcp_clients.keys())\n if mcp_names:\n sections.append(f\"Connected MCP servers: {', '.join(mcp_names)}\")\n return \"\\n\\n\".join(sections)\n\n\n# ── Basic Tools ──\n\ndef safe_path(p: str, cwd: Path = None) -> Path:\n # File tools stay inside the workspace or teammate worktree. Bash remains\n # powerful on purpose and is controlled by the permission hook instead.\n base = cwd or WORKDIR\n path = (base / p).resolve()\n if not path.is_relative_to(base):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str, cwd: Path = None,\n run_in_background: bool = False) -> str:\n # run_in_background is consumed by the dispatcher; direct execution ignores it.\n try:\n r = subprocess.run(command, shell=True, cwd=cwd or WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int | None = None,\n offset: int = 0, cwd: Path = None) -> str:\n try:\n lines = safe_path(path, cwd).read_text().splitlines()\n offset = max(int(offset or 0), 0)\n limit = int(limit) if limit is not None else None\n lines = lines[offset:]\n if limit is not None and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str, cwd: Path = None) -> str:\n try:\n fp = safe_path(path, cwd)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str,\n cwd: Path = None) -> str:\n try:\n fp = safe_path(path, cwd)\n text = fp.read_text()\n if old_text not in text:\n return f\"Error: text not found in {path}\"\n fp.write_text(text.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_glob(pattern: str, cwd: Path = None) -> str:\n import glob as g\n try:\n base = cwd or WORKDIR\n results = []\n for match in g.glob(pattern, root_dir=base):\n if (base / match).resolve().is_relative_to(base):\n results.append(match)\n return \"\\n\".join(results) if results else \"(no matches)\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef call_tool_handler(handler, args: dict, name: str) -> str:\n if not handler:\n return f\"Unknown: {name}\"\n try:\n return handler(**(args or {}))\n except TypeError as e:\n return f\"Error: {e}\"\n\n\ndef _normalize_todos(todos):\n if isinstance(todos, str):\n try:\n todos = json.loads(todos)\n except json.JSONDecodeError:\n try:\n todos = ast.literal_eval(todos)\n except (SyntaxError, ValueError):\n return None, \"Error: todos must be a list or JSON array string\"\n if not isinstance(todos, list):\n return None, \"Error: todos must be a list\"\n for i, todo in enumerate(todos):\n if not isinstance(todo, dict):\n return None, f\"Error: todos[{i}] must be an object\"\n if \"content\" not in todo or \"status\" not in todo:\n return None, f\"Error: todos[{i}] missing 'content' or 'status'\"\n if todo[\"status\"] not in (\"pending\", \"in_progress\", \"completed\"):\n return None, f\"Error: todos[{i}] has invalid status '{todo['status']}'\"\n return todos, None\n\ndef run_todo_write(todos: list) -> str:\n global CURRENT_TODOS\n todos, error = _normalize_todos(todos)\n if error:\n return error\n CURRENT_TODOS = todos\n print(f\" \\033[33m[todo] updated {len(CURRENT_TODOS)} item(s)\\033[0m\")\n return f\"Updated {len(CURRENT_TODOS)} todos\"\n\n\n# ── MessageBus ──\n\n# Team communication is append-only JSONL mailboxes. This keeps the protocol\n# inspectable on disk and lets background teammates send messages.\nMAILBOX_DIR = WORKDIR / \".mailboxes\"\nMAILBOX_DIR.mkdir(exist_ok=True)\n\n\nclass MessageBus:\n def send(self, from_agent: str, to_agent: str, content: str,\n msg_type: str = \"message\", metadata: dict = None):\n msg = {\"from\": from_agent, \"to\": to_agent,\n \"content\": content, \"type\": msg_type,\n \"ts\": time.time(), \"metadata\": metadata or {}}\n inbox = MAILBOX_DIR / f\"{to_agent}.jsonl\"\n with open(inbox, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n terminal_print(f\" \\033[33m[bus] {from_agent} → {to_agent}: \"\n f\"({msg_type}) {content[:50]}\\033[0m\")\n\n def read_inbox(self, agent: str) -> list[dict]:\n inbox = MAILBOX_DIR / f\"{agent}.jsonl\"\n if not inbox.exists():\n return []\n msgs = [json.loads(line) for line in inbox.read_text().splitlines()\n if line.strip()]\n inbox.unlink()\n return msgs\n\n\nBUS = MessageBus()\nactive_teammates: dict[str, bool] = {}\n\n# ── Protocol State ──\n\n@dataclass\nclass ProtocolState:\n request_id: str\n type: str\n sender: str\n target: str\n status: str\n payload: str\n created_at: float = field(default_factory=time.time)\n\n\npending_requests: dict[str, ProtocolState] = {}\n\n\ndef new_request_id() -> str:\n return f\"req_{random.randint(0, 999999):06d}\"\n\n\ndef match_response(response_type: str, request_id: str, approve: bool):\n # Responses are matched by request_id so one protocol reply cannot approve\n # a different pending request.\n state = pending_requests.get(request_id)\n if not state:\n return\n if state.type == \"shutdown\" and response_type != \"shutdown_response\":\n return\n if state.type == \"plan_approval\" and response_type != \"plan_approval_response\":\n return\n state.status = \"approved\" if approve else \"rejected\"\n\n\ndef consume_lead_inbox(route_protocol=True) -> list[dict]:\n msgs = BUS.read_inbox(\"lead\")\n if route_protocol:\n for msg in msgs:\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n msg_type = msg.get(\"type\", \"\")\n if req_id and msg_type.endswith(\"_response\"):\n match_response(msg_type, req_id, meta.get(\"approve\", False))\n return msgs\n\n\n# ── Autonomous Agent ──\n\nIDLE_POLL_INTERVAL = 5\nIDLE_TIMEOUT = 60\n\n\ndef scan_unclaimed_tasks() -> list[dict]:\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and can_start(task[\"id\"])):\n unclaimed.append(task)\n return unclaimed\n\n\ndef idle_poll(agent_name: str, messages: list,\n name: str, role: str,\n worktree_context: dict | None = None) -> str:\n # Autonomous teammates wake up for inbox messages first, then look for\n # unclaimed tasks. This keeps direct protocol messages higher priority.\n for _ in range(IDLE_TIMEOUT // IDLE_POLL_INTERVAL):\n time.sleep(IDLE_POLL_INTERVAL)\n inbox = BUS.read_inbox(agent_name)\n if inbox:\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n req_id = msg.get(\"metadata\", {}).get(\"request_id\", \"\")\n BUS.send(name, \"lead\", \"Shutting down.\",\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": True})\n return \"shutdown\"\n messages.append({\"role\": \"user\",\n \"content\": \"\" + json.dumps(inbox) + \"\"})\n return \"work\"\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n task_data = unclaimed[0]\n result = claim_task(task_data[\"id\"], agent_name)\n if \"Claimed\" in result:\n wt_info = \"\"\n if task_data.get(\"worktree\"):\n wt_path = WORKTREES_DIR / task_data[\"worktree\"]\n wt_info = f\"\\nWork directory: {wt_path}\"\n if worktree_context is not None:\n worktree_context[\"path\"] = str(wt_path)\n messages.append({\"role\": \"user\",\n \"content\": f\"Task {task_data['id']}: \"\n f\"{task_data['subject']}{wt_info}\"})\n return \"work\"\n return \"timeout\"\n\n\n# ── Teammate Thread ──\n\ndef spawn_teammate_thread(name: str, role: str, prompt: str) -> str:\n if name in active_teammates:\n return f\"Teammate '{name}' already exists\"\n\n # Plan approval is a real gate: after submit_plan, the teammate stops\n # taking model/tool steps until lead sends plan_approval_response.\n protocol_ctx = {\"waiting_plan\": None}\n system = (f\"You are '{name}', a {role}. \"\n f\"Use tools to complete tasks. \"\n f\"If a task has a worktree, work in that directory.\")\n\n def handle_inbox_message(name: str, msg: dict, messages: list):\n msg_type = msg.get(\"type\", \"message\")\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n if msg_type == \"shutdown_request\":\n BUS.send(name, \"lead\", \"Shutting down.\",\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": True})\n return True\n if msg_type == \"plan_approval_response\":\n approve = meta.get(\"approve\", False)\n if req_id == protocol_ctx[\"waiting_plan\"]:\n protocol_ctx[\"waiting_plan\"] = None\n messages.append({\"role\": \"user\",\n \"content\": \"[Plan approved]\" if approve\n else f\"[Plan rejected] {msg['content']}\"})\n return False\n\n def run():\n wt_ctx = {\"path\": None}\n\n def _wt_cwd():\n # Once a task with a worktree is claimed, all teammate file tools\n # transparently run inside that isolated directory.\n p = wt_ctx[\"path\"]\n return Path(p) if p else None\n\n def _run_bash(command: str) -> str:\n return run_bash(command, cwd=_wt_cwd())\n\n def _run_read(path: str) -> str:\n return run_read(path, cwd=_wt_cwd())\n\n def _run_write(path: str, content: str) -> str:\n return run_write(path, content, cwd=_wt_cwd())\n\n def _run_list_tasks():\n tasks = list_tasks()\n if not tasks:\n return \"No tasks.\"\n return \"\\n\".join(\n f\" {t.id}: {t.subject} [{t.status}]\"\n + (f\" (wt:{t.worktree})\" if t.worktree else \"\")\n for t in tasks)\n\n def _run_claim_task(task_id: str):\n result = claim_task(task_id, owner=name)\n if \"Claimed\" in result:\n task = load_task(task_id)\n wt_ctx[\"path\"] = (str(WORKTREES_DIR / task.worktree)\n if task.worktree else None)\n return result\n\n def _run_complete_task(task_id: str):\n result = complete_task(task_id)\n wt_ctx[\"path\"] = None\n return result\n\n messages = [{\"role\": \"user\", \"content\": prompt}]\n sub_tools = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"},\n \"offset\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"send_message\",\n \"description\": \"Send message to another agent.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"submit_plan\",\n \"description\": \"Submit a plan for Lead approval.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"plan\": {\"type\": \"string\"}},\n \"required\": [\"plan\"]}},\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Mark an in-progress task as completed.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n ]\n\n sub_handlers = {\n \"bash\": _run_bash, \"read_file\": _run_read,\n \"write_file\": _run_write,\n \"send_message\": lambda to, content: (BUS.send(name, to, content),\n \"Sent\")[1],\n \"list_tasks\": _run_list_tasks,\n \"claim_task\": _run_claim_task,\n \"complete_task\": _run_complete_task,\n }\n\n while True:\n if len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}. \"\n f\"Continue your work.\"})\n should_shutdown = False\n for _ in range(10):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n stopped = handle_inbox_message(name, msg, messages)\n if stopped:\n should_shutdown = True\n break\n if should_shutdown:\n break\n if protocol_ctx[\"waiting_plan\"]:\n # Poll only for protocol replies while the approval gate is\n # closed; do not let the model continue with the task.\n time.sleep(IDLE_POLL_INTERVAL)\n continue\n if inbox and not should_shutdown:\n non_protocol = [m for m in inbox\n if m.get(\"type\") == \"message\"]\n if non_protocol:\n messages.append({\"role\": \"user\",\n \"content\": \"\" + json.dumps(non_protocol) + \"\"})\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages[-20:],\n tools=sub_tools, max_tokens=8000)\n except Exception:\n break\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if not has_tool_use(response.content):\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n if block.name == \"submit_plan\":\n output = _teammate_submit_plan(\n name, block.input.get(\"plan\", \"\"))\n match = re.search(r\"\\((req_\\d+)\\)\", output)\n protocol_ctx[\"waiting_plan\"] = (\n match.group(1) if match else output)\n else:\n handler = sub_handlers.get(block.name)\n output = call_tool_handler(handler, block.input,\n block.name)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)})\n if protocol_ctx[\"waiting_plan\"]:\n # Ignore later tool_use blocks from the same model\n # response; they belong after approval, not before.\n break\n messages.append({\"role\": \"user\", \"content\": results})\n if protocol_ctx[\"waiting_plan\"]:\n break\n if should_shutdown:\n break\n if protocol_ctx[\"waiting_plan\"]:\n continue\n idle_result = idle_poll(name, messages, name, role, wt_ctx)\n if idle_result in (\"shutdown\", \"timeout\"):\n break\n\n summary = \"Done.\"\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\" and isinstance(msg[\"content\"], list):\n for b in msg[\"content\"]:\n if getattr(b, \"type\", None) == \"text\":\n summary = b.text\n break\n else:\n continue\n break\n BUS.send(name, \"lead\", summary, \"result\")\n active_teammates.pop(name, None)\n\n active_teammates[name] = True\n threading.Thread(target=run, daemon=True).start()\n return f\"Teammate '{name}' spawned as {role}\"\n\n\ndef _teammate_submit_plan(from_name: str, plan: str) -> str:\n req_id = new_request_id()\n pending_requests[req_id] = ProtocolState(\n request_id=req_id, type=\"plan_approval\",\n sender=from_name, target=\"lead\",\n status=\"pending\", payload=plan)\n BUS.send(from_name, \"lead\", plan,\n \"plan_approval_request\",\n {\"request_id\": req_id})\n return f\"Plan submitted ({req_id})\"\n\n\n# ── Lead Protocol Tools ──\n\ndef run_request_shutdown(teammate: str) -> str:\n req_id = new_request_id()\n pending_requests[req_id] = ProtocolState(\n request_id=req_id, type=\"shutdown\",\n sender=\"lead\", target=teammate,\n status=\"pending\", payload=\"\")\n BUS.send(\"lead\", teammate, \"Shut down.\", \"shutdown_request\",\n {\"request_id\": req_id})\n return f\"Shutdown request sent to {teammate}\"\n\n\ndef run_request_plan(teammate: str, task: str) -> str:\n BUS.send(\"lead\", teammate, f\"Submit plan for: {task}\", \"message\")\n return f\"Asked {teammate} to submit a plan\"\n\n\ndef run_review_plan(request_id: str, approve: bool,\n feedback: str = \"\") -> str:\n state = pending_requests.get(request_id)\n if not state:\n return f\"Request {request_id} not found\"\n state.status = \"approved\" if approve else \"rejected\"\n BUS.send(\"lead\", state.sender,\n feedback or (\"Approved\" if approve else \"Rejected\"),\n \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve})\n return f\"Plan {'approved' if approve else 'rejected'}\"\n\n\n# ── Hooks + Permission Pipeline ──\n\n# Hooks are intentionally outside tool handlers. The loop can add permission,\n# logging, and stop behavior without changing each individual tool.\nHOOKS = {\"UserPromptSubmit\": [], \"PreToolUse\": [],\n \"PostToolUse\": [], \"Stop\": []}\n\n\ndef register_hook(event: str, callback):\n HOOKS[event].append(callback)\n\n\ndef trigger_hooks(event: str, *args):\n for callback in HOOKS[event]:\n result = callback(*args)\n if result is not None:\n return result\n return None\n\n\nDENY_LIST = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"mkfs\", \"dd if=\"]\nDESTRUCTIVE = [\"rm \", \"> /etc/\", \"chmod 777\"]\n\n\ndef permission_hook(block):\n # The permission layer sees the raw tool_use before dispatch. It can deny,\n # ask the user, or allow execution to continue.\n if block.name == \"bash\":\n command = block.input.get(\"command\", \"\")\n for pattern in DENY_LIST:\n if pattern in command:\n return f\"Permission denied: '{pattern}' is on the deny list\"\n if any(token in command for token in DESTRUCTIVE):\n print(f\"\\n\\033[33m[permission] destructive command\\033[0m\")\n print(f\" {command}\")\n choice = input(\" Allow? [y/N] \").strip().lower()\n if choice not in (\"y\", \"yes\"):\n return \"Permission denied by user\"\n if block.name in (\"write_file\", \"edit_file\"):\n path = block.input.get(\"path\", \"\")\n try:\n safe_path(path)\n except Exception:\n return f\"Permission denied: path escapes workspace: {path}\"\n if block.name.startswith(\"mcp__\") and \"deploy\" in block.name:\n print(f\"\\n\\033[33m[permission] MCP destructive-looking tool: {block.name}\\033[0m\")\n choice = input(\" Allow? [y/N] \").strip().lower()\n if choice not in (\"y\", \"yes\"):\n return \"Permission denied by user\"\n return None\n\n\ndef log_hook(block):\n print(f\"\\033[90m[HOOK] {block.name}\\033[0m\")\n return None\n\n\ndef large_output_hook(block, output):\n if len(str(output)) > 100000:\n print(f\"\\033[33m[HOOK] large output from {block.name}: \"\n f\"{len(str(output))} chars\\033[0m\")\n return None\n\n\ndef user_prompt_hook(query: str):\n print(f\"\\033[90m[HOOK] UserPromptSubmit: {WORKDIR}\\033[0m\")\n return None\n\n\ndef stop_hook(messages: list):\n tool_count = 0\n for msg in messages:\n content = msg.get(\"content\")\n if isinstance(content, list):\n tool_count += sum(1 for item in content\n if isinstance(item, dict)\n and item.get(\"type\") == \"tool_result\")\n print(f\"\\033[90m[HOOK] Stop: {tool_count} tool result(s)\\033[0m\")\n return None\n\n\nregister_hook(\"UserPromptSubmit\", user_prompt_hook)\nregister_hook(\"PreToolUse\", permission_hook)\nregister_hook(\"PreToolUse\", log_hook)\nregister_hook(\"PostToolUse\", large_output_hook)\nregister_hook(\"Stop\", stop_hook)\n\n\n# ── Subagent Tool ──\n\nSUB_SYSTEM = (\n f\"You are a coding subagent at {WORKDIR}. \"\n \"Complete the task, then return a concise final summary. \"\n \"Do not spawn more agents.\"\n)\n\n\nSUB_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"},\n \"offset\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"old_text\": {\"type\": \"string\"},\n \"new_text\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"pattern\": {\"type\": \"string\"}},\n \"required\": [\"pattern\"]}},\n]\n\n\nSUB_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read,\n \"write_file\": run_write, \"edit_file\": run_edit,\n \"glob\": run_glob,\n}\n\n\ndef extract_text(content) -> str:\n if not isinstance(content, list):\n return str(content)\n return \"\\n\".join(\n getattr(block, \"text\", \"\")\n for block in content\n if getattr(block, \"type\", None) == \"text\").strip()\n\n\ndef has_tool_use(content) -> bool:\n # Do not rely on stop_reason alone; the concrete tool_use block is the\n # continuation signal used by the loop.\n return any(getattr(block, \"type\", None) == \"tool_use\"\n for block in content)\n\n\ndef spawn_subagent(description: str) -> str:\n messages = [{\"role\": \"user\", \"content\": description}]\n for _ in range(30):\n response = client.messages.create(\n model=MODEL, system=SUB_SYSTEM, messages=messages,\n tools=SUB_TOOLS, max_tokens=8000)\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if not has_tool_use(response.content):\n break\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n output = str(blocked)\n else:\n handler = SUB_HANDLERS.get(block.name)\n output = call_tool_handler(handler, block.input, block.name)\n trigger_hooks(\"PostToolUse\", block, output)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\":\n text = extract_text(msg[\"content\"])\n if text:\n return text\n return \"Subagent finished without a text summary.\"\n\n\n# ── Context Compaction ──\n\n# Compaction is layered: first shrink oversized tool results, then trim old\n# message ranges, and only call the model for a summary when the context is\n# still too large or the model explicitly asks for compact.\ndef estimate_size(messages: list) -> int:\n return len(json.dumps(messages, default=str))\n\n\ndef collect_tool_results(messages: list):\n found = []\n for mi, msg in enumerate(messages):\n content = msg.get(\"content\")\n if msg.get(\"role\") != \"user\" or not isinstance(content, list):\n continue\n for bi, block in enumerate(content):\n if isinstance(block, dict) and block.get(\"type\") == \"tool_result\":\n found.append((mi, bi, block))\n return found\n\n\ndef persist_large_output(tool_use_id: str, output: str) -> str:\n if len(output) <= PERSIST_THRESHOLD:\n return output\n TOOL_RESULTS_DIR.mkdir(parents=True, exist_ok=True)\n path = TOOL_RESULTS_DIR / f\"{tool_use_id}.txt\"\n if not path.exists():\n path.write_text(output)\n return (f\"\\nFull output: {path}\\n\"\n f\"Preview:\\n{output[:2000]}\\n\")\n\n\ndef tool_result_budget(messages: list, max_bytes: int = 200_000) -> list:\n if not messages:\n return messages\n last = messages[-1]\n content = last.get(\"content\")\n if last.get(\"role\") != \"user\" or not isinstance(content, list):\n return messages\n blocks = [(i, b) for i, b in enumerate(content)\n if isinstance(b, dict) and b.get(\"type\") == \"tool_result\"]\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n if total <= max_bytes:\n return messages\n for _, block in sorted(blocks,\n key=lambda pair: len(str(pair[1].get(\"content\", \"\"))),\n reverse=True):\n if total <= max_bytes:\n break\n text = str(block.get(\"content\", \"\"))\n block[\"content\"] = persist_large_output(\n block.get(\"tool_use_id\", \"unknown\"), text)\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n return messages\n\n\ndef snip_compact(messages: list, max_messages: int = 50) -> list:\n if len(messages) <= max_messages:\n return messages\n keep_head, keep_tail = 3, max_messages - 3\n snipped = len(messages) - keep_head - keep_tail\n return (messages[:keep_head]\n + [{\"role\": \"user\", \"content\": f\"[snipped {snipped} messages]\"}]\n + messages[-keep_tail:])\n\n\ndef micro_compact(messages: list) -> list:\n tool_results = collect_tool_results(messages)\n if len(tool_results) <= KEEP_RECENT_TOOL_RESULTS:\n return messages\n for _, _, block in tool_results[:-KEEP_RECENT_TOOL_RESULTS]:\n if len(str(block.get(\"content\", \"\"))) > 120:\n block[\"content\"] = \"[Earlier tool result compacted. Re-run if needed.]\"\n return messages\n\n\ndef write_transcript(messages: list) -> Path:\n TRANSCRIPT_DIR.mkdir(parents=True, exist_ok=True)\n path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with path.open(\"w\") as f:\n for msg in messages:\n f.write(json.dumps(msg, default=str) + \"\\n\")\n return path\n\n\ndef summarize_history(messages: list) -> str:\n conversation = json.dumps(messages, default=str)[:80000]\n prompt = (\"Summarize this coding-agent conversation so work can continue. \"\n \"Preserve current goal, key findings, changed files, remaining work, \"\n \"and user constraints.\\n\\n\" + conversation)\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\": prompt}],\n max_tokens=2000)\n return extract_text(response.content) or \"(empty summary)\"\n\n\ndef compact_history(messages: list) -> list:\n transcript = write_transcript(messages)\n print(f\" \\033[36m[compact] transcript saved: {transcript}\\033[0m\")\n summary = summarize_history(messages)\n return [{\"role\": \"user\", \"content\": f\"[Compacted]\\n\\n{summary}\"}]\n\n\ndef reactive_compact(messages: list) -> list:\n transcript = write_transcript(messages)\n print(f\" \\033[31m[reactive compact] transcript saved: {transcript}\\033[0m\")\n try:\n summary = summarize_history(messages)\n except Exception:\n summary = \"Earlier conversation was trimmed after a prompt-too-long error.\"\n return [{\"role\": \"user\", \"content\": f\"[Reactive compact]\\n\\n{summary}\"},\n *messages[-5:]]\n\n\n# ── Error Recovery ──\n\nclass RecoveryState:\n def __init__(self):\n self.has_escalated = False\n self.recovery_count = 0\n self.consecutive_529 = 0\n self.has_attempted_reactive_compact = False\n self.current_model = PRIMARY_MODEL\n\n\ndef retry_delay(attempt: int) -> float:\n base = min(BASE_DELAY_MS * (2 ** attempt), 32000) / 1000\n return base + random.uniform(0, base * 0.25)\n\n\ndef with_retry(fn, state: RecoveryState):\n for attempt in range(MAX_RETRIES):\n try:\n result = fn()\n state.consecutive_529 = 0\n return result\n except Exception as e:\n name = type(e).__name__.lower()\n msg = str(e).lower()\n if \"ratelimit\" in name or \"429\" in msg:\n delay = retry_delay(attempt)\n print(f\" \\033[33m[429] retry {attempt + 1}/{MAX_RETRIES} \"\n f\"after {delay:.1f}s\\033[0m\")\n time.sleep(delay)\n continue\n if \"overloaded\" in name or \"529\" in msg or \"overloaded\" in msg:\n state.consecutive_529 += 1\n if state.consecutive_529 >= MAX_CONSECUTIVE_529 and FALLBACK_MODEL:\n state.current_model = FALLBACK_MODEL\n state.consecutive_529 = 0\n print(f\" \\033[31m[529] switching to {FALLBACK_MODEL}\\033[0m\")\n delay = retry_delay(attempt)\n print(f\" \\033[33m[529] retry {attempt + 1}/{MAX_RETRIES} \"\n f\"after {delay:.1f}s\\033[0m\")\n time.sleep(delay)\n continue\n raise\n raise RuntimeError(f\"Max retries ({MAX_RETRIES}) exceeded\")\n\n\ndef is_prompt_too_long_error(e: Exception) -> bool:\n msg = str(e).lower()\n return ((\"prompt\" in msg and \"long\" in msg)\n or \"context_length_exceeded\" in msg\n or \"max_context_window\" in msg)\n\n\n# ── Background Tasks ──\n\n# Slow tools return a placeholder tool_result immediately. Their real output is\n# later injected as a task_notification, so the main loop can keep moving.\n_bg_counter = 0\nbackground_tasks: dict[str, dict] = {}\nbackground_results: dict[str, str] = {}\nbackground_lock = threading.Lock()\n\n\ndef is_slow_operation(tool_name: str, tool_input: dict) -> bool:\n if tool_name != \"bash\":\n return False\n command = tool_input.get(\"command\", \"\").lower()\n slow_keywords = [\"install\", \"build\", \"test\", \"deploy\", \"compile\",\n \"docker build\", \"pip install\", \"npm install\",\n \"cargo build\", \"pytest\", \"make\"]\n return any(keyword in command for keyword in slow_keywords)\n\n\ndef should_run_background(tool_name: str, tool_input: dict) -> bool:\n if tool_name != \"bash\":\n return False\n return bool(tool_input.get(\"run_in_background\")) or is_slow_operation(tool_name, tool_input)\n\n\ndef start_background_task(block, handlers: dict) -> str:\n global _bg_counter\n _bg_counter += 1\n bg_id = f\"bg_{_bg_counter:04d}\"\n command = block.input.get(\"command\", block.name)\n\n def worker():\n handler = handlers.get(block.name)\n result = call_tool_handler(handler, block.input, block.name)\n trigger_hooks(\"PostToolUse\", block, result)\n with background_lock:\n background_tasks[bg_id][\"status\"] = \"completed\"\n background_results[bg_id] = str(result)\n\n with background_lock:\n background_tasks[bg_id] = {\n \"tool_use_id\": block.id,\n \"command\": command,\n \"status\": \"running\",\n }\n threading.Thread(target=worker, daemon=True).start()\n print(f\" \\033[33m[background] {bg_id}: {str(command)[:60]}\\033[0m\")\n return bg_id\n\n\ndef collect_background_results() -> list[str]:\n with background_lock:\n ready = [bg_id for bg_id, task in background_tasks.items()\n if task[\"status\"] == \"completed\"]\n notifications = []\n for bg_id in ready:\n with background_lock:\n task = background_tasks.pop(bg_id)\n output = background_results.pop(bg_id, \"\")\n summary = output[:200] if len(output) > 200 else output\n notifications.append(\n f\"\\n\"\n f\" {bg_id}\\n\"\n f\" completed\\n\"\n f\" {task['command']}\\n\"\n f\" {summary}\\n\"\n f\"\")\n return notifications\n\n\n# ── Cron Scheduler ──\n\n# Cron jobs are stored separately from conversation history. When a job fires,\n# it becomes a scheduled prompt that is injected back into the same agent loop.\nDURABLE_PATH = WORKDIR / \".scheduled_tasks.json\"\n\n\n@dataclass\nclass CronJob:\n id: str\n cron: str\n prompt: str\n recurring: bool\n durable: bool\n\n\nscheduled_jobs: dict[str, CronJob] = {}\ncron_queue: list[CronJob] = []\ncron_lock = threading.Lock()\n_last_fired: dict[str, str] = {}\n\n\ndef _cron_field_matches(field: str, value: int) -> bool:\n if field == \"*\":\n return True\n if field.startswith(\"*/\"):\n step = int(field[2:])\n return step > 0 and value % step == 0\n if \",\" in field:\n return any(_cron_field_matches(part.strip(), value)\n for part in field.split(\",\"))\n if \"-\" in field:\n lo, hi = field.split(\"-\", 1)\n return int(lo) <= value <= int(hi)\n return value == int(field)\n\n\ndef cron_matches(cron_expr: str, dt: datetime) -> bool:\n fields = cron_expr.strip().split()\n if len(fields) != 5:\n return False\n minute, hour, dom, month, dow = fields\n dow_val = (dt.weekday() + 1) % 7\n m = _cron_field_matches(minute, dt.minute)\n h = _cron_field_matches(hour, dt.hour)\n dom_ok = _cron_field_matches(dom, dt.day)\n month_ok = _cron_field_matches(month, dt.month)\n dow_ok = _cron_field_matches(dow, dow_val)\n if not (m and h and month_ok):\n return False\n if dom == \"*\" and dow == \"*\":\n return True\n if dom == \"*\":\n return dow_ok\n if dow == \"*\":\n return dom_ok\n return dom_ok or dow_ok\n\n\ndef _validate_cron_field(field: str, lo: int, hi: int) -> str | None:\n if field == \"*\":\n return None\n if field.startswith(\"*/\"):\n step = field[2:]\n if not step.isdigit() or int(step) <= 0:\n return f\"Invalid step: {field}\"\n return None\n if \",\" in field:\n for part in field.split(\",\"):\n err = _validate_cron_field(part.strip(), lo, hi)\n if err:\n return err\n return None\n if \"-\" in field:\n left, right = field.split(\"-\", 1)\n if not left.isdigit() or not right.isdigit():\n return f\"Invalid range: {field}\"\n a, b = int(left), int(right)\n if a < lo or a > hi or b < lo or b > hi:\n return f\"Range {field} out of bounds [{lo}-{hi}]\"\n if a > b:\n return f\"Range start > end: {field}\"\n return None\n if not field.isdigit():\n return f\"Invalid field: {field}\"\n value = int(field)\n if value < lo or value > hi:\n return f\"Value {value} out of bounds [{lo}-{hi}]\"\n return None\n\n\ndef validate_cron(cron_expr: str) -> str | None:\n fields = cron_expr.strip().split()\n if len(fields) != 5:\n return f\"Expected 5 fields, got {len(fields)}\"\n bounds = [(0, 59), (0, 23), (1, 31), (1, 12), (0, 6)]\n names = [\"minute\", \"hour\", \"day-of-month\", \"month\", \"day-of-week\"]\n for field, (lo, hi), name in zip(fields, bounds, names):\n err = _validate_cron_field(field, lo, hi)\n if err:\n return f\"{name}: {err}\"\n return None\n\n\ndef save_durable_jobs():\n durable = [asdict(job) for job in scheduled_jobs.values() if job.durable]\n DURABLE_PATH.write_text(json.dumps(durable, indent=2))\n\n\ndef load_durable_jobs():\n if not DURABLE_PATH.exists():\n return\n try:\n for item in json.loads(DURABLE_PATH.read_text()):\n job = CronJob(**item)\n if not validate_cron(job.cron):\n scheduled_jobs[job.id] = job\n except Exception:\n pass\n\n\ndef schedule_job(cron: str, prompt: str,\n recurring: bool = True, durable: bool = True) -> CronJob | str:\n err = validate_cron(cron)\n if err:\n return err\n job = CronJob(\n id=f\"cron_{random.randint(0, 999999):06d}\",\n cron=cron, prompt=prompt,\n recurring=recurring, durable=durable)\n with cron_lock:\n scheduled_jobs[job.id] = job\n if durable:\n save_durable_jobs()\n return job\n\n\ndef cancel_job(job_id: str) -> str:\n with cron_lock:\n job = scheduled_jobs.pop(job_id, None)\n if not job:\n return f\"Job {job_id} not found\"\n if job.durable:\n save_durable_jobs()\n return f\"Cancelled {job_id}\"\n\n\ndef cron_scheduler_loop():\n while True:\n time.sleep(1)\n now = datetime.now()\n marker = now.strftime(\"%Y-%m-%d %H:%M\")\n with cron_lock:\n for job in list(scheduled_jobs.values()):\n try:\n if cron_matches(job.cron, now) and _last_fired.get(job.id) != marker:\n cron_queue.append(job)\n _last_fired[job.id] = marker\n if not job.recurring:\n scheduled_jobs.pop(job.id, None)\n if job.durable:\n save_durable_jobs()\n except Exception as e:\n print(f\" \\033[31m[cron error] {job.id}: {e}\\033[0m\")\n\n\ndef consume_cron_queue() -> list[CronJob]:\n with cron_lock:\n fired = list(cron_queue)\n cron_queue.clear()\n return fired\n\n\ndef run_schedule_cron(cron: str, prompt: str,\n recurring: bool = True, durable: bool = True) -> str:\n result = schedule_job(cron, prompt, recurring, durable)\n if isinstance(result, str):\n return f\"Error: {result}\"\n return f\"Scheduled {result.id}: '{cron}' -> {prompt}\"\n\n\ndef run_list_crons() -> str:\n with cron_lock:\n jobs = list(scheduled_jobs.values())\n if not jobs:\n return \"No cron jobs.\"\n return \"\\n\".join(\n f\" {job.id}: '{job.cron}' -> {job.prompt[:40]} \"\n f\"[{'recurring' if job.recurring else 'one-shot'}, \"\n f\"{'durable' if job.durable else 'session'}]\"\n for job in jobs)\n\n\ndef run_cancel_cron(job_id: str) -> str:\n return cancel_job(job_id)\n\n\nload_durable_jobs()\nthreading.Thread(target=cron_scheduler_loop, daemon=True).start()\n\n\n# ── MCP System ──\n\n# MCP is modeled as late-bound tools: connect first, then discovered server\n# tools are merged into the normal tool pool with mcp__server__tool names.\nclass MCPClient:\n \"\"\"Discovers and calls tools on an MCP server (mock for teaching).\"\"\"\n\n def __init__(self, name: str):\n self.name = name\n self.tools: list[dict] = []\n self._handlers: dict[str, callable] = {}\n\n def register(self, tool_defs: list[dict],\n handlers: dict[str, callable]):\n self.tools = tool_defs\n self._handlers = handlers\n\n def call_tool(self, tool_name: str, args: dict) -> str:\n handler = self._handlers.get(tool_name)\n if not handler:\n return f\"MCP error: unknown tool '{tool_name}'\"\n try:\n return handler(**args)\n except Exception as e:\n return f\"MCP error: {e}\"\n\n\nmcp_clients: dict[str, MCPClient] = {}\n\n_DISALLOWED_CHARS = re.compile(r'[^a-zA-Z0-9_-]')\n\n\ndef normalize_mcp_name(name: str) -> str:\n \"\"\"Replace non [a-zA-Z0-9_-] with underscore.\"\"\"\n return _DISALLOWED_CHARS.sub('_', name)\n\n\ndef _mock_server_docs():\n client = MCPClient(\"docs\")\n client.register(\n tool_defs=[\n {\"name\": \"search\", \"description\": \"Search documentation. (readOnly)\",\n \"inputSchema\": {\"type\": \"object\",\n \"properties\": {\"query\": {\"type\": \"string\"}},\n \"required\": [\"query\"]}},\n {\"name\": \"get_version\", \"description\": \"Get API version. (readOnly)\",\n \"inputSchema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n ],\n handlers={\n \"search\": lambda query: f\"[docs] Found 3 results for '{query}'\",\n \"get_version\": lambda: \"[docs] API v2.1.0\",\n })\n return client\n\n\ndef _mock_server_deploy():\n client = MCPClient(\"deploy\")\n client.register(\n tool_defs=[\n {\"name\": \"trigger\",\n \"description\": \"Trigger a deployment. (destructive — requires approval in real CC)\",\n \"inputSchema\": {\"type\": \"object\",\n \"properties\": {\"service\": {\"type\": \"string\"}},\n \"required\": [\"service\"]}},\n {\"name\": \"status\", \"description\": \"Check deployment status. (readOnly)\",\n \"inputSchema\": {\"type\": \"object\",\n \"properties\": {\"service\": {\"type\": \"string\"}},\n \"required\": [\"service\"]}},\n ],\n handlers={\n \"trigger\": lambda service: f\"[deploy] Triggered: {service}\",\n \"status\": lambda service: f\"[deploy] {service}: running (v1.4.2)\",\n })\n return client\n\n\nMOCK_SERVERS = {\n \"docs\": _mock_server_docs,\n \"deploy\": _mock_server_deploy,\n}\n\n\ndef connect_mcp(name: str) -> str:\n if name in mcp_clients:\n return f\"MCP server '{name}' already connected\"\n factory = MOCK_SERVERS.get(name)\n if not factory:\n available = \", \".join(MOCK_SERVERS.keys())\n return f\"Unknown server '{name}'. Available: {available}\"\n mcp_client = factory()\n mcp_clients[name] = mcp_client\n tool_names = [t[\"name\"] for t in mcp_client.tools]\n print(f\" \\033[31m[mcp] connected: {name} → {tool_names}\\033[0m\")\n return (f\"Connected to MCP server '{name}'. \"\n f\"Discovered {len(mcp_client.tools)} tools: {', '.join(tool_names)}\")\n\n\ndef assemble_tool_pool() -> tuple[list[dict], dict]:\n \"\"\"Merge builtin tools + all MCP tools into one pool.\"\"\"\n tools = list(BUILTIN_TOOLS)\n handlers = dict(BUILTIN_HANDLERS)\n for server_name, mcp_client in mcp_clients.items():\n safe_server = normalize_mcp_name(server_name)\n for tool_def in mcp_client.tools:\n safe_tool = normalize_mcp_name(tool_def[\"name\"])\n prefixed = f\"mcp__{safe_server}__{safe_tool}\"\n tools.append({\n \"name\": prefixed,\n \"description\": tool_def.get(\"description\", \"\"),\n \"input_schema\": tool_def.get(\"inputSchema\", {}),\n })\n handlers[prefixed] = (\n lambda *, c=mcp_client, t=tool_def[\"name\"], **kw: c.call_tool(t, kw))\n return tools, handlers\n\n\n# ── Lead Worktree Tools ──\n\ndef run_create_worktree(name: str, task_id: str = \"\") -> str:\n return create_worktree(name, task_id)\n\ndef run_remove_worktree(name: str, discard_changes: bool = False) -> str:\n return remove_worktree(name, discard_changes)\n\ndef run_keep_worktree(name: str) -> str:\n return keep_worktree(name)\n\n\n# ── Basic tool handlers ──\n\ndef run_create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> str:\n task = create_task(subject, description, blockedBy)\n deps = f\" (blockedBy: {', '.join(blockedBy)})\" if blockedBy else \"\"\n print(f\" \\033[34m[create] {task.subject}{deps}\\033[0m\")\n return f\"Created {task.id}: {task.subject}{deps}\"\n\n\ndef run_list_tasks() -> str:\n tasks = list_tasks()\n if not tasks:\n return \"No tasks.\"\n return \"\\n\".join(\n f\" {t.id}: {t.subject} [{t.status}]\"\n + (f\" (wt:{t.worktree})\" if t.worktree else \"\")\n for t in tasks)\n\n\ndef run_get_task(task_id: str) -> str:\n try:\n return get_task_json(task_id)\n except FileNotFoundError:\n return f\"Error: task {task_id} not found\"\n\ndef run_claim_task(task_id: str) -> str:\n try:\n return claim_task(task_id, owner=\"agent\")\n except FileNotFoundError:\n return f\"Error: task {task_id} not found\"\n\ndef run_complete_task(task_id: str) -> str:\n try:\n return complete_task(task_id)\n except FileNotFoundError:\n return f\"Error: task {task_id} not found\"\n\ndef run_spawn_teammate(name: str, role: str, prompt: str) -> str:\n return spawn_teammate_thread(name, role, prompt)\n\ndef run_send_message(to: str, content: str) -> str:\n BUS.send(\"lead\", to, content)\n return f\"Sent to {to}\"\n\ndef run_check_inbox() -> str:\n msgs = consume_lead_inbox(route_protocol=True)\n if not msgs:\n return \"(inbox empty)\"\n lines = []\n for m in msgs:\n meta = m.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n tag = f\" [{m['type']} req:{req_id}]\" if req_id else f\" [{m['type']}]\"\n lines.append(f\" [{m['from']}]{tag} {m['content'][:200]}\")\n return \"\\n\".join(lines)\n\ndef run_connect_mcp(name: str) -> str:\n return connect_mcp(name)\n\n\n# ── Tool Definitions ──\n\n# The model sees tool schemas; Python executes handlers. S20 keeps both tables\n# explicit so every added capability is visible in one place.\nBUILTIN_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"},\n \"run_in_background\": {\"type\": \"boolean\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"},\n \"offset\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"old_text\": {\"type\": \"string\"},\n \"new_text\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"pattern\": {\"type\": \"string\"}},\n \"required\": [\"pattern\"]}},\n {\"name\": \"todo_write\",\n \"description\": \"Create and manage a task list for the current session.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"todos\": {\"type\": \"array\",\n \"items\": {\"type\": \"object\",\n \"properties\": {\n \"content\": {\"type\": \"string\"},\n \"status\": {\"type\": \"string\",\n \"enum\": [\"pending\", \"in_progress\", \"completed\"]}},\n \"required\": [\"content\", \"status\"]}}},\n \"required\": [\"todos\"]}},\n {\"name\": \"task\",\n \"description\": \"Launch a focused subagent. Returns only its final summary.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"description\": {\"type\": \"string\"}},\n \"required\": [\"description\"]}},\n {\"name\": \"load_skill\",\n \"description\": \"Load the full content of a skill by name.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"compact\",\n \"description\": \"Summarize earlier conversation and continue with compacted context.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"focus\": {\"type\": \"string\"}},\n \"required\": []}},\n {\"name\": \"create_task\", \"description\": \"Create a task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n \"blockedBy\": {\"type\": \"array\",\n \"items\": {\"type\": \"string\"}}},\n \"required\": [\"subject\"]}},\n {\"name\": \"list_tasks\", \"description\": \"List all tasks.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}, \"required\": []}},\n {\"name\": \"get_task\", \"description\": \"Get full task details.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"claim_task\", \"description\": \"Claim a pending task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\", \"description\": \"Complete an in-progress task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"schedule_cron\",\n \"description\": (\"Schedule a cron job. cron is 5-field: min hour dom \"\n \"month dow. For one-shot reminders, compute the target \"\n \"minute and set recurring=false.\"),\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"cron\": {\"type\": \"string\"},\n \"prompt\": {\"type\": \"string\"},\n \"recurring\": {\"type\": \"boolean\"},\n \"durable\": {\"type\": \"boolean\"}},\n \"required\": [\"cron\", \"prompt\"]}},\n {\"name\": \"list_crons\", \"description\": \"List registered cron jobs.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}, \"required\": []}},\n {\"name\": \"cancel_cron\", \"description\": \"Cancel a cron job by ID.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"job_id\": {\"type\": \"string\"}},\n \"required\": [\"job_id\"]}},\n {\"name\": \"spawn_teammate\", \"description\": \"Spawn an autonomous teammate.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"},\n \"role\": {\"type\": \"string\"},\n \"prompt\": {\"type\": \"string\"}},\n \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"send_message\", \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"check_inbox\",\n \"description\": \"Check inbox for messages and protocol responses.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}, \"required\": []}},\n {\"name\": \"request_shutdown\",\n \"description\": \"Request a teammate to shut down.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"teammate\": {\"type\": \"string\"}},\n \"required\": [\"teammate\"]}},\n {\"name\": \"request_plan\",\n \"description\": \"Ask a teammate to submit a plan.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"teammate\": {\"type\": \"string\"},\n \"task\": {\"type\": \"string\"}},\n \"required\": [\"teammate\", \"task\"]}},\n {\"name\": \"review_plan\",\n \"description\": \"Approve or reject a submitted plan.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"request_id\": {\"type\": \"string\"},\n \"approve\": {\"type\": \"boolean\"},\n \"feedback\": {\"type\": \"string\"}},\n \"required\": [\"request_id\", \"approve\"]}},\n {\"name\": \"create_worktree\",\n \"description\": \"Create an isolated git worktree.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"},\n \"task_id\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"remove_worktree\",\n \"description\": \"Remove a worktree. Refuses if changes exist.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"},\n \"discard_changes\": {\"type\": \"boolean\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"keep_worktree\",\n \"description\": \"Keep a worktree for manual review.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"connect_mcp\",\n \"description\": \"Connect to an MCP server (docs, deploy) and discover tools.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n]\n\nBUILTIN_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"edit_file\": run_edit, \"glob\": run_glob,\n \"todo_write\": run_todo_write, \"task\": spawn_subagent,\n \"load_skill\": load_skill,\n \"create_task\": run_create_task, \"list_tasks\": run_list_tasks,\n \"get_task\": run_get_task,\n \"claim_task\": run_claim_task, \"complete_task\": run_complete_task,\n \"schedule_cron\": run_schedule_cron,\n \"list_crons\": run_list_crons,\n \"cancel_cron\": run_cancel_cron,\n \"spawn_teammate\": run_spawn_teammate,\n \"send_message\": run_send_message, \"check_inbox\": run_check_inbox,\n \"request_shutdown\": run_request_shutdown,\n \"request_plan\": run_request_plan, \"review_plan\": run_review_plan,\n \"create_worktree\": run_create_worktree,\n \"remove_worktree\": run_remove_worktree,\n \"keep_worktree\": run_keep_worktree,\n \"connect_mcp\": run_connect_mcp,\n}\n\n\n# ── Context ──\n\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\n\n\ndef update_context(context: dict, messages: list) -> dict:\n memories = \"\"\n if MEMORY_INDEX.exists():\n memories = MEMORY_INDEX.read_text()[:2000]\n return {\n \"memories\": memories,\n \"connected_mcp\": list(mcp_clients.keys()),\n \"active_teammates\": list(active_teammates.keys()),\n }\n\n\n# ── Agent Loop ──\n\nrounds_since_todo = 0\nagent_lock = threading.Lock()\n\n\ndef prepare_context(messages: list) -> list:\n # Every LLM turn enters through the same context budget pipeline.\n messages[:] = tool_result_budget(messages)\n messages[:] = snip_compact(messages)\n messages[:] = micro_compact(messages)\n if estimate_size(messages) > CONTEXT_LIMIT:\n messages[:] = compact_history(messages)\n return messages\n\n\ndef build_user_content(results: list[dict]) -> list[dict]:\n # Tool results and completed background notifications are both returned to\n # the model as user-side content, matching the tool_result feedback loop.\n content = list(results)\n for note in collect_background_results():\n content.append({\"type\": \"text\", \"text\": note})\n return content\n\n\ndef inject_background_notifications(messages: list):\n notes = collect_background_results()\n if notes:\n messages.append({\"role\": \"user\", \"content\": [\n {\"type\": \"text\", \"text\": note} for note in notes]})\n\n\ndef call_llm(messages: list, context: dict, tools: list,\n state: RecoveryState, max_tokens: int):\n system = assemble_system_prompt(context)\n return with_retry(\n lambda: client.messages.create(\n model=state.current_model,\n system=system,\n messages=messages,\n tools=tools,\n max_tokens=max_tokens),\n state)\n\n\ndef agent_loop(messages: list, context: dict):\n global rounds_since_todo\n tools, handlers = assemble_tool_pool()\n state = RecoveryState()\n max_tokens = DEFAULT_MAX_TOKENS\n\n while True:\n # One cycle: inject scheduled/background work, prepare context, call\n # the model, execute tool_use blocks, append tool_results, repeat.\n fired = consume_cron_queue()\n for job in fired:\n messages.append({\"role\": \"user\",\n \"content\": f\"[Scheduled] {job.prompt}\"})\n print(f\" \\033[35m[cron inject] {job.prompt[:60]}\\033[0m\")\n\n inject_background_notifications(messages)\n\n if rounds_since_todo >= 3:\n messages.append({\"role\": \"user\",\n \"content\": \"Update your todos.\"})\n rounds_since_todo = 0\n\n prepare_context(messages)\n context = update_context(context, messages)\n tools, handlers = assemble_tool_pool()\n\n try:\n response = call_llm(messages, context, tools, state, max_tokens)\n except Exception as e:\n if is_prompt_too_long_error(e) and not state.has_attempted_reactive_compact:\n messages[:] = reactive_compact(messages)\n state.has_attempted_reactive_compact = True\n continue\n messages.append({\"role\": \"assistant\", \"content\": [\n {\"type\": \"text\", \"text\": f\"[Error] {type(e).__name__}: {e}\"}]})\n return\n\n if response.stop_reason == \"max_tokens\":\n if not state.has_escalated:\n max_tokens = ESCALATED_MAX_TOKENS\n state.has_escalated = True\n print(f\" \\033[33m[max_tokens] retry with {max_tokens}\\033[0m\")\n continue\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if state.recovery_count < MAX_RECOVERY_RETRIES:\n messages.append({\"role\": \"user\", \"content\": CONTINUATION_PROMPT})\n state.recovery_count += 1\n continue\n return\n\n max_tokens = DEFAULT_MAX_TOKENS\n state.has_escalated = False\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if not has_tool_use(response.content):\n trigger_hooks(\"Stop\", messages)\n return\n\n results = []\n compacted_now = False\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n\n if block.name == \"compact\":\n messages[:] = compact_history(messages)\n messages.append({\"role\": \"user\",\n \"content\": \"[Compacted. Continue with summarized context.]\"})\n compacted_now = True\n break\n\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(blocked)})\n continue\n\n if should_run_background(block.name, block.input):\n bg_id = start_background_task(block, handlers)\n output = (f\"[Background task {bg_id} started] \"\n \"Result will arrive as a task_notification.\")\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output})\n continue\n\n handler = handlers.get(block.name)\n output = call_tool_handler(handler, block.input, block.name)\n trigger_hooks(\"PostToolUse\", block, output)\n print(str(output)[:300])\n\n if block.name == \"todo_write\":\n rounds_since_todo = 0\n else:\n rounds_since_todo += 1\n\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id, \"content\": output})\n\n if compacted_now:\n continue\n\n messages.append({\"role\": \"user\", \"content\": build_user_content(results)})\n\n\ndef print_turn_assistants(messages: list, turn_start: int):\n for msg in messages[turn_start:]:\n if msg.get(\"role\") != \"assistant\":\n continue\n for block in msg.get(\"content\", []):\n if getattr(block, \"type\", None) == \"text\":\n terminal_print(block.text)\n\n\ndef cron_autorun_loop(history: list, context: dict):\n while True:\n time.sleep(1)\n fired = consume_cron_queue()\n if not fired:\n continue\n with agent_lock:\n turn_start = len(history)\n for job in fired:\n history.append({\"role\": \"user\",\n \"content\": f\"[Scheduled] {job.prompt}\"})\n terminal_print(\n f\" \\033[35m[cron auto] {job.prompt[:60]}\\033[0m\")\n agent_loop(history, context)\n context.update(update_context(context, history))\n print_turn_assistants(history, turn_start)\n\n\nif __name__ == \"__main__\":\n CLI_ACTIVE = True\n print(\"s20: comprehensive agent\")\n print(\"Enter a question, press Enter to send. Type q to quit.\\n\")\n history = []\n context = update_context({}, [])\n threading.Thread(target=cron_autorun_loop,\n args=(history, context), daemon=True).start()\n while True:\n try:\n query = input(PROMPT)\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n trigger_hooks(\"UserPromptSubmit\", query)\n turn_start = len(history)\n history.append({\"role\": \"user\", \"content\": query})\n with agent_lock:\n agent_loop(history, context)\n context = update_context(context, history)\n print_turn_assistants(history, turn_start)\n\n inbox = consume_lead_inbox(route_protocol=True)\n if inbox:\n def inbox_label(msg):\n req_id = msg.get(\"metadata\", {}).get(\"request_id\", \"\")\n suffix = f\" req:{req_id}\" if req_id else \"\"\n return f\"{msg.get('type', 'message')}{suffix}\"\n\n inbox_text = \"\\n\".join(\n f\"From {m['from']} [{inbox_label(m)}]: \"\n f\"{m['content'][:200]}\" for m in inbox)\n history.append({\"role\": \"user\",\n \"content\": f\"[Inbox]\\n{inbox_text}\"})\n print()\n",
+ "source": "#!/usr/bin/env python3\n\"\"\"\ns20: Comprehensive Agent — all teaching components in one loop.\n\nRun: python s20_comprehensive/code.py\nNeed: pip install anthropic python-dotenv pyyaml + .env with ANTHROPIC_API_KEY\n\nThis final chapter intentionally puts the earlier teaching mechanisms back\ntogether: dispatch, permission, hooks, todo, subagent, skills, compaction,\nmemory, prompt assembly, error recovery, task graph, background tasks, cron,\nteams, protocols, autonomous agents, worktrees, and MCP.\n\"\"\"\n\nimport ast, json, os, subprocess, time, random, threading, re\nfrom pathlib import Path\nfrom datetime import datetime\nfrom dataclasses import dataclass, asdict, field\nimport yaml\n\ntry:\n import readline\n readline.parse_and_bind('set bind-tty-special-chars off')\n READLINE_AVAILABLE = True\nexcept ImportError:\n READLINE_AVAILABLE = False\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nPRIMARY_MODEL = MODEL\nFALLBACK_MODEL = os.getenv(\"FALLBACK_MODEL_ID\")\n\nSKILLS_DIR = WORKDIR / \"skills\"\nTRANSCRIPT_DIR = WORKDIR / \".transcripts\"\nTOOL_RESULTS_DIR = WORKDIR / \".task_outputs\" / \"tool-results\"\n\nDEFAULT_MAX_TOKENS = 8000\nESCALATED_MAX_TOKENS = 16000\nMAX_RETRIES = 3\nMAX_CONSECUTIVE_529 = 2\nMAX_RECOVERY_RETRIES = 2\nBASE_DELAY_MS = 500\nCONTEXT_LIMIT = 50000\nKEEP_RECENT_TOOL_RESULTS = 3\nPERSIST_THRESHOLD = 30000\nCONTINUATION_PROMPT = \"Continue from the previous response. Do not repeat completed work.\"\nPROMPT = \"\\033[36ms20 >> \\033[0m\"\nCLI_ACTIVE = False\n\n\ndef terminal_print(text: str):\n if threading.current_thread() is threading.main_thread() or not CLI_ACTIVE:\n print(text)\n return\n line = \"\"\n if READLINE_AVAILABLE:\n try:\n line = readline.get_line_buffer()\n except Exception:\n line = \"\"\n print(f\"\\r\\033[K{text}\")\n print(PROMPT + line, end=\"\", flush=True)\n\n# ── Task System ──\n\n# Tasks are tiny durable records. Later systems add ownership, dependencies,\n# worktrees, and teammates on top of this same file-backed state.\nTASKS_DIR = WORKDIR / \".tasks\"\nTASKS_DIR.mkdir(exist_ok=True)\nCURRENT_TODOS: list[dict] = []\n\n\n@dataclass\nclass Task:\n id: str\n subject: str\n description: str\n status: str\n owner: str | None\n blockedBy: list[str]\n worktree: str | None = None\n\n\ndef _task_path(task_id: str) -> Path:\n return TASKS_DIR / f\"{task_id}.json\"\n\n\ndef create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> Task:\n task = Task(\n id=f\"task_{int(time.time())}_{random.randint(0, 9999):04d}\",\n subject=subject, description=description,\n status=\"pending\", owner=None,\n blockedBy=blockedBy or [],\n )\n save_task(task)\n return task\n\n\ndef save_task(task: Task):\n _task_path(task.id).write_text(json.dumps(asdict(task), indent=2))\n\n\ndef load_task(task_id: str) -> Task:\n return Task(**json.loads(_task_path(task_id).read_text()))\n\n\ndef list_tasks() -> list[Task]:\n return [Task(**json.loads(p.read_text()))\n for p in sorted(TASKS_DIR.glob(\"task_*.json\"))]\n\n\ndef get_task_json(task_id: str) -> str:\n return json.dumps(asdict(load_task(task_id)), indent=2)\n\n\ndef can_start(task_id: str) -> bool:\n # Dependencies are intentionally simple: every blocker must exist and be\n # completed before the task can be claimed.\n task = load_task(task_id)\n for dep_id in task.blockedBy:\n if not _task_path(dep_id).exists():\n return False\n if load_task(dep_id).status != \"completed\":\n return False\n return True\n\n\ndef claim_task(task_id: str, owner: str = \"agent\") -> str:\n task = load_task(task_id)\n if task.status != \"pending\":\n return f\"Task {task_id} is {task.status}, cannot claim\"\n if task.owner:\n return f\"Task {task_id} already owned by {task.owner}\"\n if not can_start(task_id):\n deps = [d for d in task.blockedBy\n if _task_path(d).exists() and load_task(d).status != \"completed\"]\n missing = [d for d in task.blockedBy if not _task_path(d).exists()]\n parts = []\n if deps: parts.append(f\"blocked by: {deps}\")\n if missing: parts.append(f\"missing deps: {missing}\")\n return \"Cannot start — \" + \", \".join(parts)\n task.owner = owner\n task.status = \"in_progress\"\n save_task(task)\n print(f\" \\033[36m[claim] {task.subject} → in_progress\\033[0m\")\n return f\"Claimed {task.id} ({task.subject})\"\n\n\ndef complete_task(task_id: str) -> str:\n task = load_task(task_id)\n if task.status != \"in_progress\":\n return f\"Task {task_id} is {task.status}, cannot complete\"\n task.status = \"completed\"\n save_task(task)\n unblocked = [t.subject for t in list_tasks()\n if t.status == \"pending\" and t.blockedBy and can_start(t.id)]\n print(f\" \\033[32m[complete] {task.subject} ✓\\033[0m\")\n msg = f\"Completed {task.id} ({task.subject})\"\n if unblocked:\n msg += f\"\\nUnblocked: {', '.join(unblocked)}\"\n return msg\n\n\n# ── Worktree System ──\n\n# Worktree names become filesystem paths, so the teaching version keeps the\n# validation rules strict and reuses them for create/remove/keep.\nWORKTREES_DIR = WORKDIR / \".worktrees\"\nWORKTREES_DIR.mkdir(exist_ok=True)\n\nVALID_WT_NAME = re.compile(r'^[A-Za-z0-9._-]{1,64}$')\n\n\ndef validate_worktree_name(name: str) -> str | None:\n if not name:\n return \"Worktree name cannot be empty\"\n if name in (\".\", \"..\"):\n return f\"'{name}' is not a valid worktree name\"\n if not VALID_WT_NAME.match(name):\n return (f\"Invalid worktree name '{name}': \"\n \"only letters, digits, dots, underscores, dashes (1-64 chars)\")\n return None\n\n\ndef run_git(args: list[str]) -> tuple[bool, str]:\n try:\n r = subprocess.run([\"git\"] + args, cwd=WORKDIR,\n capture_output=True, text=True, timeout=30)\n out = (r.stdout + r.stderr).strip()\n return r.returncode == 0, out[:5000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return False, \"Error: git timeout\"\n\n\ndef log_event(event_type: str, worktree_name: str, task_id: str = \"\"):\n event = {\"type\": event_type, \"worktree\": worktree_name,\n \"task_id\": task_id, \"ts\": time.time()}\n events_file = WORKTREES_DIR / \"events.jsonl\"\n with open(events_file, \"a\") as f:\n f.write(json.dumps(event) + \"\\n\")\n\n\ndef create_worktree(name: str, task_id: str = \"\") -> str:\n # Tool-layer validation is part of the safety boundary; do it before git\n # sees the name, not only after git happens to reject something.\n err = validate_worktree_name(name)\n if err:\n return f\"Error: {err}\"\n if task_id:\n try:\n load_task(task_id)\n except FileNotFoundError:\n return f\"Error: task {task_id} not found\"\n path = WORKTREES_DIR / name\n if path.exists():\n return f\"Worktree '{name}' already exists at {path}\"\n ok, result = run_git([\"worktree\", \"add\", str(path), \"-b\", f\"wt/{name}\", \"HEAD\"])\n if not ok:\n return f\"Git error: {result}\"\n if task_id:\n bind_task_to_worktree(task_id, name)\n log_event(\"create\", name, task_id)\n print(f\" \\033[33m[worktree] created: {name} at {path}\\033[0m\")\n return f\"Worktree '{name}' created at {path}\"\n\n\ndef bind_task_to_worktree(task_id: str, worktree_name: str):\n task = load_task(task_id)\n task.worktree = worktree_name\n save_task(task)\n\n\ndef _count_worktree_changes(path: Path) -> tuple[int, int]:\n try:\n r1 = subprocess.run([\"git\", \"status\", \"--porcelain\"],\n cwd=path, capture_output=True, text=True, timeout=10)\n files = len([l for l in r1.stdout.strip().splitlines() if l.strip()])\n r2 = subprocess.run([\"git\", \"log\", \"@{push}..HEAD\", \"--oneline\"],\n cwd=path, capture_output=True, text=True, timeout=10)\n commits = len([l for l in r2.stdout.strip().splitlines() if l.strip()])\n return files, commits\n except Exception:\n return -1, -1\n\n\ndef remove_worktree(name: str, discard_changes: bool = False) -> str:\n err = validate_worktree_name(name)\n if err:\n return err\n path = WORKTREES_DIR / name\n if not path.exists():\n return f\"Worktree '{name}' not found\"\n if not discard_changes:\n files, commits = _count_worktree_changes(path)\n if files < 0:\n return \"Cannot verify status. Use discard_changes=true to force.\"\n if files > 0 or commits > 0:\n return (f\"Worktree '{name}' has {files} file(s), {commits} commit(s). \"\n \"Use discard_changes=true or keep_worktree.\")\n ok1, _ = run_git([\"worktree\", \"remove\", str(path), \"--force\"])\n if not ok1:\n return f\"Failed to remove worktree '{name}'\"\n run_git([\"branch\", \"-D\", f\"wt/{name}\"])\n log_event(\"remove\", name)\n print(f\" \\033[33m[worktree] removed: {name}\\033[0m\")\n return f\"Worktree '{name}' removed\"\n\n\ndef keep_worktree(name: str) -> str:\n err = validate_worktree_name(name)\n if err:\n return err\n log_event(\"keep\", name)\n return f\"Worktree '{name}' kept for review (branch: wt/{name})\"\n\n\n# ── Skill Loading ──\n\nSKILL_REGISTRY: dict[str, dict] = {}\n\n\ndef _parse_frontmatter(text: str) -> tuple[dict, str]:\n if not text.startswith(\"---\"):\n return {}, text\n parts = text.split(\"---\", 2)\n if len(parts) < 3:\n return {}, text\n try:\n meta = yaml.safe_load(parts[1]) or {}\n except yaml.YAMLError:\n meta = {}\n return meta, parts[2].strip()\n\n\ndef scan_skills():\n SKILL_REGISTRY.clear()\n if not SKILLS_DIR.exists():\n return\n for directory in sorted(SKILLS_DIR.iterdir()):\n if not directory.is_dir():\n continue\n manifest = directory / \"SKILL.md\"\n if not manifest.exists():\n continue\n raw = manifest.read_text()\n meta, _ = _parse_frontmatter(raw)\n name = meta.get(\"name\", directory.name)\n desc = meta.get(\"description\", raw.split(\"\\n\")[0].lstrip(\"#\").strip())\n SKILL_REGISTRY[name] = {\n \"name\": name,\n \"description\": desc,\n \"content\": raw,\n }\n\n\nscan_skills()\n\n\ndef list_skills() -> str:\n if not SKILL_REGISTRY:\n return \"(no skills found)\"\n return \"\\n\".join(\n f\"- {skill['name']}: {skill['description']}\"\n for skill in SKILL_REGISTRY.values())\n\n\ndef load_skill(name: str) -> str:\n skill = SKILL_REGISTRY.get(name)\n if not skill:\n available = \", \".join(SKILL_REGISTRY.keys()) or \"(none)\"\n return f\"Skill not found: {name}. Available: {available}\"\n return skill[\"content\"]\n\n\n# ── Prompt Assembly ──\n\nPROMPT_SECTIONS = {\n \"identity\": \"You are a coding agent. Act, don't explain.\",\n \"tools\": \"Available tools: bash, read_file, write_file, edit_file, glob, \"\n \"todo_write, task, load_skill, compact, \"\n \"create_task, list_tasks, get_task, claim_task, complete_task, \"\n \"schedule_cron, list_crons, cancel_cron, \"\n \"spawn_teammate, send_message, check_inbox, \"\n \"request_shutdown, request_plan, review_plan, \"\n \"create_worktree, remove_worktree, keep_worktree, \"\n \"connect_mcp. MCP tools are prefixed mcp__{server}__{tool}.\",\n \"workspace\": f\"Working directory: {WORKDIR}\",\n \"memory\": \"Relevant memories are injected below when available.\",\n}\n\n\ndef assemble_system_prompt(context: dict) -> str:\n # The system prompt is rebuilt each turn from live context. This is where\n # memory, skill catalog, MCP state, and active teammates become visible.\n sections = [PROMPT_SECTIONS[\"identity\"],\n PROMPT_SECTIONS[\"tools\"],\n PROMPT_SECTIONS[\"workspace\"]]\n sections.append(f\"Current time: {datetime.now().isoformat(timespec='seconds')}\")\n sections.append(\"Skills catalog:\\n\" + list_skills() +\n \"\\nUse load_skill(name) when a skill is relevant.\")\n if context.get(\"memories\"):\n sections.append(f\"Relevant memories:\\n{context['memories']}\")\n mcp_names = list(mcp_clients.keys())\n if mcp_names:\n sections.append(f\"Connected MCP servers: {', '.join(mcp_names)}\")\n return \"\\n\\n\".join(sections)\n\n\n# ── Basic Tools ──\n\ndef safe_path(p: str, cwd: Path = None) -> Path:\n # File tools stay inside the workspace or teammate worktree. Bash remains\n # powerful on purpose and is controlled by the permission hook instead.\n base = cwd or WORKDIR\n path = (base / p).resolve()\n if not path.is_relative_to(base):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str, cwd: Path = None,\n run_in_background: bool = False) -> str:\n # run_in_background is consumed by the dispatcher; direct execution ignores it.\n try:\n r = subprocess.run(command, shell=True, cwd=cwd or WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int | None = None,\n offset: int = 0, cwd: Path = None) -> str:\n try:\n lines = safe_path(path, cwd).read_text().splitlines()\n offset = max(int(offset or 0), 0)\n limit = int(limit) if limit is not None else None\n lines = lines[offset:]\n if limit is not None and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str, cwd: Path = None) -> str:\n try:\n fp = safe_path(path, cwd)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str,\n cwd: Path = None) -> str:\n try:\n fp = safe_path(path, cwd)\n text = fp.read_text()\n if old_text not in text:\n return f\"Error: text not found in {path}\"\n fp.write_text(text.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_glob(pattern: str, cwd: Path = None) -> str:\n import glob as g\n try:\n base = cwd or WORKDIR\n results = []\n for match in g.glob(pattern, root_dir=base):\n if (base / match).resolve().is_relative_to(base):\n results.append(match)\n return \"\\n\".join(results) if results else \"(no matches)\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef call_tool_handler(handler, args: dict, name: str) -> str:\n if not handler:\n return f\"Unknown: {name}\"\n try:\n return handler(**(args or {}))\n except TypeError as e:\n return f\"Error: {e}\"\n\n\ndef _normalize_todos(todos):\n if isinstance(todos, str):\n try:\n todos = json.loads(todos)\n except json.JSONDecodeError:\n try:\n todos = ast.literal_eval(todos)\n except (SyntaxError, ValueError):\n return None, \"Error: todos must be a list or JSON array string\"\n if not isinstance(todos, list):\n return None, \"Error: todos must be a list\"\n for i, todo in enumerate(todos):\n if not isinstance(todo, dict):\n return None, f\"Error: todos[{i}] must be an object\"\n if \"content\" not in todo or \"status\" not in todo:\n return None, f\"Error: todos[{i}] missing 'content' or 'status'\"\n if todo[\"status\"] not in (\"pending\", \"in_progress\", \"completed\"):\n return None, f\"Error: todos[{i}] has invalid status '{todo['status']}'\"\n return todos, None\n\ndef run_todo_write(todos: list) -> str:\n global CURRENT_TODOS\n todos, error = _normalize_todos(todos)\n if error:\n return error\n CURRENT_TODOS = todos\n print(f\" \\033[33m[todo] updated {len(CURRENT_TODOS)} item(s)\\033[0m\")\n return f\"Updated {len(CURRENT_TODOS)} todos\"\n\n\n# ── MessageBus ──\n\n# Team communication is append-only JSONL mailboxes. This keeps the protocol\n# inspectable on disk and lets background teammates send messages.\nMAILBOX_DIR = WORKDIR / \".mailboxes\"\nMAILBOX_DIR.mkdir(exist_ok=True)\n\n\nclass MessageBus:\n def send(self, from_agent: str, to_agent: str, content: str,\n msg_type: str = \"message\", metadata: dict = None):\n msg = {\"from\": from_agent, \"to\": to_agent,\n \"content\": content, \"type\": msg_type,\n \"ts\": time.time(), \"metadata\": metadata or {}}\n inbox = MAILBOX_DIR / f\"{to_agent}.jsonl\"\n with open(inbox, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n terminal_print(f\" \\033[33m[bus] {from_agent} → {to_agent}: \"\n f\"({msg_type}) {content[:50]}\\033[0m\")\n\n def read_inbox(self, agent: str) -> list[dict]:\n inbox = MAILBOX_DIR / f\"{agent}.jsonl\"\n if not inbox.exists():\n return []\n msgs = [json.loads(line) for line in inbox.read_text().splitlines()\n if line.strip()]\n inbox.unlink()\n return msgs\n\n\nBUS = MessageBus()\nactive_teammates: dict[str, bool] = {}\n\n# ── Protocol State ──\n\n@dataclass\nclass ProtocolState:\n request_id: str\n type: str\n sender: str\n target: str\n status: str\n payload: str\n created_at: float = field(default_factory=time.time)\n\n\npending_requests: dict[str, ProtocolState] = {}\n\n\ndef new_request_id() -> str:\n return f\"req_{random.randint(0, 999999):06d}\"\n\n\ndef match_response(response_type: str, request_id: str, approve: bool):\n # Responses are matched by request_id so one protocol reply cannot approve\n # a different pending request.\n state = pending_requests.get(request_id)\n if not state:\n return\n if state.type == \"shutdown\" and response_type != \"shutdown_response\":\n return\n if state.type == \"plan_approval\" and response_type != \"plan_approval_response\":\n return\n state.status = \"approved\" if approve else \"rejected\"\n\n\ndef consume_lead_inbox(route_protocol=True) -> list[dict]:\n msgs = BUS.read_inbox(\"lead\")\n if route_protocol:\n for msg in msgs:\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n msg_type = msg.get(\"type\", \"\")\n if req_id and msg_type.endswith(\"_response\"):\n match_response(msg_type, req_id, meta.get(\"approve\", False))\n return msgs\n\n\n# ── Autonomous Agent ──\n\nIDLE_POLL_INTERVAL = 5\nIDLE_TIMEOUT = 60\n\n\ndef scan_unclaimed_tasks() -> list[dict]:\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and can_start(task[\"id\"])):\n unclaimed.append(task)\n return unclaimed\n\n\ndef idle_poll(agent_name: str, messages: list,\n name: str, role: str,\n worktree_context: dict | None = None) -> str:\n # Autonomous teammates wake up for inbox messages first, then look for\n # unclaimed tasks. This keeps direct protocol messages higher priority.\n for _ in range(IDLE_TIMEOUT // IDLE_POLL_INTERVAL):\n time.sleep(IDLE_POLL_INTERVAL)\n inbox = BUS.read_inbox(agent_name)\n if inbox:\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n req_id = msg.get(\"metadata\", {}).get(\"request_id\", \"\")\n BUS.send(name, \"lead\", \"Shutting down.\",\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": True})\n return \"shutdown\"\n messages.append({\"role\": \"user\",\n \"content\": \"\" + json.dumps(inbox) + \"\"})\n return \"work\"\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n task_data = unclaimed[0]\n result = claim_task(task_data[\"id\"], agent_name)\n if \"Claimed\" in result:\n wt_info = \"\"\n if task_data.get(\"worktree\"):\n wt_path = WORKTREES_DIR / task_data[\"worktree\"]\n wt_info = f\"\\nWork directory: {wt_path}\"\n if worktree_context is not None:\n worktree_context[\"path\"] = str(wt_path)\n messages.append({\"role\": \"user\",\n \"content\": f\"Task {task_data['id']}: \"\n f\"{task_data['subject']}{wt_info}\"})\n return \"work\"\n return \"timeout\"\n\n\n# ── Teammate Thread ──\n\ndef spawn_teammate_thread(name: str, role: str, prompt: str) -> str:\n if name in active_teammates:\n return f\"Teammate '{name}' already exists\"\n\n # Plan approval is a real gate: after submit_plan, the teammate stops\n # taking model/tool steps until lead sends plan_approval_response.\n protocol_ctx = {\"waiting_plan\": None}\n system = (f\"You are '{name}', a {role}. \"\n f\"Use tools to complete tasks. \"\n f\"If a task has a worktree, work in that directory.\")\n\n def handle_inbox_message(name: str, msg: dict, messages: list):\n msg_type = msg.get(\"type\", \"message\")\n meta = msg.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n if msg_type == \"shutdown_request\":\n BUS.send(name, \"lead\", \"Shutting down.\",\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": True})\n return True\n if msg_type == \"plan_approval_response\":\n approve = meta.get(\"approve\", False)\n if req_id == protocol_ctx[\"waiting_plan\"]:\n protocol_ctx[\"waiting_plan\"] = None\n messages.append({\"role\": \"user\",\n \"content\": \"[Plan approved]\" if approve\n else f\"[Plan rejected] {msg['content']}\"})\n return False\n\n def run():\n wt_ctx = {\"path\": None}\n\n def _wt_cwd():\n # Once a task with a worktree is claimed, all teammate file tools\n # transparently run inside that isolated directory.\n p = wt_ctx[\"path\"]\n return Path(p) if p else None\n\n def _run_bash(command: str) -> str:\n return run_bash(command, cwd=_wt_cwd())\n\n def _run_read(path: str) -> str:\n return run_read(path, cwd=_wt_cwd())\n\n def _run_write(path: str, content: str) -> str:\n return run_write(path, content, cwd=_wt_cwd())\n\n def _run_list_tasks():\n tasks = list_tasks()\n if not tasks:\n return \"No tasks.\"\n return \"\\n\".join(\n f\" {t.id}: {t.subject} [{t.status}]\"\n + (f\" (wt:{t.worktree})\" if t.worktree else \"\")\n for t in tasks)\n\n def _run_claim_task(task_id: str):\n result = claim_task(task_id, owner=name)\n if \"Claimed\" in result:\n task = load_task(task_id)\n wt_ctx[\"path\"] = (str(WORKTREES_DIR / task.worktree)\n if task.worktree else None)\n return result\n\n def _run_complete_task(task_id: str):\n result = complete_task(task_id)\n wt_ctx[\"path\"] = None\n return result\n\n messages = [{\"role\": \"user\", \"content\": prompt}]\n sub_tools = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"},\n \"offset\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"send_message\",\n \"description\": \"Send message to another agent.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"submit_plan\",\n \"description\": \"Submit a plan for Lead approval.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"plan\": {\"type\": \"string\"}},\n \"required\": [\"plan\"]}},\n {\"name\": \"list_tasks\",\n \"description\": \"List all tasks.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n {\"name\": \"claim_task\",\n \"description\": \"Claim a pending task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\",\n \"description\": \"Mark an in-progress task as completed.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n ]\n\n sub_handlers = {\n \"bash\": _run_bash, \"read_file\": _run_read,\n \"write_file\": _run_write,\n \"send_message\": lambda to, content: (BUS.send(name, to, content),\n \"Sent\")[1],\n \"list_tasks\": _run_list_tasks,\n \"claim_task\": _run_claim_task,\n \"complete_task\": _run_complete_task,\n }\n\n while True:\n if len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}. \"\n f\"Continue your work.\"})\n should_shutdown = False\n for _ in range(10):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n stopped = handle_inbox_message(name, msg, messages)\n if stopped:\n should_shutdown = True\n break\n if should_shutdown:\n break\n if protocol_ctx[\"waiting_plan\"]:\n # Poll only for protocol replies while the approval gate is\n # closed; do not let the model continue with the task.\n time.sleep(IDLE_POLL_INTERVAL)\n continue\n if inbox and not should_shutdown:\n non_protocol = [m for m in inbox\n if m.get(\"type\") == \"message\"]\n if non_protocol:\n messages.append({\"role\": \"user\",\n \"content\": \"\" + json.dumps(non_protocol) + \"\"})\n try:\n response = client.messages.create(\n model=MODEL, system=system, messages=messages[-20:],\n tools=sub_tools, max_tokens=8000)\n except Exception:\n break\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if not has_tool_use(response.content):\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n if block.name == \"submit_plan\":\n output = _teammate_submit_plan(\n name, block.input.get(\"plan\", \"\"))\n match = re.search(r\"\\((req_\\d+)\\)\", output)\n protocol_ctx[\"waiting_plan\"] = (\n match.group(1) if match else output)\n else:\n handler = sub_handlers.get(block.name)\n output = call_tool_handler(handler, block.input,\n block.name)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)})\n if protocol_ctx[\"waiting_plan\"]:\n # Ignore later tool_use blocks from the same model\n # response; they belong after approval, not before.\n break\n messages.append({\"role\": \"user\", \"content\": results})\n if protocol_ctx[\"waiting_plan\"]:\n break\n if should_shutdown:\n break\n if protocol_ctx[\"waiting_plan\"]:\n continue\n idle_result = idle_poll(name, messages, name, role, wt_ctx)\n if idle_result in (\"shutdown\", \"timeout\"):\n break\n\n summary = \"Done.\"\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\" and isinstance(msg[\"content\"], list):\n for b in msg[\"content\"]:\n if getattr(b, \"type\", None) == \"text\":\n summary = b.text\n break\n else:\n continue\n break\n BUS.send(name, \"lead\", summary, \"result\")\n active_teammates.pop(name, None)\n\n active_teammates[name] = True\n threading.Thread(target=run, daemon=True).start()\n return f\"Teammate '{name}' spawned as {role}\"\n\n\ndef _teammate_submit_plan(from_name: str, plan: str) -> str:\n req_id = new_request_id()\n pending_requests[req_id] = ProtocolState(\n request_id=req_id, type=\"plan_approval\",\n sender=from_name, target=\"lead\",\n status=\"pending\", payload=plan)\n BUS.send(from_name, \"lead\", plan,\n \"plan_approval_request\",\n {\"request_id\": req_id})\n return f\"Plan submitted ({req_id})\"\n\n\n# ── Lead Protocol Tools ──\n\ndef run_request_shutdown(teammate: str) -> str:\n req_id = new_request_id()\n pending_requests[req_id] = ProtocolState(\n request_id=req_id, type=\"shutdown\",\n sender=\"lead\", target=teammate,\n status=\"pending\", payload=\"\")\n BUS.send(\"lead\", teammate, \"Shut down.\", \"shutdown_request\",\n {\"request_id\": req_id})\n return f\"Shutdown request sent to {teammate}\"\n\n\ndef run_request_plan(teammate: str, task: str) -> str:\n BUS.send(\"lead\", teammate, f\"Submit plan for: {task}\", \"message\")\n return f\"Asked {teammate} to submit a plan\"\n\n\ndef run_review_plan(request_id: str, approve: bool,\n feedback: str = \"\") -> str:\n state = pending_requests.get(request_id)\n if not state:\n return f\"Request {request_id} not found\"\n state.status = \"approved\" if approve else \"rejected\"\n BUS.send(\"lead\", state.sender,\n feedback or (\"Approved\" if approve else \"Rejected\"),\n \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve})\n return f\"Plan {'approved' if approve else 'rejected'}\"\n\n\n# ── Hooks + Permission Pipeline ──\n\n# Hooks are intentionally outside tool handlers. The loop can add permission,\n# logging, and stop behavior without changing each individual tool.\nHOOKS = {\"UserPromptSubmit\": [], \"PreToolUse\": [],\n \"PostToolUse\": [], \"Stop\": []}\n\n\ndef register_hook(event: str, callback):\n HOOKS[event].append(callback)\n\n\ndef trigger_hooks(event: str, *args):\n for callback in HOOKS[event]:\n result = callback(*args)\n if result is not None:\n return result\n return None\n\n\nDENY_LIST = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"mkfs\", \"dd if=\"]\nDESTRUCTIVE = [\"rm \", \"> /etc/\", \"chmod 777\"]\n\n\ndef permission_hook(block):\n # The permission layer sees the raw tool_use before dispatch. It can deny,\n # ask the user, or allow execution to continue.\n if block.name == \"bash\":\n command = block.input.get(\"command\", \"\")\n for pattern in DENY_LIST:\n if pattern in command:\n return f\"Permission denied: '{pattern}' is on the deny list\"\n if any(token in command for token in DESTRUCTIVE):\n print(f\"\\n\\033[33m[permission] destructive command\\033[0m\")\n print(f\" {command}\")\n choice = input(\" Allow? [y/N] \").strip().lower()\n if choice not in (\"y\", \"yes\"):\n return \"Permission denied by user\"\n if block.name in (\"write_file\", \"edit_file\"):\n path = block.input.get(\"path\", \"\")\n try:\n safe_path(path)\n except Exception:\n return f\"Permission denied: path escapes workspace: {path}\"\n if block.name.startswith(\"mcp__\") and \"deploy\" in block.name:\n print(f\"\\n\\033[33m[permission] MCP destructive-looking tool: {block.name}\\033[0m\")\n choice = input(\" Allow? [y/N] \").strip().lower()\n if choice not in (\"y\", \"yes\"):\n return \"Permission denied by user\"\n return None\n\n\ndef log_hook(block):\n print(f\"\\033[90m[HOOK] {block.name}\\033[0m\")\n return None\n\n\ndef large_output_hook(block, output):\n if len(str(output)) > 100000:\n print(f\"\\033[33m[HOOK] large output from {block.name}: \"\n f\"{len(str(output))} chars\\033[0m\")\n return None\n\n\ndef user_prompt_hook(query: str):\n print(f\"\\033[90m[HOOK] UserPromptSubmit: {WORKDIR}\\033[0m\")\n return None\n\n\ndef stop_hook(messages: list):\n tool_count = 0\n for msg in messages:\n content = msg.get(\"content\")\n if isinstance(content, list):\n tool_count += sum(1 for item in content\n if isinstance(item, dict)\n and item.get(\"type\") == \"tool_result\")\n print(f\"\\033[90m[HOOK] Stop: {tool_count} tool result(s)\\033[0m\")\n return None\n\n\nregister_hook(\"UserPromptSubmit\", user_prompt_hook)\nregister_hook(\"PreToolUse\", permission_hook)\nregister_hook(\"PreToolUse\", log_hook)\nregister_hook(\"PostToolUse\", large_output_hook)\nregister_hook(\"Stop\", stop_hook)\n\n\n# ── Subagent Tool ──\n\nSUB_SYSTEM = (\n f\"You are a coding subagent at {WORKDIR}. \"\n \"Complete the task, then return a concise final summary. \"\n \"Do not spawn more agents.\"\n)\n\n\nSUB_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"},\n \"offset\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"old_text\": {\"type\": \"string\"},\n \"new_text\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"pattern\": {\"type\": \"string\"}},\n \"required\": [\"pattern\"]}},\n]\n\n\nSUB_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read,\n \"write_file\": run_write, \"edit_file\": run_edit,\n \"glob\": run_glob,\n}\n\n\ndef extract_text(content) -> str:\n if not isinstance(content, list):\n return str(content)\n return \"\\n\".join(\n getattr(block, \"text\", \"\")\n for block in content\n if getattr(block, \"type\", None) == \"text\").strip()\n\n\ndef has_tool_use(content) -> bool:\n # Do not rely on stop_reason alone; the concrete tool_use block is the\n # continuation signal used by the loop.\n return any(getattr(block, \"type\", None) == \"tool_use\"\n for block in content)\n\n\ndef spawn_subagent(description: str) -> str:\n messages = [{\"role\": \"user\", \"content\": description}]\n for _ in range(30):\n response = client.messages.create(\n model=MODEL, system=SUB_SYSTEM, messages=messages,\n tools=SUB_TOOLS, max_tokens=8000)\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if not has_tool_use(response.content):\n break\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n output = str(blocked)\n else:\n handler = SUB_HANDLERS.get(block.name)\n output = call_tool_handler(handler, block.input, block.name)\n trigger_hooks(\"PostToolUse\", block, output)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n for msg in reversed(messages):\n if msg[\"role\"] == \"assistant\":\n text = extract_text(msg[\"content\"])\n if text:\n return text\n return \"Subagent finished without a text summary.\"\n\n\n# ── Context Compaction ──\n\n# Compaction is layered: first shrink oversized tool results, then trim old\n# message ranges, and only call the model for a summary when the context is\n# still too large or the model explicitly asks for compact.\ndef estimate_size(messages: list) -> int:\n return len(json.dumps(messages, default=str))\n\ndef block_type(block):\n return block.get(\"type\") if isinstance(block, dict) else getattr(block, \"type\", None)\n\n\ndef message_has_tool_use(message: dict) -> bool:\n if message.get(\"role\") != \"assistant\":\n return False\n content = message.get(\"content\")\n if not isinstance(content, list):\n return False\n return any(block_type(block) == \"tool_use\" for block in content)\n\n\ndef is_tool_result_message(message: dict) -> bool:\n if message.get(\"role\") != \"user\":\n return False\n content = message.get(\"content\")\n if not isinstance(content, list):\n return False\n return any(isinstance(block, dict) and block.get(\"type\") == \"tool_result\"\n for block in content)\n\n\ndef collect_tool_results(messages: list):\n found = []\n for mi, msg in enumerate(messages):\n content = msg.get(\"content\")\n if msg.get(\"role\") != \"user\" or not isinstance(content, list):\n continue\n for bi, block in enumerate(content):\n if isinstance(block, dict) and block.get(\"type\") == \"tool_result\":\n found.append((mi, bi, block))\n return found\n\n\ndef persist_large_output(tool_use_id: str, output: str) -> str:\n if len(output) <= PERSIST_THRESHOLD:\n return output\n TOOL_RESULTS_DIR.mkdir(parents=True, exist_ok=True)\n path = TOOL_RESULTS_DIR / f\"{tool_use_id}.txt\"\n if not path.exists():\n path.write_text(output)\n return (f\"\\nFull output: {path}\\n\"\n f\"Preview:\\n{output[:2000]}\\n\")\n\n\ndef tool_result_budget(messages: list, max_bytes: int = 200_000) -> list:\n if not messages:\n return messages\n last = messages[-1]\n content = last.get(\"content\")\n if last.get(\"role\") != \"user\" or not isinstance(content, list):\n return messages\n blocks = [(i, b) for i, b in enumerate(content)\n if isinstance(b, dict) and b.get(\"type\") == \"tool_result\"]\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n if total <= max_bytes:\n return messages\n for _, block in sorted(blocks,\n key=lambda pair: len(str(pair[1].get(\"content\", \"\"))),\n reverse=True):\n if total <= max_bytes:\n break\n text = str(block.get(\"content\", \"\"))\n block[\"content\"] = persist_large_output(\n block.get(\"tool_use_id\", \"unknown\"), text)\n total = sum(len(str(b.get(\"content\", \"\"))) for _, b in blocks)\n return messages\n\n\ndef snip_compact(messages: list, max_messages: int = 50) -> list:\n if len(messages) <= max_messages:\n return messages\n head_end, tail_start = 3, len(messages) - (max_messages - 3)\n if head_end > 0 and message_has_tool_use(messages[head_end - 1]):\n while head_end < len(messages) and is_tool_result_message(messages[head_end]):\n head_end += 1\n if (tail_start > 0 and tail_start < len(messages)\n and is_tool_result_message(messages[tail_start])\n and message_has_tool_use(messages[tail_start - 1])):\n tail_start -= 1\n if head_end >= tail_start:\n return messages\n snipped = tail_start - head_end\n return (messages[:head_end]\n + [{\"role\": \"user\", \"content\": f\"[snipped {snipped} messages]\"}]\n + messages[tail_start:])\n\n\ndef micro_compact(messages: list) -> list:\n tool_results = collect_tool_results(messages)\n if len(tool_results) <= KEEP_RECENT_TOOL_RESULTS:\n return messages\n for _, _, block in tool_results[:-KEEP_RECENT_TOOL_RESULTS]:\n if len(str(block.get(\"content\", \"\"))) > 120:\n block[\"content\"] = \"[Earlier tool result compacted. Re-run if needed.]\"\n return messages\n\n\ndef write_transcript(messages: list) -> Path:\n TRANSCRIPT_DIR.mkdir(parents=True, exist_ok=True)\n path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with path.open(\"w\") as f:\n for msg in messages:\n f.write(json.dumps(msg, default=str) + \"\\n\")\n return path\n\n\ndef summarize_history(messages: list) -> str:\n conversation = json.dumps(messages, default=str)[:80000]\n prompt = (\"Summarize this coding-agent conversation so work can continue. \"\n \"Preserve current goal, key findings, changed files, remaining work, \"\n \"and user constraints.\\n\\n\" + conversation)\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\": prompt}],\n max_tokens=2000)\n return extract_text(response.content) or \"(empty summary)\"\n\n\ndef compact_history(messages: list) -> list:\n transcript = write_transcript(messages)\n print(f\" \\033[36m[compact] transcript saved: {transcript}\\033[0m\")\n summary = summarize_history(messages)\n return [{\"role\": \"user\", \"content\": f\"[Compacted]\\n\\n{summary}\"}]\n\n\ndef reactive_compact(messages: list) -> list:\n transcript = write_transcript(messages)\n print(f\" \\033[31m[reactive compact] transcript saved: {transcript}\\033[0m\")\n tail_start = max(0, len(messages) - 5)\n if (tail_start > 0 and tail_start < len(messages)\n and is_tool_result_message(messages[tail_start])\n and message_has_tool_use(messages[tail_start - 1])):\n tail_start -= 1\n try:\n summary = summarize_history(messages[:tail_start])\n except Exception:\n summary = \"Earlier conversation was trimmed after a prompt-too-long error.\"\n return [{\"role\": \"user\", \"content\": f\"[Reactive compact]\\n\\n{summary}\"},\n *messages[tail_start:]]\n\n\n# ── Error Recovery ──\n\nclass RecoveryState:\n def __init__(self):\n self.has_escalated = False\n self.recovery_count = 0\n self.consecutive_529 = 0\n self.has_attempted_reactive_compact = False\n self.current_model = PRIMARY_MODEL\n\n\ndef retry_delay(attempt: int) -> float:\n base = min(BASE_DELAY_MS * (2 ** attempt), 32000) / 1000\n return base + random.uniform(0, base * 0.25)\n\n\ndef with_retry(fn, state: RecoveryState):\n for attempt in range(MAX_RETRIES):\n try:\n result = fn()\n state.consecutive_529 = 0\n return result\n except Exception as e:\n name = type(e).__name__.lower()\n msg = str(e).lower()\n if \"ratelimit\" in name or \"429\" in msg:\n delay = retry_delay(attempt)\n print(f\" \\033[33m[429] retry {attempt + 1}/{MAX_RETRIES} \"\n f\"after {delay:.1f}s\\033[0m\")\n time.sleep(delay)\n continue\n if \"overloaded\" in name or \"529\" in msg or \"overloaded\" in msg:\n state.consecutive_529 += 1\n if state.consecutive_529 >= MAX_CONSECUTIVE_529 and FALLBACK_MODEL:\n state.current_model = FALLBACK_MODEL\n state.consecutive_529 = 0\n print(f\" \\033[31m[529] switching to {FALLBACK_MODEL}\\033[0m\")\n delay = retry_delay(attempt)\n print(f\" \\033[33m[529] retry {attempt + 1}/{MAX_RETRIES} \"\n f\"after {delay:.1f}s\\033[0m\")\n time.sleep(delay)\n continue\n raise\n raise RuntimeError(f\"Max retries ({MAX_RETRIES}) exceeded\")\n\n\ndef is_prompt_too_long_error(e: Exception) -> bool:\n msg = str(e).lower()\n return ((\"prompt\" in msg and \"long\" in msg)\n or \"context_length_exceeded\" in msg\n or \"max_context_window\" in msg)\n\n\n# ── Background Tasks ──\n\n# Slow tools return a placeholder tool_result immediately. Their real output is\n# later injected as a task_notification, so the main loop can keep moving.\n_bg_counter = 0\nbackground_tasks: dict[str, dict] = {}\nbackground_results: dict[str, str] = {}\nbackground_lock = threading.Lock()\n\n\ndef is_slow_operation(tool_name: str, tool_input: dict) -> bool:\n if tool_name != \"bash\":\n return False\n command = tool_input.get(\"command\", \"\").lower()\n slow_keywords = [\"install\", \"build\", \"test\", \"deploy\", \"compile\",\n \"docker build\", \"pip install\", \"npm install\",\n \"cargo build\", \"pytest\", \"make\"]\n return any(keyword in command for keyword in slow_keywords)\n\n\ndef should_run_background(tool_name: str, tool_input: dict) -> bool:\n if tool_name != \"bash\":\n return False\n return bool(tool_input.get(\"run_in_background\")) or is_slow_operation(tool_name, tool_input)\n\n\ndef start_background_task(block, handlers: dict) -> str:\n global _bg_counter\n _bg_counter += 1\n bg_id = f\"bg_{_bg_counter:04d}\"\n command = block.input.get(\"command\", block.name)\n\n def worker():\n handler = handlers.get(block.name)\n result = call_tool_handler(handler, block.input, block.name)\n trigger_hooks(\"PostToolUse\", block, result)\n with background_lock:\n background_tasks[bg_id][\"status\"] = \"completed\"\n background_results[bg_id] = str(result)\n\n with background_lock:\n background_tasks[bg_id] = {\n \"tool_use_id\": block.id,\n \"command\": command,\n \"status\": \"running\",\n }\n threading.Thread(target=worker, daemon=True).start()\n print(f\" \\033[33m[background] {bg_id}: {str(command)[:60]}\\033[0m\")\n return bg_id\n\n\ndef collect_background_results() -> list[str]:\n with background_lock:\n ready = [bg_id for bg_id, task in background_tasks.items()\n if task[\"status\"] == \"completed\"]\n notifications = []\n for bg_id in ready:\n with background_lock:\n task = background_tasks.pop(bg_id)\n output = background_results.pop(bg_id, \"\")\n summary = output[:200] if len(output) > 200 else output\n notifications.append(\n f\"\\n\"\n f\" {bg_id}\\n\"\n f\" completed\\n\"\n f\" {task['command']}\\n\"\n f\" {summary}\\n\"\n f\"\")\n return notifications\n\n\n# ── Cron Scheduler ──\n\n# Cron jobs are stored separately from conversation history. When a job fires,\n# it becomes a scheduled prompt that is injected back into the same agent loop.\nDURABLE_PATH = WORKDIR / \".scheduled_tasks.json\"\n\n\n@dataclass\nclass CronJob:\n id: str\n cron: str\n prompt: str\n recurring: bool\n durable: bool\n\n\nscheduled_jobs: dict[str, CronJob] = {}\ncron_queue: list[CronJob] = []\ncron_lock = threading.Lock()\n_last_fired: dict[str, str] = {}\n\n\ndef _cron_field_matches(field: str, value: int) -> bool:\n if field == \"*\":\n return True\n if field.startswith(\"*/\"):\n step = int(field[2:])\n return step > 0 and value % step == 0\n if \",\" in field:\n return any(_cron_field_matches(part.strip(), value)\n for part in field.split(\",\"))\n if \"-\" in field:\n lo, hi = field.split(\"-\", 1)\n return int(lo) <= value <= int(hi)\n return value == int(field)\n\n\ndef cron_matches(cron_expr: str, dt: datetime) -> bool:\n fields = cron_expr.strip().split()\n if len(fields) != 5:\n return False\n minute, hour, dom, month, dow = fields\n dow_val = (dt.weekday() + 1) % 7\n m = _cron_field_matches(minute, dt.minute)\n h = _cron_field_matches(hour, dt.hour)\n dom_ok = _cron_field_matches(dom, dt.day)\n month_ok = _cron_field_matches(month, dt.month)\n dow_ok = _cron_field_matches(dow, dow_val)\n if not (m and h and month_ok):\n return False\n if dom == \"*\" and dow == \"*\":\n return True\n if dom == \"*\":\n return dow_ok\n if dow == \"*\":\n return dom_ok\n return dom_ok or dow_ok\n\n\ndef _validate_cron_field(field: str, lo: int, hi: int) -> str | None:\n if field == \"*\":\n return None\n if field.startswith(\"*/\"):\n step = field[2:]\n if not step.isdigit() or int(step) <= 0:\n return f\"Invalid step: {field}\"\n return None\n if \",\" in field:\n for part in field.split(\",\"):\n err = _validate_cron_field(part.strip(), lo, hi)\n if err:\n return err\n return None\n if \"-\" in field:\n left, right = field.split(\"-\", 1)\n if not left.isdigit() or not right.isdigit():\n return f\"Invalid range: {field}\"\n a, b = int(left), int(right)\n if a < lo or a > hi or b < lo or b > hi:\n return f\"Range {field} out of bounds [{lo}-{hi}]\"\n if a > b:\n return f\"Range start > end: {field}\"\n return None\n if not field.isdigit():\n return f\"Invalid field: {field}\"\n value = int(field)\n if value < lo or value > hi:\n return f\"Value {value} out of bounds [{lo}-{hi}]\"\n return None\n\n\ndef validate_cron(cron_expr: str) -> str | None:\n fields = cron_expr.strip().split()\n if len(fields) != 5:\n return f\"Expected 5 fields, got {len(fields)}\"\n bounds = [(0, 59), (0, 23), (1, 31), (1, 12), (0, 6)]\n names = [\"minute\", \"hour\", \"day-of-month\", \"month\", \"day-of-week\"]\n for field, (lo, hi), name in zip(fields, bounds, names):\n err = _validate_cron_field(field, lo, hi)\n if err:\n return f\"{name}: {err}\"\n return None\n\n\ndef save_durable_jobs():\n durable = [asdict(job) for job in scheduled_jobs.values() if job.durable]\n DURABLE_PATH.write_text(json.dumps(durable, indent=2))\n\n\ndef load_durable_jobs():\n if not DURABLE_PATH.exists():\n return\n try:\n for item in json.loads(DURABLE_PATH.read_text()):\n job = CronJob(**item)\n if not validate_cron(job.cron):\n scheduled_jobs[job.id] = job\n except Exception:\n pass\n\n\ndef schedule_job(cron: str, prompt: str,\n recurring: bool = True, durable: bool = True) -> CronJob | str:\n err = validate_cron(cron)\n if err:\n return err\n job = CronJob(\n id=f\"cron_{random.randint(0, 999999):06d}\",\n cron=cron, prompt=prompt,\n recurring=recurring, durable=durable)\n with cron_lock:\n scheduled_jobs[job.id] = job\n if durable:\n save_durable_jobs()\n return job\n\n\ndef cancel_job(job_id: str) -> str:\n with cron_lock:\n job = scheduled_jobs.pop(job_id, None)\n if not job:\n return f\"Job {job_id} not found\"\n if job.durable:\n save_durable_jobs()\n return f\"Cancelled {job_id}\"\n\n\ndef cron_scheduler_loop():\n while True:\n time.sleep(1)\n now = datetime.now()\n marker = now.strftime(\"%Y-%m-%d %H:%M\")\n with cron_lock:\n for job in list(scheduled_jobs.values()):\n try:\n if cron_matches(job.cron, now) and _last_fired.get(job.id) != marker:\n cron_queue.append(job)\n _last_fired[job.id] = marker\n if not job.recurring:\n scheduled_jobs.pop(job.id, None)\n if job.durable:\n save_durable_jobs()\n except Exception as e:\n print(f\" \\033[31m[cron error] {job.id}: {e}\\033[0m\")\n\n\ndef consume_cron_queue() -> list[CronJob]:\n with cron_lock:\n fired = list(cron_queue)\n cron_queue.clear()\n return fired\n\n\ndef run_schedule_cron(cron: str, prompt: str,\n recurring: bool = True, durable: bool = True) -> str:\n result = schedule_job(cron, prompt, recurring, durable)\n if isinstance(result, str):\n return f\"Error: {result}\"\n return f\"Scheduled {result.id}: '{cron}' -> {prompt}\"\n\n\ndef run_list_crons() -> str:\n with cron_lock:\n jobs = list(scheduled_jobs.values())\n if not jobs:\n return \"No cron jobs.\"\n return \"\\n\".join(\n f\" {job.id}: '{job.cron}' -> {job.prompt[:40]} \"\n f\"[{'recurring' if job.recurring else 'one-shot'}, \"\n f\"{'durable' if job.durable else 'session'}]\"\n for job in jobs)\n\n\ndef run_cancel_cron(job_id: str) -> str:\n return cancel_job(job_id)\n\n\nload_durable_jobs()\nthreading.Thread(target=cron_scheduler_loop, daemon=True).start()\n\n\n# ── MCP System ──\n\n# MCP is modeled as late-bound tools: connect first, then discovered server\n# tools are merged into the normal tool pool with mcp__server__tool names.\nclass MCPClient:\n \"\"\"Discovers and calls tools on an MCP server (mock for teaching).\"\"\"\n\n def __init__(self, name: str):\n self.name = name\n self.tools: list[dict] = []\n self._handlers: dict[str, callable] = {}\n\n def register(self, tool_defs: list[dict],\n handlers: dict[str, callable]):\n self.tools = tool_defs\n self._handlers = handlers\n\n def call_tool(self, tool_name: str, args: dict) -> str:\n handler = self._handlers.get(tool_name)\n if not handler:\n return f\"MCP error: unknown tool '{tool_name}'\"\n try:\n return handler(**args)\n except Exception as e:\n return f\"MCP error: {e}\"\n\n\nmcp_clients: dict[str, MCPClient] = {}\n\n_DISALLOWED_CHARS = re.compile(r'[^a-zA-Z0-9_-]')\n\n\ndef normalize_mcp_name(name: str) -> str:\n \"\"\"Replace non [a-zA-Z0-9_-] with underscore.\"\"\"\n return _DISALLOWED_CHARS.sub('_', name)\n\n\ndef _mock_server_docs():\n client = MCPClient(\"docs\")\n client.register(\n tool_defs=[\n {\"name\": \"search\", \"description\": \"Search documentation. (readOnly)\",\n \"inputSchema\": {\"type\": \"object\",\n \"properties\": {\"query\": {\"type\": \"string\"}},\n \"required\": [\"query\"]}},\n {\"name\": \"get_version\", \"description\": \"Get API version. (readOnly)\",\n \"inputSchema\": {\"type\": \"object\", \"properties\": {},\n \"required\": []}},\n ],\n handlers={\n \"search\": lambda query: f\"[docs] Found 3 results for '{query}'\",\n \"get_version\": lambda: \"[docs] API v2.1.0\",\n })\n return client\n\n\ndef _mock_server_deploy():\n client = MCPClient(\"deploy\")\n client.register(\n tool_defs=[\n {\"name\": \"trigger\",\n \"description\": \"Trigger a deployment. (destructive — requires approval in real CC)\",\n \"inputSchema\": {\"type\": \"object\",\n \"properties\": {\"service\": {\"type\": \"string\"}},\n \"required\": [\"service\"]}},\n {\"name\": \"status\", \"description\": \"Check deployment status. (readOnly)\",\n \"inputSchema\": {\"type\": \"object\",\n \"properties\": {\"service\": {\"type\": \"string\"}},\n \"required\": [\"service\"]}},\n ],\n handlers={\n \"trigger\": lambda service: f\"[deploy] Triggered: {service}\",\n \"status\": lambda service: f\"[deploy] {service}: running (v1.4.2)\",\n })\n return client\n\n\nMOCK_SERVERS = {\n \"docs\": _mock_server_docs,\n \"deploy\": _mock_server_deploy,\n}\n\n\ndef connect_mcp(name: str) -> str:\n if name in mcp_clients:\n return f\"MCP server '{name}' already connected\"\n factory = MOCK_SERVERS.get(name)\n if not factory:\n available = \", \".join(MOCK_SERVERS.keys())\n return f\"Unknown server '{name}'. Available: {available}\"\n mcp_client = factory()\n mcp_clients[name] = mcp_client\n tool_names = [t[\"name\"] for t in mcp_client.tools]\n print(f\" \\033[31m[mcp] connected: {name} → {tool_names}\\033[0m\")\n return (f\"Connected to MCP server '{name}'. \"\n f\"Discovered {len(mcp_client.tools)} tools: {', '.join(tool_names)}\")\n\n\ndef assemble_tool_pool() -> tuple[list[dict], dict]:\n \"\"\"Merge builtin tools + all MCP tools into one pool.\"\"\"\n tools = list(BUILTIN_TOOLS)\n handlers = dict(BUILTIN_HANDLERS)\n for server_name, mcp_client in mcp_clients.items():\n safe_server = normalize_mcp_name(server_name)\n for tool_def in mcp_client.tools:\n safe_tool = normalize_mcp_name(tool_def[\"name\"])\n prefixed = f\"mcp__{safe_server}__{safe_tool}\"\n tools.append({\n \"name\": prefixed,\n \"description\": tool_def.get(\"description\", \"\"),\n \"input_schema\": tool_def.get(\"inputSchema\", {}),\n })\n handlers[prefixed] = (\n lambda *, c=mcp_client, t=tool_def[\"name\"], **kw: c.call_tool(t, kw))\n return tools, handlers\n\n\n# ── Lead Worktree Tools ──\n\ndef run_create_worktree(name: str, task_id: str = \"\") -> str:\n return create_worktree(name, task_id)\n\ndef run_remove_worktree(name: str, discard_changes: bool = False) -> str:\n return remove_worktree(name, discard_changes)\n\ndef run_keep_worktree(name: str) -> str:\n return keep_worktree(name)\n\n\n# ── Basic tool handlers ──\n\ndef run_create_task(subject: str, description: str = \"\",\n blockedBy: list[str] | None = None) -> str:\n task = create_task(subject, description, blockedBy)\n deps = f\" (blockedBy: {', '.join(blockedBy)})\" if blockedBy else \"\"\n print(f\" \\033[34m[create] {task.subject}{deps}\\033[0m\")\n return f\"Created {task.id}: {task.subject}{deps}\"\n\n\ndef run_list_tasks() -> str:\n tasks = list_tasks()\n if not tasks:\n return \"No tasks.\"\n return \"\\n\".join(\n f\" {t.id}: {t.subject} [{t.status}]\"\n + (f\" (wt:{t.worktree})\" if t.worktree else \"\")\n for t in tasks)\n\n\ndef run_get_task(task_id: str) -> str:\n try:\n return get_task_json(task_id)\n except FileNotFoundError:\n return f\"Error: task {task_id} not found\"\n\ndef run_claim_task(task_id: str) -> str:\n try:\n return claim_task(task_id, owner=\"agent\")\n except FileNotFoundError:\n return f\"Error: task {task_id} not found\"\n\ndef run_complete_task(task_id: str) -> str:\n try:\n return complete_task(task_id)\n except FileNotFoundError:\n return f\"Error: task {task_id} not found\"\n\ndef run_spawn_teammate(name: str, role: str, prompt: str) -> str:\n return spawn_teammate_thread(name, role, prompt)\n\ndef run_send_message(to: str, content: str) -> str:\n BUS.send(\"lead\", to, content)\n return f\"Sent to {to}\"\n\ndef run_check_inbox() -> str:\n msgs = consume_lead_inbox(route_protocol=True)\n if not msgs:\n return \"(inbox empty)\"\n lines = []\n for m in msgs:\n meta = m.get(\"metadata\", {})\n req_id = meta.get(\"request_id\", \"\")\n tag = f\" [{m['type']} req:{req_id}]\" if req_id else f\" [{m['type']}]\"\n lines.append(f\" [{m['from']}]{tag} {m['content'][:200]}\")\n return \"\\n\".join(lines)\n\ndef run_connect_mcp(name: str) -> str:\n return connect_mcp(name)\n\n\n# ── Tool Definitions ──\n\n# The model sees tool schemas; Python executes handlers. S20 keeps both tables\n# explicit so every added capability is visible in one place.\nBUILTIN_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"},\n \"run_in_background\": {\"type\": \"boolean\"}},\n \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"},\n \"offset\": {\"type\": \"integer\"}},\n \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to a file.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"path\": {\"type\": \"string\"},\n \"old_text\": {\"type\": \"string\"},\n \"new_text\": {\"type\": \"string\"}},\n \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"glob\", \"description\": \"Find files matching a glob pattern.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"pattern\": {\"type\": \"string\"}},\n \"required\": [\"pattern\"]}},\n {\"name\": \"todo_write\",\n \"description\": \"Create and manage a task list for the current session.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"todos\": {\"type\": \"array\",\n \"items\": {\"type\": \"object\",\n \"properties\": {\n \"content\": {\"type\": \"string\"},\n \"status\": {\"type\": \"string\",\n \"enum\": [\"pending\", \"in_progress\", \"completed\"]}},\n \"required\": [\"content\", \"status\"]}}},\n \"required\": [\"todos\"]}},\n {\"name\": \"task\",\n \"description\": \"Launch a focused subagent. Returns only its final summary.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"description\": {\"type\": \"string\"}},\n \"required\": [\"description\"]}},\n {\"name\": \"load_skill\",\n \"description\": \"Load the full content of a skill by name.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"compact\",\n \"description\": \"Summarize earlier conversation and continue with compacted context.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"focus\": {\"type\": \"string\"}},\n \"required\": []}},\n {\"name\": \"create_task\", \"description\": \"Create a task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n \"blockedBy\": {\"type\": \"array\",\n \"items\": {\"type\": \"string\"}}},\n \"required\": [\"subject\"]}},\n {\"name\": \"list_tasks\", \"description\": \"List all tasks.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}, \"required\": []}},\n {\"name\": \"get_task\", \"description\": \"Get full task details.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"claim_task\", \"description\": \"Claim a pending task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"complete_task\", \"description\": \"Complete an in-progress task.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"string\"}},\n \"required\": [\"task_id\"]}},\n {\"name\": \"schedule_cron\",\n \"description\": (\"Schedule a cron job. cron is 5-field: min hour dom \"\n \"month dow. For one-shot reminders, compute the target \"\n \"minute and set recurring=false.\"),\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"cron\": {\"type\": \"string\"},\n \"prompt\": {\"type\": \"string\"},\n \"recurring\": {\"type\": \"boolean\"},\n \"durable\": {\"type\": \"boolean\"}},\n \"required\": [\"cron\", \"prompt\"]}},\n {\"name\": \"list_crons\", \"description\": \"List registered cron jobs.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}, \"required\": []}},\n {\"name\": \"cancel_cron\", \"description\": \"Cancel a cron job by ID.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"job_id\": {\"type\": \"string\"}},\n \"required\": [\"job_id\"]}},\n {\"name\": \"spawn_teammate\", \"description\": \"Spawn an autonomous teammate.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"},\n \"role\": {\"type\": \"string\"},\n \"prompt\": {\"type\": \"string\"}},\n \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"send_message\", \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"to\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"}},\n \"required\": [\"to\", \"content\"]}},\n {\"name\": \"check_inbox\",\n \"description\": \"Check inbox for messages and protocol responses.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}, \"required\": []}},\n {\"name\": \"request_shutdown\",\n \"description\": \"Request a teammate to shut down.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"teammate\": {\"type\": \"string\"}},\n \"required\": [\"teammate\"]}},\n {\"name\": \"request_plan\",\n \"description\": \"Ask a teammate to submit a plan.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"teammate\": {\"type\": \"string\"},\n \"task\": {\"type\": \"string\"}},\n \"required\": [\"teammate\", \"task\"]}},\n {\"name\": \"review_plan\",\n \"description\": \"Approve or reject a submitted plan.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"request_id\": {\"type\": \"string\"},\n \"approve\": {\"type\": \"boolean\"},\n \"feedback\": {\"type\": \"string\"}},\n \"required\": [\"request_id\", \"approve\"]}},\n {\"name\": \"create_worktree\",\n \"description\": \"Create an isolated git worktree.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"},\n \"task_id\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"remove_worktree\",\n \"description\": \"Remove a worktree. Refuses if changes exist.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"},\n \"discard_changes\": {\"type\": \"boolean\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"keep_worktree\",\n \"description\": \"Keep a worktree for manual review.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n {\"name\": \"connect_mcp\",\n \"description\": \"Connect to an MCP server (docs, deploy) and discover tools.\",\n \"input_schema\": {\"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"]}},\n]\n\nBUILTIN_HANDLERS = {\n \"bash\": run_bash, \"read_file\": run_read, \"write_file\": run_write,\n \"edit_file\": run_edit, \"glob\": run_glob,\n \"todo_write\": run_todo_write, \"task\": spawn_subagent,\n \"load_skill\": load_skill,\n \"create_task\": run_create_task, \"list_tasks\": run_list_tasks,\n \"get_task\": run_get_task,\n \"claim_task\": run_claim_task, \"complete_task\": run_complete_task,\n \"schedule_cron\": run_schedule_cron,\n \"list_crons\": run_list_crons,\n \"cancel_cron\": run_cancel_cron,\n \"spawn_teammate\": run_spawn_teammate,\n \"send_message\": run_send_message, \"check_inbox\": run_check_inbox,\n \"request_shutdown\": run_request_shutdown,\n \"request_plan\": run_request_plan, \"review_plan\": run_review_plan,\n \"create_worktree\": run_create_worktree,\n \"remove_worktree\": run_remove_worktree,\n \"keep_worktree\": run_keep_worktree,\n \"connect_mcp\": run_connect_mcp,\n}\n\n\n# ── Context ──\n\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\n\n\ndef update_context(context: dict, messages: list) -> dict:\n memories = \"\"\n if MEMORY_INDEX.exists():\n memories = MEMORY_INDEX.read_text()[:2000]\n return {\n \"memories\": memories,\n \"connected_mcp\": list(mcp_clients.keys()),\n \"active_teammates\": list(active_teammates.keys()),\n }\n\n\n# ── Agent Loop ──\n\nrounds_since_todo = 0\nagent_lock = threading.Lock()\n\n\ndef prepare_context(messages: list) -> list:\n # Every LLM turn enters through the same context budget pipeline.\n messages[:] = tool_result_budget(messages)\n messages[:] = snip_compact(messages)\n messages[:] = micro_compact(messages)\n if estimate_size(messages) > CONTEXT_LIMIT:\n messages[:] = compact_history(messages)\n return messages\n\n\ndef build_user_content(results: list[dict]) -> list[dict]:\n # Tool results and completed background notifications are both returned to\n # the model as user-side content, matching the tool_result feedback loop.\n content = list(results)\n for note in collect_background_results():\n content.append({\"type\": \"text\", \"text\": note})\n return content\n\n\ndef inject_background_notifications(messages: list):\n notes = collect_background_results()\n if notes:\n messages.append({\"role\": \"user\", \"content\": [\n {\"type\": \"text\", \"text\": note} for note in notes]})\n\n\ndef call_llm(messages: list, context: dict, tools: list,\n state: RecoveryState, max_tokens: int):\n system = assemble_system_prompt(context)\n return with_retry(\n lambda: client.messages.create(\n model=state.current_model,\n system=system,\n messages=messages,\n tools=tools,\n max_tokens=max_tokens),\n state)\n\n\ndef agent_loop(messages: list, context: dict):\n global rounds_since_todo\n tools, handlers = assemble_tool_pool()\n state = RecoveryState()\n max_tokens = DEFAULT_MAX_TOKENS\n\n while True:\n # One cycle: inject scheduled/background work, prepare context, call\n # the model, execute tool_use blocks, append tool_results, repeat.\n fired = consume_cron_queue()\n for job in fired:\n messages.append({\"role\": \"user\",\n \"content\": f\"[Scheduled] {job.prompt}\"})\n print(f\" \\033[35m[cron inject] {job.prompt[:60]}\\033[0m\")\n\n inject_background_notifications(messages)\n\n if rounds_since_todo >= 3:\n messages.append({\"role\": \"user\",\n \"content\": \"Update your todos.\"})\n rounds_since_todo = 0\n\n prepare_context(messages)\n context = update_context(context, messages)\n tools, handlers = assemble_tool_pool()\n\n try:\n response = call_llm(messages, context, tools, state, max_tokens)\n except Exception as e:\n if is_prompt_too_long_error(e) and not state.has_attempted_reactive_compact:\n messages[:] = reactive_compact(messages)\n state.has_attempted_reactive_compact = True\n continue\n messages.append({\"role\": \"assistant\", \"content\": [\n {\"type\": \"text\", \"text\": f\"[Error] {type(e).__name__}: {e}\"}]})\n return\n\n if response.stop_reason == \"max_tokens\":\n if not state.has_escalated:\n max_tokens = ESCALATED_MAX_TOKENS\n state.has_escalated = True\n print(f\" \\033[33m[max_tokens] retry with {max_tokens}\\033[0m\")\n continue\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if state.recovery_count < MAX_RECOVERY_RETRIES:\n messages.append({\"role\": \"user\", \"content\": CONTINUATION_PROMPT})\n state.recovery_count += 1\n continue\n return\n\n max_tokens = DEFAULT_MAX_TOKENS\n state.has_escalated = False\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if not has_tool_use(response.content):\n trigger_hooks(\"Stop\", messages)\n return\n\n results = []\n compacted_now = False\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n print(f\"\\033[36m> {block.name}\\033[0m\")\n\n if block.name == \"compact\":\n messages[:] = compact_history(messages)\n messages.append({\"role\": \"user\",\n \"content\": \"[Compacted. Continue with summarized context.]\"})\n compacted_now = True\n break\n\n blocked = trigger_hooks(\"PreToolUse\", block)\n if blocked:\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(blocked)})\n continue\n\n if should_run_background(block.name, block.input):\n bg_id = start_background_task(block, handlers)\n output = (f\"[Background task {bg_id} started] \"\n \"Result will arrive as a task_notification.\")\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output})\n continue\n\n handler = handlers.get(block.name)\n output = call_tool_handler(handler, block.input, block.name)\n trigger_hooks(\"PostToolUse\", block, output)\n print(str(output)[:300])\n\n if block.name == \"todo_write\":\n rounds_since_todo = 0\n else:\n rounds_since_todo += 1\n\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id, \"content\": output})\n\n if compacted_now:\n continue\n\n messages.append({\"role\": \"user\", \"content\": build_user_content(results)})\n\n\ndef print_turn_assistants(messages: list, turn_start: int):\n for msg in messages[turn_start:]:\n if msg.get(\"role\") != \"assistant\":\n continue\n for block in msg.get(\"content\", []):\n if block_type(block) == \"text\":\n terminal_print(block[\"text\"] if isinstance(block, dict) else block.text)\n\n\ndef cron_autorun_loop(history: list, context: dict):\n while True:\n time.sleep(1)\n fired = consume_cron_queue()\n if not fired:\n continue\n with agent_lock:\n turn_start = len(history)\n for job in fired:\n history.append({\"role\": \"user\",\n \"content\": f\"[Scheduled] {job.prompt}\"})\n terminal_print(\n f\" \\033[35m[cron auto] {job.prompt[:60]}\\033[0m\")\n agent_loop(history, context)\n context.update(update_context(context, history))\n print_turn_assistants(history, turn_start)\n\n\nif __name__ == \"__main__\":\n CLI_ACTIVE = True\n print(\"s20: comprehensive agent\")\n print(\"Enter a question, press Enter to send. Type q to quit.\\n\")\n history = []\n context = update_context({}, [])\n threading.Thread(target=cron_autorun_loop,\n args=(history, context), daemon=True).start()\n while True:\n try:\n query = input(PROMPT)\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n trigger_hooks(\"UserPromptSubmit\", query)\n turn_start = len(history)\n history.append({\"role\": \"user\", \"content\": query})\n with agent_lock:\n agent_loop(history, context)\n context = update_context(context, history)\n print_turn_assistants(history, turn_start)\n\n inbox = consume_lead_inbox(route_protocol=True)\n if inbox:\n def inbox_label(msg):\n req_id = msg.get(\"metadata\", {}).get(\"request_id\", \"\")\n suffix = f\" req:{req_id}\" if req_id else \"\"\n return f\"{msg.get('type', 'message')}{suffix}\"\n\n inbox_text = \"\\n\".join(\n f\"From {m['from']} [{inbox_label(m)}]: \"\n f\"{m['content'][:200]}\" for m in inbox)\n history.append({\"role\": \"user\",\n \"content\": f\"[Inbox]\\n{inbox_text}\"})\n print()\n",
"images": [
{
"src": "/course-assets/s20_comprehensive/system-architecture.svg",
@@ -3720,6 +3775,9 @@
"newClasses": [],
"newFunctions": [
"estimate_size",
+ "_block_type",
+ "_message_has_tool_use",
+ "_is_tool_result_message",
"snip_compact",
"collect_tool_results",
"micro_compact",
@@ -3733,7 +3791,7 @@
"newTools": [
"compact"
],
- "locDelta": 47
+ "locDelta": 79
},
{
"from": "s08",
@@ -3752,7 +3810,7 @@
"persist_large"
],
"newTools": [],
- "locDelta": 116
+ "locDelta": 114
},
{
"from": "s09",
@@ -3764,7 +3822,7 @@
"update_context"
],
"newTools": [],
- "locDelta": -332
+ "locDelta": -362
},
{
"from": "s10",
@@ -3808,7 +3866,7 @@
"claim_task",
"complete_task"
],
- "locDelta": 10
+ "locDelta": 12
},
{
"from": "s12",
@@ -3852,7 +3910,7 @@
"list_crons",
"cancel_cron"
],
- "locDelta": 266
+ "locDelta": 264
},
{
"from": "s14",
@@ -3861,6 +3919,7 @@
"MessageBus"
],
"newFunctions": [
+ "has_pending_background",
"spawn_teammate_thread",
"run_spawn_teammate",
"run_send_message",
@@ -3871,7 +3930,7 @@
"spawn_teammate",
"check_inbox"
],
- "locDelta": 100
+ "locDelta": 141
},
{
"from": "s15",
@@ -3894,17 +3953,18 @@
"request_plan",
"review_plan"
],
- "locDelta": -36
+ "locDelta": -75
},
{
"from": "s16",
"to": "s17",
"newClasses": [],
"newFunctions": [
- "scan_unclaimed_tasks"
+ "scan_unclaimed_tasks",
+ "idle_poll"
],
"newTools": [],
- "locDelta": -61
+ "locDelta": -62
},
{
"from": "s17",
@@ -3929,7 +3989,7 @@
"remove_worktree",
"keep_worktree"
],
- "locDelta": 154
+ "locDelta": 155
},
{
"from": "s18",
@@ -3982,6 +4042,9 @@
"has_tool_use",
"spawn_subagent",
"estimate_size",
+ "block_type",
+ "message_has_tool_use",
+ "is_tool_result_message",
"collect_tool_results",
"persist_large_output",
"tool_result_budget",
@@ -4026,7 +4089,7 @@
"list_crons",
"cancel_cron"
],
- "locDelta": 842
+ "locDelta": 871
}
]
-}
+}
\ No newline at end of file