Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions rust/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,51 @@ Or provide an OAuth bearer token directly:
export ANTHROPIC_AUTH_TOKEN="anthropic-oauth-or-proxy-bearer-token"
```

### Local models via Ollama (WSL)

Claw works with any OpenAI-compatible endpoint, including a local
[Ollama](https://ollama.com) server. This is handy for offline use or for
driving the CLI from WSL against models served on the Windows host.

```bash
cd rust/

# 1. Build the release binary
cargo build --release

# 2. Point claw at the local Ollama OpenAI-compatible endpoint
export OPENAI_BASE_URL="http://127.0.0.1:11434/v1"
export OPENAI_API_KEY="ollama" # any non-empty value works; Ollama ignores it

# 3. Run. Prefix the model with `openai/` so prefix routing selects the
# OpenAI-compatible provider, and use the exact Ollama tag.
./target/release/claw --model openai/qwen3.5:9b
```

**The model name must match the Ollama tag exactly.** Ollama tags use a colon
(`name:tag`), e.g. `qwen3.5:9b` — not a hyphen (`qwen3.5-9b`), which the server
rejects with `404 ... model not found`. In the command above the `openai/`
prefix is what selects the OpenAI-compatible provider (the prefix is stripped
before the tag is sent to Ollama). You can also drop the prefix and pass the
bare tag — when `OPENAI_BASE_URL` is set, claw routes a prefix-less model to the
OpenAI-compatible endpoint as long as the name contains a `:` or `.`. Either
way, always pass the full colon tag. List the installed tags with:

```bash
ollama list # native view
curl -s http://127.0.0.1:11434/v1/models # OpenAI-compatible view
```

> **WSL note:** when Ollama runs on the Windows host, WSL 2 forwards
> `127.0.0.1:11434` automatically. If the connection is refused, start Ollama on
> the host (`ollama serve`) or point `OPENAI_BASE_URL` at the host IP instead of
> `127.0.0.1`.

Reasoning ("thinking") models such as Qwen3 are supported. Ollama streams the
chain-of-thought in a `reasoning` field with an empty `content`; claw surfaces it
as a separate thinking block so both the reasoning trace and the final answer are
rendered.

## Mock parity harness

The workspace now includes a deterministic Anthropic-compatible mock service and a clean-environment CLI harness for end-to-end parity checks.
Expand Down
117 changes: 117 additions & 0 deletions rust/crates/api/src/providers/openai_compat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,7 @@ impl StreamState {
.delta
.reasoning_content
.filter(|value| !value.is_empty())
.or(choice.delta.reasoning.filter(|value| !value.is_empty()))
.or(choice
.delta
.thinking
Expand Down Expand Up @@ -816,6 +817,9 @@ struct ChatMessage {
content: Option<String>,
#[serde(default)]
reasoning_content: Option<String>,
/// Ollama's Qwen3 thinking models stream chain-of-thought here with `content=""`.
#[serde(default)]
reasoning: Option<String>,
#[serde(default)]
tool_calls: Vec<ResponseToolCall>,
}
Expand Down Expand Up @@ -890,6 +894,9 @@ struct ChunkDelta {
/// Some providers (GLM, DeepSeek) emit reasoning in `reasoning_content`
#[serde(default)]
reasoning_content: Option<String>,
/// Ollama's Qwen3 thinking models stream tokens here with `content=""`.
#[serde(default)]
reasoning: Option<String>,
#[serde(default)]
thinking: Option<ThinkingDelta>,
#[serde(default, deserialize_with = "deserialize_null_as_empty_vec")]
Expand Down Expand Up @@ -1500,6 +1507,7 @@ fn normalize_response(
.message
.reasoning_content
.filter(|value| !value.is_empty())
.or_else(|| choice.message.reasoning.filter(|value| !value.is_empty()))
{
content.push(OutputContentBlock::Thinking {
thinking,
Expand Down Expand Up @@ -1982,6 +1990,7 @@ mod tests {
role: "assistant".to_string(),
content: Some("final answer".to_string()),
reasoning_content: Some("hidden thought".to_string()),
reasoning: None,
tool_calls: Vec::new(),
},
finish_reason: Some("stop".to_string()),
Expand All @@ -2007,6 +2016,112 @@ mod tests {
);
}

#[test]
fn non_streaming_response_with_ollama_reasoning_field_emits_thinking_block() {
// Given an Ollama-style response that carries the chain-of-thought in
// `reasoning` (not `reasoning_content`) — Qwen3 thinking models emit an
// empty `content` and put the trace in `reasoning`.
let response = super::ChatCompletionResponse {
id: "chatcmpl_ollama".to_string(),
model: "qwen3.5:9b".to_string(),
choices: vec![super::ChatChoice {
message: super::ChatMessage {
role: "assistant".to_string(),
content: Some("43".to_string()),
reasoning_content: None,
reasoning: Some("17 + 26 = 43".to_string()),
tool_calls: Vec::new(),
},
finish_reason: Some("stop".to_string()),
}],
usage: None,
};

// When normalizing the provider response.
let normalized = normalize_response("qwen3.5:9b", response).expect("normalized");

// Then the `reasoning` text surfaces as a Thinking block before the text.
assert_eq!(
normalized.content,
vec![
OutputContentBlock::Thinking {
thinking: "17 + 26 = 43".to_string(),
signature: None,
},
OutputContentBlock::Text {
text: "43".to_string(),
},
]
);
}

#[test]
fn streaming_chunks_with_ollama_reasoning_field_emit_thinking_before_text() {
// Given Ollama Qwen3 streaming chunks carrying the chain-of-thought in
// the `reasoning` field with an empty `content`.
let mut state = StreamState::new("qwen3.5:9b".to_string());
let mut events = state
.ingest_chunk(super::ChatCompletionChunk {
id: "chatcmpl_ollama_stream".to_string(),
model: Some("qwen3.5:9b".to_string()),
choices: vec![super::ChunkChoice {
delta: super::ChunkDelta {
content: None,
reasoning_content: None,
reasoning: Some("thinking".to_string()),
thinking: None,
tool_calls: Vec::new(),
},
finish_reason: None,
}],
usage: None,
})
.expect("reasoning chunk");
events.extend(
state
.ingest_chunk(super::ChatCompletionChunk {
id: "chatcmpl_ollama_stream".to_string(),
model: None,
choices: vec![super::ChunkChoice {
delta: super::ChunkDelta {
content: Some("43".to_string()),
reasoning_content: None,
reasoning: None,
thinking: None,
tool_calls: Vec::new(),
},
finish_reason: Some("stop".to_string()),
}],
usage: None,
})
.expect("text chunk"),
);
events.extend(state.finish().expect("finish"));

// Then a Thinking block (index 0) is emitted before the Text block (index 1).
assert!(matches!(
events[1],
StreamEvent::ContentBlockStart(ContentBlockStartEvent {
index: 0,
content_block: OutputContentBlock::Thinking { .. },
})
));
assert!(events.iter().any(|event| matches!(
event,
StreamEvent::ContentBlockDelta(ContentBlockDeltaEvent {
index: 0,
delta: ContentBlockDelta::ThinkingDelta { .. },
})
)));
assert!(events.iter().any(|event| matches!(
event,
StreamEvent::ContentBlockStart(ContentBlockStartEvent {
index: 1,
content_block: OutputContentBlock::Text { .. },
})
)));
}

#[test]
fn streaming_chunks_with_reasoning_content_emit_thinking_block_events_before_text() {
// Given streaming chunks with reasoning_content followed by text.
Expand All @@ -2019,6 +2134,7 @@ mod tests {
delta: super::ChunkDelta {
content: None,
reasoning_content: Some("think".to_string()),
reasoning: None,
thinking: None,
tool_calls: Vec::new(),
},
Expand All @@ -2036,6 +2152,7 @@ mod tests {
delta: super::ChunkDelta {
content: Some(" answer".to_string()),
reasoning_content: None,
reasoning: None,
thinking: None,
tool_calls: Vec::new(),
},
Expand Down
5 changes: 5 additions & 0 deletions rust/crates/rusty-claude-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2943,6 +2943,11 @@ fn validate_model_syntax(model: &str) -> Result<(), String> {
err_msg.push_str("\nDid you mean `qwen/");
err_msg.push_str(trimmed);
err_msg.push_str("`? (Requires DASHSCOPE_API_KEY env var)");
err_msg.push_str("\nOr for a local Ollama server: use `openai/");
err_msg.push_str(trimmed);
err_msg.push_str(
"` with OPENAI_BASE_URL=http://127.0.0.1:11434/v1 and OPENAI_API_KEY=ollama",
);
} else if trimmed.starts_with("grok") {
err_msg.push_str("\nDid you mean `xai/");
err_msg.push_str(trimmed);
Expand Down
Loading