Skip to main content

harness/
model.rs

1use async_trait::async_trait;
2use eventsource_stream::Eventsource;
3use futures::stream::{BoxStream, StreamExt};
4use serde_json::{json, Value};
5
6use crate::event::HarnessUsage;
7use crate::tools::{ToolInvocation, ToolSpec};
8
9/// One token-level event from the model. The harness loop consumes a
10/// `Stream<ModelChunk>` and forwards `TextDelta` straight through as
11/// `HarnessInternalEvent::AssistantTextChunk` so callers see live
12/// generation; tool-call chunks are accumulated internally and only
13/// emitted once `ToolCallEnd` lands so the harness can dispatch the call
14/// with a complete `serde_json::Value` input.
15///
16/// Provider-agnostic: `OpenAiCompatibleModelClient` translates from
17/// OpenAI's chat-completions SSE shape, and the future Anthropic client
18/// will project Messages-API events into the same enum. `ScriptedModelClient`
19/// synthesizes whichever sequence its scripted `ModelResponse` would
20/// have implied.
21#[derive(Debug, Clone, PartialEq)]
22pub enum ModelChunk {
23    TextDelta {
24        msg_id: String,
25        delta: String,
26    },
27    ThinkingDelta {
28        thinking_id: String,
29        delta: String,
30        /// Provider-emitted signature (Anthropic extended thinking) that
31        /// MUST be round-tripped to history verbatim, otherwise some
32        /// providers reject the next turn. `None` for OpenAI today.
33        signature: Option<String>,
34    },
35    /// First chunk of a tool call. The harness records `(id, name)` and
36    /// starts buffering `ToolCallInputDelta`s under this id.
37    ToolCallStart {
38        id: String,
39        name: String,
40    },
41    /// Streaming JSON arguments for a previously-announced tool call.
42    /// OpenAI emits these as a string that, when concatenated, is valid
43    /// JSON. Harness accumulates these into a single string per id, then
44    /// parses on `ToolCallEnd`.
45    ToolCallInputDelta {
46        id: String,
47        delta: String,
48    },
49    /// Tool call finalised. `input` is the parsed JSON value if the
50    /// provider sent the full object on this chunk (Anthropic), or a
51    /// placeholder if the harness still needs to parse the accumulated
52    /// `ToolCallInputDelta` buffer (OpenAI). Either way the harness
53    /// treats `input` as authoritative when present.
54    ToolCallEnd {
55        id: String,
56        input: Option<Value>,
57    },
58    /// Final chunk. `usage` carries the provider's reported token count
59    /// for this call (None if the gateway elides it).
60    Done {
61        stop_reason: String,
62        usage: Option<HarnessUsage>,
63    },
64}
65
66/// Verbatim thinking block emitted by Anthropic extended thinking. The
67/// `signature` is a provider-supplied opaque token that MUST be
68/// round-tripped on subsequent turns — Anthropic rejects modified
69/// thinking blocks. OpenAI does not emit thinking blocks at all, so this
70/// field is always `None` on that path.
71#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
72pub struct AssistantThinking {
73    pub text: String,
74    pub signature: Option<String>,
75}
76
77/// Image attached to a user message. Both providers accept either inline
78/// base64 bytes or a URL the provider fetches; we surface both shapes
79/// rather than always inlining (URLs save bandwidth + sandbox upload).
80#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
81pub struct ImageSource {
82    /// IANA media type. Anthropic accepts `image/jpeg`, `image/png`,
83    /// `image/gif`, `image/webp`; OpenAI accepts the same set. Both
84    /// validate at request time — junk values surface as 400.
85    pub media_type: String,
86    pub data: ImageData,
87}
88
89#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
90pub enum ImageData {
91    /// Base64-encoded image bytes. Most universal — supported by every
92    /// modern multimodal model. Encoded without the `data:image/...;base64,`
93    /// prefix (the projection layer adds it when the provider's wire
94    /// format requires it; e.g. OpenAI image_url).
95    Base64(String),
96    /// URL the provider fetches. Provider-side IP egress / latency
97    /// tradeoff — convenient for public assets, brittle for private ones.
98    Url(String),
99}
100
101/// One attachment on a `ChatMessage::User`. `Image` is the only variant
102/// today; documents / file_id round-trips slot in as future variants
103/// without breaking pattern matches (callers should use `, ..` rest
104/// pattern when destructuring User to stay forward-compat).
105#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
106pub enum UserAttachment {
107    Image(ImageSource),
108}
109
110/// One conversation entry as seen by the model. Mirrors the
111/// `system / user / assistant / tool` set OpenAI chat/completions expects.
112/// Anthropic's Messages API uses a different shape (tool_use / tool_result
113/// content blocks instead of separate `tool` role) but consumes the same
114/// `ChatMessage` history — the projection lives in the per-provider
115/// model client, keeping the harness loop provider-agnostic.
116#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
117pub enum ChatMessage {
118    User {
119        content: String,
120        /// Non-text attachments (images today; file_id / documents later).
121        /// Renders as additional content blocks alongside the `content`
122        /// text on the wire — projection per provider in §8 / §9.
123        attachments: Vec<UserAttachment>,
124    },
125    /// Assistant turn. May carry any combination of: a `thinking` block
126    /// (Anthropic extended thinking), final text, and one+ tool calls.
127    /// All three render into the assistant message's content array on
128    /// the Anthropic wire; OpenAI uses `tool_calls` for tool calls and
129    /// ignores thinking entirely.
130    Assistant {
131        text: Option<String>,
132        tool_calls: Vec<ToolInvocation>,
133        /// Thinking block to round-trip verbatim. `None` for OpenAI /
134        /// Anthropic-without-extended-thinking turns.
135        thinking: Option<AssistantThinking>,
136    },
137    /// Tool response paired by `tool_call_id`. `content` is the
138    /// serialized tool output (model side sees a string regardless of
139    /// the underlying JSON shape). `attachments` carries structured
140    /// non-text content the tool produced — e.g. a screenshot MCP tool
141    /// returning an image. Only providers with a tool-content array
142    /// (Anthropic) surface these on the wire; OpenAI tool role is
143    /// strictly string-typed, so attachments degrade to a placeholder
144    /// in the `content` string there.
145    Tool {
146        tool_call_id: String,
147        content: String,
148        is_error: bool,
149        attachments: Vec<UserAttachment>,
150    },
151}
152
153#[derive(Debug, Clone, PartialEq)]
154pub struct ModelTurnInput {
155    /// Optional system prompt (already composed: spec_snapshot system_prompt
156    /// + driver.append_system_prompt). `None` ⇒ no system message sent.
157    pub system_prompt: Option<String>,
158    /// Full conversation history for this turn. AgentLoopHarness appends
159    /// each `Assistant` / `Tool` message as the loop progresses so the
160    /// model retains its own prior reasoning across tool round-trips.
161    pub messages: Vec<ChatMessage>,
162    /// Tool specs available this turn. Sourced from `ToolRuntime::specs()`
163    /// so adding / removing a tool changes one place. Empty Vec ⇒ no tools
164    /// advertised (final-answer-only mode).
165    pub tools: Vec<ToolSpec>,
166    /// How the model should pick (or skip) tools. Defaults to `Auto`.
167    /// Set via `AgentLoopHarness::with_tool_choice` from
168    /// `bootstrap.driver.native_model.tool_choice`.
169    pub tool_choice: ToolChoice,
170    /// Whether the model may emit multiple tool_use blocks in one
171    /// response (OpenAI's `parallel_tool_calls`). `None` ⇒ provider
172    /// default (true for OpenAI). Anthropic is always implicitly
173    /// multi-tool-capable so this field is OpenAI-only.
174    pub parallel_tool_calls: Option<bool>,
175}
176
177/// How the model should route tool selection for the current turn.
178/// Mapped to each provider's wire field by `chat_request_body`:
179///
180/// | variant      | OpenAI                                                  | Anthropic                       |
181/// |--------------|---------------------------------------------------------|---------------------------------|
182/// | `Auto`       | `"auto"` (or omitted)                                   | `{"type":"auto"}` (or omitted)  |
183/// | `None`       | `"none"` — model MUST NOT call a tool                   | (degrades: tools field dropped) |
184/// | `Required`   | `"required"` — model MUST call at least one tool        | `{"type":"any"}`                |
185/// | `Tool(name)` | `{"type":"function","function":{"name":name}}` — forced | `{"type":"tool","name":name}`   |
186///
187/// The `None` variant has no direct Anthropic equivalent — Anthropic
188/// forces tool consideration whenever `tools` is non-empty. We
189/// approximate it by dropping the `tools` field entirely for that turn;
190/// the model can't call what it doesn't know about.
191#[derive(Debug, Clone, PartialEq, Default)]
192pub enum ToolChoice {
193    #[default]
194    Auto,
195    None,
196    Required,
197    Tool(String),
198}
199
200impl ToolChoice {
201    /// Parse from the bootstrap.yaml string form.
202    /// Accepts `"auto"`, `"none"`, `"required"`, `"tool:<name>"`.
203    /// Empty / unknown strings degrade to `Auto` for forward compat —
204    /// callers that want strict validation can match before calling.
205    pub fn parse(s: &str) -> Self {
206        let trimmed = s.trim();
207        if let Some(name) = trimmed.strip_prefix("tool:") {
208            return Self::Tool(name.trim().to_string());
209        }
210        match trimmed.to_ascii_lowercase().as_str() {
211            "" | "auto" => Self::Auto,
212            "none" => Self::None,
213            "required" | "any" => Self::Required,
214            _ => Self::Auto,
215        }
216    }
217}
218
219/// Response from a single model call. `Message` is a final answer (no
220/// follow-up tool needed); `ToolCall` hands control back to the harness
221/// loop to execute the tool and feed the result back next turn.
222///
223/// `usage` carries the provider-reported token tally for *this* call.
224/// AgentLoopHarness accumulates these across all steps of a turn and
225/// attaches the total to the final `HarnessInternalEvent::TurnEnd`.
226/// `None` ⇒ provider didn't report usage on this call (e.g. mid-stream
227/// chunk, scripted fake client).
228#[derive(Debug, Clone, PartialEq)]
229pub enum ModelResponse {
230    Message {
231        text: String,
232        stop_reason: String,
233        usage: Option<HarnessUsage>,
234    },
235    ToolCall {
236        preface: Option<String>,
237        invocation: ToolInvocation,
238        usage: Option<HarnessUsage>,
239    },
240}
241
242impl ModelResponse {
243    /// Per-call usage, regardless of variant. Pulled out so AgentLoopHarness
244    /// can fold it into the running total without matching on every branch.
245    pub fn usage(&self) -> Option<&HarnessUsage> {
246        match self {
247            ModelResponse::Message { usage, .. } | ModelResponse::ToolCall { usage, .. } => {
248                usage.as_ref()
249            }
250        }
251    }
252}
253
254/// Categorised model-client failures. Mirrors `NativeHarnessError`'s
255/// `Model*` variants 1:1 so the agent loop can map across without
256/// pattern-matching gymnastics.
257///
258/// Retryability:
259///   - `RateLimit` / `Network` / `ServerError` → transient, safe to retry with backoff
260///   - `Auth` / `ContextOverflow` / `BadRequest` → config error, never retry
261#[derive(Debug, thiserror::Error)]
262pub enum ModelClientError {
263    /// HTTP 429 — back off and retry.
264    #[error("rate limit: {0}")]
265    RateLimit(String),
266    /// HTTP 401 / 403 — wrong key or no permission; do not retry.
267    #[error("auth: {0}")]
268    Auth(String),
269    /// HTTP 400 that looks like context overflow; do not retry.
270    #[error("context overflow: {0}")]
271    ContextOverflow(String),
272    /// HTTP 400 (invalid model, bad params, etc.) — config error; do not retry.
273    #[error("bad request: {0}")]
274    BadRequest(String),
275    /// HTTP 5xx or transport failure — transient; safe to retry.
276    #[error("server error: {0}")]
277    ServerError(String),
278    /// DNS / TCP / TLS failure — transient; safe to retry.
279    #[error("network: {0}")]
280    Network(String),
281    /// Anything else we couldn't bucket; treated as non-retryable.
282    #[error("model error: {0}")]
283    Other(String),
284}
285
286impl ModelClientError {
287    /// Returns `true` if the caller should back off and retry this error.
288    pub fn retryable(&self) -> bool {
289        matches!(
290            self,
291            Self::RateLimit(_) | Self::Network(_) | Self::ServerError(_)
292        )
293    }
294}
295
296#[async_trait]
297pub trait ModelClient: Send + Sync {
298    /// Stream the model's response as a sequence of `ModelChunk`s. Production
299    /// `ScriptedModelClient` / `OpenAiCompatibleModelClient` implement this;
300    /// `next()` is provided as a folding convenience for callers that don't
301    /// need token-level events.
302    async fn stream(
303        &self,
304        input: ModelTurnInput,
305    ) -> Result<BoxStream<'static, Result<ModelChunk, ModelClientError>>, ModelClientError>;
306
307    /// Buffer the stream into a single `ModelResponse`. Default impl
308    /// `await`s every chunk and then runs `collect_model_response`. Override
309    /// only if a provider has a cheaper non-streaming path (none today —
310    /// even OpenAI's non-stream call still goes through the same SSE-or-not
311    /// branch on our side).
312    async fn next(&self, input: ModelTurnInput) -> Result<ModelResponse, ModelClientError> {
313        let stream = self.stream(input).await?;
314        collect_model_response(stream).await
315    }
316}
317
318/// Fold a `ModelChunk` stream into the legacy `ModelResponse` shape. Used
319/// by tests, by the default `next()` impl, and by any caller that prefers
320/// "one response at a time" over a streaming feed. Provider-agnostic —
321/// the same logic works for OpenAI and Anthropic chunk streams because
322/// `ModelChunk` is the projection target both clients normalise into.
323pub async fn collect_model_response(
324    mut stream: BoxStream<'static, Result<ModelChunk, ModelClientError>>,
325) -> Result<ModelResponse, ModelClientError> {
326    let mut text_buf = String::new();
327    let mut text_msg_id: Option<String> = None;
328    // Per tool-call-id: name + accumulated argument bytes + early input (Anthropic-style).
329    let mut tool_states: Vec<ToolStreamState> = Vec::new();
330    let mut stop_reason: Option<String> = None;
331    let mut usage: Option<HarnessUsage> = None;
332
333    while let Some(item) = stream.next().await {
334        match item? {
335            ModelChunk::TextDelta { msg_id, delta } => {
336                if text_msg_id.as_deref() != Some(&msg_id) {
337                    text_msg_id = Some(msg_id);
338                    text_buf.clear();
339                }
340                text_buf.push_str(&delta);
341            }
342            ModelChunk::ThinkingDelta { .. } => {
343                // collect_model_response is a back-compat surface; thinking
344                // blocks don't fit into the simple Message/ToolCall enum,
345                // so we silently drop them. Callers that care use the
346                // streaming API directly.
347            }
348            ModelChunk::ToolCallStart { id, name } => {
349                tool_states.push(ToolStreamState {
350                    id,
351                    name,
352                    args_buf: String::new(),
353                    early_input: None,
354                });
355            }
356            ModelChunk::ToolCallInputDelta { id, delta } => {
357                if let Some(state) = tool_states.iter_mut().find(|s| s.id == id) {
358                    state.args_buf.push_str(&delta);
359                }
360            }
361            ModelChunk::ToolCallEnd { id, input } => {
362                if let Some(state) = tool_states.iter_mut().find(|s| s.id == id) {
363                    state.early_input = input;
364                }
365            }
366            ModelChunk::Done {
367                stop_reason: sr,
368                usage: u,
369            } => {
370                stop_reason = Some(sr);
371                usage = u;
372            }
373        }
374    }
375
376    // Tool calls take precedence — when the model decides to use a tool,
377    // the loop must execute it before any final-answer text matters. Pick
378    // the first tool call (multiple-tool parallelism is a future
379    // extension; today RD-side serialises them).
380    if let Some(state) = tool_states.into_iter().next() {
381        let parsed_input = match state.early_input {
382            Some(v) => v,
383            None => serde_json::from_str(state.args_buf.as_str().trim()).map_err(|e| {
384                ModelClientError::Other(format!(
385                    "decode tool arguments for {id}: {e}",
386                    id = state.id
387                ))
388            })?,
389        };
390        return Ok(ModelResponse::ToolCall {
391            preface: (!text_buf.is_empty()).then(|| text_buf.clone()),
392            invocation: ToolInvocation {
393                id: state.id,
394                name: state.name,
395                input: parsed_input,
396            },
397            usage,
398        });
399    }
400
401    Ok(ModelResponse::Message {
402        text: text_buf,
403        stop_reason: stop_reason.unwrap_or_else(|| "end_turn".into()),
404        usage,
405    })
406}
407
408/// Per-tool-call buffer used while folding a stream. Lives inline in
409/// `collect_model_response`; pulled out as a struct only because Rust
410/// closures over a Vec of tuples get noisy fast.
411struct ToolStreamState {
412    id: String,
413    name: String,
414    args_buf: String,
415    early_input: Option<Value>,
416}
417
418#[derive(Debug, Clone)]
419pub struct OpenAiCompatibleConfig {
420    pub base_url: String,
421    pub api_key: String,
422    pub model: String,
423    pub temperature: Option<f64>,
424    pub max_tokens: Option<i32>,
425    /// Reasoning effort hint: `"low"`, `"medium"`, or `"high"`. Sent as the
426    /// top-level `reasoning_effort` field. `None` omits it (provider default).
427    pub reasoning_effort: Option<String>,
428}
429
430#[derive(Debug, Clone)]
431pub struct OpenAiCompatibleModelClient {
432    http: reqwest::Client,
433    config: OpenAiCompatibleConfig,
434}
435
436impl OpenAiCompatibleModelClient {
437    pub fn new(config: OpenAiCompatibleConfig) -> Self {
438        // Fail fast if the TCP handshake takes too long. Do not set a
439        // read_timeout: streaming model responses may legitimately pause
440        // longer than a fixed per-read timeout between SSE frames.
441        let http = reqwest::Client::builder()
442            .connect_timeout(std::time::Duration::from_secs(15))
443            .build()
444            .unwrap_or_else(|_| reqwest::Client::new());
445        Self { http, config }
446    }
447
448    fn endpoint(&self) -> String {
449        // `base_url` is the full API prefix supplied by the caller, including
450        // any version segment — e.g. `https://api.openai.com/v1` or
451        // `https://open.bigmodel.cn/api/paas/v4`. We only append the route;
452        // picking the version is the caller's responsibility, since it differs
453        // across OpenAI-compatible providers.
454        let base = self.config.base_url.trim_end_matches('/');
455        if base.ends_with("/chat/completions") {
456            base.to_string()
457        } else {
458            format!("{base}/chat/completions")
459        }
460    }
461
462    fn request_body(&self, input: &ModelTurnInput) -> Value {
463        let mut messages = Vec::with_capacity(input.messages.len() + 1);
464        if let Some(sys) = input.system_prompt.as_deref().filter(|s| !s.is_empty()) {
465            messages.push(json!({ "role": "system", "content": sys }));
466        }
467        for msg in &input.messages {
468            messages.push(chat_message_to_wire(msg));
469        }
470
471        let mut body = json!({
472            "model": self.config.model,
473            "messages": messages,
474        });
475        // Tool advertising obeys `tool_choice`:
476        //   - Auto / Required / Tool(name) — send tools + matching
477        //     `tool_choice` field; model gets the wire-level constraint.
478        //   - None — drop the tools entirely. OpenAI does accept
479        //     `tool_choice: "none"` to forbid use, but dropping
480        //     `tools` is cheaper (fewer prompt tokens) and the model
481        //     can't call what it doesn't see.
482        let send_tools = !input.tools.is_empty() && !matches!(input.tool_choice, ToolChoice::None);
483        if send_tools {
484            body["tools"] = json!(input
485                .tools
486                .iter()
487                .map(tool_spec_to_openai_function)
488                .collect::<Vec<_>>());
489            body["tool_choice"] = openai_tool_choice_value(&input.tool_choice);
490            if let Some(parallel) = input.parallel_tool_calls {
491                body["parallel_tool_calls"] = json!(parallel);
492            }
493        }
494        if let Some(temperature) = self.config.temperature {
495            body["temperature"] = json!(temperature);
496        }
497        if let Some(max_tokens) = self.config.max_tokens {
498            body["max_tokens"] = json!(max_tokens);
499        }
500        if let Some(effort) = self
501            .config
502            .reasoning_effort
503            .as_deref()
504            .filter(|s| !s.is_empty())
505        {
506            body["reasoning_effort"] = json!(effort);
507        }
508        body
509    }
510}
511
512fn openai_tool_choice_value(c: &ToolChoice) -> Value {
513    match c {
514        ToolChoice::Auto => json!("auto"),
515        // ToolChoice::None lands in the caller's "drop tools" branch
516        // so it never reaches here, but encode it defensively.
517        ToolChoice::None => json!("none"),
518        ToolChoice::Required => json!("required"),
519        ToolChoice::Tool(name) => json!({
520            "type": "function",
521            "function": {"name": name},
522        }),
523    }
524}
525
526/// Pull token counts out of an OpenAI chat/completions `usage` block.
527/// Returns `None` if the field is missing / empty — some compat gateways
528/// (older DeepSeek, certain proxies) elide it. Maps directly:
529///   * `prompt_tokens`        → `input_tokens`
530///   * `completion_tokens`    → `output_tokens`
531///   * `prompt_tokens_details.cached_tokens` → `cache_read_input_tokens`
532///     (OpenAI semantics: cached_tokens is a subset of prompt_tokens — we
533///     keep that view rather than subtracting; matches the proto field's
534///     "展示提示" intent).
535///   * cache_creation: no OpenAI equivalent → 0.
536fn parse_openai_usage(usage: Option<&Value>) -> Option<HarnessUsage> {
537    let u = usage?;
538    let input = u.get("prompt_tokens").and_then(|v| v.as_u64()).unwrap_or(0);
539    let output = u
540        .get("completion_tokens")
541        .and_then(|v| v.as_u64())
542        .unwrap_or(0);
543    let cache_read = u
544        .get("prompt_tokens_details")
545        .and_then(|d| d.get("cached_tokens"))
546        .and_then(|v| v.as_u64())
547        .unwrap_or(0);
548    // Heuristic: if every counter is zero, treat as "no data reported" so
549    // downstream telemetry doesn't mis-attribute a no-op response.
550    if input == 0 && output == 0 && cache_read == 0 {
551        return None;
552    }
553    Some(HarnessUsage {
554        input_tokens: input,
555        output_tokens: output,
556        cache_read_input_tokens: cache_read,
557        cache_creation_input_tokens: 0,
558        // Provider-reported usage on the main step never carries
559        // compaction tokens — agent_loop accumulates those separately
560        // when a compaction summarize call returns its own usage.
561        compaction_input_tokens: 0,
562        compaction_output_tokens: 0,
563    })
564}
565
566/// Render one `ToolSpec` to the OpenAI chat/completions `tools[*]` shape.
567/// Pulled out so future Anthropic client can project the same spec to its
568/// Messages API `input_schema` form without duplicating tool metadata.
569/// Project an `ImageSource` to the OpenAI chat/completions `image_url`
570/// content part shape. Inline base64 gets wrapped in a `data:` URI
571/// because OpenAI's API expects the full URL form there — the model
572/// won't accept raw base64 as a sibling key.
573fn image_to_openai_part(src: &ImageSource) -> Value {
574    let url = match &src.data {
575        ImageData::Base64(b64) => {
576            // `data:image/png;base64,xxx`. OpenAI documents this form
577            // in the vision quickstart. We don't validate media_type
578            // here — invalid values surface as a 400 at request time
579            // (caller saw it in classify_openai_http_error).
580            format!("data:{};base64,{}", src.media_type, b64)
581        }
582        ImageData::Url(u) => u.clone(),
583    };
584    json!({
585        "type": "image_url",
586        "image_url": { "url": url },
587    })
588}
589
590fn tool_spec_to_openai_function(spec: &ToolSpec) -> Value {
591    json!({
592        "type": "function",
593        "function": {
594            "name": spec.name,
595            "description": spec.description,
596            "parameters": spec.input_schema,
597        }
598    })
599}
600
601/// Replay-compaction thresholds for a single tool result. A result is
602/// compacted only when it exceeds the token estimate or the byte budget —
603/// small results are never touched. The kept window is split half head /
604/// half tail so the model sees the command's start AND its trailing
605/// errors / summary.
606const MAX_TOOL_RESULT_REPLAY_TOKENS: u64 = 2_000;
607const MAX_TOOL_RESULT_REPLAY_BYTES: usize = 12 * 1024;
608const COMPACTED_TOOL_RESULT_KEEP_CHARS: usize = 3_000;
609
610/// Compact one oversized tool result for model replay. Applied at wire
611/// projection time ONLY — the full result stays verbatim in the in-memory
612/// history and in the persisted `messages.jsonl`, so resume and operators
613/// keep the raw data while every subsequent model call stops re-paying
614/// thousands of tokens for it. Deterministic (same input → same output),
615/// which keeps the projected prefix byte-stable for provider prompt caches.
616/// Orthogonal to message-level compaction: this trims per-result on every
617/// call, without waiting for a context-window trigger.
618fn compact_tool_result_for_replay(content: &str) -> std::borrow::Cow<'_, str> {
619    let estimated_tokens = crate::compaction::estimate_tokens(content);
620    if estimated_tokens <= MAX_TOOL_RESULT_REPLAY_TOKENS
621        && content.len() <= MAX_TOOL_RESULT_REPLAY_BYTES
622    {
623        return std::borrow::Cow::Borrowed(content);
624    }
625    let chars: Vec<char> = content.chars().collect();
626    if chars.len() <= COMPACTED_TOOL_RESULT_KEEP_CHARS {
627        return std::borrow::Cow::Borrowed(content);
628    }
629    let head_len = COMPACTED_TOOL_RESULT_KEEP_CHARS / 2;
630    let tail_len = COMPACTED_TOOL_RESULT_KEEP_CHARS - head_len;
631    let head: String = chars[..head_len].iter().collect();
632    let tail: String = chars[chars.len() - tail_len..].iter().collect();
633    let omitted = chars.len() - COMPACTED_TOOL_RESULT_KEEP_CHARS;
634    std::borrow::Cow::Owned(format!(
635        "[tool result compacted for model replay]\n\
636         original_estimated_tokens={estimated_tokens} original_chars={} \
637         retained_head_chars={head_len} retained_tail_chars={tail_len}\n\
638         The full raw tool result remains in session history; this replay is abbreviated.\n\n\
639         --- head ---\n{head}\n\n\
640         --- omitted ---\n[... omitted {omitted} chars from tool result replay ...]\n\n\
641         --- tail ---\n{tail}",
642        chars.len(),
643    ))
644}
645
646/// Render one `ChatMessage` to the OpenAI chat/completions wire shape.
647/// Pulled out so tests and (future) other providers can share / diff the
648/// projection.
649fn chat_message_to_wire(msg: &ChatMessage) -> Value {
650    match msg {
651        ChatMessage::User {
652            content,
653            attachments,
654        } => {
655            // Fast path: no attachments → content stays as a plain
656            // string. Keeps simple text-only requests byte-identical to
657            // pre-multimodal output (Anthropic prompt cache friendliness
658            // on OpenAI-compatible gateways that mirror that behaviour).
659            if attachments.is_empty() {
660                json!({ "role": "user", "content": content })
661            } else {
662                // Mixed multimodal — promote `content` to an array of
663                // OpenAI vision parts. Text always lands first (the
664                // common chat ordering); image_url parts follow. Empty
665                // text is dropped — OpenAI accepts arrays with only
666                // image parts.
667                let mut parts: Vec<Value> = Vec::with_capacity(attachments.len() + 1);
668                if !content.is_empty() {
669                    parts.push(json!({ "type": "text", "text": content }));
670                }
671                for att in attachments {
672                    match att {
673                        UserAttachment::Image(src) => {
674                            parts.push(image_to_openai_part(src));
675                        }
676                    }
677                }
678                json!({ "role": "user", "content": parts })
679            }
680        }
681        ChatMessage::Assistant {
682            text,
683            tool_calls,
684            thinking: _,
685        } => {
686            // OpenAI chat/completions has no equivalent of Anthropic
687            // thinking blocks; we drop the field here. The Anthropic
688            // projection (chat_messages_to_anthropic_messages) consumes
689            // it.
690            let mut obj = json!({ "role": "assistant" });
691            if let Some(t) = text.as_deref().filter(|s| !s.is_empty()) {
692                obj["content"] = json!(t);
693            } else {
694                obj["content"] = Value::Null;
695            }
696            if !tool_calls.is_empty() {
697                let calls: Vec<Value> = tool_calls
698                    .iter()
699                    .map(|tc| {
700                        json!({
701                            "id": tc.id,
702                            "type": "function",
703                            "function": {
704                                "name": tc.name,
705                                "arguments": tc.input.to_string(),
706                            },
707                        })
708                    })
709                    .collect();
710                obj["tool_calls"] = json!(calls);
711            }
712            obj
713        }
714        ChatMessage::Tool {
715            tool_call_id,
716            content,
717            attachments,
718            is_error: _,
719        } => {
720            // OpenAI tool role is strictly string-typed (no content
721            // block array). Non-text attachments (e.g. image returned
722            // by an MCP screenshot tool) can't ride here — we surface
723            // them as a placeholder appended to `content` so the model
724            // is at least aware something visual was attached. Lossy
725            // by design; if the agent needs to see the image, use the
726            // Anthropic provider.
727            let mut content_str = compact_tool_result_for_replay(content).into_owned();
728            for att in attachments {
729                let UserAttachment::Image(src) = att;
730                content_str.push_str(&format!(
731                    "\n[image attached: {} (not visible via OpenAI tool role)]",
732                    src.media_type
733                ));
734            }
735            json!({
736                "role": "tool",
737                "tool_call_id": tool_call_id,
738                "content": content_str,
739            })
740        }
741    }
742}
743
744#[async_trait]
745impl ModelClient for OpenAiCompatibleModelClient {
746    async fn stream(
747        &self,
748        input: ModelTurnInput,
749    ) -> Result<BoxStream<'static, Result<ModelChunk, ModelClientError>>, ModelClientError> {
750        // Bolt-on streaming flags. `stream_options.include_usage` is a
751        // recent OpenAI addition that makes the final SSE event carry the
752        // usage block; without it streaming responses drop usage entirely.
753        // Compat gateways (DeepSeek, Groq, etc.) mostly accept the field
754        // and either honour it or ignore — passing it is forward-safe.
755        let mut body = self.request_body(&input);
756        body["stream"] = json!(true);
757        body["stream_options"] = json!({ "include_usage": true });
758
759        let resp = match self
760            .http
761            .post(self.endpoint())
762            .bearer_auth(&self.config.api_key)
763            .json(&body)
764            .send()
765            .await
766        {
767            Ok(r) => r,
768            Err(e) => return Err(classify_reqwest_error(&e, e.to_string())),
769        };
770        let status = resp.status();
771        if !status.is_success() {
772            let body_text = resp.text().await.unwrap_or_default();
773            return Err(classify_openai_http_error(status, &body_text));
774        }
775
776        // bytes_stream → SSE event_stream → ModelChunk stream. The SSE
777        // parser handles UTF-8 boundaries, multi-line `data:` reassembly,
778        // and the `[DONE]` sentinel; we layer ModelChunk extraction on
779        // top with an explicit state machine because OpenAI ships `id`
780        // / `name` only on the first delta of each tool call.
781        let event_stream = resp.bytes_stream().eventsource();
782        let (tx, rx) = tokio::sync::mpsc::channel::<Result<ModelChunk, ModelClientError>>(8);
783
784        tokio::spawn(async move {
785            let mut state = OpenAiStreamState::default();
786            futures::pin_mut!(event_stream);
787            while let Some(ev) = event_stream.next().await {
788                let chunks = match ev {
789                    Ok(event) => match state.feed_data(&event.data) {
790                        Ok(c) => c,
791                        Err(e) => {
792                            let _ = tx.send(Err(e)).await;
793                            return;
794                        }
795                    },
796                    Err(e) => {
797                        let _ = tx
798                            .send(Err(ModelClientError::Network(format!(
799                                "SSE transport error: {e}"
800                            ))))
801                            .await;
802                        return;
803                    }
804                };
805                for c in chunks {
806                    if tx.send(Ok(c)).await.is_err() {
807                        return;
808                    }
809                }
810            }
811            // Stream closed. Distinguish a clean end from a premature cut-off:
812            //   * clean  — we saw `[DONE]` or a `finish_reason`. Emit the final
813            //     Done (covers gateways that close without `[DONE]` but DO send
814            //     a finish_reason).
815            //   * cut off — neither marker arrived. The connection dropped mid
816            //     response (proxy/idle timeout, upstream truncation). Surfacing
817            //     this as a (retryable) error instead of fabricating a Done is
818            //     critical: otherwise a truncated answer is silently accepted as
819            //     complete, and the user sees a half-finished reply with no hint
820            //     anything went wrong.
821            if state.ended_cleanly() {
822                if let Some(final_chunk) = state.finalize() {
823                    let _ = tx.send(Ok(final_chunk)).await;
824                }
825            } else {
826                let _ = tx
827                    .send(Err(ModelClientError::Network(
828                        "model stream closed before completion (no finish_reason or [DONE]) \
829                         — connection dropped or upstream truncated the response"
830                            .into(),
831                    )))
832                    .await;
833            }
834        });
835
836        Ok(tokio_stream::wrappers::ReceiverStream::new(rx).boxed())
837    }
838}
839
840/// State machine that turns OpenAI chat.completion.chunk SSE events into
841/// `ModelChunk` stream items. Lives outside the trait impl so the parsing
842/// logic is unit-testable without standing up an HTTP server.
843#[derive(Debug, Default)]
844struct OpenAiStreamState {
845    /// Carries the assistant message id forward from the first chunk that
846    /// supplied it (OpenAI uses `chatcmpl-...`). Falls back to a synthetic
847    /// id if the provider doesn't send one — text deltas without an id
848    /// would otherwise be dropped on the floor by `native_adapter`'s chunk
849    /// accumulator.
850    msg_id: Option<String>,
851    /// Maps the OpenAI `tool_calls[i].index` to the eventual call id so
852    /// subsequent `arguments` deltas can route to the right buffer.
853    tool_call_by_index: std::collections::HashMap<u64, String>,
854    finish_reason: Option<String>,
855    pending_usage: Option<HarnessUsage>,
856    /// Set once `[DONE]` or a finish_reason chunk lands; suppresses the
857    /// duplicate `Done` we'd otherwise emit from `finalize()`.
858    done_emitted: bool,
859}
860
861impl OpenAiStreamState {
862    fn feed_data(&mut self, data: &str) -> Result<Vec<ModelChunk>, ModelClientError> {
863        // `[DONE]` is OpenAI's end-of-stream sentinel; some compat gateways
864        // also emit it, others just close. Either path lands in finalize().
865        if data.trim() == "[DONE]" {
866            if let Some(done) = self.emit_done() {
867                return Ok(vec![done]);
868            }
869            return Ok(vec![]);
870        }
871        let value: Value = serde_json::from_str(data)
872            .map_err(|e| ModelClientError::Other(format!("SSE data not JSON: {e}; raw={data}")))?;
873
874        let mut out: Vec<ModelChunk> = Vec::new();
875
876        // The final chunk on `stream_options.include_usage=true` arrives
877        // with empty `choices` and a populated `usage` block.
878        if let Some(usage) = parse_openai_usage(value.get("usage")) {
879            self.pending_usage = Some(usage);
880        }
881
882        if let Some(id) = value.get("id").and_then(|v| v.as_str()) {
883            if self.msg_id.is_none() && !id.is_empty() {
884                self.msg_id = Some(id.to_string());
885            }
886        }
887
888        let Some(choices) = value.get("choices").and_then(|v| v.as_array()) else {
889            return Ok(out);
890        };
891        let Some(choice) = choices.first() else {
892            return Ok(out);
893        };
894        let Some(delta) = choice.get("delta") else {
895            // Some gateways send a usage-only chunk with no delta — fine.
896            if let Some(reason) = choice.get("finish_reason").and_then(|v| v.as_str()) {
897                self.finish_reason = Some(reason.to_string());
898            }
899            return Ok(out);
900        };
901
902        // Text token delta. Falls back to "msg_native_default" if the
903        // provider hasn't emitted a chunk id; `native_adapter`'s
904        // accumulator groups deltas by id so we must keep it stable.
905        if let Some(text) = delta.get("content").and_then(|v| v.as_str()) {
906            if !text.is_empty() {
907                let msg_id = self
908                    .msg_id
909                    .clone()
910                    .unwrap_or_else(|| "msg_native_default".to_string());
911                out.push(ModelChunk::TextDelta {
912                    msg_id,
913                    delta: text.to_string(),
914                });
915            }
916        }
917
918        // Tool call deltas. Each entry in `tool_calls[]` has an `index`
919        // that's stable across chunks; the first chunk for a given index
920        // carries `id` + `function.name`, later chunks just stream
921        // `function.arguments`. We route every arguments delta through
922        // the index→id map.
923        if let Some(tcs) = delta.get("tool_calls").and_then(|v| v.as_array()) {
924            for tc in tcs {
925                let index = tc.get("index").and_then(|v| v.as_u64()).unwrap_or(0);
926                if let Some(id) = tc.get("id").and_then(|v| v.as_str()) {
927                    if !id.is_empty() {
928                        self.tool_call_by_index.insert(index, id.to_string());
929                        let name = tc
930                            .get("function")
931                            .and_then(|f| f.get("name"))
932                            .and_then(|v| v.as_str())
933                            .unwrap_or("")
934                            .to_string();
935                        out.push(ModelChunk::ToolCallStart {
936                            id: id.to_string(),
937                            name,
938                        });
939                    }
940                }
941                // Streaming argument bytes — concatenation across chunks
942                // forms a single JSON object string. Handed back as a
943                // delta so collect_model_response (or any other folder)
944                // can accumulate.
945                if let Some(args) = tc
946                    .get("function")
947                    .and_then(|f| f.get("arguments"))
948                    .and_then(|v| v.as_str())
949                {
950                    if let Some(id) = self.tool_call_by_index.get(&index).cloned() {
951                        if !args.is_empty() {
952                            out.push(ModelChunk::ToolCallInputDelta {
953                                id,
954                                delta: args.to_string(),
955                            });
956                        }
957                    }
958                }
959            }
960        }
961
962        if let Some(reason) = choice.get("finish_reason").and_then(|v| v.as_str()) {
963            self.finish_reason = Some(reason.to_string());
964            // OpenAI's tool-call streaming ends with finish_reason="tool_calls";
965            // emit a `ToolCallEnd` for each open tool call (without a parsed
966            // input — `collect_model_response` will parse the accumulated
967            // buffer if it needs the value).
968            if reason == "tool_calls" {
969                for (_idx, id) in self.tool_call_by_index.iter() {
970                    out.push(ModelChunk::ToolCallEnd {
971                        id: id.clone(),
972                        input: None,
973                    });
974                }
975            }
976            // Some gateways then close the stream without `[DONE]`. We
977            // could emit Done eagerly here, but to handle the include_usage
978            // case (usage arrives in a later chunk), defer to finalize().
979        }
980
981        Ok(out)
982    }
983
984    fn finalize(&mut self) -> Option<ModelChunk> {
985        self.emit_done()
986    }
987
988    /// True once we've observed a legitimate end-of-response signal — either
989    /// the `[DONE]` sentinel (sets `done_emitted`) or a chunk carrying a
990    /// `finish_reason` (e.g. `stop` / `length` / `tool_calls`). When a stream
991    /// closes WITHOUT either, the response was cut off mid-flight (dropped
992    /// connection, proxy/idle timeout, upstream truncation) and must NOT be
993    /// treated as a complete answer.
994    fn ended_cleanly(&self) -> bool {
995        self.done_emitted || self.finish_reason.is_some()
996    }
997
998    fn emit_done(&mut self) -> Option<ModelChunk> {
999        if self.done_emitted {
1000            return None;
1001        }
1002        self.done_emitted = true;
1003        let stop_reason = map_openai_finish_reason(self.finish_reason.as_deref());
1004        Some(ModelChunk::Done {
1005            stop_reason,
1006            usage: self.pending_usage.take(),
1007        })
1008    }
1009}
1010
1011/// Map OpenAI's `finish_reason` to the harness's stop_reason vocabulary
1012/// used by `dispatch::map_stop_reason`. Unknown / null values fall back
1013/// to "end_turn" (the model produced something), not "unknown_stop_reason"
1014/// (which `dispatch::map_stop_reason` would otherwise classify as
1015/// RUNTIME_ERROR — wrong here, the call succeeded).
1016fn map_openai_finish_reason(reason: Option<&str>) -> String {
1017    match reason {
1018        Some("stop") => "end_turn".into(),
1019        Some("length") => "max_tokens".into(),
1020        Some("tool_calls") => "end_turn".into(),
1021        Some("content_filter") => "refusal".into(),
1022        Some(other) if !other.is_empty() => other.to_string(),
1023        _ => "end_turn".into(),
1024    }
1025}
1026
1027/// Bucket an HTTP error into the right `ModelClientError` variant. Looks
1028/// at status code first; falls back to the response body for the trickier
1029/// case (BadRequest could be context overflow, malformed prompt, or just
1030/// a bad parameter).
1031fn classify_openai_http_error(status: reqwest::StatusCode, body: &str) -> ModelClientError {
1032    use reqwest::StatusCode;
1033    let snippet = body.chars().take(512).collect::<String>();
1034
1035    // Retryable errors
1036    if status == StatusCode::TOO_MANY_REQUESTS {
1037        return ModelClientError::RateLimit(format!("HTTP {status}: {snippet}"));
1038    }
1039    if status.is_server_error() {
1040        // 5xx are transient — proxy overloaded, upstream error, etc.
1041        return ModelClientError::ServerError(format!("HTTP {status}: {snippet}"));
1042    }
1043
1044    // Non-retryable config/client errors
1045    if status == StatusCode::UNAUTHORIZED || status == StatusCode::FORBIDDEN {
1046        return ModelClientError::Auth(format!("HTTP {status}: {snippet}"));
1047    }
1048    if status == StatusCode::BAD_REQUEST && looks_like_context_overflow(body) {
1049        return ModelClientError::ContextOverflow(format!("HTTP {status}: {snippet}"));
1050    }
1051    if status == StatusCode::BAD_REQUEST {
1052        // "Invalid model name", missing required fields, etc. — config error.
1053        return ModelClientError::BadRequest(format!("HTTP {status}: {snippet}"));
1054    }
1055
1056    ModelClientError::Other(format!("HTTP {status}: {snippet}"))
1057}
1058
1059/// Heuristic match against the common phrasings OpenAI-compatible
1060/// providers use to flag "this prompt is too long for the model". Different
1061/// gateways word it differently; we widen the net by lowercasing + token
1062/// search rather than trying to JSON-parse the body (which may be a
1063/// non-JSON HTML 400 from some proxies).
1064fn looks_like_context_overflow(body: &str) -> bool {
1065    let lower = body.to_lowercase();
1066    lower.contains("context length")
1067        || lower.contains("maximum context")
1068        || lower.contains("context_length_exceeded")
1069        || lower.contains("too many tokens")
1070        || lower.contains("exceeds the model")
1071}
1072
1073/// Map a `reqwest::Error` to either `Network` (transport-layer / DNS /
1074/// connect / timeout / body) or `Other` (everything else — e.g. invalid
1075/// URL built locally, which is a programmer bug rather than transient).
1076fn classify_reqwest_error(err: &reqwest::Error, msg: String) -> ModelClientError {
1077    if err.is_connect() || err.is_timeout() || err.is_request() || err.is_body() {
1078        ModelClientError::Network(msg)
1079    } else {
1080        ModelClientError::Other(msg)
1081    }
1082}
1083
1084/// In-process model client used by tests / dev. Inspects the most recent
1085/// `User` message in `input.messages` and either fires a deterministic
1086/// tool call (if a prior `Tool` result hasn't landed yet) or emits a
1087/// summary text reply (once it has).
1088///
1089/// Implements `stream` natively so the agent loop exercises the
1090/// streaming code path even in tests; the chunk sequence it produces
1091/// matches what `OpenAiCompatibleModelClient` would emit for the same
1092/// logical response.
1093#[derive(Debug, Default, Clone)]
1094pub struct ScriptedModelClient;
1095
1096#[async_trait]
1097impl ModelClient for ScriptedModelClient {
1098    async fn stream(
1099        &self,
1100        input: ModelTurnInput,
1101    ) -> Result<BoxStream<'static, Result<ModelChunk, ModelClientError>>, ModelClientError> {
1102        let chunks = scripted_chunks_for(&input);
1103        let stream = futures::stream::iter(chunks.into_iter().map(Ok));
1104        Ok(stream.boxed())
1105    }
1106}
1107
1108/// Decide which `ModelChunk` sequence the scripted client would emit for a
1109/// given `ModelTurnInput`. Same heuristics as the old `next()` impl, just
1110/// rendered as a stream so tests that expect tool→summary→done chunking
1111/// see the right shape.
1112fn scripted_chunks_for(input: &ModelTurnInput) -> Vec<ModelChunk> {
1113    // If we already have a tool result in history, produce the final
1114    // summary message — text-only chunk followed by Done.
1115    let last_tool = input.messages.iter().rev().find_map(|m| match m {
1116        ChatMessage::Tool {
1117            tool_call_id,
1118            content,
1119            is_error,
1120            ..
1121        } => Some((tool_call_id.clone(), content.clone(), *is_error)),
1122        _ => None,
1123    });
1124    if let Some((id, content, is_error)) = last_tool {
1125        let summary = if is_error {
1126            format!("tool {id} failed: {content}")
1127        } else {
1128            format!("tool {id} completed: {content}")
1129        };
1130        return vec![
1131            ModelChunk::TextDelta {
1132                msg_id: "scripted_msg".into(),
1133                delta: summary,
1134            },
1135            ModelChunk::Done {
1136                stop_reason: "end_turn".into(),
1137                usage: None,
1138            },
1139        ];
1140    }
1141
1142    // Otherwise look at the latest user prompt and pick a tool.
1143    let user_prompt = input
1144        .messages
1145        .iter()
1146        .rev()
1147        .find_map(|m| match m {
1148            ChatMessage::User { content, .. } => Some(content.clone()),
1149            _ => None,
1150        })
1151        .unwrap_or_default();
1152    let prompt = user_prompt.trim();
1153    let (id, name, args) = if let Some(path) = prompt.strip_prefix("read ") {
1154        ("tc_read_1", "read", json!({"path": path.trim()}))
1155    } else if let Some(rest) = prompt.strip_prefix("write ") {
1156        let (path, content) = rest.split_once(' ').unwrap_or((rest, ""));
1157        (
1158            "tc_write_1",
1159            "write",
1160            json!({"path": path.trim(), "content": content}),
1161        )
1162    } else {
1163        ("tc_bash_1", "bash", json!({"command": prompt}))
1164    };
1165
1166    vec![
1167        ModelChunk::TextDelta {
1168            msg_id: "scripted_msg".into(),
1169            delta: format!("native model selected tool: {name}"),
1170        },
1171        ModelChunk::ToolCallStart {
1172            id: id.into(),
1173            name: name.into(),
1174        },
1175        ModelChunk::ToolCallEnd {
1176            id: id.into(),
1177            input: Some(args),
1178        },
1179        ModelChunk::Done {
1180            stop_reason: "end_turn".into(),
1181            usage: None,
1182        },
1183    ]
1184}
1185
1186// ─── Anthropic Messages API ──────────────────────────────────────────────
1187//
1188// Streaming Messages API differs from OpenAI chat/completions in three
1189// load-bearing places:
1190//
1191//   1. Endpoint + auth   POST /v1/messages with `x-api-key` +
1192//                        `anthropic-version: 2023-06-01`, no Bearer.
1193//   2. Body              `system` is a top-level field (not a chat role);
1194//                        `tool_result` lives as a content block inside the
1195//                        next user message (not a separate `tool` role);
1196//                        `max_tokens` is required.
1197//   3. SSE shape         Uses both `event:` and `data:` lines. Event types
1198//                        partition the chunk types we care about, and tool
1199//                        arguments arrive as `input_json_delta` strings
1200//                        that concatenate into a single JSON object.
1201//
1202// All of the work in this section is wire-shape translation; the harness
1203// loop and `ModelChunk` enum stay provider-agnostic.
1204
1205/// Deployment-side credentials + endpoint for the Anthropic Messages API.
1206/// `max_tokens` is required by Anthropic on every request, so we hold it
1207/// here (default below) rather than relying on the agent recipe.
1208#[derive(Debug, Clone)]
1209pub struct AnthropicConfig {
1210    pub base_url: String,
1211    pub api_key: String,
1212    pub model: String,
1213    /// Required by Anthropic. Defaults to 4096 if the spec snapshot
1214    /// doesn't override; we surface it on the config so HR can tune via
1215    /// `spec_snapshot.agent_spec_version.model_config.max_tokens`.
1216    pub max_tokens: i32,
1217    pub temperature: Option<f64>,
1218    /// Wire-format version pin. Defaults to "2023-06-01" — the only one
1219    /// supported by Messages API at time of writing. Override if Anthropic
1220    /// ever publishes a newer one we want to opt into.
1221    pub anthropic_version: String,
1222}
1223
1224impl AnthropicConfig {
1225    /// Default `anthropic-version` header value the client sends if HR
1226    /// doesn't override it.
1227    pub const DEFAULT_VERSION: &'static str = "2023-06-01";
1228    /// Default `max_tokens` when the agent recipe doesn't specify.
1229    /// 4096 is small enough to keep cost-of-mistake bounded; HR is
1230    /// expected to override on real workloads.
1231    pub const DEFAULT_MAX_TOKENS: i32 = 4096;
1232}
1233
1234#[derive(Debug, Clone)]
1235pub struct AnthropicModelClient {
1236    http: reqwest::Client,
1237    config: AnthropicConfig,
1238}
1239
1240impl AnthropicModelClient {
1241    pub fn new(config: AnthropicConfig) -> Self {
1242        let http = reqwest::Client::builder()
1243            .connect_timeout(std::time::Duration::from_secs(15))
1244            .build()
1245            .unwrap_or_else(|_| reqwest::Client::new());
1246        Self { http, config }
1247    }
1248
1249    fn endpoint(&self) -> String {
1250        // `base_url` is the full API prefix supplied by the caller, including
1251        // the version segment — e.g. `https://api.anthropic.com/v1`. We only
1252        // append the route; picking the version is the caller's job.
1253        let base = self.config.base_url.trim_end_matches('/');
1254        if base.ends_with("/messages") {
1255            base.to_string()
1256        } else {
1257            format!("{base}/messages")
1258        }
1259    }
1260
1261    /// Build the JSON body for `POST /v1/messages`. Mirrors `OpenAi`'s
1262    /// `request_body` but projects to the Messages API wire shape — see
1263    /// the section header for the differences.
1264    fn request_body(&self, input: &ModelTurnInput) -> Value {
1265        let messages = chat_messages_to_anthropic_messages(&input.messages);
1266        // `tool_choice::None` has no direct Anthropic equivalent
1267        // (Anthropic forces tool consideration once `tools` is non-
1268        // empty). Best approximation is to drop the tools entirely.
1269        let tools = if matches!(input.tool_choice, ToolChoice::None) {
1270            Vec::new()
1271        } else {
1272            input
1273                .tools
1274                .iter()
1275                .map(tool_spec_to_anthropic_tool)
1276                .collect::<Vec<_>>()
1277        };
1278        let system_field = anthropic_system_field(input.system_prompt.as_deref());
1279
1280        // Cache strategy is applied last so it can decorate the final
1281        // serialized shape. Builds up to 4 ephemeral breakpoints (system,
1282        // last tool, last message, optionally mid message for long chats).
1283        let cached = apply_anthropic_cache_strategy(system_field, tools, messages);
1284
1285        let mut body = json!({
1286            "model": self.config.model,
1287            "max_tokens": self.config.max_tokens,
1288            "messages": cached.messages,
1289            "stream": true,
1290        });
1291        if let Some(sys) = cached.system {
1292            body["system"] = sys;
1293        }
1294        if !cached.tools.is_empty() {
1295            body["tools"] = json!(cached.tools);
1296            // tool_choice only meaningful when we're actually sending
1297            // tools; Auto is Anthropic's default so we omit it to keep
1298            // the wire body byte-identical for the common case
1299            // (matters for prompt cache stability).
1300            if !matches!(input.tool_choice, ToolChoice::Auto) {
1301                body["tool_choice"] = anthropic_tool_choice_value(&input.tool_choice);
1302            }
1303        }
1304        // Anthropic doesn't support `parallel_tool_calls` — it's an
1305        // OpenAI-only knob. We silently ignore input.parallel_tool_calls
1306        // on this provider. (Anthropic returns multiple tool_use blocks
1307        // freely when the model decides to.)
1308        if let Some(t) = self.config.temperature {
1309            body["temperature"] = json!(t);
1310        }
1311        body
1312    }
1313}
1314
1315fn anthropic_tool_choice_value(c: &ToolChoice) -> Value {
1316    match c {
1317        ToolChoice::Auto => json!({"type": "auto"}),
1318        ToolChoice::None => json!({"type": "auto"}), // unreachable in practice (caller drops tools)
1319        ToolChoice::Required => json!({"type": "any"}),
1320        ToolChoice::Tool(name) => json!({"type": "tool", "name": name}),
1321    }
1322}
1323
1324#[async_trait]
1325impl ModelClient for AnthropicModelClient {
1326    async fn stream(
1327        &self,
1328        input: ModelTurnInput,
1329    ) -> Result<BoxStream<'static, Result<ModelChunk, ModelClientError>>, ModelClientError> {
1330        let resp = match self
1331            .http
1332            .post(self.endpoint())
1333            .header("x-api-key", &self.config.api_key)
1334            .header("anthropic-version", &self.config.anthropic_version)
1335            .header("content-type", "application/json")
1336            .json(&self.request_body(&input))
1337            .send()
1338            .await
1339        {
1340            Ok(r) => r,
1341            Err(e) => return Err(classify_reqwest_error(&e, e.to_string())),
1342        };
1343        let status = resp.status();
1344        if !status.is_success() {
1345            let body_text = resp.text().await.unwrap_or_default();
1346            return Err(classify_anthropic_http_error(status, &body_text));
1347        }
1348
1349        let event_stream = resp.bytes_stream().eventsource();
1350        let (tx, rx) = tokio::sync::mpsc::channel::<Result<ModelChunk, ModelClientError>>(8);
1351        tokio::spawn(async move {
1352            let mut state = AnthropicStreamState::default();
1353            futures::pin_mut!(event_stream);
1354            while let Some(ev) = event_stream.next().await {
1355                let chunks = match ev {
1356                    Ok(event) => match state.feed_event(&event.event, &event.data) {
1357                        Ok(c) => c,
1358                        Err(e) => {
1359                            let _ = tx.send(Err(e)).await;
1360                            return;
1361                        }
1362                    },
1363                    Err(e) => {
1364                        let _ = tx
1365                            .send(Err(ModelClientError::Network(format!(
1366                                "SSE transport error: {e}"
1367                            ))))
1368                            .await;
1369                        return;
1370                    }
1371                };
1372                for c in chunks {
1373                    if tx.send(Ok(c)).await.is_err() {
1374                        return;
1375                    }
1376                }
1377            }
1378            if let Some(done) = state.finalize() {
1379                let _ = tx.send(Ok(done)).await;
1380            }
1381        });
1382        Ok(tokio_stream::wrappers::ReceiverStream::new(rx).boxed())
1383    }
1384}
1385
1386/// Render `ChatMessage[]` into Anthropic Messages API `messages[]`. The
1387/// projection has two non-trivial rules:
1388///
1389///   1. `ChatMessage::Tool` doesn't have a dedicated role on Anthropic —
1390///      tool results live as content blocks on the user message that
1391///      follows them. We buffer pending tool results and flush them
1392///      into the next user message (or a synthetic user message at the
1393///      end if the conversation ends on tool results).
1394///   2. `ChatMessage::Assistant` may carry `thinking`, plain `text`, and
1395///      tool calls; all three render as content blocks under one
1396///      assistant message in that exact order (thinking → text →
1397///      tool_use). Anthropic API rejects modified thinking blocks, so
1398///      we round-trip the signature verbatim.
1399fn chat_messages_to_anthropic_messages(messages: &[ChatMessage]) -> Vec<Value> {
1400    let mut out: Vec<Value> = Vec::with_capacity(messages.len());
1401    let mut pending_tool_results: Vec<Value> = Vec::new();
1402
1403    let flush_tool_results = |bucket: &mut Vec<Value>, out: &mut Vec<Value>| {
1404        if !bucket.is_empty() {
1405            let blocks = std::mem::take(bucket);
1406            out.push(json!({"role": "user", "content": blocks}));
1407        }
1408    };
1409
1410    for msg in messages {
1411        match msg {
1412            ChatMessage::User {
1413                content,
1414                attachments,
1415            } => {
1416                // Build the content block list in this order:
1417                //   1. flushed tool_result blocks (if any pending)
1418                //   2. text block (when content non-empty)
1419                //   3. image blocks per attachment, in original order
1420                //
1421                // Anthropic requires the user message body to alternate
1422                // with assistant, so when tool_results are pending they
1423                // merge into THIS user message (saving an extra hop).
1424                let mut blocks: Vec<Value> = std::mem::take(&mut pending_tool_results);
1425                if !content.is_empty() {
1426                    blocks.push(json!({"type":"text","text":content}));
1427                }
1428                for att in attachments {
1429                    match att {
1430                        UserAttachment::Image(src) => {
1431                            blocks.push(image_to_anthropic_block(src));
1432                        }
1433                    }
1434                }
1435                // Defensive: if everything was empty (no tool_results,
1436                // empty content, no attachments), still emit a single
1437                // empty text block — Anthropic API rejects empty
1438                // `content` arrays.
1439                if blocks.is_empty() {
1440                    blocks.push(json!({"type":"text","text":""}));
1441                }
1442                out.push(json!({"role": "user", "content": blocks}));
1443            }
1444            ChatMessage::Assistant {
1445                text,
1446                tool_calls,
1447                thinking,
1448            } => {
1449                // tool_results must be flushed before we can append an
1450                // assistant message (Anthropic rejects two consecutive
1451                // assistant messages).
1452                flush_tool_results(&mut pending_tool_results, &mut out);
1453                let mut blocks: Vec<Value> = Vec::new();
1454                // Thinking first — both as observed in Anthropic's own
1455                // serialisations and as a stable position for cache
1456                // breakpoint placement.
1457                if let Some(t) = thinking {
1458                    let mut tb = json!({"type": "thinking", "thinking": t.text});
1459                    if let Some(sig) = t.signature.as_deref() {
1460                        if !sig.is_empty() {
1461                            tb["signature"] = json!(sig);
1462                        }
1463                    }
1464                    blocks.push(tb);
1465                }
1466                if let Some(t) = text.as_deref() {
1467                    if !t.is_empty() {
1468                        blocks.push(json!({"type": "text", "text": t}));
1469                    }
1470                }
1471                for tc in tool_calls {
1472                    blocks.push(json!({
1473                        "type": "tool_use",
1474                        "id": tc.id,
1475                        "name": tc.name,
1476                        "input": tc.input,
1477                    }));
1478                }
1479                if blocks.is_empty() {
1480                    // Pathological case — assistant turn with nothing
1481                    // to project. Anthropic rejects empty content arrays,
1482                    // so skip the message entirely.
1483                    continue;
1484                }
1485                out.push(json!({"role": "assistant", "content": blocks}));
1486            }
1487            ChatMessage::Tool {
1488                tool_call_id,
1489                content,
1490                is_error,
1491                attachments,
1492            } => {
1493                // Anthropic tool_result CAN carry image content blocks
1494                // (in addition to text). Render text first, then any
1495                // image attachments — same `text → image` ordering used
1496                // for the user message projection (§9 in internals doc).
1497                let mut blocks: Vec<Value> = Vec::new();
1498                if !content.is_empty() {
1499                    let replay = compact_tool_result_for_replay(content);
1500                    blocks.push(json!({"type": "text", "text": replay}));
1501                }
1502                for att in attachments {
1503                    let UserAttachment::Image(src) = att;
1504                    blocks.push(image_to_anthropic_block(src));
1505                }
1506                // tool_result.content can be either a string or a block
1507                // array per Anthropic docs. We use the array form
1508                // uniformly so attachments-or-not codepath is one.
1509                if blocks.is_empty() {
1510                    blocks.push(json!({"type": "text", "text": ""}));
1511                }
1512                pending_tool_results.push(json!({
1513                    "type": "tool_result",
1514                    "tool_use_id": tool_call_id,
1515                    "content": blocks,
1516                    "is_error": is_error,
1517                }));
1518            }
1519        }
1520    }
1521
1522    // Conversation ended on tool results without a follow-up user message
1523    // — synthesize one so Anthropic sees the results.
1524    flush_tool_results(&mut pending_tool_results, &mut out);
1525    out
1526}
1527
1528/// Anthropic accepts `system` as either a string OR a content-block
1529/// array (the latter lets us put `cache_control` on it). We always
1530/// pre-shape it as the array form so the cache strategy layer can apply
1531/// markers without re-allocating. `None` ⇒ no system field on the wire.
1532fn anthropic_system_field(prompt: Option<&str>) -> Option<Value> {
1533    let s = prompt?.trim();
1534    if s.is_empty() {
1535        return None;
1536    }
1537    Some(json!([{"type": "text", "text": s}]))
1538}
1539
1540/// Render a `ToolSpec` into Anthropic's tool definition shape. Same
1541/// `input_schema` field both providers use, just different envelope.
1542/// Project an `ImageSource` to an Anthropic `image` content block.
1543/// Anthropic accepts two source shapes:
1544///   * `{type:"base64", media_type, data}` for inline bytes
1545///   * `{type:"url", url}` for fetched-by-server URLs (added 2024)
1546fn image_to_anthropic_block(src: &ImageSource) -> Value {
1547    let source = match &src.data {
1548        ImageData::Base64(b64) => json!({
1549            "type": "base64",
1550            "media_type": src.media_type,
1551            "data": b64,
1552        }),
1553        ImageData::Url(url) => json!({
1554            "type": "url",
1555            "url": url,
1556        }),
1557    };
1558    json!({"type": "image", "source": source})
1559}
1560
1561fn tool_spec_to_anthropic_tool(spec: &ToolSpec) -> Value {
1562    json!({
1563        "name": spec.name,
1564        "description": spec.description,
1565        "input_schema": spec.input_schema,
1566    })
1567}
1568
1569struct AnthropicCached {
1570    system: Option<Value>,
1571    tools: Vec<Value>,
1572    messages: Vec<Value>,
1573}
1574
1575/// Apply up-to-four `cache_control: ephemeral` breakpoints to the
1576/// request, mirroring OMA `default-loop.ts:997-1039`. Anthropic rate-
1577/// limits cache writes to 4 breakpoints per request; we spend them on:
1578///
1579///   1. **System block (last content)** — caches everything before the
1580///      messages section (system + tools).
1581///   2. **Last tool definition** — defensive when system also has
1582///      dynamic content (a system change shouldn't bust the tools cache).
1583///   3. **Last message** — multi-turn chat tail breakpoint, where most
1584///      hits come from.
1585///   4. **Mid message** (only when `messages.len() > 30`) — Anthropic's
1586///      20-block lookback window means long single turns blow past the
1587///      tail breakpoint; the intermediate one catches reads inside the
1588///      turn.
1589///
1590/// Empty `system` skips its breakpoint (Anthropic API rejects
1591/// `cache_control` on empty text blocks).
1592fn apply_anthropic_cache_strategy(
1593    system: Option<Value>,
1594    tools: Vec<Value>,
1595    messages: Vec<Value>,
1596) -> AnthropicCached {
1597    let mut system = system;
1598    if let Some(sys) = system.as_mut() {
1599        if let Some(arr) = sys.as_array_mut() {
1600            if let Some(last) = arr.last_mut() {
1601                if last
1602                    .get("text")
1603                    .and_then(|v| v.as_str())
1604                    .map(|s| !s.is_empty())
1605                    .unwrap_or(false)
1606                {
1607                    last["cache_control"] = json!({"type": "ephemeral"});
1608                }
1609            }
1610        }
1611    }
1612
1613    let mut tools = tools;
1614    if let Some(last) = tools.last_mut() {
1615        last["cache_control"] = json!({"type": "ephemeral"});
1616    }
1617
1618    let mut messages = messages;
1619    // Last message tail breakpoint — applied to the last content block
1620    // of the last message (mirrors OMA's behaviour and matches what
1621    // Anthropic docs recommend for chat tails).
1622    if let Some(last) = messages.last_mut() {
1623        if let Some(blocks) = last.get_mut("content").and_then(|v| v.as_array_mut()) {
1624            if let Some(last_block) = blocks.last_mut() {
1625                last_block["cache_control"] = json!({"type": "ephemeral"});
1626            }
1627        }
1628    }
1629    // Mid breakpoint for long chats — same shape, applied at the message
1630    // sitting at the midpoint when there are >30 messages.
1631    if messages.len() > 30 {
1632        let mid = messages.len() / 2;
1633        if let Some(blocks) = messages[mid]
1634            .get_mut("content")
1635            .and_then(|v| v.as_array_mut())
1636        {
1637            if let Some(last_block) = blocks.last_mut() {
1638                last_block["cache_control"] = json!({"type": "ephemeral"});
1639            }
1640        }
1641    }
1642
1643    AnthropicCached {
1644        system,
1645        tools,
1646        messages,
1647    }
1648}
1649
1650/// Anthropic streaming uses `event:` to discriminate frame types. This
1651/// state machine maps the lifecycle into our `ModelChunk` enum.
1652///
1653/// Frames we care about (Anthropic docs § streaming):
1654///   * `message_start` — carries message id (msg_xxxxx) + initial usage.
1655///   * `content_block_start` — declares a new block at `index` with
1656///     `type: text | thinking | tool_use`. tool_use carries id + name.
1657///   * `content_block_delta` — `delta.type: text_delta | input_json_delta
1658///     | thinking_delta | signature_delta`.
1659///   * `content_block_stop` — block at index complete. For tool_use,
1660///     this is where we emit `ToolCallEnd`.
1661///   * `message_delta` — final stop_reason + output_tokens.
1662///   * `message_stop` — frame after which the connection closes.
1663///   * `ping` — keepalive, ignored.
1664///   * `error` — provider-side fault.
1665#[derive(Debug, Default)]
1666struct AnthropicStreamState {
1667    msg_id: Option<String>,
1668    /// Stream-local block index → kind + tool metadata.
1669    blocks: std::collections::HashMap<u64, AnthropicBlock>,
1670    stop_reason: Option<String>,
1671    pending_usage: Option<HarnessUsage>,
1672    done_emitted: bool,
1673}
1674
1675#[derive(Debug)]
1676enum AnthropicBlock {
1677    Text,
1678    Thinking { thinking_id: String },
1679    ToolUse { id: String },
1680}
1681
1682impl AnthropicStreamState {
1683    fn feed_event(&mut self, event: &str, data: &str) -> Result<Vec<ModelChunk>, ModelClientError> {
1684        // Anthropic sends `ping` for keepalive — ignore. Unknown events
1685        // we also ignore (forward-compat with future event types).
1686        match event {
1687            "ping" | "" => return Ok(vec![]),
1688            "error" => {
1689                return Err(ModelClientError::Other(format!(
1690                    "anthropic stream error event: {data}"
1691                )));
1692            }
1693            _ => {}
1694        }
1695
1696        let value: Value = serde_json::from_str(data).map_err(|e| {
1697            ModelClientError::Other(format!(
1698                "anthropic SSE data not JSON (event={event}): {e}; raw={data}"
1699            ))
1700        })?;
1701        let mut out: Vec<ModelChunk> = Vec::new();
1702
1703        match event {
1704            "message_start" => {
1705                let msg = value.get("message");
1706                if let Some(id) = msg.and_then(|m| m.get("id")).and_then(|v| v.as_str()) {
1707                    if !id.is_empty() {
1708                        self.msg_id = Some(id.to_string());
1709                    }
1710                }
1711                if let Some(u) = msg.and_then(|m| m.get("usage")) {
1712                    self.pending_usage =
1713                        Some(merge_anthropic_usage(self.pending_usage.clone(), u, true));
1714                }
1715            }
1716            "content_block_start" => {
1717                let index = value.get("index").and_then(|v| v.as_u64()).unwrap_or(0);
1718                let block = value.get("content_block");
1719                let kind = block.and_then(|b| b.get("type")).and_then(|v| v.as_str());
1720                match kind {
1721                    Some("text") => {
1722                        self.blocks.insert(index, AnthropicBlock::Text);
1723                    }
1724                    Some("thinking") => {
1725                        let thinking_id = self
1726                            .msg_id
1727                            .clone()
1728                            .map(|m| format!("{m}_t{index}"))
1729                            .unwrap_or_else(|| format!("thinking_{index}"));
1730                        self.blocks
1731                            .insert(index, AnthropicBlock::Thinking { thinking_id });
1732                    }
1733                    Some("tool_use") => {
1734                        let id = block
1735                            .and_then(|b| b.get("id"))
1736                            .and_then(|v| v.as_str())
1737                            .unwrap_or_default()
1738                            .to_string();
1739                        let name = block
1740                            .and_then(|b| b.get("name"))
1741                            .and_then(|v| v.as_str())
1742                            .unwrap_or_default()
1743                            .to_string();
1744                        if !id.is_empty() && !name.is_empty() {
1745                            out.push(ModelChunk::ToolCallStart {
1746                                id: id.clone(),
1747                                name,
1748                            });
1749                        }
1750                        self.blocks.insert(index, AnthropicBlock::ToolUse { id });
1751                    }
1752                    _ => {
1753                        // Unknown block type — record as Text so we
1754                        // don't panic on later deltas; ignoring them
1755                        // is the safer forward-compat path.
1756                        self.blocks.insert(index, AnthropicBlock::Text);
1757                    }
1758                }
1759            }
1760            "content_block_delta" => {
1761                let index = value.get("index").and_then(|v| v.as_u64()).unwrap_or(0);
1762                let delta = match value.get("delta") {
1763                    Some(d) => d,
1764                    None => return Ok(out),
1765                };
1766                let delta_type = delta.get("type").and_then(|v| v.as_str()).unwrap_or("");
1767                match (self.blocks.get(&index), delta_type) {
1768                    (Some(AnthropicBlock::Text), "text_delta") => {
1769                        if let Some(text) = delta.get("text").and_then(|v| v.as_str()) {
1770                            if !text.is_empty() {
1771                                let msg_id = self
1772                                    .msg_id
1773                                    .clone()
1774                                    .unwrap_or_else(|| "msg_anthropic_default".into());
1775                                out.push(ModelChunk::TextDelta {
1776                                    msg_id,
1777                                    delta: text.to_string(),
1778                                });
1779                            }
1780                        }
1781                    }
1782                    (Some(AnthropicBlock::Thinking { thinking_id }), "thinking_delta") => {
1783                        if let Some(text) = delta.get("thinking").and_then(|v| v.as_str()) {
1784                            if !text.is_empty() {
1785                                out.push(ModelChunk::ThinkingDelta {
1786                                    thinking_id: thinking_id.clone(),
1787                                    delta: text.to_string(),
1788                                    signature: None,
1789                                });
1790                            }
1791                        }
1792                    }
1793                    (Some(AnthropicBlock::Thinking { thinking_id }), "signature_delta") => {
1794                        if let Some(sig) = delta.get("signature").and_then(|v| v.as_str()) {
1795                            // Signature deltas land empty-text + signed.
1796                            // We pass them through as ThinkingDelta with
1797                            // empty `delta` so agent_loop's signature
1798                            // latch fires. Empty-delta ThinkingChunk is
1799                            // suppressed downstream by the empty check.
1800                            out.push(ModelChunk::ThinkingDelta {
1801                                thinking_id: thinking_id.clone(),
1802                                delta: String::new(),
1803                                signature: Some(sig.to_string()),
1804                            });
1805                        }
1806                    }
1807                    (Some(AnthropicBlock::ToolUse { id }), "input_json_delta") => {
1808                        if let Some(partial) = delta.get("partial_json").and_then(|v| v.as_str()) {
1809                            if !partial.is_empty() {
1810                                out.push(ModelChunk::ToolCallInputDelta {
1811                                    id: id.clone(),
1812                                    delta: partial.to_string(),
1813                                });
1814                            }
1815                        }
1816                    }
1817                    _ => { /* unknown delta type or block-kind mismatch */ }
1818                }
1819            }
1820            "content_block_stop" => {
1821                let index = value.get("index").and_then(|v| v.as_u64()).unwrap_or(0);
1822                if let Some(AnthropicBlock::ToolUse { id }) = self.blocks.get(&index) {
1823                    // Tool argument bytes accumulated as deltas; harness
1824                    // parses them in `consume_step_stream`. We don't
1825                    // attach an early input here — Anthropic ships the
1826                    // arguments only via deltas.
1827                    out.push(ModelChunk::ToolCallEnd {
1828                        id: id.clone(),
1829                        input: None,
1830                    });
1831                }
1832            }
1833            "message_delta" => {
1834                if let Some(reason) = value
1835                    .get("delta")
1836                    .and_then(|d| d.get("stop_reason"))
1837                    .and_then(|v| v.as_str())
1838                {
1839                    self.stop_reason = Some(reason.to_string());
1840                }
1841                if let Some(u) = value.get("usage") {
1842                    // message_delta only refreshes the output_tokens —
1843                    // input_tokens come from message_start. Merge.
1844                    self.pending_usage =
1845                        Some(merge_anthropic_usage(self.pending_usage.clone(), u, false));
1846                }
1847            }
1848            "message_stop" => {
1849                if let Some(done) = self.emit_done() {
1850                    out.push(done);
1851                }
1852            }
1853            _ => { /* forward-compat: silently ignore unknown event types */ }
1854        }
1855        Ok(out)
1856    }
1857
1858    fn finalize(&mut self) -> Option<ModelChunk> {
1859        self.emit_done()
1860    }
1861
1862    fn emit_done(&mut self) -> Option<ModelChunk> {
1863        if self.done_emitted {
1864            return None;
1865        }
1866        self.done_emitted = true;
1867        let stop_reason = map_anthropic_stop_reason(self.stop_reason.as_deref());
1868        Some(ModelChunk::Done {
1869            stop_reason,
1870            usage: self.pending_usage.take(),
1871        })
1872    }
1873}
1874
1875/// Merge an Anthropic usage block (from `message_start` or `message_delta`)
1876/// into the running tally. When `include_input=true`, accept both
1877/// `input_tokens` and the cache-related fields; otherwise only refresh
1878/// `output_tokens` (per Anthropic's spec — `message_delta` only carries
1879/// output deltas).
1880fn merge_anthropic_usage(
1881    prior: Option<HarnessUsage>,
1882    incoming: &Value,
1883    include_input: bool,
1884) -> HarnessUsage {
1885    let mut u = prior.unwrap_or_default();
1886    if include_input {
1887        if let Some(v) = incoming.get("input_tokens").and_then(|v| v.as_u64()) {
1888            u.input_tokens = v;
1889        }
1890        if let Some(v) = incoming
1891            .get("cache_read_input_tokens")
1892            .and_then(|v| v.as_u64())
1893        {
1894            u.cache_read_input_tokens = v;
1895        }
1896        if let Some(v) = incoming
1897            .get("cache_creation_input_tokens")
1898            .and_then(|v| v.as_u64())
1899        {
1900            u.cache_creation_input_tokens = v;
1901        }
1902    }
1903    if let Some(v) = incoming.get("output_tokens").and_then(|v| v.as_u64()) {
1904        u.output_tokens = v;
1905    }
1906    u
1907}
1908
1909fn map_anthropic_stop_reason(reason: Option<&str>) -> String {
1910    // Anthropic Messages API stop_reason values (per docs):
1911    //   end_turn / max_tokens / stop_sequence / tool_use / refusal
1912    match reason {
1913        Some("end_turn") | Some("stop_sequence") | Some("tool_use") => "end_turn".into(),
1914        Some("max_tokens") => "max_tokens".into(),
1915        Some("refusal") => "refusal".into(),
1916        Some(other) if !other.is_empty() => other.to_string(),
1917        _ => "end_turn".into(),
1918    }
1919}
1920
1921/// HTTP status → `ModelClientError` for Anthropic. Status codes mostly
1922/// align with OpenAI's so we reuse the classifier logic; the body
1923/// heuristics differ slightly (Anthropic uses `type:"error"` with a
1924/// `message` field rather than nested `error.message`).
1925fn classify_anthropic_http_error(status: reqwest::StatusCode, body: &str) -> ModelClientError {
1926    use reqwest::StatusCode;
1927    let snippet = body.chars().take(512).collect::<String>();
1928    if status == StatusCode::TOO_MANY_REQUESTS {
1929        return ModelClientError::RateLimit(format!("HTTP {status}: {snippet}"));
1930    }
1931    if status == StatusCode::UNAUTHORIZED || status == StatusCode::FORBIDDEN {
1932        return ModelClientError::Auth(format!("HTTP {status}: {snippet}"));
1933    }
1934    if status == StatusCode::BAD_REQUEST && looks_like_context_overflow(body) {
1935        return ModelClientError::ContextOverflow(format!("HTTP {status}: {snippet}"));
1936    }
1937    if status == StatusCode::BAD_REQUEST {
1938        return ModelClientError::BadRequest(format!("HTTP {status}: {snippet}"));
1939    }
1940    if status.is_server_error() {
1941        return ModelClientError::ServerError(format!("HTTP {status}: {snippet}"));
1942    }
1943    ModelClientError::Other(format!("HTTP {status}: {snippet}"))
1944}
1945
1946#[cfg(test)]
1947mod tests {
1948    use super::*;
1949
1950    fn user(prompt: &str) -> ModelTurnInput {
1951        ModelTurnInput {
1952            system_prompt: None,
1953            messages: vec![ChatMessage::User {
1954                content: prompt.into(),
1955                attachments: vec![],
1956            }],
1957            tools: vec![],
1958            tool_choice: ToolChoice::Auto,
1959            parallel_tool_calls: None,
1960        }
1961    }
1962
1963    fn bash_spec() -> ToolSpec {
1964        ToolSpec {
1965            name: "bash".into(),
1966            description: "Run a shell command inside the sandbox.".into(),
1967            input_schema: json!({
1968                "type": "object",
1969                "properties": {"command": {"type": "string"}},
1970                "required": ["command"],
1971                "additionalProperties": false
1972            }),
1973        }
1974    }
1975
1976    #[test]
1977    fn openai_client_builds_chat_completions_request() {
1978        // base_url is the full prefix: the route is appended verbatim, the
1979        // version segment is NOT injected — a trailing slash is tolerated.
1980        let client = OpenAiCompatibleModelClient::new(OpenAiCompatibleConfig {
1981            base_url: "https://example.test/v1/".into(),
1982            api_key: "sk-test".into(),
1983            model: "gpt-test".into(),
1984            temperature: None,
1985            max_tokens: None,
1986            reasoning_effort: None,
1987        });
1988        assert_eq!(
1989            client.endpoint(),
1990            "https://example.test/v1/chat/completions"
1991        );
1992        let client_with_v1 = OpenAiCompatibleModelClient::new(OpenAiCompatibleConfig {
1993            base_url: "https://example.test/v1".into(),
1994            api_key: "sk-test".into(),
1995            model: "gpt-test".into(),
1996            temperature: None,
1997            max_tokens: None,
1998            reasoning_effort: None,
1999        });
2000        assert_eq!(
2001            client_with_v1.endpoint(),
2002            "https://example.test/v1/chat/completions"
2003        );
2004        // Providers whose version segment is not `/v1` (e.g. GLM's `/v4`) are
2005        // honored verbatim — regression guard for the `/v1`-injection bug.
2006        let glm = OpenAiCompatibleModelClient::new(OpenAiCompatibleConfig {
2007            base_url: "https://open.bigmodel.cn/api/coding/paas/v4".into(),
2008            api_key: "sk-test".into(),
2009            model: "glm-4.6".into(),
2010            temperature: None,
2011            max_tokens: None,
2012            reasoning_effort: None,
2013        });
2014        assert_eq!(
2015            glm.endpoint(),
2016            "https://open.bigmodel.cn/api/coding/paas/v4/chat/completions"
2017        );
2018        let body = client.request_body(&user("hello"));
2019        assert_eq!(body["model"], "gpt-test");
2020        assert_eq!(body["messages"][0]["role"], "user");
2021        assert_eq!(body["messages"][0]["content"], "hello");
2022        // tools omitted ⇒ no `tools` / `tool_choice` keys
2023        assert!(body.get("tools").is_none());
2024        assert!(body.get("tool_choice").is_none());
2025
2026        // Now pass an actual ToolSpec and confirm it renders as a function.
2027        let with_tools = ModelTurnInput {
2028            system_prompt: None,
2029            messages: vec![ChatMessage::User {
2030                content: "hello".into(),
2031                attachments: vec![],
2032            }],
2033            tools: vec![bash_spec()],
2034            tool_choice: ToolChoice::Auto,
2035            parallel_tool_calls: None,
2036        };
2037        let body = client.request_body(&with_tools);
2038        assert_eq!(body["tools"][0]["function"]["name"], "bash");
2039        assert_eq!(
2040            body["tools"][0]["function"]["parameters"]["required"][0],
2041            "command"
2042        );
2043        assert_eq!(body["tool_choice"], "auto");
2044        // parallel_tool_calls defaults to None ⇒ field omitted (let
2045        // OpenAI server-side default apply).
2046        assert!(body.get("parallel_tool_calls").is_none());
2047    }
2048
2049    #[test]
2050    fn openai_client_emits_tool_choice_required() {
2051        let client = OpenAiCompatibleModelClient::new(OpenAiCompatibleConfig {
2052            base_url: "https://example.test".into(),
2053            api_key: "sk-test".into(),
2054            model: "gpt-test".into(),
2055            temperature: None,
2056            max_tokens: None,
2057            reasoning_effort: None,
2058        });
2059        let body = client.request_body(&ModelTurnInput {
2060            system_prompt: None,
2061            messages: vec![ChatMessage::User {
2062                content: "go".into(),
2063                attachments: vec![],
2064            }],
2065            tools: vec![bash_spec()],
2066            tool_choice: ToolChoice::Required,
2067            parallel_tool_calls: Some(false),
2068        });
2069        assert_eq!(body["tool_choice"], "required");
2070        assert_eq!(body["parallel_tool_calls"], false);
2071    }
2072
2073    #[test]
2074    fn openai_client_emits_tool_choice_named_tool() {
2075        let client = OpenAiCompatibleModelClient::new(OpenAiCompatibleConfig {
2076            base_url: "https://example.test".into(),
2077            api_key: "sk-test".into(),
2078            model: "gpt-test".into(),
2079            temperature: None,
2080            max_tokens: None,
2081            reasoning_effort: None,
2082        });
2083        let body = client.request_body(&ModelTurnInput {
2084            system_prompt: None,
2085            messages: vec![ChatMessage::User {
2086                content: "go".into(),
2087                attachments: vec![],
2088            }],
2089            tools: vec![bash_spec()],
2090            tool_choice: ToolChoice::Tool("bash".into()),
2091            parallel_tool_calls: None,
2092        });
2093        assert_eq!(body["tool_choice"]["type"], "function");
2094        assert_eq!(body["tool_choice"]["function"]["name"], "bash");
2095    }
2096
2097    #[test]
2098    fn openai_client_drops_tools_when_choice_is_none() {
2099        // tool_choice: None — we drop the tools entirely so the model
2100        // can't call what it doesn't see (cheaper prompt + equivalent
2101        // semantic).
2102        let client = OpenAiCompatibleModelClient::new(OpenAiCompatibleConfig {
2103            base_url: "https://example.test".into(),
2104            api_key: "sk-test".into(),
2105            model: "gpt-test".into(),
2106            temperature: None,
2107            max_tokens: None,
2108            reasoning_effort: None,
2109        });
2110        let body = client.request_body(&ModelTurnInput {
2111            system_prompt: None,
2112            messages: vec![ChatMessage::User {
2113                content: "go".into(),
2114                attachments: vec![],
2115            }],
2116            tools: vec![bash_spec()],
2117            tool_choice: ToolChoice::None,
2118            parallel_tool_calls: None,
2119        });
2120        assert!(body.get("tools").is_none(), "tools should be dropped");
2121        assert!(body.get("tool_choice").is_none());
2122    }
2123
2124    #[test]
2125    fn tool_choice_parse_handles_canonical_strings() {
2126        assert!(matches!(ToolChoice::parse(""), ToolChoice::Auto));
2127        assert!(matches!(ToolChoice::parse("auto"), ToolChoice::Auto));
2128        assert!(matches!(ToolChoice::parse("AUTO"), ToolChoice::Auto));
2129        assert!(matches!(ToolChoice::parse("none"), ToolChoice::None));
2130        assert!(matches!(
2131            ToolChoice::parse("required"),
2132            ToolChoice::Required
2133        ));
2134        assert!(matches!(ToolChoice::parse("any"), ToolChoice::Required));
2135        match ToolChoice::parse("tool:bash") {
2136            ToolChoice::Tool(name) => assert_eq!(name, "bash"),
2137            other => panic!("expected Tool(bash), got {other:?}"),
2138        }
2139        // Unknown degrades to Auto (forward-compat).
2140        assert!(matches!(ToolChoice::parse("garbage"), ToolChoice::Auto));
2141    }
2142
2143    #[test]
2144    fn openai_client_prepends_system_when_set() {
2145        let client = OpenAiCompatibleModelClient::new(OpenAiCompatibleConfig {
2146            base_url: "https://example.test".into(),
2147            api_key: "sk-test".into(),
2148            model: "gpt-test".into(),
2149            temperature: None,
2150            max_tokens: None,
2151            reasoning_effort: None,
2152        });
2153        let input = ModelTurnInput {
2154            system_prompt: Some("you are concise".into()),
2155            messages: vec![ChatMessage::User {
2156                content: "hi".into(),
2157                attachments: vec![],
2158            }],
2159            tools: vec![],
2160            tool_choice: ToolChoice::Auto,
2161            parallel_tool_calls: None,
2162        };
2163        let body = client.request_body(&input);
2164        assert_eq!(body["messages"][0]["role"], "system");
2165        assert_eq!(body["messages"][0]["content"], "you are concise");
2166        assert_eq!(body["messages"][1]["role"], "user");
2167    }
2168
2169    #[test]
2170    fn parse_openai_usage_extracts_token_counts() {
2171        let u = parse_openai_usage(Some(&json!({
2172            "prompt_tokens": 12,
2173            "completion_tokens": 7,
2174            "total_tokens": 19,
2175            "prompt_tokens_details": {"cached_tokens": 4}
2176        })))
2177        .expect("usage parsed");
2178        assert_eq!(u.input_tokens, 12);
2179        assert_eq!(u.output_tokens, 7);
2180        assert_eq!(u.cache_read_input_tokens, 4);
2181        assert_eq!(u.cache_creation_input_tokens, 0);
2182    }
2183
2184    #[test]
2185    fn parse_openai_usage_without_cache_details() {
2186        let u = parse_openai_usage(Some(&json!({
2187            "prompt_tokens": 200,
2188            "completion_tokens": 30
2189        })))
2190        .expect("usage parsed");
2191        assert_eq!(u.input_tokens, 200);
2192        assert_eq!(u.output_tokens, 30);
2193        assert_eq!(u.cache_read_input_tokens, 0);
2194    }
2195
2196    #[test]
2197    fn openai_stream_state_emits_text_deltas_then_done() {
2198        let mut state = OpenAiStreamState::default();
2199        // First chunk seeds id + role.
2200        let out = state
2201            .feed_data(
2202                r#"{"id":"chatcmpl-1","choices":[{"index":0,"delta":{"role":"assistant","content":""}}]}"#,
2203            )
2204            .unwrap();
2205        assert!(out.is_empty(), "empty content shouldn't emit");
2206        // Text deltas.
2207        let out = state
2208            .feed_data(r#"{"choices":[{"index":0,"delta":{"content":"Hello"}}]}"#)
2209            .unwrap();
2210        assert_eq!(out.len(), 1);
2211        match &out[0] {
2212            ModelChunk::TextDelta { msg_id, delta } => {
2213                assert_eq!(msg_id, "chatcmpl-1");
2214                assert_eq!(delta, "Hello");
2215            }
2216            other => panic!("expected TextDelta, got {other:?}"),
2217        }
2218        let out = state
2219            .feed_data(r#"{"choices":[{"index":0,"delta":{"content":" world"}}]}"#)
2220            .unwrap();
2221        assert_eq!(out.len(), 1);
2222        // finish_reason in penultimate chunk (no Done yet — usage may
2223        // follow if include_usage was set).
2224        let out = state
2225            .feed_data(r#"{"choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}"#)
2226            .unwrap();
2227        assert!(out.is_empty());
2228        // include_usage final chunk (empty choices, usage populated).
2229        let out = state
2230            .feed_data(r#"{"choices":[],"usage":{"prompt_tokens":10,"completion_tokens":3}}"#)
2231            .unwrap();
2232        assert!(out.is_empty());
2233        // [DONE] sentinel produces the final Done chunk.
2234        let out = state.feed_data("[DONE]").unwrap();
2235        assert_eq!(out.len(), 1);
2236        match &out[0] {
2237            ModelChunk::Done { stop_reason, usage } => {
2238                assert_eq!(stop_reason, "end_turn");
2239                let u = usage.as_ref().expect("usage propagated");
2240                assert_eq!(u.input_tokens, 10);
2241                assert_eq!(u.output_tokens, 3);
2242            }
2243            other => panic!("expected Done, got {other:?}"),
2244        }
2245    }
2246
2247    #[test]
2248    fn openai_stream_state_emits_tool_call_chunks() {
2249        let mut state = OpenAiStreamState::default();
2250        // First tool-call chunk: id + name + initial arguments.
2251        let out = state
2252            .feed_data(
2253                r#"{"id":"c1","choices":[{"index":0,"delta":{"role":"assistant","tool_calls":[{"index":0,"id":"call_x","type":"function","function":{"name":"bash","arguments":""}}]}}]}"#,
2254            )
2255            .unwrap();
2256        assert_eq!(out.len(), 1);
2257        match &out[0] {
2258            ModelChunk::ToolCallStart { id, name } => {
2259                assert_eq!(id, "call_x");
2260                assert_eq!(name, "bash");
2261            }
2262            other => panic!("expected ToolCallStart, got {other:?}"),
2263        }
2264        // Streaming arguments come split into multiple chunks.
2265        let out = state
2266            .feed_data(
2267                r#"{"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]}}]}"#,
2268            )
2269            .unwrap();
2270        assert_eq!(out.len(), 1);
2271        let ModelChunk::ToolCallInputDelta { delta, .. } = &out[0] else {
2272            panic!("expected ToolCallInputDelta");
2273        };
2274        assert_eq!(delta, "{\"");
2275
2276        let out = state
2277            .feed_data(
2278                r#"{"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"cmd\":\"pwd\"}"}}]}}]}"#,
2279            )
2280            .unwrap();
2281        let ModelChunk::ToolCallInputDelta { delta, .. } = &out[0] else {
2282            panic!("expected ToolCallInputDelta");
2283        };
2284        assert_eq!(delta, "cmd\":\"pwd\"}");
2285
2286        // finish_reason="tool_calls" should emit ToolCallEnd for every
2287        // open tool call.
2288        let out = state
2289            .feed_data(r#"{"choices":[{"index":0,"delta":{},"finish_reason":"tool_calls"}]}"#)
2290            .unwrap();
2291        assert_eq!(out.len(), 1);
2292        match &out[0] {
2293            ModelChunk::ToolCallEnd { id, input } => {
2294                assert_eq!(id, "call_x");
2295                assert!(input.is_none(), "OpenAI streaming defers parsing");
2296            }
2297            other => panic!("expected ToolCallEnd, got {other:?}"),
2298        }
2299
2300        // Stream closes (no [DONE] sentinel from some gateways) → finalize().
2301        let final_chunk = state.finalize().expect("finalize emits Done");
2302        match final_chunk {
2303            ModelChunk::Done { stop_reason, .. } => assert_eq!(stop_reason, "end_turn"),
2304            other => panic!("expected Done from finalize, got {other:?}"),
2305        }
2306    }
2307
2308    #[test]
2309    fn map_openai_finish_reason_table() {
2310        assert_eq!(map_openai_finish_reason(Some("stop")), "end_turn");
2311        assert_eq!(map_openai_finish_reason(Some("length")), "max_tokens");
2312        assert_eq!(map_openai_finish_reason(Some("tool_calls")), "end_turn");
2313        assert_eq!(map_openai_finish_reason(Some("content_filter")), "refusal");
2314        assert_eq!(map_openai_finish_reason(None), "end_turn");
2315        assert_eq!(map_openai_finish_reason(Some("")), "end_turn");
2316    }
2317
2318    // ── Anthropic Messages API ────────────────────────────────────────
2319
2320    #[test]
2321    fn chat_message_to_openai_wire_text_only_keeps_string_content() {
2322        // Fast path: no attachments → content stays as a plain string
2323        // (byte-identical to pre-multimodal behaviour).
2324        let msg = ChatMessage::User {
2325            content: "hello".into(),
2326            attachments: vec![],
2327        };
2328        let v = chat_message_to_wire(&msg);
2329        assert_eq!(v["role"], "user");
2330        assert_eq!(v["content"], "hello");
2331        // Not an array — that distinction matters for OpenAI-compatible
2332        // gateways that strict-parse the chat schema.
2333        assert!(v["content"].is_string());
2334    }
2335
2336    #[test]
2337    fn chat_message_to_openai_wire_with_base64_image() {
2338        let msg = ChatMessage::User {
2339            content: "describe this".into(),
2340            attachments: vec![UserAttachment::Image(ImageSource {
2341                media_type: "image/png".into(),
2342                data: ImageData::Base64("iVBORw0KG...".into()),
2343            })],
2344        };
2345        let v = chat_message_to_wire(&msg);
2346        let parts = v["content"].as_array().expect("content array");
2347        assert_eq!(parts.len(), 2);
2348        assert_eq!(parts[0]["type"], "text");
2349        assert_eq!(parts[0]["text"], "describe this");
2350        assert_eq!(parts[1]["type"], "image_url");
2351        // base64 must be wrapped in a data URI for OpenAI.
2352        let url = parts[1]["image_url"]["url"].as_str().unwrap();
2353        assert!(url.starts_with("data:image/png;base64,"));
2354        assert!(url.contains("iVBORw0KG..."));
2355    }
2356
2357    #[test]
2358    fn chat_message_to_openai_wire_with_url_image() {
2359        let msg = ChatMessage::User {
2360            content: "".into(), // empty text, image-only
2361            attachments: vec![UserAttachment::Image(ImageSource {
2362                media_type: "image/jpeg".into(),
2363                data: ImageData::Url("https://cdn.example.com/cat.jpg".into()),
2364            })],
2365        };
2366        let v = chat_message_to_wire(&msg);
2367        let parts = v["content"].as_array().unwrap();
2368        // Empty text dropped — only image part survives.
2369        assert_eq!(parts.len(), 1);
2370        assert_eq!(parts[0]["type"], "image_url");
2371        assert_eq!(
2372            parts[0]["image_url"]["url"],
2373            "https://cdn.example.com/cat.jpg"
2374        );
2375    }
2376
2377    #[test]
2378    fn chat_message_to_openai_tool_role_degrades_image_to_placeholder() {
2379        // OpenAI's `tool` role is strictly string-typed — images coming
2380        // out of MCP tool calls have to degrade. We append a placeholder
2381        // line per attachment so the model still notices something
2382        // visual was returned, even if it can't see it.
2383        let msg = ChatMessage::Tool {
2384            tool_call_id: "call_x".into(),
2385            content: "ok".into(),
2386            is_error: false,
2387            attachments: vec![UserAttachment::Image(ImageSource {
2388                media_type: "image/png".into(),
2389                data: ImageData::Base64("AAA".into()),
2390            })],
2391        };
2392        let v = chat_message_to_wire(&msg);
2393        assert_eq!(v["role"], "tool");
2394        assert_eq!(v["tool_call_id"], "call_x");
2395        let content = v["content"].as_str().unwrap();
2396        assert!(content.starts_with("ok\n"));
2397        assert!(content.contains("image attached: image/png"));
2398        // Base64 bytes must NOT leak into the wire — degradation, not
2399        // smuggling.
2400        assert!(!content.contains("AAA"));
2401    }
2402
2403    // ── E7: tool_result replay compaction ──
2404
2405    #[test]
2406    fn replay_compaction_leaves_small_results_untouched() {
2407        let small = "x".repeat(1_000);
2408        assert!(matches!(
2409            compact_tool_result_for_replay(&small),
2410            std::borrow::Cow::Borrowed(_)
2411        ));
2412        // Over the token budget (10_000 ASCII chars / 4 = 2_500 > 2_000)
2413        // → compacted even though bytes are under 12 KB.
2414        let medium = "word ".repeat(2_000);
2415        let out = compact_tool_result_for_replay(&medium);
2416        assert!(out.contains("compacted for model replay"));
2417    }
2418
2419    #[test]
2420    fn replay_compaction_keeps_head_and_tail_deterministically() {
2421        let body = format!("HEAD_MARK{}TAIL_MARK", "x".repeat(20_000));
2422        let first = compact_tool_result_for_replay(&body).into_owned();
2423        let second = compact_tool_result_for_replay(&body).into_owned();
2424        // Deterministic: byte-identical across calls (prompt-cache safety).
2425        assert_eq!(first, second);
2426        assert!(first.starts_with("[tool result compacted for model replay]"));
2427        assert!(first.contains("HEAD_MARK"), "head survives");
2428        assert!(first.contains("TAIL_MARK"), "tail survives");
2429        assert!(first.contains("omitted"), "omission marker present");
2430        // Massively smaller than the original.
2431        assert!(first.len() < body.len() / 2);
2432    }
2433
2434    #[test]
2435    fn openai_projection_compacts_oversized_tool_result() {
2436        let big = format!("START{}END", "y".repeat(20_000));
2437        let msg = ChatMessage::Tool {
2438            tool_call_id: "call_big".into(),
2439            content: big.clone(),
2440            is_error: false,
2441            attachments: vec![],
2442        };
2443        let v = chat_message_to_wire(&msg);
2444        let content = v["content"].as_str().unwrap();
2445        assert!(content.contains("compacted for model replay"));
2446        assert!(content.contains("START") && content.contains("END"));
2447        // Projection-only: the source message still holds the full result.
2448        match &msg {
2449            ChatMessage::Tool { content, .. } => assert_eq!(content.len(), big.len()),
2450            _ => unreachable!(),
2451        }
2452    }
2453
2454    #[test]
2455    fn anthropic_projection_compacts_oversized_tool_result() {
2456        let big = "z".repeat(20_000);
2457        let msgs = vec![
2458            ChatMessage::Assistant {
2459                text: None,
2460                tool_calls: vec![crate::tools::ToolInvocation {
2461                    id: "tc_big".into(),
2462                    name: "bash".into(),
2463                    input: json!({}),
2464                }],
2465                thinking: None,
2466            },
2467            ChatMessage::Tool {
2468                tool_call_id: "tc_big".into(),
2469                content: big,
2470                is_error: false,
2471                attachments: vec![],
2472            },
2473        ];
2474        let wire = chat_messages_to_anthropic_messages(&msgs);
2475        let rendered = serde_json::to_string(&wire).unwrap();
2476        assert!(rendered.contains("compacted for model replay"));
2477    }
2478
2479    #[test]
2480    fn chat_messages_to_anthropic_tool_result_carries_image_block() {
2481        // After an assistant tool_use, a Tool message that carries an
2482        // image attachment (e.g. MCP screenshot) should project to a
2483        // tool_result whose content array contains both the text and
2484        // the image content block.
2485        let msgs = vec![
2486            ChatMessage::Assistant {
2487                text: None,
2488                tool_calls: vec![ToolInvocation {
2489                    id: "tc_img".into(),
2490                    name: "screenshot".into(),
2491                    input: json!({}),
2492                }],
2493                thinking: None,
2494            },
2495            ChatMessage::Tool {
2496                tool_call_id: "tc_img".into(),
2497                content: "see image".into(),
2498                is_error: false,
2499                attachments: vec![UserAttachment::Image(ImageSource {
2500                    media_type: "image/png".into(),
2501                    data: ImageData::Base64("PNGBYTES".into()),
2502                })],
2503            },
2504        ];
2505        let out = chat_messages_to_anthropic_messages(&msgs);
2506        // [assistant tool_use, user(tool_result containing text+image)]
2507        assert_eq!(out.len(), 2);
2508        let user = &out[1];
2509        assert_eq!(user["role"], "user");
2510        let outer = user["content"].as_array().unwrap();
2511        assert_eq!(outer.len(), 1);
2512        assert_eq!(outer[0]["type"], "tool_result");
2513        assert_eq!(outer[0]["tool_use_id"], "tc_img");
2514        let inner = outer[0]["content"].as_array().unwrap();
2515        // tool_result content is now block-array form (not a string)
2516        // when attachments are present.
2517        assert_eq!(inner.len(), 2);
2518        assert_eq!(inner[0]["type"], "text");
2519        assert_eq!(inner[0]["text"], "see image");
2520        assert_eq!(inner[1]["type"], "image");
2521        assert_eq!(inner[1]["source"]["type"], "base64");
2522        assert_eq!(inner[1]["source"]["media_type"], "image/png");
2523        assert_eq!(inner[1]["source"]["data"], "PNGBYTES");
2524    }
2525
2526    #[test]
2527    fn chat_messages_to_anthropic_renders_user_text_with_image_block() {
2528        let msgs = vec![ChatMessage::User {
2529            content: "what is this".into(),
2530            attachments: vec![UserAttachment::Image(ImageSource {
2531                media_type: "image/png".into(),
2532                data: ImageData::Base64("AAAA".into()),
2533            })],
2534        }];
2535        let out = chat_messages_to_anthropic_messages(&msgs);
2536        assert_eq!(out.len(), 1);
2537        let blocks = out[0]["content"].as_array().unwrap();
2538        // Text first, then image — same ordering as OpenAI parts.
2539        assert_eq!(blocks[0]["type"], "text");
2540        assert_eq!(blocks[0]["text"], "what is this");
2541        assert_eq!(blocks[1]["type"], "image");
2542        // base64 source shape
2543        assert_eq!(blocks[1]["source"]["type"], "base64");
2544        assert_eq!(blocks[1]["source"]["media_type"], "image/png");
2545        assert_eq!(blocks[1]["source"]["data"], "AAAA");
2546    }
2547
2548    #[test]
2549    fn chat_messages_to_anthropic_renders_url_image() {
2550        let msgs = vec![ChatMessage::User {
2551            content: "".into(),
2552            attachments: vec![UserAttachment::Image(ImageSource {
2553                media_type: "image/jpeg".into(),
2554                data: ImageData::Url("https://example.com/x.jpg".into()),
2555            })],
2556        }];
2557        let out = chat_messages_to_anthropic_messages(&msgs);
2558        let blocks = out[0]["content"].as_array().unwrap();
2559        // Empty-text user with image: only one block (the image).
2560        assert_eq!(blocks.len(), 1);
2561        assert_eq!(blocks[0]["type"], "image");
2562        assert_eq!(blocks[0]["source"]["type"], "url");
2563        assert_eq!(blocks[0]["source"]["url"], "https://example.com/x.jpg");
2564    }
2565
2566    #[test]
2567    fn chat_message_to_anthropic_merges_tool_results_and_image() {
2568        // Tool result pending + a user message that also carries an image:
2569        // both must land in the same user message's content array.
2570        let msgs = vec![
2571            ChatMessage::Assistant {
2572                text: None,
2573                tool_calls: vec![ToolInvocation {
2574                    id: "tc_1".into(),
2575                    name: "screenshot".into(),
2576                    input: json!({}),
2577                }],
2578                thinking: None,
2579            },
2580            ChatMessage::Tool {
2581                tool_call_id: "tc_1".into(),
2582                content: "captured".into(),
2583                is_error: false,
2584                attachments: vec![],
2585            },
2586            ChatMessage::User {
2587                content: "what changed?".into(),
2588                attachments: vec![UserAttachment::Image(ImageSource {
2589                    media_type: "image/png".into(),
2590                    data: ImageData::Base64("ZZ".into()),
2591                })],
2592            },
2593        ];
2594        let out = chat_messages_to_anthropic_messages(&msgs);
2595        // [assistant tool_use, user(tool_result + text + image)]
2596        assert_eq!(out.len(), 2);
2597        let blocks = out[1]["content"].as_array().unwrap();
2598        assert_eq!(blocks.len(), 3);
2599        assert_eq!(blocks[0]["type"], "tool_result");
2600        assert_eq!(blocks[0]["tool_use_id"], "tc_1");
2601        assert_eq!(blocks[1]["type"], "text");
2602        assert_eq!(blocks[1]["text"], "what changed?");
2603        assert_eq!(blocks[2]["type"], "image");
2604    }
2605
2606    #[test]
2607    fn chat_messages_to_anthropic_renders_simple_user_assistant() {
2608        let msgs = vec![
2609            ChatMessage::User {
2610                content: "hi".into(),
2611                attachments: vec![],
2612            },
2613            ChatMessage::Assistant {
2614                text: Some("hello".into()),
2615                tool_calls: vec![],
2616                thinking: None,
2617            },
2618        ];
2619        let out = chat_messages_to_anthropic_messages(&msgs);
2620        assert_eq!(out.len(), 2);
2621        assert_eq!(out[0]["role"], "user");
2622        assert_eq!(out[0]["content"][0]["type"], "text");
2623        assert_eq!(out[0]["content"][0]["text"], "hi");
2624        assert_eq!(out[1]["role"], "assistant");
2625        assert_eq!(out[1]["content"][0]["text"], "hello");
2626    }
2627
2628    #[test]
2629    fn chat_messages_to_anthropic_folds_tool_results_into_next_user() {
2630        // After an assistant tool_use, the next user message is the one
2631        // carrying the tool_result content block — no separate user/tool
2632        // hop on the wire.
2633        let msgs = vec![
2634            ChatMessage::User {
2635                content: "do it".into(),
2636                attachments: vec![],
2637            },
2638            ChatMessage::Assistant {
2639                text: None,
2640                tool_calls: vec![ToolInvocation {
2641                    id: "call_1".into(),
2642                    name: "bash".into(),
2643                    input: json!({"command": "pwd"}),
2644                }],
2645                thinking: None,
2646            },
2647            ChatMessage::Tool {
2648                tool_call_id: "call_1".into(),
2649                content: "{\"stdout\":\"/\"}".into(),
2650                is_error: false,
2651                attachments: vec![],
2652            },
2653            ChatMessage::User {
2654                content: "explain".into(),
2655                attachments: vec![],
2656            },
2657        ];
2658        let out = chat_messages_to_anthropic_messages(&msgs);
2659        // user, assistant(tool_use), user(tool_result + "explain")
2660        assert_eq!(out.len(), 3);
2661        assert_eq!(out[1]["role"], "assistant");
2662        assert_eq!(out[1]["content"][0]["type"], "tool_use");
2663        assert_eq!(out[1]["content"][0]["id"], "call_1");
2664        assert_eq!(out[2]["role"], "user");
2665        assert_eq!(out[2]["content"][0]["type"], "tool_result");
2666        assert_eq!(out[2]["content"][0]["tool_use_id"], "call_1");
2667        assert_eq!(out[2]["content"][1]["type"], "text");
2668        assert_eq!(out[2]["content"][1]["text"], "explain");
2669    }
2670
2671    #[test]
2672    fn chat_messages_to_anthropic_renders_thinking_then_text_then_tool_use() {
2673        let msgs = vec![ChatMessage::Assistant {
2674            text: Some("preface".into()),
2675            tool_calls: vec![ToolInvocation {
2676                id: "t".into(),
2677                name: "n".into(),
2678                input: json!({"a": 1}),
2679            }],
2680            thinking: Some(AssistantThinking {
2681                text: "deep thought".into(),
2682                signature: Some("sig123".into()),
2683            }),
2684        }];
2685        let out = chat_messages_to_anthropic_messages(&msgs);
2686        let blocks = out[0]["content"].as_array().unwrap();
2687        // Order: thinking → text → tool_use.
2688        assert_eq!(blocks[0]["type"], "thinking");
2689        assert_eq!(blocks[0]["thinking"], "deep thought");
2690        assert_eq!(blocks[0]["signature"], "sig123");
2691        assert_eq!(blocks[1]["type"], "text");
2692        assert_eq!(blocks[1]["text"], "preface");
2693        assert_eq!(blocks[2]["type"], "tool_use");
2694    }
2695
2696    #[test]
2697    fn chat_messages_to_anthropic_trailing_tool_results_flushed() {
2698        // Conversation ends on tool results without a follow-up user
2699        // turn — we still need to send the results to the model.
2700        let msgs = vec![
2701            ChatMessage::Assistant {
2702                text: None,
2703                tool_calls: vec![ToolInvocation {
2704                    id: "t".into(),
2705                    name: "n".into(),
2706                    input: json!({}),
2707                }],
2708                thinking: None,
2709            },
2710            ChatMessage::Tool {
2711                tool_call_id: "t".into(),
2712                content: "ok".into(),
2713                is_error: false,
2714                attachments: vec![],
2715            },
2716        ];
2717        let out = chat_messages_to_anthropic_messages(&msgs);
2718        assert_eq!(out.len(), 2);
2719        assert_eq!(out[1]["role"], "user");
2720        assert_eq!(out[1]["content"][0]["type"], "tool_result");
2721    }
2722
2723    #[test]
2724    fn apply_anthropic_cache_strategy_marks_system_last_tool_and_last_message() {
2725        let system = anthropic_system_field(Some("system prompt"));
2726        let tools = vec![
2727            json!({"name": "a", "description": "", "input_schema": {"type": "object"}}),
2728            json!({"name": "b", "description": "", "input_schema": {"type": "object"}}),
2729        ];
2730        let messages = vec![
2731            json!({"role": "user", "content": [{"type": "text", "text": "hi"}]}),
2732            json!({"role": "assistant", "content": [{"type": "text", "text": "hello"}]}),
2733        ];
2734        let out = apply_anthropic_cache_strategy(system, tools, messages);
2735        let sys_block = &out.system.as_ref().unwrap()[0];
2736        assert_eq!(sys_block["cache_control"]["type"], "ephemeral");
2737        // Last tool (only the LAST one) cached, not the first.
2738        assert!(out.tools[0].get("cache_control").is_none());
2739        assert_eq!(out.tools[1]["cache_control"]["type"], "ephemeral");
2740        // Last message's last content block cached.
2741        let last_msg_blocks = out.messages.last().unwrap()["content"].as_array().unwrap();
2742        assert_eq!(
2743            last_msg_blocks.last().unwrap()["cache_control"]["type"],
2744            "ephemeral"
2745        );
2746    }
2747
2748    #[test]
2749    fn apply_anthropic_cache_strategy_skips_empty_system() {
2750        // Empty system → omitted entirely (Anthropic rejects cache_control
2751        // on empty text blocks).
2752        let out = apply_anthropic_cache_strategy(None, vec![], vec![]);
2753        assert!(out.system.is_none());
2754    }
2755
2756    fn anthropic_client_for_tool_choice_tests() -> AnthropicModelClient {
2757        AnthropicModelClient::new(AnthropicConfig {
2758            base_url: "https://example.test".into(),
2759            api_key: "sk-test".into(),
2760            model: "claude-test".into(),
2761            max_tokens: 1024,
2762            temperature: None,
2763            anthropic_version: AnthropicConfig::DEFAULT_VERSION.into(),
2764        })
2765    }
2766
2767    #[test]
2768    fn anthropic_client_omits_tool_choice_when_auto() {
2769        // Auto is Anthropic's default — keep the wire byte-identical
2770        // by omitting the field (prompt cache stability).
2771        let client = anthropic_client_for_tool_choice_tests();
2772        let body = client.request_body(&ModelTurnInput {
2773            system_prompt: None,
2774            messages: vec![ChatMessage::User {
2775                content: "go".into(),
2776                attachments: vec![],
2777            }],
2778            tools: vec![bash_spec()],
2779            tool_choice: ToolChoice::Auto,
2780            parallel_tool_calls: None,
2781        });
2782        assert!(body["tools"].as_array().unwrap().len() > 0);
2783        assert!(body.get("tool_choice").is_none());
2784        // parallel_tool_calls is OpenAI-only — Anthropic body must
2785        // never carry it.
2786        assert!(body.get("parallel_tool_calls").is_none());
2787    }
2788
2789    #[test]
2790    fn anthropic_client_emits_tool_choice_required_as_any() {
2791        let client = anthropic_client_for_tool_choice_tests();
2792        let body = client.request_body(&ModelTurnInput {
2793            system_prompt: None,
2794            messages: vec![ChatMessage::User {
2795                content: "go".into(),
2796                attachments: vec![],
2797            }],
2798            tools: vec![bash_spec()],
2799            tool_choice: ToolChoice::Required,
2800            parallel_tool_calls: Some(true),
2801        });
2802        assert_eq!(body["tool_choice"]["type"], "any");
2803        // OpenAI-only knob must NOT leak into Anthropic body even when
2804        // caller set it (harness passes it uniformly to both providers).
2805        assert!(body.get("parallel_tool_calls").is_none());
2806    }
2807
2808    #[test]
2809    fn anthropic_client_emits_tool_choice_named_tool() {
2810        let client = anthropic_client_for_tool_choice_tests();
2811        let body = client.request_body(&ModelTurnInput {
2812            system_prompt: None,
2813            messages: vec![ChatMessage::User {
2814                content: "go".into(),
2815                attachments: vec![],
2816            }],
2817            tools: vec![bash_spec()],
2818            tool_choice: ToolChoice::Tool("bash".into()),
2819            parallel_tool_calls: None,
2820        });
2821        assert_eq!(body["tool_choice"]["type"], "tool");
2822        assert_eq!(body["tool_choice"]["name"], "bash");
2823    }
2824
2825    #[test]
2826    fn anthropic_client_drops_tools_when_choice_is_none() {
2827        // Anthropic has no native "tool_choice: none" — best
2828        // approximation is dropping the tools array. The body's
2829        // `tools` key must be absent and `tool_choice` too.
2830        let client = anthropic_client_for_tool_choice_tests();
2831        let body = client.request_body(&ModelTurnInput {
2832            system_prompt: None,
2833            messages: vec![ChatMessage::User {
2834                content: "go".into(),
2835                attachments: vec![],
2836            }],
2837            tools: vec![bash_spec()],
2838            tool_choice: ToolChoice::None,
2839            parallel_tool_calls: None,
2840        });
2841        assert!(body.get("tools").is_none());
2842        assert!(body.get("tool_choice").is_none());
2843    }
2844
2845    #[test]
2846    fn anthropic_stream_state_text_only() {
2847        let mut s = AnthropicStreamState::default();
2848        // message_start with id + usage.
2849        let _ = s
2850            .feed_event(
2851                "message_start",
2852                r#"{"type":"message_start","message":{"id":"msg_01","usage":{"input_tokens":10,"output_tokens":0}}}"#,
2853            )
2854            .unwrap();
2855        let _ = s
2856            .feed_event(
2857                "content_block_start",
2858                r#"{"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}"#,
2859            )
2860            .unwrap();
2861        let out = s
2862            .feed_event(
2863                "content_block_delta",
2864                r#"{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Hello"}}"#,
2865            )
2866            .unwrap();
2867        assert_eq!(out.len(), 1);
2868        match &out[0] {
2869            ModelChunk::TextDelta { msg_id, delta } => {
2870                assert_eq!(msg_id, "msg_01");
2871                assert_eq!(delta, "Hello");
2872            }
2873            other => panic!("expected TextDelta, got {other:?}"),
2874        }
2875        let _ = s.feed_event(
2876            "message_delta",
2877            r#"{"type":"message_delta","delta":{"stop_reason":"end_turn"},"usage":{"output_tokens":5}}"#,
2878        );
2879        let out = s
2880            .feed_event("message_stop", r#"{"type":"message_stop"}"#)
2881            .unwrap();
2882        assert_eq!(out.len(), 1);
2883        match &out[0] {
2884            ModelChunk::Done { stop_reason, usage } => {
2885                assert_eq!(stop_reason, "end_turn");
2886                let u = usage.as_ref().unwrap();
2887                assert_eq!(u.input_tokens, 10);
2888                assert_eq!(u.output_tokens, 5);
2889            }
2890            other => panic!("expected Done, got {other:?}"),
2891        }
2892    }
2893
2894    #[test]
2895    fn anthropic_stream_state_thinking_block_emits_delta_and_signature() {
2896        let mut s = AnthropicStreamState::default();
2897        let _ = s.feed_event(
2898            "message_start",
2899            r#"{"type":"message_start","message":{"id":"msg_t"}}"#,
2900        );
2901        let _ = s.feed_event(
2902            "content_block_start",
2903            r#"{"type":"content_block_start","index":0,"content_block":{"type":"thinking","thinking":""}}"#,
2904        );
2905        let out = s
2906            .feed_event(
2907                "content_block_delta",
2908                r#"{"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":"reasoning..."}}"#,
2909            )
2910            .unwrap();
2911        assert_eq!(out.len(), 1);
2912        let ModelChunk::ThinkingDelta {
2913            delta, signature, ..
2914        } = &out[0]
2915        else {
2916            panic!("expected ThinkingDelta");
2917        };
2918        assert_eq!(delta, "reasoning...");
2919        assert!(signature.is_none());
2920
2921        let out = s
2922            .feed_event(
2923                "content_block_delta",
2924                r#"{"type":"content_block_delta","index":0,"delta":{"type":"signature_delta","signature":"sig_abc"}}"#,
2925            )
2926            .unwrap();
2927        let ModelChunk::ThinkingDelta {
2928            delta, signature, ..
2929        } = &out[0]
2930        else {
2931            panic!("expected ThinkingDelta");
2932        };
2933        assert_eq!(delta, "");
2934        assert_eq!(signature.as_deref(), Some("sig_abc"));
2935    }
2936
2937    #[test]
2938    fn anthropic_stream_state_tool_use_streamed_input() {
2939        let mut s = AnthropicStreamState::default();
2940        let _ = s.feed_event(
2941            "message_start",
2942            r#"{"type":"message_start","message":{"id":"msg_x"}}"#,
2943        );
2944        let out = s
2945            .feed_event(
2946                "content_block_start",
2947                r#"{"type":"content_block_start","index":0,"content_block":{"type":"tool_use","id":"toolu_1","name":"bash","input":{}}}"#,
2948            )
2949            .unwrap();
2950        assert_eq!(out.len(), 1);
2951        match &out[0] {
2952            ModelChunk::ToolCallStart { id, name } => {
2953                assert_eq!(id, "toolu_1");
2954                assert_eq!(name, "bash");
2955            }
2956            other => panic!("expected ToolCallStart, got {other:?}"),
2957        }
2958        let out = s
2959            .feed_event(
2960                "content_block_delta",
2961                r#"{"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":"{\"cmd\":"}}"#,
2962            )
2963            .unwrap();
2964        let ModelChunk::ToolCallInputDelta { id, delta } = &out[0] else {
2965            panic!("expected ToolCallInputDelta");
2966        };
2967        assert_eq!(id, "toolu_1");
2968        assert_eq!(delta, "{\"cmd\":");
2969
2970        let out = s
2971            .feed_event(
2972                "content_block_stop",
2973                r#"{"type":"content_block_stop","index":0}"#,
2974            )
2975            .unwrap();
2976        match &out[0] {
2977            ModelChunk::ToolCallEnd { id, input } => {
2978                assert_eq!(id, "toolu_1");
2979                assert!(input.is_none());
2980            }
2981            other => panic!("expected ToolCallEnd, got {other:?}"),
2982        }
2983    }
2984
2985    #[test]
2986    fn anthropic_stream_state_finalises_on_close_without_message_stop() {
2987        // Some gateways drop `message_stop`; the harness still needs a
2988        // Done. finalize() bridges that gap.
2989        let mut s = AnthropicStreamState::default();
2990        let _ = s
2991            .feed_event(
2992                "message_delta",
2993                r#"{"type":"message_delta","delta":{"stop_reason":"max_tokens"},"usage":{"output_tokens":100}}"#,
2994            )
2995            .unwrap();
2996        let done = s.finalize().unwrap();
2997        match done {
2998            ModelChunk::Done { stop_reason, .. } => assert_eq!(stop_reason, "max_tokens"),
2999            other => panic!("expected Done, got {other:?}"),
3000        }
3001    }
3002
3003    #[test]
3004    fn anthropic_stop_reason_mapping() {
3005        assert_eq!(map_anthropic_stop_reason(Some("end_turn")), "end_turn");
3006        assert_eq!(map_anthropic_stop_reason(Some("tool_use")), "end_turn");
3007        assert_eq!(map_anthropic_stop_reason(Some("max_tokens")), "max_tokens");
3008        assert_eq!(map_anthropic_stop_reason(Some("stop_sequence")), "end_turn");
3009        assert_eq!(map_anthropic_stop_reason(Some("refusal")), "refusal");
3010        assert_eq!(map_anthropic_stop_reason(None), "end_turn");
3011    }
3012
3013    #[test]
3014    fn classify_anthropic_http_error_buckets_by_status() {
3015        use reqwest::StatusCode;
3016        assert!(matches!(
3017            classify_anthropic_http_error(StatusCode::TOO_MANY_REQUESTS, "{}"),
3018            ModelClientError::RateLimit(_)
3019        ));
3020        assert!(matches!(
3021            classify_anthropic_http_error(StatusCode::UNAUTHORIZED, "{}"),
3022            ModelClientError::Auth(_)
3023        ));
3024        assert!(matches!(
3025            classify_anthropic_http_error(
3026                StatusCode::BAD_REQUEST,
3027                "{\"error\":{\"message\":\"prompt is too long; context_length_exceeded\"}}"
3028            ),
3029            ModelClientError::ContextOverflow(_)
3030        ));
3031        assert!(matches!(
3032            classify_anthropic_http_error(StatusCode::BAD_REQUEST, "invalid model"),
3033            ModelClientError::BadRequest(_)
3034        ));
3035        assert!(matches!(
3036            classify_anthropic_http_error(StatusCode::INTERNAL_SERVER_ERROR, "oops"),
3037            ModelClientError::ServerError(_)
3038        ));
3039    }
3040
3041    #[tokio::test]
3042    async fn collect_model_response_folds_streamed_tool_call_arguments() {
3043        // Simulate an OpenAI-style streaming tool call whose arguments
3044        // arrive in three chunks. collect_model_response should glue them
3045        // back into a parsed JSON value and ignore the leading text.
3046        let chunks = vec![
3047            Ok(ModelChunk::TextDelta {
3048                msg_id: "m".into(),
3049                delta: "ok ".into(),
3050            }),
3051            Ok(ModelChunk::ToolCallStart {
3052                id: "call_1".into(),
3053                name: "bash".into(),
3054            }),
3055            Ok(ModelChunk::ToolCallInputDelta {
3056                id: "call_1".into(),
3057                delta: "{\"command\":".into(),
3058            }),
3059            Ok(ModelChunk::ToolCallInputDelta {
3060                id: "call_1".into(),
3061                delta: "\"pwd\"}".into(),
3062            }),
3063            Ok(ModelChunk::ToolCallEnd {
3064                id: "call_1".into(),
3065                input: None,
3066            }),
3067            Ok(ModelChunk::Done {
3068                stop_reason: "end_turn".into(),
3069                usage: None,
3070            }),
3071        ];
3072        let stream = futures::stream::iter(chunks).boxed();
3073        let response = collect_model_response(stream).await.unwrap();
3074        let ModelResponse::ToolCall {
3075            invocation,
3076            preface,
3077            ..
3078        } = response
3079        else {
3080            panic!("expected ToolCall");
3081        };
3082        assert_eq!(invocation.name, "bash");
3083        assert_eq!(invocation.input["command"], "pwd");
3084        assert_eq!(preface.as_deref(), Some("ok "));
3085    }
3086
3087    #[test]
3088    fn classify_openai_http_error_buckets_by_status_and_body() {
3089        use reqwest::StatusCode;
3090        assert!(matches!(
3091            classify_openai_http_error(StatusCode::TOO_MANY_REQUESTS, "rate limit hit"),
3092            ModelClientError::RateLimit(_)
3093        ));
3094        assert!(matches!(
3095            classify_openai_http_error(StatusCode::UNAUTHORIZED, "bad key"),
3096            ModelClientError::Auth(_)
3097        ));
3098        assert!(matches!(
3099            classify_openai_http_error(StatusCode::FORBIDDEN, "no access"),
3100            ModelClientError::Auth(_)
3101        ));
3102        assert!(matches!(
3103            classify_openai_http_error(
3104                StatusCode::BAD_REQUEST,
3105                "{\"error\":{\"message\":\"this model's maximum context length is 8192\"}}"
3106            ),
3107            ModelClientError::ContextOverflow(_)
3108        ));
3109        // BAD_REQUEST without context_overflow tell-tale → BadRequest.
3110        assert!(matches!(
3111            classify_openai_http_error(StatusCode::BAD_REQUEST, "missing argument"),
3112            ModelClientError::BadRequest(_)
3113        ));
3114        // 5xx → ServerError (retryable with backoff).
3115        assert!(matches!(
3116            classify_openai_http_error(StatusCode::INTERNAL_SERVER_ERROR, "oops"),
3117            ModelClientError::ServerError(_)
3118        ));
3119    }
3120
3121    #[test]
3122    fn looks_like_context_overflow_matches_common_phrasings() {
3123        assert!(looks_like_context_overflow(
3124            "context_length_exceeded: this model has a maximum context length of 8192"
3125        ));
3126        assert!(looks_like_context_overflow("too many tokens in prompt"));
3127        assert!(looks_like_context_overflow(
3128            "Prompt exceeds the model's maximum context"
3129        ));
3130        assert!(!looks_like_context_overflow("invalid api key"));
3131    }
3132
3133    #[test]
3134    fn parse_openai_usage_returns_none_for_missing_or_all_zero() {
3135        // Field absent entirely.
3136        assert!(parse_openai_usage(None).is_none());
3137        // Field present but all zeros — treat as "provider didn't report".
3138        assert!(parse_openai_usage(Some(&json!({
3139            "prompt_tokens": 0,
3140            "completion_tokens": 0
3141        })))
3142        .is_none());
3143    }
3144
3145    #[test]
3146    fn openai_client_renders_multi_turn_history() {
3147        let client = OpenAiCompatibleModelClient::new(OpenAiCompatibleConfig {
3148            base_url: "https://example.test".into(),
3149            api_key: "sk-test".into(),
3150            model: "gpt-test".into(),
3151            temperature: Some(0.2),
3152            max_tokens: Some(128),
3153            reasoning_effort: None,
3154        });
3155        let body = client.request_body(&ModelTurnInput {
3156            system_prompt: None,
3157            messages: vec![
3158                ChatMessage::User {
3159                    content: "run pwd".into(),
3160                    attachments: vec![],
3161                },
3162                ChatMessage::Assistant {
3163                    text: None,
3164                    tool_calls: vec![ToolInvocation {
3165                        id: "call_1".into(),
3166                        name: "bash".into(),
3167                        input: json!({"command": "pwd"}),
3168                    }],
3169                    thinking: None,
3170                },
3171                ChatMessage::Tool {
3172                    tool_call_id: "call_1".into(),
3173                    content: "{\"stdout\":\"/home/user\"}".into(),
3174                    is_error: false,
3175                    attachments: vec![],
3176                },
3177            ],
3178            tools: vec![],
3179            tool_choice: ToolChoice::Auto,
3180            parallel_tool_calls: None,
3181        });
3182        assert_eq!(body["temperature"], 0.2);
3183        assert_eq!(body["max_tokens"], 128);
3184        assert_eq!(body["messages"][0]["role"], "user");
3185        assert_eq!(body["messages"][1]["role"], "assistant");
3186        assert_eq!(body["messages"][1]["tool_calls"][0]["id"], "call_1");
3187        assert_eq!(
3188            body["messages"][1]["tool_calls"][0]["function"]["name"],
3189            "bash"
3190        );
3191        assert_eq!(body["messages"][2]["role"], "tool");
3192        assert_eq!(body["messages"][2]["tool_call_id"], "call_1");
3193    }
3194
3195    #[tokio::test]
3196    async fn scripted_client_emits_tool_call_then_summary() {
3197        let scripted = ScriptedModelClient;
3198        let first = scripted
3199            .next(user("read README.md"))
3200            .await
3201            .expect("scripted first");
3202        let ModelResponse::ToolCall { invocation, .. } = first else {
3203            panic!("expected tool call on first step");
3204        };
3205        assert_eq!(invocation.name, "read");
3206
3207        // Simulate the loop appending Assistant + Tool messages, then ask
3208        // the scripted client for its next move — should be a summary.
3209        let history = ModelTurnInput {
3210            system_prompt: None,
3211            messages: vec![
3212                ChatMessage::User {
3213                    content: "read README.md".into(),
3214                    attachments: vec![],
3215                },
3216                ChatMessage::Assistant {
3217                    text: None,
3218                    tool_calls: vec![invocation.clone()],
3219                    thinking: None,
3220                },
3221                ChatMessage::Tool {
3222                    tool_call_id: invocation.id.clone(),
3223                    content: "{\"content\":\"hi\"}".into(),
3224                    is_error: false,
3225                    attachments: vec![],
3226                },
3227            ],
3228            tools: vec![],
3229            tool_choice: ToolChoice::Auto,
3230            parallel_tool_calls: None,
3231        };
3232        let second = scripted.next(history).await.expect("scripted second");
3233        let ModelResponse::Message { text, .. } = second else {
3234            panic!("expected final message after tool result");
3235        };
3236        assert!(text.contains("completed"));
3237    }
3238}