1use async_trait::async_trait;
2use eventsource_stream::Eventsource;
3use futures::stream::{BoxStream, StreamExt};
4use serde_json::{json, Value};
5
6use crate::event::HarnessUsage;
7use crate::tools::{ToolInvocation, ToolSpec};
8
9#[derive(Debug, Clone, PartialEq)]
22pub enum ModelChunk {
23 TextDelta {
24 msg_id: String,
25 delta: String,
26 },
27 ThinkingDelta {
28 thinking_id: String,
29 delta: String,
30 signature: Option<String>,
34 },
35 ToolCallStart {
38 id: String,
39 name: String,
40 },
41 ToolCallInputDelta {
46 id: String,
47 delta: String,
48 },
49 ToolCallEnd {
55 id: String,
56 input: Option<Value>,
57 },
58 Done {
61 stop_reason: String,
62 usage: Option<HarnessUsage>,
63 },
64}
65
66#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
72pub struct AssistantThinking {
73 pub text: String,
74 pub signature: Option<String>,
75}
76
77#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
81pub struct ImageSource {
82 pub media_type: String,
86 pub data: ImageData,
87}
88
89#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
90pub enum ImageData {
91 Base64(String),
96 Url(String),
99}
100
101#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
106pub enum UserAttachment {
107 Image(ImageSource),
108}
109
110#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
117pub enum ChatMessage {
118 User {
119 content: String,
120 attachments: Vec<UserAttachment>,
124 },
125 Assistant {
131 text: Option<String>,
132 tool_calls: Vec<ToolInvocation>,
133 thinking: Option<AssistantThinking>,
136 },
137 Tool {
146 tool_call_id: String,
147 content: String,
148 is_error: bool,
149 attachments: Vec<UserAttachment>,
150 },
151}
152
153#[derive(Debug, Clone, PartialEq)]
154pub struct ModelTurnInput {
155 pub system_prompt: Option<String>,
158 pub messages: Vec<ChatMessage>,
162 pub tools: Vec<ToolSpec>,
166 pub tool_choice: ToolChoice,
170 pub parallel_tool_calls: Option<bool>,
175}
176
177#[derive(Debug, Clone, PartialEq, Default)]
192pub enum ToolChoice {
193 #[default]
194 Auto,
195 None,
196 Required,
197 Tool(String),
198}
199
200impl ToolChoice {
201 pub fn parse(s: &str) -> Self {
206 let trimmed = s.trim();
207 if let Some(name) = trimmed.strip_prefix("tool:") {
208 return Self::Tool(name.trim().to_string());
209 }
210 match trimmed.to_ascii_lowercase().as_str() {
211 "" | "auto" => Self::Auto,
212 "none" => Self::None,
213 "required" | "any" => Self::Required,
214 _ => Self::Auto,
215 }
216 }
217}
218
219#[derive(Debug, Clone, PartialEq)]
229pub enum ModelResponse {
230 Message {
231 text: String,
232 stop_reason: String,
233 usage: Option<HarnessUsage>,
234 },
235 ToolCall {
236 preface: Option<String>,
237 invocation: ToolInvocation,
238 usage: Option<HarnessUsage>,
239 },
240}
241
242impl ModelResponse {
243 pub fn usage(&self) -> Option<&HarnessUsage> {
246 match self {
247 ModelResponse::Message { usage, .. } | ModelResponse::ToolCall { usage, .. } => {
248 usage.as_ref()
249 }
250 }
251 }
252}
253
254#[derive(Debug, thiserror::Error)]
262pub enum ModelClientError {
263 #[error("rate limit: {0}")]
265 RateLimit(String),
266 #[error("auth: {0}")]
268 Auth(String),
269 #[error("context overflow: {0}")]
271 ContextOverflow(String),
272 #[error("bad request: {0}")]
274 BadRequest(String),
275 #[error("server error: {0}")]
277 ServerError(String),
278 #[error("network: {0}")]
280 Network(String),
281 #[error("model error: {0}")]
283 Other(String),
284}
285
286impl ModelClientError {
287 pub fn retryable(&self) -> bool {
289 matches!(
290 self,
291 Self::RateLimit(_) | Self::Network(_) | Self::ServerError(_)
292 )
293 }
294}
295
296#[async_trait]
297pub trait ModelClient: Send + Sync {
298 async fn stream(
303 &self,
304 input: ModelTurnInput,
305 ) -> Result<BoxStream<'static, Result<ModelChunk, ModelClientError>>, ModelClientError>;
306
307 async fn next(&self, input: ModelTurnInput) -> Result<ModelResponse, ModelClientError> {
313 let stream = self.stream(input).await?;
314 collect_model_response(stream).await
315 }
316}
317
318pub async fn collect_model_response(
324 mut stream: BoxStream<'static, Result<ModelChunk, ModelClientError>>,
325) -> Result<ModelResponse, ModelClientError> {
326 let mut text_buf = String::new();
327 let mut text_msg_id: Option<String> = None;
328 let mut tool_states: Vec<ToolStreamState> = Vec::new();
330 let mut stop_reason: Option<String> = None;
331 let mut usage: Option<HarnessUsage> = None;
332
333 while let Some(item) = stream.next().await {
334 match item? {
335 ModelChunk::TextDelta { msg_id, delta } => {
336 if text_msg_id.as_deref() != Some(&msg_id) {
337 text_msg_id = Some(msg_id);
338 text_buf.clear();
339 }
340 text_buf.push_str(&delta);
341 }
342 ModelChunk::ThinkingDelta { .. } => {
343 }
348 ModelChunk::ToolCallStart { id, name } => {
349 tool_states.push(ToolStreamState {
350 id,
351 name,
352 args_buf: String::new(),
353 early_input: None,
354 });
355 }
356 ModelChunk::ToolCallInputDelta { id, delta } => {
357 if let Some(state) = tool_states.iter_mut().find(|s| s.id == id) {
358 state.args_buf.push_str(&delta);
359 }
360 }
361 ModelChunk::ToolCallEnd { id, input } => {
362 if let Some(state) = tool_states.iter_mut().find(|s| s.id == id) {
363 state.early_input = input;
364 }
365 }
366 ModelChunk::Done {
367 stop_reason: sr,
368 usage: u,
369 } => {
370 stop_reason = Some(sr);
371 usage = u;
372 }
373 }
374 }
375
376 if let Some(state) = tool_states.into_iter().next() {
381 let parsed_input = match state.early_input {
382 Some(v) => v,
383 None => serde_json::from_str(state.args_buf.as_str().trim()).map_err(|e| {
384 ModelClientError::Other(format!(
385 "decode tool arguments for {id}: {e}",
386 id = state.id
387 ))
388 })?,
389 };
390 return Ok(ModelResponse::ToolCall {
391 preface: (!text_buf.is_empty()).then(|| text_buf.clone()),
392 invocation: ToolInvocation {
393 id: state.id,
394 name: state.name,
395 input: parsed_input,
396 },
397 usage,
398 });
399 }
400
401 Ok(ModelResponse::Message {
402 text: text_buf,
403 stop_reason: stop_reason.unwrap_or_else(|| "end_turn".into()),
404 usage,
405 })
406}
407
408struct ToolStreamState {
412 id: String,
413 name: String,
414 args_buf: String,
415 early_input: Option<Value>,
416}
417
418#[derive(Debug, Clone)]
419pub struct OpenAiCompatibleConfig {
420 pub base_url: String,
421 pub api_key: String,
422 pub model: String,
423 pub temperature: Option<f64>,
424 pub max_tokens: Option<i32>,
425 pub reasoning_effort: Option<String>,
428}
429
430#[derive(Debug, Clone)]
431pub struct OpenAiCompatibleModelClient {
432 http: reqwest::Client,
433 config: OpenAiCompatibleConfig,
434}
435
436impl OpenAiCompatibleModelClient {
437 pub fn new(config: OpenAiCompatibleConfig) -> Self {
438 let http = reqwest::Client::builder()
442 .connect_timeout(std::time::Duration::from_secs(15))
443 .build()
444 .unwrap_or_else(|_| reqwest::Client::new());
445 Self { http, config }
446 }
447
448 fn endpoint(&self) -> String {
449 let base = self.config.base_url.trim_end_matches('/');
455 if base.ends_with("/chat/completions") {
456 base.to_string()
457 } else {
458 format!("{base}/chat/completions")
459 }
460 }
461
462 fn request_body(&self, input: &ModelTurnInput) -> Value {
463 let mut messages = Vec::with_capacity(input.messages.len() + 1);
464 if let Some(sys) = input.system_prompt.as_deref().filter(|s| !s.is_empty()) {
465 messages.push(json!({ "role": "system", "content": sys }));
466 }
467 for msg in &input.messages {
468 messages.push(chat_message_to_wire(msg));
469 }
470
471 let mut body = json!({
472 "model": self.config.model,
473 "messages": messages,
474 });
475 let send_tools = !input.tools.is_empty() && !matches!(input.tool_choice, ToolChoice::None);
483 if send_tools {
484 body["tools"] = json!(input
485 .tools
486 .iter()
487 .map(tool_spec_to_openai_function)
488 .collect::<Vec<_>>());
489 body["tool_choice"] = openai_tool_choice_value(&input.tool_choice);
490 if let Some(parallel) = input.parallel_tool_calls {
491 body["parallel_tool_calls"] = json!(parallel);
492 }
493 }
494 if let Some(temperature) = self.config.temperature {
495 body["temperature"] = json!(temperature);
496 }
497 if let Some(max_tokens) = self.config.max_tokens {
498 body["max_tokens"] = json!(max_tokens);
499 }
500 if let Some(effort) = self
501 .config
502 .reasoning_effort
503 .as_deref()
504 .filter(|s| !s.is_empty())
505 {
506 body["reasoning_effort"] = json!(effort);
507 }
508 body
509 }
510}
511
512fn openai_tool_choice_value(c: &ToolChoice) -> Value {
513 match c {
514 ToolChoice::Auto => json!("auto"),
515 ToolChoice::None => json!("none"),
518 ToolChoice::Required => json!("required"),
519 ToolChoice::Tool(name) => json!({
520 "type": "function",
521 "function": {"name": name},
522 }),
523 }
524}
525
526fn parse_openai_usage(usage: Option<&Value>) -> Option<HarnessUsage> {
537 let u = usage?;
538 let input = u.get("prompt_tokens").and_then(|v| v.as_u64()).unwrap_or(0);
539 let output = u
540 .get("completion_tokens")
541 .and_then(|v| v.as_u64())
542 .unwrap_or(0);
543 let cache_read = u
544 .get("prompt_tokens_details")
545 .and_then(|d| d.get("cached_tokens"))
546 .and_then(|v| v.as_u64())
547 .unwrap_or(0);
548 if input == 0 && output == 0 && cache_read == 0 {
551 return None;
552 }
553 Some(HarnessUsage {
554 input_tokens: input,
555 output_tokens: output,
556 cache_read_input_tokens: cache_read,
557 cache_creation_input_tokens: 0,
558 compaction_input_tokens: 0,
562 compaction_output_tokens: 0,
563 })
564}
565
566fn image_to_openai_part(src: &ImageSource) -> Value {
574 let url = match &src.data {
575 ImageData::Base64(b64) => {
576 format!("data:{};base64,{}", src.media_type, b64)
581 }
582 ImageData::Url(u) => u.clone(),
583 };
584 json!({
585 "type": "image_url",
586 "image_url": { "url": url },
587 })
588}
589
590fn tool_spec_to_openai_function(spec: &ToolSpec) -> Value {
591 json!({
592 "type": "function",
593 "function": {
594 "name": spec.name,
595 "description": spec.description,
596 "parameters": spec.input_schema,
597 }
598 })
599}
600
601const MAX_TOOL_RESULT_REPLAY_TOKENS: u64 = 2_000;
607const MAX_TOOL_RESULT_REPLAY_BYTES: usize = 12 * 1024;
608const COMPACTED_TOOL_RESULT_KEEP_CHARS: usize = 3_000;
609
610fn compact_tool_result_for_replay(content: &str) -> std::borrow::Cow<'_, str> {
619 let estimated_tokens = crate::compaction::estimate_tokens(content);
620 if estimated_tokens <= MAX_TOOL_RESULT_REPLAY_TOKENS
621 && content.len() <= MAX_TOOL_RESULT_REPLAY_BYTES
622 {
623 return std::borrow::Cow::Borrowed(content);
624 }
625 let chars: Vec<char> = content.chars().collect();
626 if chars.len() <= COMPACTED_TOOL_RESULT_KEEP_CHARS {
627 return std::borrow::Cow::Borrowed(content);
628 }
629 let head_len = COMPACTED_TOOL_RESULT_KEEP_CHARS / 2;
630 let tail_len = COMPACTED_TOOL_RESULT_KEEP_CHARS - head_len;
631 let head: String = chars[..head_len].iter().collect();
632 let tail: String = chars[chars.len() - tail_len..].iter().collect();
633 let omitted = chars.len() - COMPACTED_TOOL_RESULT_KEEP_CHARS;
634 std::borrow::Cow::Owned(format!(
635 "[tool result compacted for model replay]\n\
636 original_estimated_tokens={estimated_tokens} original_chars={} \
637 retained_head_chars={head_len} retained_tail_chars={tail_len}\n\
638 The full raw tool result remains in session history; this replay is abbreviated.\n\n\
639 --- head ---\n{head}\n\n\
640 --- omitted ---\n[... omitted {omitted} chars from tool result replay ...]\n\n\
641 --- tail ---\n{tail}",
642 chars.len(),
643 ))
644}
645
646fn chat_message_to_wire(msg: &ChatMessage) -> Value {
650 match msg {
651 ChatMessage::User {
652 content,
653 attachments,
654 } => {
655 if attachments.is_empty() {
660 json!({ "role": "user", "content": content })
661 } else {
662 let mut parts: Vec<Value> = Vec::with_capacity(attachments.len() + 1);
668 if !content.is_empty() {
669 parts.push(json!({ "type": "text", "text": content }));
670 }
671 for att in attachments {
672 match att {
673 UserAttachment::Image(src) => {
674 parts.push(image_to_openai_part(src));
675 }
676 }
677 }
678 json!({ "role": "user", "content": parts })
679 }
680 }
681 ChatMessage::Assistant {
682 text,
683 tool_calls,
684 thinking: _,
685 } => {
686 let mut obj = json!({ "role": "assistant" });
691 if let Some(t) = text.as_deref().filter(|s| !s.is_empty()) {
692 obj["content"] = json!(t);
693 } else {
694 obj["content"] = Value::Null;
695 }
696 if !tool_calls.is_empty() {
697 let calls: Vec<Value> = tool_calls
698 .iter()
699 .map(|tc| {
700 json!({
701 "id": tc.id,
702 "type": "function",
703 "function": {
704 "name": tc.name,
705 "arguments": tc.input.to_string(),
706 },
707 })
708 })
709 .collect();
710 obj["tool_calls"] = json!(calls);
711 }
712 obj
713 }
714 ChatMessage::Tool {
715 tool_call_id,
716 content,
717 attachments,
718 is_error: _,
719 } => {
720 let mut content_str = compact_tool_result_for_replay(content).into_owned();
728 for att in attachments {
729 let UserAttachment::Image(src) = att;
730 content_str.push_str(&format!(
731 "\n[image attached: {} (not visible via OpenAI tool role)]",
732 src.media_type
733 ));
734 }
735 json!({
736 "role": "tool",
737 "tool_call_id": tool_call_id,
738 "content": content_str,
739 })
740 }
741 }
742}
743
744#[async_trait]
745impl ModelClient for OpenAiCompatibleModelClient {
746 async fn stream(
747 &self,
748 input: ModelTurnInput,
749 ) -> Result<BoxStream<'static, Result<ModelChunk, ModelClientError>>, ModelClientError> {
750 let mut body = self.request_body(&input);
756 body["stream"] = json!(true);
757 body["stream_options"] = json!({ "include_usage": true });
758
759 let resp = match self
760 .http
761 .post(self.endpoint())
762 .bearer_auth(&self.config.api_key)
763 .json(&body)
764 .send()
765 .await
766 {
767 Ok(r) => r,
768 Err(e) => return Err(classify_reqwest_error(&e, e.to_string())),
769 };
770 let status = resp.status();
771 if !status.is_success() {
772 let body_text = resp.text().await.unwrap_or_default();
773 return Err(classify_openai_http_error(status, &body_text));
774 }
775
776 let event_stream = resp.bytes_stream().eventsource();
782 let (tx, rx) = tokio::sync::mpsc::channel::<Result<ModelChunk, ModelClientError>>(8);
783
784 tokio::spawn(async move {
785 let mut state = OpenAiStreamState::default();
786 futures::pin_mut!(event_stream);
787 while let Some(ev) = event_stream.next().await {
788 let chunks = match ev {
789 Ok(event) => match state.feed_data(&event.data) {
790 Ok(c) => c,
791 Err(e) => {
792 let _ = tx.send(Err(e)).await;
793 return;
794 }
795 },
796 Err(e) => {
797 let _ = tx
798 .send(Err(ModelClientError::Network(format!(
799 "SSE transport error: {e}"
800 ))))
801 .await;
802 return;
803 }
804 };
805 for c in chunks {
806 if tx.send(Ok(c)).await.is_err() {
807 return;
808 }
809 }
810 }
811 if state.ended_cleanly() {
822 if let Some(final_chunk) = state.finalize() {
823 let _ = tx.send(Ok(final_chunk)).await;
824 }
825 } else {
826 let _ = tx
827 .send(Err(ModelClientError::Network(
828 "model stream closed before completion (no finish_reason or [DONE]) \
829 — connection dropped or upstream truncated the response"
830 .into(),
831 )))
832 .await;
833 }
834 });
835
836 Ok(tokio_stream::wrappers::ReceiverStream::new(rx).boxed())
837 }
838}
839
840#[derive(Debug, Default)]
844struct OpenAiStreamState {
845 msg_id: Option<String>,
851 tool_call_by_index: std::collections::HashMap<u64, String>,
854 finish_reason: Option<String>,
855 pending_usage: Option<HarnessUsage>,
856 done_emitted: bool,
859}
860
861impl OpenAiStreamState {
862 fn feed_data(&mut self, data: &str) -> Result<Vec<ModelChunk>, ModelClientError> {
863 if data.trim() == "[DONE]" {
866 if let Some(done) = self.emit_done() {
867 return Ok(vec![done]);
868 }
869 return Ok(vec![]);
870 }
871 let value: Value = serde_json::from_str(data)
872 .map_err(|e| ModelClientError::Other(format!("SSE data not JSON: {e}; raw={data}")))?;
873
874 let mut out: Vec<ModelChunk> = Vec::new();
875
876 if let Some(usage) = parse_openai_usage(value.get("usage")) {
879 self.pending_usage = Some(usage);
880 }
881
882 if let Some(id) = value.get("id").and_then(|v| v.as_str()) {
883 if self.msg_id.is_none() && !id.is_empty() {
884 self.msg_id = Some(id.to_string());
885 }
886 }
887
888 let Some(choices) = value.get("choices").and_then(|v| v.as_array()) else {
889 return Ok(out);
890 };
891 let Some(choice) = choices.first() else {
892 return Ok(out);
893 };
894 let Some(delta) = choice.get("delta") else {
895 if let Some(reason) = choice.get("finish_reason").and_then(|v| v.as_str()) {
897 self.finish_reason = Some(reason.to_string());
898 }
899 return Ok(out);
900 };
901
902 if let Some(text) = delta.get("content").and_then(|v| v.as_str()) {
906 if !text.is_empty() {
907 let msg_id = self
908 .msg_id
909 .clone()
910 .unwrap_or_else(|| "msg_native_default".to_string());
911 out.push(ModelChunk::TextDelta {
912 msg_id,
913 delta: text.to_string(),
914 });
915 }
916 }
917
918 if let Some(tcs) = delta.get("tool_calls").and_then(|v| v.as_array()) {
924 for tc in tcs {
925 let index = tc.get("index").and_then(|v| v.as_u64()).unwrap_or(0);
926 if let Some(id) = tc.get("id").and_then(|v| v.as_str()) {
927 if !id.is_empty() {
928 self.tool_call_by_index.insert(index, id.to_string());
929 let name = tc
930 .get("function")
931 .and_then(|f| f.get("name"))
932 .and_then(|v| v.as_str())
933 .unwrap_or("")
934 .to_string();
935 out.push(ModelChunk::ToolCallStart {
936 id: id.to_string(),
937 name,
938 });
939 }
940 }
941 if let Some(args) = tc
946 .get("function")
947 .and_then(|f| f.get("arguments"))
948 .and_then(|v| v.as_str())
949 {
950 if let Some(id) = self.tool_call_by_index.get(&index).cloned() {
951 if !args.is_empty() {
952 out.push(ModelChunk::ToolCallInputDelta {
953 id,
954 delta: args.to_string(),
955 });
956 }
957 }
958 }
959 }
960 }
961
962 if let Some(reason) = choice.get("finish_reason").and_then(|v| v.as_str()) {
963 self.finish_reason = Some(reason.to_string());
964 if reason == "tool_calls" {
969 for (_idx, id) in self.tool_call_by_index.iter() {
970 out.push(ModelChunk::ToolCallEnd {
971 id: id.clone(),
972 input: None,
973 });
974 }
975 }
976 }
980
981 Ok(out)
982 }
983
984 fn finalize(&mut self) -> Option<ModelChunk> {
985 self.emit_done()
986 }
987
988 fn ended_cleanly(&self) -> bool {
995 self.done_emitted || self.finish_reason.is_some()
996 }
997
998 fn emit_done(&mut self) -> Option<ModelChunk> {
999 if self.done_emitted {
1000 return None;
1001 }
1002 self.done_emitted = true;
1003 let stop_reason = map_openai_finish_reason(self.finish_reason.as_deref());
1004 Some(ModelChunk::Done {
1005 stop_reason,
1006 usage: self.pending_usage.take(),
1007 })
1008 }
1009}
1010
1011fn map_openai_finish_reason(reason: Option<&str>) -> String {
1017 match reason {
1018 Some("stop") => "end_turn".into(),
1019 Some("length") => "max_tokens".into(),
1020 Some("tool_calls") => "end_turn".into(),
1021 Some("content_filter") => "refusal".into(),
1022 Some(other) if !other.is_empty() => other.to_string(),
1023 _ => "end_turn".into(),
1024 }
1025}
1026
1027fn classify_openai_http_error(status: reqwest::StatusCode, body: &str) -> ModelClientError {
1032 use reqwest::StatusCode;
1033 let snippet = body.chars().take(512).collect::<String>();
1034
1035 if status == StatusCode::TOO_MANY_REQUESTS {
1037 return ModelClientError::RateLimit(format!("HTTP {status}: {snippet}"));
1038 }
1039 if status.is_server_error() {
1040 return ModelClientError::ServerError(format!("HTTP {status}: {snippet}"));
1042 }
1043
1044 if status == StatusCode::UNAUTHORIZED || status == StatusCode::FORBIDDEN {
1046 return ModelClientError::Auth(format!("HTTP {status}: {snippet}"));
1047 }
1048 if status == StatusCode::BAD_REQUEST && looks_like_context_overflow(body) {
1049 return ModelClientError::ContextOverflow(format!("HTTP {status}: {snippet}"));
1050 }
1051 if status == StatusCode::BAD_REQUEST {
1052 return ModelClientError::BadRequest(format!("HTTP {status}: {snippet}"));
1054 }
1055
1056 ModelClientError::Other(format!("HTTP {status}: {snippet}"))
1057}
1058
1059fn looks_like_context_overflow(body: &str) -> bool {
1065 let lower = body.to_lowercase();
1066 lower.contains("context length")
1067 || lower.contains("maximum context")
1068 || lower.contains("context_length_exceeded")
1069 || lower.contains("too many tokens")
1070 || lower.contains("exceeds the model")
1071}
1072
1073fn classify_reqwest_error(err: &reqwest::Error, msg: String) -> ModelClientError {
1077 if err.is_connect() || err.is_timeout() || err.is_request() || err.is_body() {
1078 ModelClientError::Network(msg)
1079 } else {
1080 ModelClientError::Other(msg)
1081 }
1082}
1083
1084#[derive(Debug, Default, Clone)]
1094pub struct ScriptedModelClient;
1095
1096#[async_trait]
1097impl ModelClient for ScriptedModelClient {
1098 async fn stream(
1099 &self,
1100 input: ModelTurnInput,
1101 ) -> Result<BoxStream<'static, Result<ModelChunk, ModelClientError>>, ModelClientError> {
1102 let chunks = scripted_chunks_for(&input);
1103 let stream = futures::stream::iter(chunks.into_iter().map(Ok));
1104 Ok(stream.boxed())
1105 }
1106}
1107
1108fn scripted_chunks_for(input: &ModelTurnInput) -> Vec<ModelChunk> {
1113 let last_tool = input.messages.iter().rev().find_map(|m| match m {
1116 ChatMessage::Tool {
1117 tool_call_id,
1118 content,
1119 is_error,
1120 ..
1121 } => Some((tool_call_id.clone(), content.clone(), *is_error)),
1122 _ => None,
1123 });
1124 if let Some((id, content, is_error)) = last_tool {
1125 let summary = if is_error {
1126 format!("tool {id} failed: {content}")
1127 } else {
1128 format!("tool {id} completed: {content}")
1129 };
1130 return vec![
1131 ModelChunk::TextDelta {
1132 msg_id: "scripted_msg".into(),
1133 delta: summary,
1134 },
1135 ModelChunk::Done {
1136 stop_reason: "end_turn".into(),
1137 usage: None,
1138 },
1139 ];
1140 }
1141
1142 let user_prompt = input
1144 .messages
1145 .iter()
1146 .rev()
1147 .find_map(|m| match m {
1148 ChatMessage::User { content, .. } => Some(content.clone()),
1149 _ => None,
1150 })
1151 .unwrap_or_default();
1152 let prompt = user_prompt.trim();
1153 let (id, name, args) = if let Some(path) = prompt.strip_prefix("read ") {
1154 ("tc_read_1", "read", json!({"path": path.trim()}))
1155 } else if let Some(rest) = prompt.strip_prefix("write ") {
1156 let (path, content) = rest.split_once(' ').unwrap_or((rest, ""));
1157 (
1158 "tc_write_1",
1159 "write",
1160 json!({"path": path.trim(), "content": content}),
1161 )
1162 } else {
1163 ("tc_bash_1", "bash", json!({"command": prompt}))
1164 };
1165
1166 vec![
1167 ModelChunk::TextDelta {
1168 msg_id: "scripted_msg".into(),
1169 delta: format!("native model selected tool: {name}"),
1170 },
1171 ModelChunk::ToolCallStart {
1172 id: id.into(),
1173 name: name.into(),
1174 },
1175 ModelChunk::ToolCallEnd {
1176 id: id.into(),
1177 input: Some(args),
1178 },
1179 ModelChunk::Done {
1180 stop_reason: "end_turn".into(),
1181 usage: None,
1182 },
1183 ]
1184}
1185
1186#[derive(Debug, Clone)]
1209pub struct AnthropicConfig {
1210 pub base_url: String,
1211 pub api_key: String,
1212 pub model: String,
1213 pub max_tokens: i32,
1217 pub temperature: Option<f64>,
1218 pub anthropic_version: String,
1222}
1223
1224impl AnthropicConfig {
1225 pub const DEFAULT_VERSION: &'static str = "2023-06-01";
1228 pub const DEFAULT_MAX_TOKENS: i32 = 4096;
1232}
1233
1234#[derive(Debug, Clone)]
1235pub struct AnthropicModelClient {
1236 http: reqwest::Client,
1237 config: AnthropicConfig,
1238}
1239
1240impl AnthropicModelClient {
1241 pub fn new(config: AnthropicConfig) -> Self {
1242 let http = reqwest::Client::builder()
1243 .connect_timeout(std::time::Duration::from_secs(15))
1244 .build()
1245 .unwrap_or_else(|_| reqwest::Client::new());
1246 Self { http, config }
1247 }
1248
1249 fn endpoint(&self) -> String {
1250 let base = self.config.base_url.trim_end_matches('/');
1254 if base.ends_with("/messages") {
1255 base.to_string()
1256 } else {
1257 format!("{base}/messages")
1258 }
1259 }
1260
1261 fn request_body(&self, input: &ModelTurnInput) -> Value {
1265 let messages = chat_messages_to_anthropic_messages(&input.messages);
1266 let tools = if matches!(input.tool_choice, ToolChoice::None) {
1270 Vec::new()
1271 } else {
1272 input
1273 .tools
1274 .iter()
1275 .map(tool_spec_to_anthropic_tool)
1276 .collect::<Vec<_>>()
1277 };
1278 let system_field = anthropic_system_field(input.system_prompt.as_deref());
1279
1280 let cached = apply_anthropic_cache_strategy(system_field, tools, messages);
1284
1285 let mut body = json!({
1286 "model": self.config.model,
1287 "max_tokens": self.config.max_tokens,
1288 "messages": cached.messages,
1289 "stream": true,
1290 });
1291 if let Some(sys) = cached.system {
1292 body["system"] = sys;
1293 }
1294 if !cached.tools.is_empty() {
1295 body["tools"] = json!(cached.tools);
1296 if !matches!(input.tool_choice, ToolChoice::Auto) {
1301 body["tool_choice"] = anthropic_tool_choice_value(&input.tool_choice);
1302 }
1303 }
1304 if let Some(t) = self.config.temperature {
1309 body["temperature"] = json!(t);
1310 }
1311 body
1312 }
1313}
1314
1315fn anthropic_tool_choice_value(c: &ToolChoice) -> Value {
1316 match c {
1317 ToolChoice::Auto => json!({"type": "auto"}),
1318 ToolChoice::None => json!({"type": "auto"}), ToolChoice::Required => json!({"type": "any"}),
1320 ToolChoice::Tool(name) => json!({"type": "tool", "name": name}),
1321 }
1322}
1323
1324#[async_trait]
1325impl ModelClient for AnthropicModelClient {
1326 async fn stream(
1327 &self,
1328 input: ModelTurnInput,
1329 ) -> Result<BoxStream<'static, Result<ModelChunk, ModelClientError>>, ModelClientError> {
1330 let resp = match self
1331 .http
1332 .post(self.endpoint())
1333 .header("x-api-key", &self.config.api_key)
1334 .header("anthropic-version", &self.config.anthropic_version)
1335 .header("content-type", "application/json")
1336 .json(&self.request_body(&input))
1337 .send()
1338 .await
1339 {
1340 Ok(r) => r,
1341 Err(e) => return Err(classify_reqwest_error(&e, e.to_string())),
1342 };
1343 let status = resp.status();
1344 if !status.is_success() {
1345 let body_text = resp.text().await.unwrap_or_default();
1346 return Err(classify_anthropic_http_error(status, &body_text));
1347 }
1348
1349 let event_stream = resp.bytes_stream().eventsource();
1350 let (tx, rx) = tokio::sync::mpsc::channel::<Result<ModelChunk, ModelClientError>>(8);
1351 tokio::spawn(async move {
1352 let mut state = AnthropicStreamState::default();
1353 futures::pin_mut!(event_stream);
1354 while let Some(ev) = event_stream.next().await {
1355 let chunks = match ev {
1356 Ok(event) => match state.feed_event(&event.event, &event.data) {
1357 Ok(c) => c,
1358 Err(e) => {
1359 let _ = tx.send(Err(e)).await;
1360 return;
1361 }
1362 },
1363 Err(e) => {
1364 let _ = tx
1365 .send(Err(ModelClientError::Network(format!(
1366 "SSE transport error: {e}"
1367 ))))
1368 .await;
1369 return;
1370 }
1371 };
1372 for c in chunks {
1373 if tx.send(Ok(c)).await.is_err() {
1374 return;
1375 }
1376 }
1377 }
1378 if let Some(done) = state.finalize() {
1379 let _ = tx.send(Ok(done)).await;
1380 }
1381 });
1382 Ok(tokio_stream::wrappers::ReceiverStream::new(rx).boxed())
1383 }
1384}
1385
1386fn chat_messages_to_anthropic_messages(messages: &[ChatMessage]) -> Vec<Value> {
1400 let mut out: Vec<Value> = Vec::with_capacity(messages.len());
1401 let mut pending_tool_results: Vec<Value> = Vec::new();
1402
1403 let flush_tool_results = |bucket: &mut Vec<Value>, out: &mut Vec<Value>| {
1404 if !bucket.is_empty() {
1405 let blocks = std::mem::take(bucket);
1406 out.push(json!({"role": "user", "content": blocks}));
1407 }
1408 };
1409
1410 for msg in messages {
1411 match msg {
1412 ChatMessage::User {
1413 content,
1414 attachments,
1415 } => {
1416 let mut blocks: Vec<Value> = std::mem::take(&mut pending_tool_results);
1425 if !content.is_empty() {
1426 blocks.push(json!({"type":"text","text":content}));
1427 }
1428 for att in attachments {
1429 match att {
1430 UserAttachment::Image(src) => {
1431 blocks.push(image_to_anthropic_block(src));
1432 }
1433 }
1434 }
1435 if blocks.is_empty() {
1440 blocks.push(json!({"type":"text","text":""}));
1441 }
1442 out.push(json!({"role": "user", "content": blocks}));
1443 }
1444 ChatMessage::Assistant {
1445 text,
1446 tool_calls,
1447 thinking,
1448 } => {
1449 flush_tool_results(&mut pending_tool_results, &mut out);
1453 let mut blocks: Vec<Value> = Vec::new();
1454 if let Some(t) = thinking {
1458 let mut tb = json!({"type": "thinking", "thinking": t.text});
1459 if let Some(sig) = t.signature.as_deref() {
1460 if !sig.is_empty() {
1461 tb["signature"] = json!(sig);
1462 }
1463 }
1464 blocks.push(tb);
1465 }
1466 if let Some(t) = text.as_deref() {
1467 if !t.is_empty() {
1468 blocks.push(json!({"type": "text", "text": t}));
1469 }
1470 }
1471 for tc in tool_calls {
1472 blocks.push(json!({
1473 "type": "tool_use",
1474 "id": tc.id,
1475 "name": tc.name,
1476 "input": tc.input,
1477 }));
1478 }
1479 if blocks.is_empty() {
1480 continue;
1484 }
1485 out.push(json!({"role": "assistant", "content": blocks}));
1486 }
1487 ChatMessage::Tool {
1488 tool_call_id,
1489 content,
1490 is_error,
1491 attachments,
1492 } => {
1493 let mut blocks: Vec<Value> = Vec::new();
1498 if !content.is_empty() {
1499 let replay = compact_tool_result_for_replay(content);
1500 blocks.push(json!({"type": "text", "text": replay}));
1501 }
1502 for att in attachments {
1503 let UserAttachment::Image(src) = att;
1504 blocks.push(image_to_anthropic_block(src));
1505 }
1506 if blocks.is_empty() {
1510 blocks.push(json!({"type": "text", "text": ""}));
1511 }
1512 pending_tool_results.push(json!({
1513 "type": "tool_result",
1514 "tool_use_id": tool_call_id,
1515 "content": blocks,
1516 "is_error": is_error,
1517 }));
1518 }
1519 }
1520 }
1521
1522 flush_tool_results(&mut pending_tool_results, &mut out);
1525 out
1526}
1527
1528fn anthropic_system_field(prompt: Option<&str>) -> Option<Value> {
1533 let s = prompt?.trim();
1534 if s.is_empty() {
1535 return None;
1536 }
1537 Some(json!([{"type": "text", "text": s}]))
1538}
1539
1540fn image_to_anthropic_block(src: &ImageSource) -> Value {
1547 let source = match &src.data {
1548 ImageData::Base64(b64) => json!({
1549 "type": "base64",
1550 "media_type": src.media_type,
1551 "data": b64,
1552 }),
1553 ImageData::Url(url) => json!({
1554 "type": "url",
1555 "url": url,
1556 }),
1557 };
1558 json!({"type": "image", "source": source})
1559}
1560
1561fn tool_spec_to_anthropic_tool(spec: &ToolSpec) -> Value {
1562 json!({
1563 "name": spec.name,
1564 "description": spec.description,
1565 "input_schema": spec.input_schema,
1566 })
1567}
1568
1569struct AnthropicCached {
1570 system: Option<Value>,
1571 tools: Vec<Value>,
1572 messages: Vec<Value>,
1573}
1574
1575fn apply_anthropic_cache_strategy(
1593 system: Option<Value>,
1594 tools: Vec<Value>,
1595 messages: Vec<Value>,
1596) -> AnthropicCached {
1597 let mut system = system;
1598 if let Some(sys) = system.as_mut() {
1599 if let Some(arr) = sys.as_array_mut() {
1600 if let Some(last) = arr.last_mut() {
1601 if last
1602 .get("text")
1603 .and_then(|v| v.as_str())
1604 .map(|s| !s.is_empty())
1605 .unwrap_or(false)
1606 {
1607 last["cache_control"] = json!({"type": "ephemeral"});
1608 }
1609 }
1610 }
1611 }
1612
1613 let mut tools = tools;
1614 if let Some(last) = tools.last_mut() {
1615 last["cache_control"] = json!({"type": "ephemeral"});
1616 }
1617
1618 let mut messages = messages;
1619 if let Some(last) = messages.last_mut() {
1623 if let Some(blocks) = last.get_mut("content").and_then(|v| v.as_array_mut()) {
1624 if let Some(last_block) = blocks.last_mut() {
1625 last_block["cache_control"] = json!({"type": "ephemeral"});
1626 }
1627 }
1628 }
1629 if messages.len() > 30 {
1632 let mid = messages.len() / 2;
1633 if let Some(blocks) = messages[mid]
1634 .get_mut("content")
1635 .and_then(|v| v.as_array_mut())
1636 {
1637 if let Some(last_block) = blocks.last_mut() {
1638 last_block["cache_control"] = json!({"type": "ephemeral"});
1639 }
1640 }
1641 }
1642
1643 AnthropicCached {
1644 system,
1645 tools,
1646 messages,
1647 }
1648}
1649
1650#[derive(Debug, Default)]
1666struct AnthropicStreamState {
1667 msg_id: Option<String>,
1668 blocks: std::collections::HashMap<u64, AnthropicBlock>,
1670 stop_reason: Option<String>,
1671 pending_usage: Option<HarnessUsage>,
1672 done_emitted: bool,
1673}
1674
1675#[derive(Debug)]
1676enum AnthropicBlock {
1677 Text,
1678 Thinking { thinking_id: String },
1679 ToolUse { id: String },
1680}
1681
1682impl AnthropicStreamState {
1683 fn feed_event(&mut self, event: &str, data: &str) -> Result<Vec<ModelChunk>, ModelClientError> {
1684 match event {
1687 "ping" | "" => return Ok(vec![]),
1688 "error" => {
1689 return Err(ModelClientError::Other(format!(
1690 "anthropic stream error event: {data}"
1691 )));
1692 }
1693 _ => {}
1694 }
1695
1696 let value: Value = serde_json::from_str(data).map_err(|e| {
1697 ModelClientError::Other(format!(
1698 "anthropic SSE data not JSON (event={event}): {e}; raw={data}"
1699 ))
1700 })?;
1701 let mut out: Vec<ModelChunk> = Vec::new();
1702
1703 match event {
1704 "message_start" => {
1705 let msg = value.get("message");
1706 if let Some(id) = msg.and_then(|m| m.get("id")).and_then(|v| v.as_str()) {
1707 if !id.is_empty() {
1708 self.msg_id = Some(id.to_string());
1709 }
1710 }
1711 if let Some(u) = msg.and_then(|m| m.get("usage")) {
1712 self.pending_usage =
1713 Some(merge_anthropic_usage(self.pending_usage.clone(), u, true));
1714 }
1715 }
1716 "content_block_start" => {
1717 let index = value.get("index").and_then(|v| v.as_u64()).unwrap_or(0);
1718 let block = value.get("content_block");
1719 let kind = block.and_then(|b| b.get("type")).and_then(|v| v.as_str());
1720 match kind {
1721 Some("text") => {
1722 self.blocks.insert(index, AnthropicBlock::Text);
1723 }
1724 Some("thinking") => {
1725 let thinking_id = self
1726 .msg_id
1727 .clone()
1728 .map(|m| format!("{m}_t{index}"))
1729 .unwrap_or_else(|| format!("thinking_{index}"));
1730 self.blocks
1731 .insert(index, AnthropicBlock::Thinking { thinking_id });
1732 }
1733 Some("tool_use") => {
1734 let id = block
1735 .and_then(|b| b.get("id"))
1736 .and_then(|v| v.as_str())
1737 .unwrap_or_default()
1738 .to_string();
1739 let name = block
1740 .and_then(|b| b.get("name"))
1741 .and_then(|v| v.as_str())
1742 .unwrap_or_default()
1743 .to_string();
1744 if !id.is_empty() && !name.is_empty() {
1745 out.push(ModelChunk::ToolCallStart {
1746 id: id.clone(),
1747 name,
1748 });
1749 }
1750 self.blocks.insert(index, AnthropicBlock::ToolUse { id });
1751 }
1752 _ => {
1753 self.blocks.insert(index, AnthropicBlock::Text);
1757 }
1758 }
1759 }
1760 "content_block_delta" => {
1761 let index = value.get("index").and_then(|v| v.as_u64()).unwrap_or(0);
1762 let delta = match value.get("delta") {
1763 Some(d) => d,
1764 None => return Ok(out),
1765 };
1766 let delta_type = delta.get("type").and_then(|v| v.as_str()).unwrap_or("");
1767 match (self.blocks.get(&index), delta_type) {
1768 (Some(AnthropicBlock::Text), "text_delta") => {
1769 if let Some(text) = delta.get("text").and_then(|v| v.as_str()) {
1770 if !text.is_empty() {
1771 let msg_id = self
1772 .msg_id
1773 .clone()
1774 .unwrap_or_else(|| "msg_anthropic_default".into());
1775 out.push(ModelChunk::TextDelta {
1776 msg_id,
1777 delta: text.to_string(),
1778 });
1779 }
1780 }
1781 }
1782 (Some(AnthropicBlock::Thinking { thinking_id }), "thinking_delta") => {
1783 if let Some(text) = delta.get("thinking").and_then(|v| v.as_str()) {
1784 if !text.is_empty() {
1785 out.push(ModelChunk::ThinkingDelta {
1786 thinking_id: thinking_id.clone(),
1787 delta: text.to_string(),
1788 signature: None,
1789 });
1790 }
1791 }
1792 }
1793 (Some(AnthropicBlock::Thinking { thinking_id }), "signature_delta") => {
1794 if let Some(sig) = delta.get("signature").and_then(|v| v.as_str()) {
1795 out.push(ModelChunk::ThinkingDelta {
1801 thinking_id: thinking_id.clone(),
1802 delta: String::new(),
1803 signature: Some(sig.to_string()),
1804 });
1805 }
1806 }
1807 (Some(AnthropicBlock::ToolUse { id }), "input_json_delta") => {
1808 if let Some(partial) = delta.get("partial_json").and_then(|v| v.as_str()) {
1809 if !partial.is_empty() {
1810 out.push(ModelChunk::ToolCallInputDelta {
1811 id: id.clone(),
1812 delta: partial.to_string(),
1813 });
1814 }
1815 }
1816 }
1817 _ => { }
1818 }
1819 }
1820 "content_block_stop" => {
1821 let index = value.get("index").and_then(|v| v.as_u64()).unwrap_or(0);
1822 if let Some(AnthropicBlock::ToolUse { id }) = self.blocks.get(&index) {
1823 out.push(ModelChunk::ToolCallEnd {
1828 id: id.clone(),
1829 input: None,
1830 });
1831 }
1832 }
1833 "message_delta" => {
1834 if let Some(reason) = value
1835 .get("delta")
1836 .and_then(|d| d.get("stop_reason"))
1837 .and_then(|v| v.as_str())
1838 {
1839 self.stop_reason = Some(reason.to_string());
1840 }
1841 if let Some(u) = value.get("usage") {
1842 self.pending_usage =
1845 Some(merge_anthropic_usage(self.pending_usage.clone(), u, false));
1846 }
1847 }
1848 "message_stop" => {
1849 if let Some(done) = self.emit_done() {
1850 out.push(done);
1851 }
1852 }
1853 _ => { }
1854 }
1855 Ok(out)
1856 }
1857
1858 fn finalize(&mut self) -> Option<ModelChunk> {
1859 self.emit_done()
1860 }
1861
1862 fn emit_done(&mut self) -> Option<ModelChunk> {
1863 if self.done_emitted {
1864 return None;
1865 }
1866 self.done_emitted = true;
1867 let stop_reason = map_anthropic_stop_reason(self.stop_reason.as_deref());
1868 Some(ModelChunk::Done {
1869 stop_reason,
1870 usage: self.pending_usage.take(),
1871 })
1872 }
1873}
1874
1875fn merge_anthropic_usage(
1881 prior: Option<HarnessUsage>,
1882 incoming: &Value,
1883 include_input: bool,
1884) -> HarnessUsage {
1885 let mut u = prior.unwrap_or_default();
1886 if include_input {
1887 if let Some(v) = incoming.get("input_tokens").and_then(|v| v.as_u64()) {
1888 u.input_tokens = v;
1889 }
1890 if let Some(v) = incoming
1891 .get("cache_read_input_tokens")
1892 .and_then(|v| v.as_u64())
1893 {
1894 u.cache_read_input_tokens = v;
1895 }
1896 if let Some(v) = incoming
1897 .get("cache_creation_input_tokens")
1898 .and_then(|v| v.as_u64())
1899 {
1900 u.cache_creation_input_tokens = v;
1901 }
1902 }
1903 if let Some(v) = incoming.get("output_tokens").and_then(|v| v.as_u64()) {
1904 u.output_tokens = v;
1905 }
1906 u
1907}
1908
1909fn map_anthropic_stop_reason(reason: Option<&str>) -> String {
1910 match reason {
1913 Some("end_turn") | Some("stop_sequence") | Some("tool_use") => "end_turn".into(),
1914 Some("max_tokens") => "max_tokens".into(),
1915 Some("refusal") => "refusal".into(),
1916 Some(other) if !other.is_empty() => other.to_string(),
1917 _ => "end_turn".into(),
1918 }
1919}
1920
1921fn classify_anthropic_http_error(status: reqwest::StatusCode, body: &str) -> ModelClientError {
1926 use reqwest::StatusCode;
1927 let snippet = body.chars().take(512).collect::<String>();
1928 if status == StatusCode::TOO_MANY_REQUESTS {
1929 return ModelClientError::RateLimit(format!("HTTP {status}: {snippet}"));
1930 }
1931 if status == StatusCode::UNAUTHORIZED || status == StatusCode::FORBIDDEN {
1932 return ModelClientError::Auth(format!("HTTP {status}: {snippet}"));
1933 }
1934 if status == StatusCode::BAD_REQUEST && looks_like_context_overflow(body) {
1935 return ModelClientError::ContextOverflow(format!("HTTP {status}: {snippet}"));
1936 }
1937 if status == StatusCode::BAD_REQUEST {
1938 return ModelClientError::BadRequest(format!("HTTP {status}: {snippet}"));
1939 }
1940 if status.is_server_error() {
1941 return ModelClientError::ServerError(format!("HTTP {status}: {snippet}"));
1942 }
1943 ModelClientError::Other(format!("HTTP {status}: {snippet}"))
1944}
1945
1946#[cfg(test)]
1947mod tests {
1948 use super::*;
1949
1950 fn user(prompt: &str) -> ModelTurnInput {
1951 ModelTurnInput {
1952 system_prompt: None,
1953 messages: vec![ChatMessage::User {
1954 content: prompt.into(),
1955 attachments: vec![],
1956 }],
1957 tools: vec![],
1958 tool_choice: ToolChoice::Auto,
1959 parallel_tool_calls: None,
1960 }
1961 }
1962
1963 fn bash_spec() -> ToolSpec {
1964 ToolSpec {
1965 name: "bash".into(),
1966 description: "Run a shell command inside the sandbox.".into(),
1967 input_schema: json!({
1968 "type": "object",
1969 "properties": {"command": {"type": "string"}},
1970 "required": ["command"],
1971 "additionalProperties": false
1972 }),
1973 }
1974 }
1975
1976 #[test]
1977 fn openai_client_builds_chat_completions_request() {
1978 let client = OpenAiCompatibleModelClient::new(OpenAiCompatibleConfig {
1981 base_url: "https://example.test/v1/".into(),
1982 api_key: "sk-test".into(),
1983 model: "gpt-test".into(),
1984 temperature: None,
1985 max_tokens: None,
1986 reasoning_effort: None,
1987 });
1988 assert_eq!(
1989 client.endpoint(),
1990 "https://example.test/v1/chat/completions"
1991 );
1992 let client_with_v1 = OpenAiCompatibleModelClient::new(OpenAiCompatibleConfig {
1993 base_url: "https://example.test/v1".into(),
1994 api_key: "sk-test".into(),
1995 model: "gpt-test".into(),
1996 temperature: None,
1997 max_tokens: None,
1998 reasoning_effort: None,
1999 });
2000 assert_eq!(
2001 client_with_v1.endpoint(),
2002 "https://example.test/v1/chat/completions"
2003 );
2004 let glm = OpenAiCompatibleModelClient::new(OpenAiCompatibleConfig {
2007 base_url: "https://open.bigmodel.cn/api/coding/paas/v4".into(),
2008 api_key: "sk-test".into(),
2009 model: "glm-4.6".into(),
2010 temperature: None,
2011 max_tokens: None,
2012 reasoning_effort: None,
2013 });
2014 assert_eq!(
2015 glm.endpoint(),
2016 "https://open.bigmodel.cn/api/coding/paas/v4/chat/completions"
2017 );
2018 let body = client.request_body(&user("hello"));
2019 assert_eq!(body["model"], "gpt-test");
2020 assert_eq!(body["messages"][0]["role"], "user");
2021 assert_eq!(body["messages"][0]["content"], "hello");
2022 assert!(body.get("tools").is_none());
2024 assert!(body.get("tool_choice").is_none());
2025
2026 let with_tools = ModelTurnInput {
2028 system_prompt: None,
2029 messages: vec![ChatMessage::User {
2030 content: "hello".into(),
2031 attachments: vec![],
2032 }],
2033 tools: vec![bash_spec()],
2034 tool_choice: ToolChoice::Auto,
2035 parallel_tool_calls: None,
2036 };
2037 let body = client.request_body(&with_tools);
2038 assert_eq!(body["tools"][0]["function"]["name"], "bash");
2039 assert_eq!(
2040 body["tools"][0]["function"]["parameters"]["required"][0],
2041 "command"
2042 );
2043 assert_eq!(body["tool_choice"], "auto");
2044 assert!(body.get("parallel_tool_calls").is_none());
2047 }
2048
2049 #[test]
2050 fn openai_client_emits_tool_choice_required() {
2051 let client = OpenAiCompatibleModelClient::new(OpenAiCompatibleConfig {
2052 base_url: "https://example.test".into(),
2053 api_key: "sk-test".into(),
2054 model: "gpt-test".into(),
2055 temperature: None,
2056 max_tokens: None,
2057 reasoning_effort: None,
2058 });
2059 let body = client.request_body(&ModelTurnInput {
2060 system_prompt: None,
2061 messages: vec![ChatMessage::User {
2062 content: "go".into(),
2063 attachments: vec![],
2064 }],
2065 tools: vec![bash_spec()],
2066 tool_choice: ToolChoice::Required,
2067 parallel_tool_calls: Some(false),
2068 });
2069 assert_eq!(body["tool_choice"], "required");
2070 assert_eq!(body["parallel_tool_calls"], false);
2071 }
2072
2073 #[test]
2074 fn openai_client_emits_tool_choice_named_tool() {
2075 let client = OpenAiCompatibleModelClient::new(OpenAiCompatibleConfig {
2076 base_url: "https://example.test".into(),
2077 api_key: "sk-test".into(),
2078 model: "gpt-test".into(),
2079 temperature: None,
2080 max_tokens: None,
2081 reasoning_effort: None,
2082 });
2083 let body = client.request_body(&ModelTurnInput {
2084 system_prompt: None,
2085 messages: vec![ChatMessage::User {
2086 content: "go".into(),
2087 attachments: vec![],
2088 }],
2089 tools: vec![bash_spec()],
2090 tool_choice: ToolChoice::Tool("bash".into()),
2091 parallel_tool_calls: None,
2092 });
2093 assert_eq!(body["tool_choice"]["type"], "function");
2094 assert_eq!(body["tool_choice"]["function"]["name"], "bash");
2095 }
2096
2097 #[test]
2098 fn openai_client_drops_tools_when_choice_is_none() {
2099 let client = OpenAiCompatibleModelClient::new(OpenAiCompatibleConfig {
2103 base_url: "https://example.test".into(),
2104 api_key: "sk-test".into(),
2105 model: "gpt-test".into(),
2106 temperature: None,
2107 max_tokens: None,
2108 reasoning_effort: None,
2109 });
2110 let body = client.request_body(&ModelTurnInput {
2111 system_prompt: None,
2112 messages: vec![ChatMessage::User {
2113 content: "go".into(),
2114 attachments: vec![],
2115 }],
2116 tools: vec![bash_spec()],
2117 tool_choice: ToolChoice::None,
2118 parallel_tool_calls: None,
2119 });
2120 assert!(body.get("tools").is_none(), "tools should be dropped");
2121 assert!(body.get("tool_choice").is_none());
2122 }
2123
2124 #[test]
2125 fn tool_choice_parse_handles_canonical_strings() {
2126 assert!(matches!(ToolChoice::parse(""), ToolChoice::Auto));
2127 assert!(matches!(ToolChoice::parse("auto"), ToolChoice::Auto));
2128 assert!(matches!(ToolChoice::parse("AUTO"), ToolChoice::Auto));
2129 assert!(matches!(ToolChoice::parse("none"), ToolChoice::None));
2130 assert!(matches!(
2131 ToolChoice::parse("required"),
2132 ToolChoice::Required
2133 ));
2134 assert!(matches!(ToolChoice::parse("any"), ToolChoice::Required));
2135 match ToolChoice::parse("tool:bash") {
2136 ToolChoice::Tool(name) => assert_eq!(name, "bash"),
2137 other => panic!("expected Tool(bash), got {other:?}"),
2138 }
2139 assert!(matches!(ToolChoice::parse("garbage"), ToolChoice::Auto));
2141 }
2142
2143 #[test]
2144 fn openai_client_prepends_system_when_set() {
2145 let client = OpenAiCompatibleModelClient::new(OpenAiCompatibleConfig {
2146 base_url: "https://example.test".into(),
2147 api_key: "sk-test".into(),
2148 model: "gpt-test".into(),
2149 temperature: None,
2150 max_tokens: None,
2151 reasoning_effort: None,
2152 });
2153 let input = ModelTurnInput {
2154 system_prompt: Some("you are concise".into()),
2155 messages: vec![ChatMessage::User {
2156 content: "hi".into(),
2157 attachments: vec![],
2158 }],
2159 tools: vec![],
2160 tool_choice: ToolChoice::Auto,
2161 parallel_tool_calls: None,
2162 };
2163 let body = client.request_body(&input);
2164 assert_eq!(body["messages"][0]["role"], "system");
2165 assert_eq!(body["messages"][0]["content"], "you are concise");
2166 assert_eq!(body["messages"][1]["role"], "user");
2167 }
2168
2169 #[test]
2170 fn parse_openai_usage_extracts_token_counts() {
2171 let u = parse_openai_usage(Some(&json!({
2172 "prompt_tokens": 12,
2173 "completion_tokens": 7,
2174 "total_tokens": 19,
2175 "prompt_tokens_details": {"cached_tokens": 4}
2176 })))
2177 .expect("usage parsed");
2178 assert_eq!(u.input_tokens, 12);
2179 assert_eq!(u.output_tokens, 7);
2180 assert_eq!(u.cache_read_input_tokens, 4);
2181 assert_eq!(u.cache_creation_input_tokens, 0);
2182 }
2183
2184 #[test]
2185 fn parse_openai_usage_without_cache_details() {
2186 let u = parse_openai_usage(Some(&json!({
2187 "prompt_tokens": 200,
2188 "completion_tokens": 30
2189 })))
2190 .expect("usage parsed");
2191 assert_eq!(u.input_tokens, 200);
2192 assert_eq!(u.output_tokens, 30);
2193 assert_eq!(u.cache_read_input_tokens, 0);
2194 }
2195
2196 #[test]
2197 fn openai_stream_state_emits_text_deltas_then_done() {
2198 let mut state = OpenAiStreamState::default();
2199 let out = state
2201 .feed_data(
2202 r#"{"id":"chatcmpl-1","choices":[{"index":0,"delta":{"role":"assistant","content":""}}]}"#,
2203 )
2204 .unwrap();
2205 assert!(out.is_empty(), "empty content shouldn't emit");
2206 let out = state
2208 .feed_data(r#"{"choices":[{"index":0,"delta":{"content":"Hello"}}]}"#)
2209 .unwrap();
2210 assert_eq!(out.len(), 1);
2211 match &out[0] {
2212 ModelChunk::TextDelta { msg_id, delta } => {
2213 assert_eq!(msg_id, "chatcmpl-1");
2214 assert_eq!(delta, "Hello");
2215 }
2216 other => panic!("expected TextDelta, got {other:?}"),
2217 }
2218 let out = state
2219 .feed_data(r#"{"choices":[{"index":0,"delta":{"content":" world"}}]}"#)
2220 .unwrap();
2221 assert_eq!(out.len(), 1);
2222 let out = state
2225 .feed_data(r#"{"choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}"#)
2226 .unwrap();
2227 assert!(out.is_empty());
2228 let out = state
2230 .feed_data(r#"{"choices":[],"usage":{"prompt_tokens":10,"completion_tokens":3}}"#)
2231 .unwrap();
2232 assert!(out.is_empty());
2233 let out = state.feed_data("[DONE]").unwrap();
2235 assert_eq!(out.len(), 1);
2236 match &out[0] {
2237 ModelChunk::Done { stop_reason, usage } => {
2238 assert_eq!(stop_reason, "end_turn");
2239 let u = usage.as_ref().expect("usage propagated");
2240 assert_eq!(u.input_tokens, 10);
2241 assert_eq!(u.output_tokens, 3);
2242 }
2243 other => panic!("expected Done, got {other:?}"),
2244 }
2245 }
2246
2247 #[test]
2248 fn openai_stream_state_emits_tool_call_chunks() {
2249 let mut state = OpenAiStreamState::default();
2250 let out = state
2252 .feed_data(
2253 r#"{"id":"c1","choices":[{"index":0,"delta":{"role":"assistant","tool_calls":[{"index":0,"id":"call_x","type":"function","function":{"name":"bash","arguments":""}}]}}]}"#,
2254 )
2255 .unwrap();
2256 assert_eq!(out.len(), 1);
2257 match &out[0] {
2258 ModelChunk::ToolCallStart { id, name } => {
2259 assert_eq!(id, "call_x");
2260 assert_eq!(name, "bash");
2261 }
2262 other => panic!("expected ToolCallStart, got {other:?}"),
2263 }
2264 let out = state
2266 .feed_data(
2267 r#"{"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]}}]}"#,
2268 )
2269 .unwrap();
2270 assert_eq!(out.len(), 1);
2271 let ModelChunk::ToolCallInputDelta { delta, .. } = &out[0] else {
2272 panic!("expected ToolCallInputDelta");
2273 };
2274 assert_eq!(delta, "{\"");
2275
2276 let out = state
2277 .feed_data(
2278 r#"{"choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"cmd\":\"pwd\"}"}}]}}]}"#,
2279 )
2280 .unwrap();
2281 let ModelChunk::ToolCallInputDelta { delta, .. } = &out[0] else {
2282 panic!("expected ToolCallInputDelta");
2283 };
2284 assert_eq!(delta, "cmd\":\"pwd\"}");
2285
2286 let out = state
2289 .feed_data(r#"{"choices":[{"index":0,"delta":{},"finish_reason":"tool_calls"}]}"#)
2290 .unwrap();
2291 assert_eq!(out.len(), 1);
2292 match &out[0] {
2293 ModelChunk::ToolCallEnd { id, input } => {
2294 assert_eq!(id, "call_x");
2295 assert!(input.is_none(), "OpenAI streaming defers parsing");
2296 }
2297 other => panic!("expected ToolCallEnd, got {other:?}"),
2298 }
2299
2300 let final_chunk = state.finalize().expect("finalize emits Done");
2302 match final_chunk {
2303 ModelChunk::Done { stop_reason, .. } => assert_eq!(stop_reason, "end_turn"),
2304 other => panic!("expected Done from finalize, got {other:?}"),
2305 }
2306 }
2307
2308 #[test]
2309 fn map_openai_finish_reason_table() {
2310 assert_eq!(map_openai_finish_reason(Some("stop")), "end_turn");
2311 assert_eq!(map_openai_finish_reason(Some("length")), "max_tokens");
2312 assert_eq!(map_openai_finish_reason(Some("tool_calls")), "end_turn");
2313 assert_eq!(map_openai_finish_reason(Some("content_filter")), "refusal");
2314 assert_eq!(map_openai_finish_reason(None), "end_turn");
2315 assert_eq!(map_openai_finish_reason(Some("")), "end_turn");
2316 }
2317
2318 #[test]
2321 fn chat_message_to_openai_wire_text_only_keeps_string_content() {
2322 let msg = ChatMessage::User {
2325 content: "hello".into(),
2326 attachments: vec![],
2327 };
2328 let v = chat_message_to_wire(&msg);
2329 assert_eq!(v["role"], "user");
2330 assert_eq!(v["content"], "hello");
2331 assert!(v["content"].is_string());
2334 }
2335
2336 #[test]
2337 fn chat_message_to_openai_wire_with_base64_image() {
2338 let msg = ChatMessage::User {
2339 content: "describe this".into(),
2340 attachments: vec![UserAttachment::Image(ImageSource {
2341 media_type: "image/png".into(),
2342 data: ImageData::Base64("iVBORw0KG...".into()),
2343 })],
2344 };
2345 let v = chat_message_to_wire(&msg);
2346 let parts = v["content"].as_array().expect("content array");
2347 assert_eq!(parts.len(), 2);
2348 assert_eq!(parts[0]["type"], "text");
2349 assert_eq!(parts[0]["text"], "describe this");
2350 assert_eq!(parts[1]["type"], "image_url");
2351 let url = parts[1]["image_url"]["url"].as_str().unwrap();
2353 assert!(url.starts_with("data:image/png;base64,"));
2354 assert!(url.contains("iVBORw0KG..."));
2355 }
2356
2357 #[test]
2358 fn chat_message_to_openai_wire_with_url_image() {
2359 let msg = ChatMessage::User {
2360 content: "".into(), attachments: vec![UserAttachment::Image(ImageSource {
2362 media_type: "image/jpeg".into(),
2363 data: ImageData::Url("https://cdn.example.com/cat.jpg".into()),
2364 })],
2365 };
2366 let v = chat_message_to_wire(&msg);
2367 let parts = v["content"].as_array().unwrap();
2368 assert_eq!(parts.len(), 1);
2370 assert_eq!(parts[0]["type"], "image_url");
2371 assert_eq!(
2372 parts[0]["image_url"]["url"],
2373 "https://cdn.example.com/cat.jpg"
2374 );
2375 }
2376
2377 #[test]
2378 fn chat_message_to_openai_tool_role_degrades_image_to_placeholder() {
2379 let msg = ChatMessage::Tool {
2384 tool_call_id: "call_x".into(),
2385 content: "ok".into(),
2386 is_error: false,
2387 attachments: vec![UserAttachment::Image(ImageSource {
2388 media_type: "image/png".into(),
2389 data: ImageData::Base64("AAA".into()),
2390 })],
2391 };
2392 let v = chat_message_to_wire(&msg);
2393 assert_eq!(v["role"], "tool");
2394 assert_eq!(v["tool_call_id"], "call_x");
2395 let content = v["content"].as_str().unwrap();
2396 assert!(content.starts_with("ok\n"));
2397 assert!(content.contains("image attached: image/png"));
2398 assert!(!content.contains("AAA"));
2401 }
2402
2403 #[test]
2406 fn replay_compaction_leaves_small_results_untouched() {
2407 let small = "x".repeat(1_000);
2408 assert!(matches!(
2409 compact_tool_result_for_replay(&small),
2410 std::borrow::Cow::Borrowed(_)
2411 ));
2412 let medium = "word ".repeat(2_000);
2415 let out = compact_tool_result_for_replay(&medium);
2416 assert!(out.contains("compacted for model replay"));
2417 }
2418
2419 #[test]
2420 fn replay_compaction_keeps_head_and_tail_deterministically() {
2421 let body = format!("HEAD_MARK{}TAIL_MARK", "x".repeat(20_000));
2422 let first = compact_tool_result_for_replay(&body).into_owned();
2423 let second = compact_tool_result_for_replay(&body).into_owned();
2424 assert_eq!(first, second);
2426 assert!(first.starts_with("[tool result compacted for model replay]"));
2427 assert!(first.contains("HEAD_MARK"), "head survives");
2428 assert!(first.contains("TAIL_MARK"), "tail survives");
2429 assert!(first.contains("omitted"), "omission marker present");
2430 assert!(first.len() < body.len() / 2);
2432 }
2433
2434 #[test]
2435 fn openai_projection_compacts_oversized_tool_result() {
2436 let big = format!("START{}END", "y".repeat(20_000));
2437 let msg = ChatMessage::Tool {
2438 tool_call_id: "call_big".into(),
2439 content: big.clone(),
2440 is_error: false,
2441 attachments: vec![],
2442 };
2443 let v = chat_message_to_wire(&msg);
2444 let content = v["content"].as_str().unwrap();
2445 assert!(content.contains("compacted for model replay"));
2446 assert!(content.contains("START") && content.contains("END"));
2447 match &msg {
2449 ChatMessage::Tool { content, .. } => assert_eq!(content.len(), big.len()),
2450 _ => unreachable!(),
2451 }
2452 }
2453
2454 #[test]
2455 fn anthropic_projection_compacts_oversized_tool_result() {
2456 let big = "z".repeat(20_000);
2457 let msgs = vec![
2458 ChatMessage::Assistant {
2459 text: None,
2460 tool_calls: vec![crate::tools::ToolInvocation {
2461 id: "tc_big".into(),
2462 name: "bash".into(),
2463 input: json!({}),
2464 }],
2465 thinking: None,
2466 },
2467 ChatMessage::Tool {
2468 tool_call_id: "tc_big".into(),
2469 content: big,
2470 is_error: false,
2471 attachments: vec![],
2472 },
2473 ];
2474 let wire = chat_messages_to_anthropic_messages(&msgs);
2475 let rendered = serde_json::to_string(&wire).unwrap();
2476 assert!(rendered.contains("compacted for model replay"));
2477 }
2478
2479 #[test]
2480 fn chat_messages_to_anthropic_tool_result_carries_image_block() {
2481 let msgs = vec![
2486 ChatMessage::Assistant {
2487 text: None,
2488 tool_calls: vec![ToolInvocation {
2489 id: "tc_img".into(),
2490 name: "screenshot".into(),
2491 input: json!({}),
2492 }],
2493 thinking: None,
2494 },
2495 ChatMessage::Tool {
2496 tool_call_id: "tc_img".into(),
2497 content: "see image".into(),
2498 is_error: false,
2499 attachments: vec![UserAttachment::Image(ImageSource {
2500 media_type: "image/png".into(),
2501 data: ImageData::Base64("PNGBYTES".into()),
2502 })],
2503 },
2504 ];
2505 let out = chat_messages_to_anthropic_messages(&msgs);
2506 assert_eq!(out.len(), 2);
2508 let user = &out[1];
2509 assert_eq!(user["role"], "user");
2510 let outer = user["content"].as_array().unwrap();
2511 assert_eq!(outer.len(), 1);
2512 assert_eq!(outer[0]["type"], "tool_result");
2513 assert_eq!(outer[0]["tool_use_id"], "tc_img");
2514 let inner = outer[0]["content"].as_array().unwrap();
2515 assert_eq!(inner.len(), 2);
2518 assert_eq!(inner[0]["type"], "text");
2519 assert_eq!(inner[0]["text"], "see image");
2520 assert_eq!(inner[1]["type"], "image");
2521 assert_eq!(inner[1]["source"]["type"], "base64");
2522 assert_eq!(inner[1]["source"]["media_type"], "image/png");
2523 assert_eq!(inner[1]["source"]["data"], "PNGBYTES");
2524 }
2525
2526 #[test]
2527 fn chat_messages_to_anthropic_renders_user_text_with_image_block() {
2528 let msgs = vec![ChatMessage::User {
2529 content: "what is this".into(),
2530 attachments: vec![UserAttachment::Image(ImageSource {
2531 media_type: "image/png".into(),
2532 data: ImageData::Base64("AAAA".into()),
2533 })],
2534 }];
2535 let out = chat_messages_to_anthropic_messages(&msgs);
2536 assert_eq!(out.len(), 1);
2537 let blocks = out[0]["content"].as_array().unwrap();
2538 assert_eq!(blocks[0]["type"], "text");
2540 assert_eq!(blocks[0]["text"], "what is this");
2541 assert_eq!(blocks[1]["type"], "image");
2542 assert_eq!(blocks[1]["source"]["type"], "base64");
2544 assert_eq!(blocks[1]["source"]["media_type"], "image/png");
2545 assert_eq!(blocks[1]["source"]["data"], "AAAA");
2546 }
2547
2548 #[test]
2549 fn chat_messages_to_anthropic_renders_url_image() {
2550 let msgs = vec![ChatMessage::User {
2551 content: "".into(),
2552 attachments: vec![UserAttachment::Image(ImageSource {
2553 media_type: "image/jpeg".into(),
2554 data: ImageData::Url("https://example.com/x.jpg".into()),
2555 })],
2556 }];
2557 let out = chat_messages_to_anthropic_messages(&msgs);
2558 let blocks = out[0]["content"].as_array().unwrap();
2559 assert_eq!(blocks.len(), 1);
2561 assert_eq!(blocks[0]["type"], "image");
2562 assert_eq!(blocks[0]["source"]["type"], "url");
2563 assert_eq!(blocks[0]["source"]["url"], "https://example.com/x.jpg");
2564 }
2565
2566 #[test]
2567 fn chat_message_to_anthropic_merges_tool_results_and_image() {
2568 let msgs = vec![
2571 ChatMessage::Assistant {
2572 text: None,
2573 tool_calls: vec![ToolInvocation {
2574 id: "tc_1".into(),
2575 name: "screenshot".into(),
2576 input: json!({}),
2577 }],
2578 thinking: None,
2579 },
2580 ChatMessage::Tool {
2581 tool_call_id: "tc_1".into(),
2582 content: "captured".into(),
2583 is_error: false,
2584 attachments: vec![],
2585 },
2586 ChatMessage::User {
2587 content: "what changed?".into(),
2588 attachments: vec![UserAttachment::Image(ImageSource {
2589 media_type: "image/png".into(),
2590 data: ImageData::Base64("ZZ".into()),
2591 })],
2592 },
2593 ];
2594 let out = chat_messages_to_anthropic_messages(&msgs);
2595 assert_eq!(out.len(), 2);
2597 let blocks = out[1]["content"].as_array().unwrap();
2598 assert_eq!(blocks.len(), 3);
2599 assert_eq!(blocks[0]["type"], "tool_result");
2600 assert_eq!(blocks[0]["tool_use_id"], "tc_1");
2601 assert_eq!(blocks[1]["type"], "text");
2602 assert_eq!(blocks[1]["text"], "what changed?");
2603 assert_eq!(blocks[2]["type"], "image");
2604 }
2605
2606 #[test]
2607 fn chat_messages_to_anthropic_renders_simple_user_assistant() {
2608 let msgs = vec![
2609 ChatMessage::User {
2610 content: "hi".into(),
2611 attachments: vec![],
2612 },
2613 ChatMessage::Assistant {
2614 text: Some("hello".into()),
2615 tool_calls: vec![],
2616 thinking: None,
2617 },
2618 ];
2619 let out = chat_messages_to_anthropic_messages(&msgs);
2620 assert_eq!(out.len(), 2);
2621 assert_eq!(out[0]["role"], "user");
2622 assert_eq!(out[0]["content"][0]["type"], "text");
2623 assert_eq!(out[0]["content"][0]["text"], "hi");
2624 assert_eq!(out[1]["role"], "assistant");
2625 assert_eq!(out[1]["content"][0]["text"], "hello");
2626 }
2627
2628 #[test]
2629 fn chat_messages_to_anthropic_folds_tool_results_into_next_user() {
2630 let msgs = vec![
2634 ChatMessage::User {
2635 content: "do it".into(),
2636 attachments: vec![],
2637 },
2638 ChatMessage::Assistant {
2639 text: None,
2640 tool_calls: vec![ToolInvocation {
2641 id: "call_1".into(),
2642 name: "bash".into(),
2643 input: json!({"command": "pwd"}),
2644 }],
2645 thinking: None,
2646 },
2647 ChatMessage::Tool {
2648 tool_call_id: "call_1".into(),
2649 content: "{\"stdout\":\"/\"}".into(),
2650 is_error: false,
2651 attachments: vec![],
2652 },
2653 ChatMessage::User {
2654 content: "explain".into(),
2655 attachments: vec![],
2656 },
2657 ];
2658 let out = chat_messages_to_anthropic_messages(&msgs);
2659 assert_eq!(out.len(), 3);
2661 assert_eq!(out[1]["role"], "assistant");
2662 assert_eq!(out[1]["content"][0]["type"], "tool_use");
2663 assert_eq!(out[1]["content"][0]["id"], "call_1");
2664 assert_eq!(out[2]["role"], "user");
2665 assert_eq!(out[2]["content"][0]["type"], "tool_result");
2666 assert_eq!(out[2]["content"][0]["tool_use_id"], "call_1");
2667 assert_eq!(out[2]["content"][1]["type"], "text");
2668 assert_eq!(out[2]["content"][1]["text"], "explain");
2669 }
2670
2671 #[test]
2672 fn chat_messages_to_anthropic_renders_thinking_then_text_then_tool_use() {
2673 let msgs = vec![ChatMessage::Assistant {
2674 text: Some("preface".into()),
2675 tool_calls: vec![ToolInvocation {
2676 id: "t".into(),
2677 name: "n".into(),
2678 input: json!({"a": 1}),
2679 }],
2680 thinking: Some(AssistantThinking {
2681 text: "deep thought".into(),
2682 signature: Some("sig123".into()),
2683 }),
2684 }];
2685 let out = chat_messages_to_anthropic_messages(&msgs);
2686 let blocks = out[0]["content"].as_array().unwrap();
2687 assert_eq!(blocks[0]["type"], "thinking");
2689 assert_eq!(blocks[0]["thinking"], "deep thought");
2690 assert_eq!(blocks[0]["signature"], "sig123");
2691 assert_eq!(blocks[1]["type"], "text");
2692 assert_eq!(blocks[1]["text"], "preface");
2693 assert_eq!(blocks[2]["type"], "tool_use");
2694 }
2695
2696 #[test]
2697 fn chat_messages_to_anthropic_trailing_tool_results_flushed() {
2698 let msgs = vec![
2701 ChatMessage::Assistant {
2702 text: None,
2703 tool_calls: vec![ToolInvocation {
2704 id: "t".into(),
2705 name: "n".into(),
2706 input: json!({}),
2707 }],
2708 thinking: None,
2709 },
2710 ChatMessage::Tool {
2711 tool_call_id: "t".into(),
2712 content: "ok".into(),
2713 is_error: false,
2714 attachments: vec![],
2715 },
2716 ];
2717 let out = chat_messages_to_anthropic_messages(&msgs);
2718 assert_eq!(out.len(), 2);
2719 assert_eq!(out[1]["role"], "user");
2720 assert_eq!(out[1]["content"][0]["type"], "tool_result");
2721 }
2722
2723 #[test]
2724 fn apply_anthropic_cache_strategy_marks_system_last_tool_and_last_message() {
2725 let system = anthropic_system_field(Some("system prompt"));
2726 let tools = vec![
2727 json!({"name": "a", "description": "", "input_schema": {"type": "object"}}),
2728 json!({"name": "b", "description": "", "input_schema": {"type": "object"}}),
2729 ];
2730 let messages = vec![
2731 json!({"role": "user", "content": [{"type": "text", "text": "hi"}]}),
2732 json!({"role": "assistant", "content": [{"type": "text", "text": "hello"}]}),
2733 ];
2734 let out = apply_anthropic_cache_strategy(system, tools, messages);
2735 let sys_block = &out.system.as_ref().unwrap()[0];
2736 assert_eq!(sys_block["cache_control"]["type"], "ephemeral");
2737 assert!(out.tools[0].get("cache_control").is_none());
2739 assert_eq!(out.tools[1]["cache_control"]["type"], "ephemeral");
2740 let last_msg_blocks = out.messages.last().unwrap()["content"].as_array().unwrap();
2742 assert_eq!(
2743 last_msg_blocks.last().unwrap()["cache_control"]["type"],
2744 "ephemeral"
2745 );
2746 }
2747
2748 #[test]
2749 fn apply_anthropic_cache_strategy_skips_empty_system() {
2750 let out = apply_anthropic_cache_strategy(None, vec![], vec![]);
2753 assert!(out.system.is_none());
2754 }
2755
2756 fn anthropic_client_for_tool_choice_tests() -> AnthropicModelClient {
2757 AnthropicModelClient::new(AnthropicConfig {
2758 base_url: "https://example.test".into(),
2759 api_key: "sk-test".into(),
2760 model: "claude-test".into(),
2761 max_tokens: 1024,
2762 temperature: None,
2763 anthropic_version: AnthropicConfig::DEFAULT_VERSION.into(),
2764 })
2765 }
2766
2767 #[test]
2768 fn anthropic_client_omits_tool_choice_when_auto() {
2769 let client = anthropic_client_for_tool_choice_tests();
2772 let body = client.request_body(&ModelTurnInput {
2773 system_prompt: None,
2774 messages: vec![ChatMessage::User {
2775 content: "go".into(),
2776 attachments: vec![],
2777 }],
2778 tools: vec![bash_spec()],
2779 tool_choice: ToolChoice::Auto,
2780 parallel_tool_calls: None,
2781 });
2782 assert!(body["tools"].as_array().unwrap().len() > 0);
2783 assert!(body.get("tool_choice").is_none());
2784 assert!(body.get("parallel_tool_calls").is_none());
2787 }
2788
2789 #[test]
2790 fn anthropic_client_emits_tool_choice_required_as_any() {
2791 let client = anthropic_client_for_tool_choice_tests();
2792 let body = client.request_body(&ModelTurnInput {
2793 system_prompt: None,
2794 messages: vec![ChatMessage::User {
2795 content: "go".into(),
2796 attachments: vec![],
2797 }],
2798 tools: vec![bash_spec()],
2799 tool_choice: ToolChoice::Required,
2800 parallel_tool_calls: Some(true),
2801 });
2802 assert_eq!(body["tool_choice"]["type"], "any");
2803 assert!(body.get("parallel_tool_calls").is_none());
2806 }
2807
2808 #[test]
2809 fn anthropic_client_emits_tool_choice_named_tool() {
2810 let client = anthropic_client_for_tool_choice_tests();
2811 let body = client.request_body(&ModelTurnInput {
2812 system_prompt: None,
2813 messages: vec![ChatMessage::User {
2814 content: "go".into(),
2815 attachments: vec![],
2816 }],
2817 tools: vec![bash_spec()],
2818 tool_choice: ToolChoice::Tool("bash".into()),
2819 parallel_tool_calls: None,
2820 });
2821 assert_eq!(body["tool_choice"]["type"], "tool");
2822 assert_eq!(body["tool_choice"]["name"], "bash");
2823 }
2824
2825 #[test]
2826 fn anthropic_client_drops_tools_when_choice_is_none() {
2827 let client = anthropic_client_for_tool_choice_tests();
2831 let body = client.request_body(&ModelTurnInput {
2832 system_prompt: None,
2833 messages: vec![ChatMessage::User {
2834 content: "go".into(),
2835 attachments: vec![],
2836 }],
2837 tools: vec![bash_spec()],
2838 tool_choice: ToolChoice::None,
2839 parallel_tool_calls: None,
2840 });
2841 assert!(body.get("tools").is_none());
2842 assert!(body.get("tool_choice").is_none());
2843 }
2844
2845 #[test]
2846 fn anthropic_stream_state_text_only() {
2847 let mut s = AnthropicStreamState::default();
2848 let _ = s
2850 .feed_event(
2851 "message_start",
2852 r#"{"type":"message_start","message":{"id":"msg_01","usage":{"input_tokens":10,"output_tokens":0}}}"#,
2853 )
2854 .unwrap();
2855 let _ = s
2856 .feed_event(
2857 "content_block_start",
2858 r#"{"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}"#,
2859 )
2860 .unwrap();
2861 let out = s
2862 .feed_event(
2863 "content_block_delta",
2864 r#"{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Hello"}}"#,
2865 )
2866 .unwrap();
2867 assert_eq!(out.len(), 1);
2868 match &out[0] {
2869 ModelChunk::TextDelta { msg_id, delta } => {
2870 assert_eq!(msg_id, "msg_01");
2871 assert_eq!(delta, "Hello");
2872 }
2873 other => panic!("expected TextDelta, got {other:?}"),
2874 }
2875 let _ = s.feed_event(
2876 "message_delta",
2877 r#"{"type":"message_delta","delta":{"stop_reason":"end_turn"},"usage":{"output_tokens":5}}"#,
2878 );
2879 let out = s
2880 .feed_event("message_stop", r#"{"type":"message_stop"}"#)
2881 .unwrap();
2882 assert_eq!(out.len(), 1);
2883 match &out[0] {
2884 ModelChunk::Done { stop_reason, usage } => {
2885 assert_eq!(stop_reason, "end_turn");
2886 let u = usage.as_ref().unwrap();
2887 assert_eq!(u.input_tokens, 10);
2888 assert_eq!(u.output_tokens, 5);
2889 }
2890 other => panic!("expected Done, got {other:?}"),
2891 }
2892 }
2893
2894 #[test]
2895 fn anthropic_stream_state_thinking_block_emits_delta_and_signature() {
2896 let mut s = AnthropicStreamState::default();
2897 let _ = s.feed_event(
2898 "message_start",
2899 r#"{"type":"message_start","message":{"id":"msg_t"}}"#,
2900 );
2901 let _ = s.feed_event(
2902 "content_block_start",
2903 r#"{"type":"content_block_start","index":0,"content_block":{"type":"thinking","thinking":""}}"#,
2904 );
2905 let out = s
2906 .feed_event(
2907 "content_block_delta",
2908 r#"{"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":"reasoning..."}}"#,
2909 )
2910 .unwrap();
2911 assert_eq!(out.len(), 1);
2912 let ModelChunk::ThinkingDelta {
2913 delta, signature, ..
2914 } = &out[0]
2915 else {
2916 panic!("expected ThinkingDelta");
2917 };
2918 assert_eq!(delta, "reasoning...");
2919 assert!(signature.is_none());
2920
2921 let out = s
2922 .feed_event(
2923 "content_block_delta",
2924 r#"{"type":"content_block_delta","index":0,"delta":{"type":"signature_delta","signature":"sig_abc"}}"#,
2925 )
2926 .unwrap();
2927 let ModelChunk::ThinkingDelta {
2928 delta, signature, ..
2929 } = &out[0]
2930 else {
2931 panic!("expected ThinkingDelta");
2932 };
2933 assert_eq!(delta, "");
2934 assert_eq!(signature.as_deref(), Some("sig_abc"));
2935 }
2936
2937 #[test]
2938 fn anthropic_stream_state_tool_use_streamed_input() {
2939 let mut s = AnthropicStreamState::default();
2940 let _ = s.feed_event(
2941 "message_start",
2942 r#"{"type":"message_start","message":{"id":"msg_x"}}"#,
2943 );
2944 let out = s
2945 .feed_event(
2946 "content_block_start",
2947 r#"{"type":"content_block_start","index":0,"content_block":{"type":"tool_use","id":"toolu_1","name":"bash","input":{}}}"#,
2948 )
2949 .unwrap();
2950 assert_eq!(out.len(), 1);
2951 match &out[0] {
2952 ModelChunk::ToolCallStart { id, name } => {
2953 assert_eq!(id, "toolu_1");
2954 assert_eq!(name, "bash");
2955 }
2956 other => panic!("expected ToolCallStart, got {other:?}"),
2957 }
2958 let out = s
2959 .feed_event(
2960 "content_block_delta",
2961 r#"{"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":"{\"cmd\":"}}"#,
2962 )
2963 .unwrap();
2964 let ModelChunk::ToolCallInputDelta { id, delta } = &out[0] else {
2965 panic!("expected ToolCallInputDelta");
2966 };
2967 assert_eq!(id, "toolu_1");
2968 assert_eq!(delta, "{\"cmd\":");
2969
2970 let out = s
2971 .feed_event(
2972 "content_block_stop",
2973 r#"{"type":"content_block_stop","index":0}"#,
2974 )
2975 .unwrap();
2976 match &out[0] {
2977 ModelChunk::ToolCallEnd { id, input } => {
2978 assert_eq!(id, "toolu_1");
2979 assert!(input.is_none());
2980 }
2981 other => panic!("expected ToolCallEnd, got {other:?}"),
2982 }
2983 }
2984
2985 #[test]
2986 fn anthropic_stream_state_finalises_on_close_without_message_stop() {
2987 let mut s = AnthropicStreamState::default();
2990 let _ = s
2991 .feed_event(
2992 "message_delta",
2993 r#"{"type":"message_delta","delta":{"stop_reason":"max_tokens"},"usage":{"output_tokens":100}}"#,
2994 )
2995 .unwrap();
2996 let done = s.finalize().unwrap();
2997 match done {
2998 ModelChunk::Done { stop_reason, .. } => assert_eq!(stop_reason, "max_tokens"),
2999 other => panic!("expected Done, got {other:?}"),
3000 }
3001 }
3002
3003 #[test]
3004 fn anthropic_stop_reason_mapping() {
3005 assert_eq!(map_anthropic_stop_reason(Some("end_turn")), "end_turn");
3006 assert_eq!(map_anthropic_stop_reason(Some("tool_use")), "end_turn");
3007 assert_eq!(map_anthropic_stop_reason(Some("max_tokens")), "max_tokens");
3008 assert_eq!(map_anthropic_stop_reason(Some("stop_sequence")), "end_turn");
3009 assert_eq!(map_anthropic_stop_reason(Some("refusal")), "refusal");
3010 assert_eq!(map_anthropic_stop_reason(None), "end_turn");
3011 }
3012
3013 #[test]
3014 fn classify_anthropic_http_error_buckets_by_status() {
3015 use reqwest::StatusCode;
3016 assert!(matches!(
3017 classify_anthropic_http_error(StatusCode::TOO_MANY_REQUESTS, "{}"),
3018 ModelClientError::RateLimit(_)
3019 ));
3020 assert!(matches!(
3021 classify_anthropic_http_error(StatusCode::UNAUTHORIZED, "{}"),
3022 ModelClientError::Auth(_)
3023 ));
3024 assert!(matches!(
3025 classify_anthropic_http_error(
3026 StatusCode::BAD_REQUEST,
3027 "{\"error\":{\"message\":\"prompt is too long; context_length_exceeded\"}}"
3028 ),
3029 ModelClientError::ContextOverflow(_)
3030 ));
3031 assert!(matches!(
3032 classify_anthropic_http_error(StatusCode::BAD_REQUEST, "invalid model"),
3033 ModelClientError::BadRequest(_)
3034 ));
3035 assert!(matches!(
3036 classify_anthropic_http_error(StatusCode::INTERNAL_SERVER_ERROR, "oops"),
3037 ModelClientError::ServerError(_)
3038 ));
3039 }
3040
3041 #[tokio::test]
3042 async fn collect_model_response_folds_streamed_tool_call_arguments() {
3043 let chunks = vec![
3047 Ok(ModelChunk::TextDelta {
3048 msg_id: "m".into(),
3049 delta: "ok ".into(),
3050 }),
3051 Ok(ModelChunk::ToolCallStart {
3052 id: "call_1".into(),
3053 name: "bash".into(),
3054 }),
3055 Ok(ModelChunk::ToolCallInputDelta {
3056 id: "call_1".into(),
3057 delta: "{\"command\":".into(),
3058 }),
3059 Ok(ModelChunk::ToolCallInputDelta {
3060 id: "call_1".into(),
3061 delta: "\"pwd\"}".into(),
3062 }),
3063 Ok(ModelChunk::ToolCallEnd {
3064 id: "call_1".into(),
3065 input: None,
3066 }),
3067 Ok(ModelChunk::Done {
3068 stop_reason: "end_turn".into(),
3069 usage: None,
3070 }),
3071 ];
3072 let stream = futures::stream::iter(chunks).boxed();
3073 let response = collect_model_response(stream).await.unwrap();
3074 let ModelResponse::ToolCall {
3075 invocation,
3076 preface,
3077 ..
3078 } = response
3079 else {
3080 panic!("expected ToolCall");
3081 };
3082 assert_eq!(invocation.name, "bash");
3083 assert_eq!(invocation.input["command"], "pwd");
3084 assert_eq!(preface.as_deref(), Some("ok "));
3085 }
3086
3087 #[test]
3088 fn classify_openai_http_error_buckets_by_status_and_body() {
3089 use reqwest::StatusCode;
3090 assert!(matches!(
3091 classify_openai_http_error(StatusCode::TOO_MANY_REQUESTS, "rate limit hit"),
3092 ModelClientError::RateLimit(_)
3093 ));
3094 assert!(matches!(
3095 classify_openai_http_error(StatusCode::UNAUTHORIZED, "bad key"),
3096 ModelClientError::Auth(_)
3097 ));
3098 assert!(matches!(
3099 classify_openai_http_error(StatusCode::FORBIDDEN, "no access"),
3100 ModelClientError::Auth(_)
3101 ));
3102 assert!(matches!(
3103 classify_openai_http_error(
3104 StatusCode::BAD_REQUEST,
3105 "{\"error\":{\"message\":\"this model's maximum context length is 8192\"}}"
3106 ),
3107 ModelClientError::ContextOverflow(_)
3108 ));
3109 assert!(matches!(
3111 classify_openai_http_error(StatusCode::BAD_REQUEST, "missing argument"),
3112 ModelClientError::BadRequest(_)
3113 ));
3114 assert!(matches!(
3116 classify_openai_http_error(StatusCode::INTERNAL_SERVER_ERROR, "oops"),
3117 ModelClientError::ServerError(_)
3118 ));
3119 }
3120
3121 #[test]
3122 fn looks_like_context_overflow_matches_common_phrasings() {
3123 assert!(looks_like_context_overflow(
3124 "context_length_exceeded: this model has a maximum context length of 8192"
3125 ));
3126 assert!(looks_like_context_overflow("too many tokens in prompt"));
3127 assert!(looks_like_context_overflow(
3128 "Prompt exceeds the model's maximum context"
3129 ));
3130 assert!(!looks_like_context_overflow("invalid api key"));
3131 }
3132
3133 #[test]
3134 fn parse_openai_usage_returns_none_for_missing_or_all_zero() {
3135 assert!(parse_openai_usage(None).is_none());
3137 assert!(parse_openai_usage(Some(&json!({
3139 "prompt_tokens": 0,
3140 "completion_tokens": 0
3141 })))
3142 .is_none());
3143 }
3144
3145 #[test]
3146 fn openai_client_renders_multi_turn_history() {
3147 let client = OpenAiCompatibleModelClient::new(OpenAiCompatibleConfig {
3148 base_url: "https://example.test".into(),
3149 api_key: "sk-test".into(),
3150 model: "gpt-test".into(),
3151 temperature: Some(0.2),
3152 max_tokens: Some(128),
3153 reasoning_effort: None,
3154 });
3155 let body = client.request_body(&ModelTurnInput {
3156 system_prompt: None,
3157 messages: vec![
3158 ChatMessage::User {
3159 content: "run pwd".into(),
3160 attachments: vec![],
3161 },
3162 ChatMessage::Assistant {
3163 text: None,
3164 tool_calls: vec![ToolInvocation {
3165 id: "call_1".into(),
3166 name: "bash".into(),
3167 input: json!({"command": "pwd"}),
3168 }],
3169 thinking: None,
3170 },
3171 ChatMessage::Tool {
3172 tool_call_id: "call_1".into(),
3173 content: "{\"stdout\":\"/home/user\"}".into(),
3174 is_error: false,
3175 attachments: vec![],
3176 },
3177 ],
3178 tools: vec![],
3179 tool_choice: ToolChoice::Auto,
3180 parallel_tool_calls: None,
3181 });
3182 assert_eq!(body["temperature"], 0.2);
3183 assert_eq!(body["max_tokens"], 128);
3184 assert_eq!(body["messages"][0]["role"], "user");
3185 assert_eq!(body["messages"][1]["role"], "assistant");
3186 assert_eq!(body["messages"][1]["tool_calls"][0]["id"], "call_1");
3187 assert_eq!(
3188 body["messages"][1]["tool_calls"][0]["function"]["name"],
3189 "bash"
3190 );
3191 assert_eq!(body["messages"][2]["role"], "tool");
3192 assert_eq!(body["messages"][2]["tool_call_id"], "call_1");
3193 }
3194
3195 #[tokio::test]
3196 async fn scripted_client_emits_tool_call_then_summary() {
3197 let scripted = ScriptedModelClient;
3198 let first = scripted
3199 .next(user("read README.md"))
3200 .await
3201 .expect("scripted first");
3202 let ModelResponse::ToolCall { invocation, .. } = first else {
3203 panic!("expected tool call on first step");
3204 };
3205 assert_eq!(invocation.name, "read");
3206
3207 let history = ModelTurnInput {
3210 system_prompt: None,
3211 messages: vec![
3212 ChatMessage::User {
3213 content: "read README.md".into(),
3214 attachments: vec![],
3215 },
3216 ChatMessage::Assistant {
3217 text: None,
3218 tool_calls: vec![invocation.clone()],
3219 thinking: None,
3220 },
3221 ChatMessage::Tool {
3222 tool_call_id: invocation.id.clone(),
3223 content: "{\"content\":\"hi\"}".into(),
3224 is_error: false,
3225 attachments: vec![],
3226 },
3227 ],
3228 tools: vec![],
3229 tool_choice: ToolChoice::Auto,
3230 parallel_tool_calls: None,
3231 };
3232 let second = scripted.next(history).await.expect("scripted second");
3233 let ModelResponse::Message { text, .. } = second else {
3234 panic!("expected final message after tool result");
3235 };
3236 assert!(text.contains("completed"));
3237 }
3238}