Skip to main content

a3s_code_core/llm/
anthropic.rs

1//! Anthropic Claude LLM client
2
3use super::http::{default_http_client, normalize_base_url, HttpClient};
4use super::structured;
5use super::types::*;
6use super::LlmClient;
7use crate::retry::{AttemptOutcome, RetryConfig};
8use anyhow::{Context, Result};
9use async_trait::async_trait;
10use futures::StreamExt;
11use serde::Deserialize;
12use std::sync::Arc;
13use std::time::Instant;
14use tokio::sync::mpsc;
15use tokio_util::sync::CancellationToken;
16
17/// Default max tokens for LLM responses
18pub(crate) const DEFAULT_MAX_TOKENS: usize = 8192;
19
20/// Anthropic Claude client
21pub struct AnthropicClient {
22    pub(crate) provider_name: String,
23    pub(crate) api_key: SecretString,
24    pub(crate) model: String,
25    pub(crate) base_url: String,
26    pub(crate) max_tokens: usize,
27    pub(crate) temperature: Option<f32>,
28    pub(crate) thinking_budget: Option<usize>,
29    pub(crate) http: Arc<dyn HttpClient>,
30    pub(crate) retry_config: RetryConfig,
31}
32
33impl AnthropicClient {
34    pub fn new(api_key: String, model: String) -> Self {
35        Self {
36            provider_name: "anthropic".to_string(),
37            api_key: SecretString::new(api_key),
38            model,
39            base_url: "https://api.anthropic.com".to_string(),
40            max_tokens: DEFAULT_MAX_TOKENS,
41            temperature: None,
42            thinking_budget: None,
43            http: default_http_client(),
44            retry_config: RetryConfig::default(),
45        }
46    }
47
48    pub fn with_base_url(mut self, base_url: String) -> Self {
49        self.base_url = normalize_base_url(&base_url);
50        self
51    }
52
53    pub fn with_provider_name(mut self, provider_name: impl Into<String>) -> Self {
54        self.provider_name = provider_name.into();
55        self
56    }
57
58    pub fn with_max_tokens(mut self, max_tokens: usize) -> Self {
59        self.max_tokens = max_tokens;
60        self
61    }
62
63    pub fn with_temperature(mut self, temperature: f32) -> Self {
64        self.temperature = Some(temperature);
65        self
66    }
67
68    pub fn with_thinking_budget(mut self, budget: usize) -> Self {
69        self.thinking_budget = Some(budget);
70        self
71    }
72
73    pub fn with_retry_config(mut self, retry_config: RetryConfig) -> Self {
74        self.retry_config = retry_config;
75        self
76    }
77
78    pub fn with_http_client(mut self, http: Arc<dyn HttpClient>) -> Self {
79        self.http = http;
80        self
81    }
82
83    fn initial_tool_input_json(input: &serde_json::Value) -> Option<String> {
84        match input {
85            serde_json::Value::Object(map) if map.is_empty() => None,
86            serde_json::Value::Null => None,
87            value => serde_json::to_string(value).ok(),
88        }
89    }
90
91    pub(crate) fn build_request(
92        &self,
93        messages: &[Message],
94        system: Option<&str>,
95        tools: &[ToolDefinition],
96    ) -> serde_json::Value {
97        let mut request = serde_json::json!({
98            "model": self.model,
99            "max_tokens": self.max_tokens,
100            "messages": messages,
101        });
102
103        // System prompt with cache_control for prompt caching.
104        // Anthropic caches system content blocks marked with
105        // `cache_control: { type: "ephemeral" }`.
106        if let Some(sys) = system {
107            request["system"] = serde_json::json!([
108                {
109                    "type": "text",
110                    "text": sys,
111                    "cache_control": { "type": "ephemeral" }
112                }
113            ]);
114        }
115
116        if !tools.is_empty() {
117            let mut tool_defs: Vec<serde_json::Value> = tools
118                .iter()
119                .map(|t| {
120                    serde_json::json!({
121                        "name": t.name,
122                        "description": t.description,
123                        "input_schema": t.parameters,
124                    })
125                })
126                .collect();
127
128            // Mark the last tool definition with cache_control so the
129            // entire tool block is cached on subsequent requests.
130            if let Some(last) = tool_defs.last_mut() {
131                last["cache_control"] = serde_json::json!({ "type": "ephemeral" });
132            }
133
134            request["tools"] = serde_json::json!(tool_defs);
135        }
136
137        // Apply optional sampling parameters
138        if let Some(temp) = self.temperature {
139            request["temperature"] = serde_json::json!(temp);
140        }
141
142        // Extended thinking (Anthropic-specific)
143        if let Some(budget) = self.thinking_budget {
144            request["thinking"] = serde_json::json!({
145                "type": "enabled",
146                "budget_tokens": budget
147            });
148            // Thinking requires temperature=1 per Anthropic docs
149            request["temperature"] = serde_json::json!(1.0);
150        }
151
152        request
153    }
154}
155
156impl AnthropicClient {
157    /// Apply a structured-output directive to an Anthropic request.
158    ///
159    /// Anthropic supports forced tool choice (`tool_choice`) but has no
160    /// `response_format`, so only `force_tool` is honored.
161    fn apply_directive(
162        request: &mut serde_json::Value,
163        directive: &structured::StructuredDirective,
164    ) {
165        if let Some(tool) = &directive.force_tool {
166            request["tool_choice"] = serde_json::json!({ "type": "tool", "name": tool });
167        }
168    }
169
170    /// Execute a fully-built (non-streaming) request body.
171    async fn send_request(&self, request_body: serde_json::Value) -> Result<LlmResponse> {
172        {
173            let request_started_at = Instant::now();
174            let url = format!("{}/v1/messages", self.base_url);
175
176            let headers = vec![
177                ("x-api-key", self.api_key.expose()),
178                ("anthropic-version", "2023-06-01"),
179                ("anthropic-beta", "prompt-caching-2024-07-31"),
180            ];
181
182            let response = crate::retry::with_retry(&self.retry_config, |_attempt| {
183                let http = &self.http;
184                let url = &url;
185                let headers = headers.clone();
186                let request_body = &request_body;
187                async move {
188                    match http
189                        .post(url, headers, request_body, CancellationToken::new())
190                        .await
191                    {
192                        Ok(resp) => {
193                            let status = reqwest::StatusCode::from_u16(resp.status)
194                                .unwrap_or(reqwest::StatusCode::INTERNAL_SERVER_ERROR);
195                            if status.is_success() {
196                                AttemptOutcome::Success(resp.body)
197                            } else if self.retry_config.is_retryable_status(status) {
198                                AttemptOutcome::Retryable {
199                                    status,
200                                    body: resp.body,
201                                    retry_after: None,
202                                }
203                            } else {
204                                AttemptOutcome::Fatal(anyhow::anyhow!(
205                                    "Anthropic API error at {} ({}): {}",
206                                    url,
207                                    status,
208                                    resp.body
209                                ))
210                            }
211                        }
212                        Err(e) => AttemptOutcome::Fatal(e),
213                    }
214                }
215            })
216            .await?;
217
218            let parsed: AnthropicResponse =
219                serde_json::from_str(&response).context("Failed to parse Anthropic response")?;
220
221            tracing::debug!("Anthropic response: {:?}", parsed);
222
223            let content: Vec<ContentBlock> = parsed
224                .content
225                .into_iter()
226                .map(|block| match block {
227                    AnthropicContentBlock::Text { text } => ContentBlock::Text { text },
228                    AnthropicContentBlock::ToolUse { id, name, input } => {
229                        ContentBlock::ToolUse { id, name, input }
230                    }
231                })
232                .collect();
233
234            let llm_response = LlmResponse {
235                message: Message {
236                    role: "assistant".to_string(),
237                    content,
238                    reasoning_content: None,
239                },
240                usage: TokenUsage {
241                    prompt_tokens: parsed.usage.input_tokens,
242                    completion_tokens: parsed.usage.output_tokens,
243                    total_tokens: parsed.usage.input_tokens + parsed.usage.output_tokens,
244                    cache_read_tokens: parsed.usage.cache_read_input_tokens,
245                    cache_write_tokens: parsed.usage.cache_creation_input_tokens,
246                },
247                stop_reason: Some(parsed.stop_reason),
248                meta: Some(LlmResponseMeta {
249                    provider: Some(self.provider_name.clone()),
250                    request_model: Some(self.model.clone()),
251                    request_url: Some(url.clone()),
252                    response_id: parsed.id,
253                    response_model: parsed.model,
254                    response_object: parsed.response_type,
255                    first_token_ms: None,
256                    duration_ms: Some(request_started_at.elapsed().as_millis() as u64),
257                }),
258            };
259
260            crate::telemetry::record_llm_usage(
261                llm_response.usage.prompt_tokens,
262                llm_response.usage.completion_tokens,
263                llm_response.usage.total_tokens,
264                llm_response.stop_reason.as_deref(),
265            );
266
267            Ok(llm_response)
268        }
269    }
270}
271
272#[async_trait]
273impl LlmClient for AnthropicClient {
274    async fn complete(
275        &self,
276        messages: &[Message],
277        system: Option<&str>,
278        tools: &[ToolDefinition],
279    ) -> Result<LlmResponse> {
280        self.send_request(self.build_request(messages, system, tools))
281            .await
282    }
283
284    async fn complete_structured(
285        &self,
286        messages: &[Message],
287        system: Option<&str>,
288        tools: &[ToolDefinition],
289        directive: &structured::StructuredDirective,
290    ) -> Result<LlmResponse> {
291        let mut request_body = self.build_request(messages, system, tools);
292        Self::apply_directive(&mut request_body, directive);
293        self.send_request(request_body).await
294    }
295
296    fn native_structured_support(&self) -> structured::NativeStructuredSupport {
297        structured::NativeStructuredSupport::ForcedTool
298    }
299
300    async fn complete_streaming(
301        &self,
302        messages: &[Message],
303        system: Option<&str>,
304        tools: &[ToolDefinition],
305        cancel_token: CancellationToken,
306    ) -> Result<mpsc::Receiver<StreamEvent>> {
307        self.send_streaming(self.build_request(messages, system, tools), cancel_token)
308            .await
309    }
310
311    async fn complete_streaming_structured(
312        &self,
313        messages: &[Message],
314        system: Option<&str>,
315        tools: &[ToolDefinition],
316        directive: &structured::StructuredDirective,
317        cancel_token: CancellationToken,
318    ) -> Result<mpsc::Receiver<StreamEvent>> {
319        let mut request_body = self.build_request(messages, system, tools);
320        Self::apply_directive(&mut request_body, directive);
321        self.send_streaming(request_body, cancel_token).await
322    }
323}
324
325impl AnthropicClient {
326    /// Execute a fully-built streaming request body (sets `stream: true`).
327    async fn send_streaming(
328        &self,
329        mut request_body: serde_json::Value,
330        cancel_token: CancellationToken,
331    ) -> Result<mpsc::Receiver<StreamEvent>> {
332        {
333            let request_started_at = Instant::now();
334            request_body["stream"] = serde_json::json!(true);
335
336            let url = format!("{}/v1/messages", self.base_url);
337
338            let headers = vec![
339                ("x-api-key", self.api_key.expose()),
340                ("anthropic-version", "2023-06-01"),
341                ("anthropic-beta", "prompt-caching-2024-07-31"),
342            ];
343
344            let streaming_resp = crate::retry::with_retry(&self.retry_config, |_attempt| {
345                let http = &self.http;
346                let url = &url;
347                let headers = headers.clone();
348                let request_body = &request_body;
349                let cancel_token = cancel_token.clone();
350                async move {
351                    let resp = tokio::select! {
352                        _ = cancel_token.cancelled() => {
353                            return AttemptOutcome::Fatal(anyhow::anyhow!("HTTP request cancelled"));
354                        }
355                        result = http.post_streaming(url, headers, request_body, cancel_token.clone()) => {
356                            match result {
357                                Ok(r) => r,
358                                Err(e) => {
359                                    // A transient network error (timeout, reset,
360                                    // mid-flight drop — common on throttled
361                                    // endpoints) carries no HTTP status. Retry it
362                                    // with backoff like 429/5xx instead of failing
363                                    // the turn; a real fatal error still bails.
364                                    return if crate::retry::is_transient_error(&e) {
365                                        AttemptOutcome::Retryable {
366                                            status: reqwest::StatusCode::SERVICE_UNAVAILABLE,
367                                            body: format!("network error: {e}"),
368                                            retry_after: None,
369                                        }
370                                    } else {
371                                        AttemptOutcome::Fatal(anyhow::anyhow!(
372                                            "HTTP request failed: {}",
373                                            e
374                                        ))
375                                    };
376                                }
377                            }
378                        }
379                    };
380                    let status = reqwest::StatusCode::from_u16(resp.status)
381                        .unwrap_or(reqwest::StatusCode::INTERNAL_SERVER_ERROR);
382                    if status.is_success() {
383                        AttemptOutcome::Success(resp)
384                    } else {
385                        let retry_after = resp
386                            .retry_after
387                            .as_deref()
388                            .and_then(|v| RetryConfig::parse_retry_after(Some(v)));
389                        if self.retry_config.is_retryable_status(status) {
390                            AttemptOutcome::Retryable {
391                                status,
392                                body: resp.error_body,
393                                retry_after,
394                            }
395                        } else {
396                            AttemptOutcome::Fatal(anyhow::anyhow!(
397                                "Anthropic API error at {} ({}): {}",
398                                url,
399                                status,
400                                resp.error_body
401                            ))
402                        }
403                    }
404                }
405            })
406            .await?;
407
408            let (tx, rx) = mpsc::channel(100);
409
410            let mut stream = streaming_resp.byte_stream;
411            let provider_name = self.provider_name.clone();
412            let request_model = self.model.clone();
413            let request_url = url.clone();
414            tokio::spawn(async move {
415                let mut buffer = String::new();
416                let mut content_blocks: Vec<ContentBlock> = Vec::new();
417                let mut text_content = String::new();
418                let mut current_tool_id = String::new();
419                let mut current_tool_name = String::new();
420                let mut current_tool_input = String::new();
421                let mut usage = TokenUsage::default();
422                let mut stop_reason = None;
423                let mut response_id = None;
424                let mut response_model = None;
425                let mut response_object = Some("message".to_string());
426                let mut first_token_ms = None;
427
428                while let Some(chunk_result) = stream.next().await {
429                    let chunk = match chunk_result {
430                        Ok(c) => c,
431                        Err(e) => {
432                            tracing::error!("Stream error: {}", e);
433                            break;
434                        }
435                    };
436
437                    buffer.push_str(&String::from_utf8_lossy(&chunk));
438
439                    while let Some(event_end) = buffer.find("\n\n") {
440                        let event_data: String = buffer.drain(..event_end).collect();
441                        buffer.drain(..2);
442
443                        for line in event_data.lines() {
444                            if let Some(data) = line.strip_prefix("data: ") {
445                                if data == "[DONE]" {
446                                    continue;
447                                }
448
449                                if let Ok(event) =
450                                    serde_json::from_str::<AnthropicStreamEvent>(data)
451                                {
452                                    match event {
453                                        AnthropicStreamEvent::ContentBlockStart {
454                                            index: _,
455                                            content_block,
456                                        } => match content_block {
457                                            AnthropicContentBlock::Text { .. } => {}
458                                            AnthropicContentBlock::ToolUse { id, name, input } => {
459                                                if !text_content.is_empty() {
460                                                    content_blocks.push(ContentBlock::Text {
461                                                        text: std::mem::take(&mut text_content),
462                                                    });
463                                                }
464                                                current_tool_id = id.clone();
465                                                current_tool_name = name.clone();
466                                                current_tool_input =
467                                                    Self::initial_tool_input_json(&input)
468                                                        .unwrap_or_default();
469                                                let _ = tx
470                                                    .send(StreamEvent::ToolUseStart { id, name })
471                                                    .await;
472                                                if !current_tool_input.is_empty() {
473                                                    if first_token_ms.is_none() {
474                                                        first_token_ms = Some(
475                                                            request_started_at.elapsed().as_millis()
476                                                                as u64,
477                                                        );
478                                                    }
479                                                    let _ = tx
480                                                        .send(StreamEvent::ToolUseInputDelta(
481                                                            current_tool_input.clone(),
482                                                        ))
483                                                        .await;
484                                                }
485                                            }
486                                        },
487                                        AnthropicStreamEvent::ContentBlockDelta {
488                                            index: _,
489                                            delta,
490                                        } => match delta {
491                                            AnthropicDelta::TextDelta { text } => {
492                                                if first_token_ms.is_none() {
493                                                    first_token_ms = Some(
494                                                        request_started_at.elapsed().as_millis()
495                                                            as u64,
496                                                    );
497                                                }
498                                                text_content.push_str(&text);
499                                                let _ = tx.send(StreamEvent::TextDelta(text)).await;
500                                            }
501                                            AnthropicDelta::InputJsonDelta { partial_json } => {
502                                                if first_token_ms.is_none() {
503                                                    first_token_ms = Some(
504                                                        request_started_at.elapsed().as_millis()
505                                                            as u64,
506                                                    );
507                                                }
508                                                current_tool_input.push_str(&partial_json);
509                                                let _ = tx
510                                                    .send(StreamEvent::ToolUseInputDelta(
511                                                        partial_json,
512                                                    ))
513                                                    .await;
514                                            }
515                                        },
516                                        AnthropicStreamEvent::ContentBlockStop { index: _ }
517                                            if !current_tool_id.is_empty() =>
518                                        {
519                                            let input: serde_json::Value = if current_tool_input
520                                                .trim()
521                                                .is_empty()
522                                            {
523                                                serde_json::Value::Object(Default::default())
524                                            } else {
525                                                serde_json::from_str(&current_tool_input)
526                                                    .unwrap_or_else(|e| {
527                                                        tracing::warn!(
528                                                            "Failed to parse tool input JSON for tool '{}': {}",
529                                                            current_tool_name, e
530                                                        );
531                                                        serde_json::json!({
532                                                            "__parse_error": format!(
533                                                                "Malformed tool arguments: {}. Raw input: {}",
534                                                                e, &current_tool_input
535                                                            )
536                                                        })
537                                                    })
538                                            };
539                                            content_blocks.push(ContentBlock::ToolUse {
540                                                id: current_tool_id.clone(),
541                                                name: current_tool_name.clone(),
542                                                input,
543                                            });
544                                            current_tool_id.clear();
545                                            current_tool_name.clear();
546                                            current_tool_input.clear();
547                                        }
548                                        AnthropicStreamEvent::MessageStart { message } => {
549                                            response_id = message.id;
550                                            response_model = message.model;
551                                            response_object = message.message_type;
552                                            usage.prompt_tokens = message.usage.input_tokens;
553                                        }
554                                        AnthropicStreamEvent::MessageDelta {
555                                            delta,
556                                            usage: msg_usage,
557                                        } => {
558                                            stop_reason = Some(delta.stop_reason);
559                                            usage.completion_tokens = msg_usage.output_tokens;
560                                            usage.total_tokens =
561                                                usage.prompt_tokens + usage.completion_tokens;
562                                        }
563                                        AnthropicStreamEvent::MessageStop => {
564                                            if !text_content.is_empty() {
565                                                content_blocks.push(ContentBlock::Text {
566                                                    text: std::mem::take(&mut text_content),
567                                                });
568                                            }
569                                            crate::telemetry::record_llm_usage(
570                                                usage.prompt_tokens,
571                                                usage.completion_tokens,
572                                                usage.total_tokens,
573                                                stop_reason.as_deref(),
574                                            );
575
576                                            let response = LlmResponse {
577                                                message: Message {
578                                                    role: "assistant".to_string(),
579                                                    content: std::mem::take(&mut content_blocks),
580                                                    reasoning_content: None,
581                                                },
582                                                usage: usage.clone(),
583                                                stop_reason: stop_reason.clone(),
584                                                meta: Some(LlmResponseMeta {
585                                                    provider: Some(provider_name.clone()),
586                                                    request_model: Some(request_model.clone()),
587                                                    request_url: Some(request_url.clone()),
588                                                    response_id: response_id.clone(),
589                                                    response_model: response_model.clone(),
590                                                    response_object: response_object.clone(),
591                                                    first_token_ms,
592                                                    duration_ms: Some(
593                                                        request_started_at.elapsed().as_millis()
594                                                            as u64,
595                                                    ),
596                                                }),
597                                            };
598                                            let _ = tx.send(StreamEvent::Done(response)).await;
599                                        }
600                                        _ => {}
601                                    }
602                                }
603                            }
604                        }
605                    }
606                }
607            });
608
609            Ok(rx)
610        }
611    }
612}
613
614// Anthropic API response types (private)
615#[derive(Debug, Deserialize)]
616pub(crate) struct AnthropicResponse {
617    #[serde(default)]
618    pub(crate) id: Option<String>,
619    #[serde(default)]
620    pub(crate) model: Option<String>,
621    #[serde(rename = "type", default)]
622    pub(crate) response_type: Option<String>,
623    pub(crate) content: Vec<AnthropicContentBlock>,
624    pub(crate) stop_reason: String,
625    pub(crate) usage: AnthropicUsage,
626}
627
628#[derive(Debug, Deserialize)]
629#[serde(tag = "type")]
630pub(crate) enum AnthropicContentBlock {
631    #[serde(rename = "text")]
632    Text { text: String },
633    #[serde(rename = "tool_use")]
634    ToolUse {
635        id: String,
636        name: String,
637        input: serde_json::Value,
638    },
639}
640
641#[derive(Debug, Deserialize)]
642pub(crate) struct AnthropicUsage {
643    pub(crate) input_tokens: usize,
644    pub(crate) output_tokens: usize,
645    pub(crate) cache_read_input_tokens: Option<usize>,
646    pub(crate) cache_creation_input_tokens: Option<usize>,
647}
648
649#[derive(Debug, Deserialize)]
650#[serde(tag = "type")]
651#[allow(dead_code)]
652pub(crate) enum AnthropicStreamEvent {
653    #[serde(rename = "message_start")]
654    MessageStart { message: AnthropicMessageStart },
655    #[serde(rename = "content_block_start")]
656    ContentBlockStart {
657        index: usize,
658        content_block: AnthropicContentBlock,
659    },
660    #[serde(rename = "content_block_delta")]
661    ContentBlockDelta { index: usize, delta: AnthropicDelta },
662    #[serde(rename = "content_block_stop")]
663    ContentBlockStop { index: usize },
664    #[serde(rename = "message_delta")]
665    MessageDelta {
666        delta: AnthropicMessageDeltaData,
667        usage: AnthropicOutputUsage,
668    },
669    #[serde(rename = "message_stop")]
670    MessageStop,
671    #[serde(rename = "ping")]
672    Ping,
673    #[serde(rename = "error")]
674    Error { error: AnthropicError },
675}
676
677#[derive(Debug, Deserialize)]
678pub(crate) struct AnthropicMessageStart {
679    #[serde(default)]
680    pub(crate) id: Option<String>,
681    #[serde(default)]
682    pub(crate) model: Option<String>,
683    #[serde(rename = "type", default)]
684    pub(crate) message_type: Option<String>,
685    pub(crate) usage: AnthropicUsage,
686}
687
688#[derive(Debug, Deserialize)]
689#[serde(tag = "type")]
690pub(crate) enum AnthropicDelta {
691    #[serde(rename = "text_delta")]
692    TextDelta { text: String },
693    #[serde(rename = "input_json_delta")]
694    InputJsonDelta { partial_json: String },
695}
696
697#[derive(Debug, Deserialize)]
698pub(crate) struct AnthropicMessageDeltaData {
699    pub(crate) stop_reason: String,
700}
701
702#[derive(Debug, Deserialize)]
703pub(crate) struct AnthropicOutputUsage {
704    pub(crate) output_tokens: usize,
705}
706
707#[derive(Debug, Deserialize)]
708#[allow(dead_code)]
709pub(crate) struct AnthropicError {
710    #[serde(rename = "type")]
711    pub(crate) error_type: String,
712    pub(crate) message: String,
713}
714
715// ============================================================================
716// Tests
717// ============================================================================
718
719#[cfg(test)]
720mod tests {
721    use super::*;
722    use crate::llm::types::{Message, ToolDefinition};
723
724    fn make_client() -> AnthropicClient {
725        AnthropicClient::new("test-key".to_string(), "claude-opus-4-6".to_string())
726    }
727
728    #[test]
729    fn test_build_request_basic() {
730        let client = make_client();
731        let messages = vec![Message::user("Hello")];
732        let req = client.build_request(&messages, None, &[]);
733
734        assert_eq!(req["model"], "claude-opus-4-6");
735        assert_eq!(req["max_tokens"], DEFAULT_MAX_TOKENS);
736        assert!(req["thinking"].is_null());
737    }
738
739    #[test]
740    fn test_build_request_with_thinking_budget() {
741        let client = make_client().with_thinking_budget(10_000);
742        let messages = vec![Message::user("Think carefully.")];
743        let req = client.build_request(&messages, None, &[]);
744
745        // thinking block must be present
746        assert_eq!(req["thinking"]["type"], "enabled");
747        assert_eq!(req["thinking"]["budget_tokens"], 10_000);
748        // temperature must be 1.0 when thinking is enabled
749        assert_eq!(req["temperature"], 1.0_f64);
750    }
751
752    #[test]
753    fn test_build_request_thinking_overrides_temperature() {
754        // Even if temperature was set, thinking forces it to 1.0
755        let client = make_client()
756            .with_temperature(0.5)
757            .with_thinking_budget(5_000);
758        let messages = vec![Message::user("Test")];
759        let req = client.build_request(&messages, None, &[]);
760
761        assert_eq!(req["temperature"], 1.0_f64);
762        assert_eq!(req["thinking"]["budget_tokens"], 5_000);
763    }
764
765    #[test]
766    fn test_build_request_no_thinking_uses_temperature() {
767        let client = make_client().with_temperature(0.7);
768        let messages = vec![Message::user("Test")];
769        let req = client.build_request(&messages, None, &[]);
770
771        // Use approximate comparison for f64
772        let temp = req["temperature"].as_f64().unwrap();
773        assert!((temp - 0.7).abs() < 0.01);
774        assert!(req["thinking"].is_null());
775    }
776
777    #[test]
778    fn test_build_request_with_system_prompt() {
779        let client = make_client();
780        let messages = vec![Message::user("Hello")];
781        let req = client.build_request(&messages, Some("You are helpful."), &[]);
782
783        let system = &req["system"];
784        assert!(system.is_array());
785        assert_eq!(system[0]["type"], "text");
786        assert_eq!(system[0]["text"], "You are helpful.");
787        assert!(system[0]["cache_control"].is_object());
788    }
789
790    #[test]
791    fn test_build_request_with_tools() {
792        let client = make_client();
793        let messages = vec![Message::user("Use a tool")];
794        let tools = vec![ToolDefinition {
795            name: "read_file".to_string(),
796            description: "Read a file".to_string(),
797            parameters: serde_json::json!({"type": "object", "properties": {}}),
798        }];
799        let req = client.build_request(&messages, None, &tools);
800
801        assert!(req["tools"].is_array());
802        assert_eq!(req["tools"][0]["name"], "read_file");
803        // Last tool should have cache_control
804        assert!(req["tools"][0]["cache_control"].is_object());
805    }
806
807    #[test]
808    fn test_build_request_thinking_budget_sets_max_tokens() {
809        // max_tokens is still respected when thinking is enabled
810        let client = make_client()
811            .with_max_tokens(16_000)
812            .with_thinking_budget(8_000);
813        let messages = vec![Message::user("Test")];
814        let req = client.build_request(&messages, None, &[]);
815
816        assert_eq!(req["max_tokens"], 16_000);
817        assert_eq!(req["thinking"]["budget_tokens"], 8_000);
818    }
819
820    #[test]
821    fn test_apply_directive_forces_tool_choice() {
822        let mut req = serde_json::json!({ "model": "m", "messages": [] });
823        let directive = structured::StructuredDirective {
824            force_tool: Some("emit_person".to_string()),
825            response_format: None,
826        };
827        AnthropicClient::apply_directive(&mut req, &directive);
828        assert_eq!(req["tool_choice"]["type"], "tool");
829        assert_eq!(req["tool_choice"]["name"], "emit_person");
830    }
831
832    #[test]
833    fn test_apply_directive_ignores_response_format() {
834        // Anthropic has no response_format; both a response_format-only and an
835        // empty directive must be no-ops.
836        let mut req = serde_json::json!({ "model": "m" });
837        AnthropicClient::apply_directive(
838            &mut req,
839            &structured::StructuredDirective {
840                force_tool: None,
841                response_format: Some(structured::ResponseFormat::JsonObject),
842            },
843        );
844        assert!(req.get("response_format").is_none());
845        assert!(req.get("tool_choice").is_none());
846    }
847
848    #[test]
849    fn test_native_structured_support_is_forced_tool() {
850        assert_eq!(
851            make_client().native_structured_support(),
852            structured::NativeStructuredSupport::ForcedTool
853        );
854    }
855}