Skip to main content

claude_api/messages/
request.rs

1//! Request payloads for the Messages API.
2//!
3//! [`CreateMessageRequest`] is the typed builder for `POST /v1/messages`.
4//! [`CountTokensRequest`] is its slimmer sibling for
5//! `POST /v1/messages/count_tokens`.
6
7use serde::Serialize;
8
9use crate::error::{Error, Result};
10use crate::messages::cache::CacheControl;
11use crate::messages::content::{ContentBlock, KnownBlock};
12use crate::messages::input::{MessageInput, SystemPrompt};
13
14fn apply_cache_control_to_last_block_with(blocks: &mut [ContentBlock], cc: CacheControl) {
15    let Some(last) = blocks.last_mut() else {
16        return;
17    };
18    if let ContentBlock::Known(
19        KnownBlock::Text { cache_control, .. }
20        | KnownBlock::Image { cache_control, .. }
21        | KnownBlock::Document { cache_control, .. }
22        | KnownBlock::ToolResult { cache_control, .. },
23    ) = last
24    {
25        *cache_control = Some(cc);
26    }
27}
28use crate::messages::mcp::McpServerConfig;
29use crate::messages::metadata::{MessageMetadata, RequestServiceTier};
30use crate::messages::thinking::ThinkingConfig;
31use crate::messages::tools::{Tool, ToolChoice};
32use crate::types::ModelId;
33
34#[allow(clippy::trivially_copy_pass_by_ref)]
35fn is_false(b: &bool) -> bool {
36    !*b
37}
38
39/// Request payload for `POST /v1/messages`.
40///
41/// Construct via [`CreateMessageRequest::builder`].
42#[derive(Debug, Clone, Serialize)]
43#[non_exhaustive]
44pub struct CreateMessageRequest {
45    /// Model to query.
46    pub model: ModelId,
47    /// Maximum number of output tokens to generate.
48    pub max_tokens: u32,
49    /// Conversation history.
50    pub messages: Vec<MessageInput>,
51
52    /// Optional system prompt.
53    #[serde(skip_serializing_if = "Option::is_none")]
54    pub system: Option<SystemPrompt>,
55    /// Sampling temperature.
56    #[serde(skip_serializing_if = "Option::is_none")]
57    pub temperature: Option<f32>,
58    /// Nucleus sampling cutoff.
59    #[serde(skip_serializing_if = "Option::is_none")]
60    pub top_p: Option<f32>,
61    /// Top-k sampling cutoff.
62    #[serde(skip_serializing_if = "Option::is_none")]
63    pub top_k: Option<u32>,
64    /// Custom stop sequences.
65    #[serde(skip_serializing_if = "Option::is_none")]
66    pub stop_sequences: Option<Vec<String>>,
67    /// Tools the model may invoke.
68    #[serde(skip_serializing_if = "Vec::is_empty")]
69    pub tools: Vec<Tool>,
70    /// Tool-use policy.
71    #[serde(skip_serializing_if = "Option::is_none")]
72    pub tool_choice: Option<ToolChoice>,
73    /// Optional per-request metadata (`user_id`).
74    #[serde(skip_serializing_if = "Option::is_none")]
75    pub metadata: Option<MessageMetadata>,
76    /// Request-side service tier preference.
77    #[serde(skip_serializing_if = "Option::is_none")]
78    pub service_tier: Option<RequestServiceTier>,
79    /// Extended-thinking configuration.
80    #[serde(skip_serializing_if = "Option::is_none")]
81    pub thinking: Option<ThinkingConfig>,
82    /// MCP servers exposed to the model on this request.
83    #[serde(skip_serializing_if = "Vec::is_empty")]
84    pub mcp_servers: Vec<McpServerConfig>,
85    /// Container ID for the code-execution built-in tool.
86    #[serde(skip_serializing_if = "Option::is_none")]
87    pub container: Option<String>,
88
89    /// Whether to stream the response. Set internally by `create_stream`;
90    /// not normally touched by callers.
91    #[doc(hidden)]
92    #[serde(default, skip_serializing_if = "is_false")]
93    pub stream: bool,
94}
95
96impl CreateMessageRequest {
97    /// Begin configuring a request.
98    #[must_use]
99    pub fn builder() -> CreateMessageRequestBuilder {
100        CreateMessageRequestBuilder::default()
101    }
102}
103
104/// Builder for [`CreateMessageRequest`].
105#[derive(Debug, Default)]
106pub struct CreateMessageRequestBuilder {
107    model: Option<ModelId>,
108    max_tokens: Option<u32>,
109    messages: Vec<MessageInput>,
110    system: Option<SystemPrompt>,
111    temperature: Option<f32>,
112    top_p: Option<f32>,
113    top_k: Option<u32>,
114    stop_sequences: Option<Vec<String>>,
115    tools: Vec<Tool>,
116    tool_choice: Option<ToolChoice>,
117    metadata: Option<MessageMetadata>,
118    service_tier: Option<RequestServiceTier>,
119    thinking: Option<ThinkingConfig>,
120    mcp_servers: Vec<McpServerConfig>,
121    container: Option<String>,
122}
123
124impl CreateMessageRequestBuilder {
125    /// Set the model. Required.
126    #[must_use]
127    pub fn model(mut self, m: impl Into<ModelId>) -> Self {
128        self.model = Some(m.into());
129        self
130    }
131
132    /// Set the max output tokens. Required.
133    #[must_use]
134    pub fn max_tokens(mut self, n: u32) -> Self {
135        self.max_tokens = Some(n);
136        self
137    }
138
139    /// Set the system prompt.
140    #[must_use]
141    pub fn system(mut self, s: impl Into<SystemPrompt>) -> Self {
142        self.system = Some(s.into());
143        self
144    }
145
146    /// Replace the entire conversation history.
147    #[must_use]
148    pub fn messages(mut self, msgs: Vec<MessageInput>) -> Self {
149        self.messages = msgs;
150        self
151    }
152
153    /// Append a user-authored message to the history.
154    #[must_use]
155    pub fn user(mut self, content: impl Into<crate::messages::input::MessageContent>) -> Self {
156        self.messages.push(MessageInput::user(content));
157        self
158    }
159
160    /// Append an assistant-authored message (typically used for prefill).
161    #[must_use]
162    pub fn assistant(mut self, content: impl Into<crate::messages::input::MessageContent>) -> Self {
163        self.messages.push(MessageInput::assistant(content));
164        self
165    }
166
167    /// Set the available tools.
168    #[must_use]
169    pub fn tools(mut self, tools: Vec<Tool>) -> Self {
170        self.tools = tools;
171        self
172    }
173
174    /// Set the tool-use policy.
175    #[must_use]
176    pub fn tool_choice(mut self, choice: ToolChoice) -> Self {
177        self.tool_choice = Some(choice);
178        self
179    }
180
181    /// Set the sampling temperature.
182    #[must_use]
183    pub fn temperature(mut self, t: f32) -> Self {
184        self.temperature = Some(t);
185        self
186    }
187
188    /// Set the nucleus sampling cutoff.
189    #[must_use]
190    pub fn top_p(mut self, p: f32) -> Self {
191        self.top_p = Some(p);
192        self
193    }
194
195    /// Set the top-k sampling cutoff.
196    #[must_use]
197    pub fn top_k(mut self, k: u32) -> Self {
198        self.top_k = Some(k);
199        self
200    }
201
202    /// Set custom stop sequences.
203    #[must_use]
204    pub fn stop_sequences(mut self, seqs: Vec<String>) -> Self {
205        self.stop_sequences = Some(seqs);
206        self
207    }
208
209    /// Set request metadata (currently `user_id` only).
210    #[must_use]
211    pub fn metadata(mut self, m: MessageMetadata) -> Self {
212        self.metadata = Some(m);
213        self
214    }
215
216    /// Set the request-side service tier.
217    #[must_use]
218    pub fn service_tier(mut self, tier: RequestServiceTier) -> Self {
219        self.service_tier = Some(tier);
220        self
221    }
222
223    /// Set the extended-thinking config.
224    #[must_use]
225    pub fn thinking(mut self, t: ThinkingConfig) -> Self {
226        self.thinking = Some(t);
227        self
228    }
229
230    /// Set the MCP servers exposed on this request.
231    #[must_use]
232    pub fn mcp_servers(mut self, servers: Vec<McpServerConfig>) -> Self {
233        self.mcp_servers = servers;
234        self
235    }
236
237    /// Set the container ID for the code-execution built-in tool.
238    #[must_use]
239    pub fn container(mut self, id: impl Into<String>) -> Self {
240        self.container = Some(id.into());
241        self
242    }
243
244    /// Sugar: apply an ephemeral cache breakpoint at the end of the system prompt.
245    ///
246    /// - `Some(Text(s))` becomes a single text block with `cache_control: ephemeral`.
247    /// - `Some(Blocks(_))` has `cache_control: ephemeral` set on the last text block.
248    /// - `None` is a no-op.
249    #[must_use]
250    pub fn cache_control_on_system(self) -> Self {
251        self.cache_system_inner(CacheControl::ephemeral())
252    }
253
254    /// Shorter alias for [`Self::cache_control_on_system`].
255    #[must_use]
256    pub fn cache_system(self) -> Self {
257        self.cache_control_on_system()
258    }
259
260    /// Like [`Self::cache_system`] but with an explicit TTL (`"5m"`,
261    /// `"1h"`). The `"1h"` form requires the
262    /// `extended-cache-ttl-2025-04-11` beta header.
263    #[must_use]
264    pub fn cache_system_with_ttl(self, ttl: impl Into<String>) -> Self {
265        self.cache_system_inner(CacheControl::ephemeral_ttl(ttl))
266    }
267
268    fn cache_system_inner(mut self, cc: CacheControl) -> Self {
269        let blocks = match self.system.take() {
270            Some(SystemPrompt::Text(text)) => vec![ContentBlock::Known(KnownBlock::Text {
271                text,
272                cache_control: Some(cc),
273                citations: None,
274            })],
275            Some(SystemPrompt::Blocks(mut blocks)) => {
276                if let Some(ContentBlock::Known(KnownBlock::Text { cache_control, .. })) =
277                    blocks.last_mut()
278                {
279                    *cache_control = Some(cc);
280                }
281                blocks
282            }
283            None => return self,
284        };
285        self.system = Some(SystemPrompt::Blocks(blocks));
286        self
287    }
288
289    /// Sugar: apply an ephemeral cache breakpoint to the last user-authored
290    /// message in the history.
291    ///
292    /// String content is converted to a single text block carrying
293    /// `cache_control: ephemeral`. Block content has `cache_control` set on
294    /// the last block that supports it (text, image, document, `tool_result`).
295    /// No-op if there are no user-authored messages.
296    #[must_use]
297    pub fn cache_control_on_last_user(self) -> Self {
298        self.cache_last_user_inner(CacheControl::ephemeral())
299    }
300
301    /// Shorter alias for [`Self::cache_control_on_last_user`].
302    #[must_use]
303    pub fn cache_last_user(self) -> Self {
304        self.cache_control_on_last_user()
305    }
306
307    /// Like [`Self::cache_last_user`] but with an explicit TTL.
308    #[must_use]
309    pub fn cache_last_user_with_ttl(self, ttl: impl Into<String>) -> Self {
310        self.cache_last_user_inner(CacheControl::ephemeral_ttl(ttl))
311    }
312
313    fn cache_last_user_inner(mut self, cc: CacheControl) -> Self {
314        use crate::messages::input::MessageContent;
315        use crate::types::Role;
316
317        let Some(idx) = self.messages.iter().rposition(|m| m.role == Role::User) else {
318            return self;
319        };
320        let target = &mut self.messages[idx];
321        match &mut target.content {
322            MessageContent::Text(text) => {
323                target.content =
324                    MessageContent::Blocks(vec![ContentBlock::Known(KnownBlock::Text {
325                        text: std::mem::take(text),
326                        cache_control: Some(cc),
327                        citations: None,
328                    })]);
329            }
330            MessageContent::Blocks(blocks) => {
331                apply_cache_control_to_last_block_with(blocks, cc);
332            }
333        }
334        self
335    }
336
337    /// Sugar: apply an ephemeral cache breakpoint to the last tool
338    /// definition. The server caches all tool definitions up to that point;
339    /// useful when the same tool list is reused across many requests.
340    /// No-op if no tools are configured.
341    #[must_use]
342    pub fn cache_control_on_tools(self) -> Self {
343        self.cache_tools_inner(CacheControl::ephemeral())
344    }
345
346    /// Shorter alias for [`Self::cache_control_on_tools`].
347    #[must_use]
348    pub fn cache_tools(self) -> Self {
349        self.cache_control_on_tools()
350    }
351
352    /// Like [`Self::cache_tools`] but with an explicit TTL.
353    #[must_use]
354    pub fn cache_tools_with_ttl(self, ttl: impl Into<String>) -> Self {
355        self.cache_tools_inner(CacheControl::ephemeral_ttl(ttl))
356    }
357
358    fn cache_tools_inner(mut self, cc: CacheControl) -> Self {
359        use crate::messages::tools::Tool as MessagesTool;
360        let Some(last) = self.tools.last_mut() else {
361            return self;
362        };
363        if let MessagesTool::Custom(ct) = last {
364            ct.cache_control = Some(cc);
365        }
366        self
367    }
368
369    /// Finalize the request.
370    ///
371    /// # Errors
372    ///
373    /// Returns [`Error::InvalidConfig`] if `model` or `max_tokens` was not set.
374    pub fn build(self) -> Result<CreateMessageRequest> {
375        let model = self
376            .model
377            .ok_or_else(|| Error::InvalidConfig("model is required".into()))?;
378        let max_tokens = self
379            .max_tokens
380            .ok_or_else(|| Error::InvalidConfig("max_tokens is required".into()))?;
381
382        Ok(CreateMessageRequest {
383            model,
384            max_tokens,
385            messages: self.messages,
386            system: self.system,
387            temperature: self.temperature,
388            top_p: self.top_p,
389            top_k: self.top_k,
390            stop_sequences: self.stop_sequences,
391            tools: self.tools,
392            tool_choice: self.tool_choice,
393            metadata: self.metadata,
394            service_tier: self.service_tier,
395            thinking: self.thinking,
396            mcp_servers: self.mcp_servers,
397            container: self.container,
398            stream: false,
399        })
400    }
401}
402
403/// Request payload for `POST /v1/messages/count_tokens`.
404///
405/// Construct via [`CountTokensRequest::builder`].
406#[derive(Debug, Clone, Serialize)]
407#[non_exhaustive]
408pub struct CountTokensRequest {
409    /// Model whose tokenizer to use.
410    pub model: ModelId,
411    /// Conversation history.
412    pub messages: Vec<MessageInput>,
413
414    /// Optional system prompt.
415    #[serde(skip_serializing_if = "Option::is_none")]
416    pub system: Option<SystemPrompt>,
417    /// Tools that would be exposed.
418    #[serde(skip_serializing_if = "Vec::is_empty")]
419    pub tools: Vec<Tool>,
420    /// Tool-use policy.
421    #[serde(skip_serializing_if = "Option::is_none")]
422    pub tool_choice: Option<ToolChoice>,
423    /// Extended-thinking configuration.
424    #[serde(skip_serializing_if = "Option::is_none")]
425    pub thinking: Option<ThinkingConfig>,
426    /// MCP servers exposed.
427    #[serde(skip_serializing_if = "Vec::is_empty")]
428    pub mcp_servers: Vec<McpServerConfig>,
429}
430
431impl CountTokensRequest {
432    /// Begin configuring a token-count request.
433    #[must_use]
434    pub fn builder() -> CountTokensRequestBuilder {
435        CountTokensRequestBuilder::default()
436    }
437}
438
439impl From<&CreateMessageRequest> for CountTokensRequest {
440    /// Project a [`CreateMessageRequest`] onto the subset of fields the
441    /// count-tokens endpoint accepts. Sampling parameters (`temperature`,
442    /// `top_p`, etc.) and `max_tokens` are dropped because they don't
443    /// affect tokenization.
444    fn from(req: &CreateMessageRequest) -> Self {
445        Self {
446            model: req.model.clone(),
447            messages: req.messages.clone(),
448            system: req.system.clone(),
449            tools: req.tools.clone(),
450            tool_choice: req.tool_choice.clone(),
451            thinking: req.thinking,
452            mcp_servers: req.mcp_servers.clone(),
453        }
454    }
455}
456
457/// Builder for [`CountTokensRequest`].
458#[derive(Debug, Default)]
459pub struct CountTokensRequestBuilder {
460    model: Option<ModelId>,
461    messages: Vec<MessageInput>,
462    system: Option<SystemPrompt>,
463    tools: Vec<Tool>,
464    tool_choice: Option<ToolChoice>,
465    thinking: Option<ThinkingConfig>,
466    mcp_servers: Vec<McpServerConfig>,
467}
468
469impl CountTokensRequestBuilder {
470    /// Set the model. Required.
471    #[must_use]
472    pub fn model(mut self, m: impl Into<ModelId>) -> Self {
473        self.model = Some(m.into());
474        self
475    }
476
477    /// Replace the conversation history.
478    #[must_use]
479    pub fn messages(mut self, msgs: Vec<MessageInput>) -> Self {
480        self.messages = msgs;
481        self
482    }
483
484    /// Append a user-authored message.
485    #[must_use]
486    pub fn user(mut self, content: impl Into<crate::messages::input::MessageContent>) -> Self {
487        self.messages.push(MessageInput::user(content));
488        self
489    }
490
491    /// Append an assistant-authored message.
492    #[must_use]
493    pub fn assistant(mut self, content: impl Into<crate::messages::input::MessageContent>) -> Self {
494        self.messages.push(MessageInput::assistant(content));
495        self
496    }
497
498    /// Set the system prompt.
499    #[must_use]
500    pub fn system(mut self, s: impl Into<SystemPrompt>) -> Self {
501        self.system = Some(s.into());
502        self
503    }
504
505    /// Set the available tools.
506    #[must_use]
507    pub fn tools(mut self, tools: Vec<Tool>) -> Self {
508        self.tools = tools;
509        self
510    }
511
512    /// Set the tool-use policy.
513    #[must_use]
514    pub fn tool_choice(mut self, choice: ToolChoice) -> Self {
515        self.tool_choice = Some(choice);
516        self
517    }
518
519    /// Set the extended-thinking config.
520    #[must_use]
521    pub fn thinking(mut self, t: ThinkingConfig) -> Self {
522        self.thinking = Some(t);
523        self
524    }
525
526    /// Set the MCP servers exposed.
527    #[must_use]
528    pub fn mcp_servers(mut self, servers: Vec<McpServerConfig>) -> Self {
529        self.mcp_servers = servers;
530        self
531    }
532
533    /// Finalize the request.
534    ///
535    /// # Errors
536    ///
537    /// Returns [`Error::InvalidConfig`] if `model` was not set.
538    pub fn build(self) -> Result<CountTokensRequest> {
539        let model = self
540            .model
541            .ok_or_else(|| Error::InvalidConfig("model is required".into()))?;
542        Ok(CountTokensRequest {
543            model,
544            messages: self.messages,
545            system: self.system,
546            tools: self.tools,
547            tool_choice: self.tool_choice,
548            thinking: self.thinking,
549            mcp_servers: self.mcp_servers,
550        })
551    }
552}
553
554#[cfg(test)]
555mod tests {
556    use super::*;
557    use pretty_assertions::assert_eq;
558    use serde_json::json;
559
560    #[test]
561    fn build_requires_model_and_max_tokens() {
562        let err = CreateMessageRequest::builder().build().unwrap_err();
563        assert!(matches!(err, Error::InvalidConfig(_)));
564
565        let err = CreateMessageRequest::builder()
566            .model(ModelId::SONNET_4_6)
567            .build()
568            .unwrap_err();
569        assert!(matches!(err, Error::InvalidConfig(_)));
570    }
571
572    #[test]
573    fn minimal_request_serializes_cleanly() {
574        let req = CreateMessageRequest::builder()
575            .model(ModelId::SONNET_4_6)
576            .max_tokens(64)
577            .user("hello")
578            .build()
579            .unwrap();
580        let v = serde_json::to_value(&req).unwrap();
581        assert_eq!(
582            v,
583            json!({
584                "model": "claude-sonnet-4-6",
585                "max_tokens": 64,
586                "messages": [{"role": "user", "content": "hello"}]
587            })
588        );
589    }
590
591    #[test]
592    fn full_request_serializes_all_fields() {
593        let req = CreateMessageRequest::builder()
594            .model(ModelId::OPUS_4_7)
595            .max_tokens(1024)
596            .system("be concise")
597            .user("hi")
598            .assistant("hey, what's up")
599            .user("tell me a joke")
600            .temperature(0.5)
601            .top_p(0.75)
602            .top_k(40)
603            .stop_sequences(vec!["\n\n".into()])
604            .metadata(MessageMetadata::with_user("user_42"))
605            .service_tier(RequestServiceTier::Auto)
606            .thinking(ThinkingConfig::enabled(2048))
607            .container("cnt_x")
608            .build()
609            .unwrap();
610        let v = serde_json::to_value(&req).unwrap();
611        assert_eq!(v["model"], "claude-opus-4-7");
612        assert_eq!(v["max_tokens"], 1024);
613        assert_eq!(v["system"], "be concise");
614        assert_eq!(v["temperature"], 0.5);
615        assert_eq!(v["top_p"], 0.75);
616        assert_eq!(v["top_k"], 40);
617        assert_eq!(v["stop_sequences"], json!(["\n\n"]));
618        assert_eq!(v["metadata"]["user_id"], "user_42");
619        assert_eq!(v["service_tier"], "auto");
620        assert_eq!(v["thinking"]["type"], "enabled");
621        assert_eq!(v["thinking"]["budget_tokens"], 2048);
622        assert_eq!(v["container"], "cnt_x");
623        assert_eq!(v["messages"].as_array().unwrap().len(), 3);
624        // `stream` is false by default and must be omitted from the wire payload.
625        assert!(
626            v.get("stream").is_none(),
627            "stream must be omitted when false"
628        );
629    }
630
631    #[test]
632    fn cache_control_on_system_converts_text_to_blocks_with_breakpoint() {
633        let req = CreateMessageRequest::builder()
634            .model(ModelId::SONNET_4_6)
635            .max_tokens(8)
636            .system("you are concise")
637            .cache_control_on_system()
638            .user("hi")
639            .build()
640            .unwrap();
641        let v = serde_json::to_value(&req).unwrap();
642        assert_eq!(
643            v["system"],
644            json!([{
645                "type": "text",
646                "text": "you are concise",
647                "cache_control": {"type": "ephemeral"}
648            }])
649        );
650    }
651
652    #[test]
653    fn cache_control_on_system_marks_last_text_block_when_blocks_supplied() {
654        let req = CreateMessageRequest::builder()
655            .model(ModelId::SONNET_4_6)
656            .max_tokens(8)
657            .system(vec![
658                ContentBlock::text("first"),
659                ContentBlock::text("second"),
660            ])
661            .cache_control_on_system()
662            .user("hi")
663            .build()
664            .unwrap();
665        let v = serde_json::to_value(&req).unwrap();
666        let blocks = v["system"].as_array().unwrap();
667        assert!(blocks[0].get("cache_control").is_none());
668        assert_eq!(blocks[1]["cache_control"]["type"], "ephemeral");
669    }
670
671    #[test]
672    fn cache_control_on_system_is_noop_when_no_system_set() {
673        let req = CreateMessageRequest::builder()
674            .model(ModelId::SONNET_4_6)
675            .max_tokens(8)
676            .cache_control_on_system()
677            .user("hi")
678            .build()
679            .unwrap();
680        let v = serde_json::to_value(&req).unwrap();
681        assert!(v.get("system").is_none());
682    }
683
684    #[test]
685    fn count_tokens_minimal_request_serializes_cleanly() {
686        let req = CountTokensRequest::builder()
687            .model(ModelId::HAIKU_4_5)
688            .user("hi")
689            .build()
690            .unwrap();
691        let v = serde_json::to_value(&req).unwrap();
692        assert_eq!(
693            v,
694            json!({
695                "model": "claude-haiku-4-5-20251001",
696                "messages": [{"role": "user", "content": "hi"}]
697            })
698        );
699    }
700
701    #[test]
702    fn count_tokens_requires_model() {
703        let err = CountTokensRequest::builder().build().unwrap_err();
704        assert!(matches!(err, Error::InvalidConfig(_)));
705    }
706
707    #[test]
708    fn cache_control_on_last_user_converts_text_to_blocks() {
709        let req = CreateMessageRequest::builder()
710            .model(ModelId::SONNET_4_6)
711            .max_tokens(8)
712            .user("first")
713            .assistant("response")
714            .user("follow-up")
715            .cache_control_on_last_user()
716            .build()
717            .unwrap();
718        let v = serde_json::to_value(&req).unwrap();
719        let msgs = v["messages"].as_array().unwrap();
720        // First user untouched.
721        assert_eq!(msgs[0]["content"], "first");
722        // Last user converted to a single cached text block.
723        assert_eq!(msgs[2]["role"], "user");
724        assert_eq!(msgs[2]["content"][0]["type"], "text");
725        assert_eq!(msgs[2]["content"][0]["text"], "follow-up");
726        assert_eq!(msgs[2]["content"][0]["cache_control"]["type"], "ephemeral");
727    }
728
729    #[test]
730    fn cache_control_on_last_user_marks_last_block_when_blocks_supplied() {
731        let req = CreateMessageRequest::builder()
732            .model(ModelId::SONNET_4_6)
733            .max_tokens(8)
734            .user(vec![ContentBlock::text("a"), ContentBlock::text("b")])
735            .cache_control_on_last_user()
736            .build()
737            .unwrap();
738        let v = serde_json::to_value(&req).unwrap();
739        let blocks = v["messages"][0]["content"].as_array().unwrap();
740        assert!(blocks[0].get("cache_control").is_none());
741        assert_eq!(blocks[1]["cache_control"]["type"], "ephemeral");
742    }
743
744    #[test]
745    fn cache_control_on_last_user_is_noop_without_user_messages() {
746        let req = CreateMessageRequest::builder()
747            .model(ModelId::SONNET_4_6)
748            .max_tokens(8)
749            .assistant("orphan prefill")
750            .cache_control_on_last_user()
751            .build()
752            .unwrap();
753        let v = serde_json::to_value(&req).unwrap();
754        // No user message exists; the assistant prefill is left untouched.
755        assert_eq!(v["messages"][0]["content"], "orphan prefill");
756    }
757
758    #[test]
759    fn cache_control_on_tools_marks_last_tool() {
760        use crate::messages::tools::{CustomTool, Tool as MessagesTool};
761        let req = CreateMessageRequest::builder()
762            .model(ModelId::SONNET_4_6)
763            .max_tokens(8)
764            .user("hi")
765            .tools(vec![
766                MessagesTool::Custom(CustomTool::new("first", json!({"type": "object"}))),
767                MessagesTool::Custom(CustomTool::new("second", json!({"type": "object"}))),
768            ])
769            .cache_control_on_tools()
770            .build()
771            .unwrap();
772        let v = serde_json::to_value(&req).unwrap();
773        let tools = v["tools"].as_array().unwrap();
774        assert!(tools[0].get("cache_control").is_none());
775        assert_eq!(tools[1]["cache_control"]["type"], "ephemeral");
776    }
777
778    #[test]
779    fn cache_control_on_tools_is_noop_without_tools() {
780        let req = CreateMessageRequest::builder()
781            .model(ModelId::SONNET_4_6)
782            .max_tokens(8)
783            .user("hi")
784            .cache_control_on_tools()
785            .build()
786            .unwrap();
787        let v = serde_json::to_value(&req).unwrap();
788        assert!(v.get("tools").is_none() || v["tools"].as_array().unwrap().is_empty());
789    }
790
791    #[test]
792    fn cache_system_alias_matches_long_form() {
793        let short = CreateMessageRequest::builder()
794            .model(ModelId::SONNET_4_6)
795            .max_tokens(8)
796            .system("S")
797            .user("u")
798            .cache_system()
799            .build()
800            .unwrap();
801        let long = CreateMessageRequest::builder()
802            .model(ModelId::SONNET_4_6)
803            .max_tokens(8)
804            .system("S")
805            .user("u")
806            .cache_control_on_system()
807            .build()
808            .unwrap();
809        assert_eq!(
810            serde_json::to_value(&short).unwrap(),
811            serde_json::to_value(&long).unwrap(),
812        );
813    }
814
815    #[test]
816    fn cache_system_with_ttl_emits_ttl_field() {
817        let req = CreateMessageRequest::builder()
818            .model(ModelId::SONNET_4_6)
819            .max_tokens(8)
820            .system("S")
821            .user("u")
822            .cache_system_with_ttl("1h")
823            .build()
824            .unwrap();
825        let v = serde_json::to_value(&req).unwrap();
826        let blocks = v["system"].as_array().unwrap();
827        let cc = &blocks[0]["cache_control"];
828        assert_eq!(cc["type"], "ephemeral");
829        assert_eq!(cc["ttl"], "1h");
830    }
831
832    #[test]
833    fn cache_last_user_with_ttl_emits_ttl_field() {
834        let req = CreateMessageRequest::builder()
835            .model(ModelId::SONNET_4_6)
836            .max_tokens(8)
837            .user("question")
838            .cache_last_user_with_ttl("5m")
839            .build()
840            .unwrap();
841        let v = serde_json::to_value(&req).unwrap();
842        let blocks = v["messages"][0]["content"].as_array().unwrap();
843        let cc = &blocks[0]["cache_control"];
844        assert_eq!(cc["type"], "ephemeral");
845        assert_eq!(cc["ttl"], "5m");
846    }
847
848    #[test]
849    fn cache_tools_with_ttl_emits_ttl_field() {
850        use crate::messages::tools::CustomTool;
851        let req = CreateMessageRequest::builder()
852            .model(ModelId::SONNET_4_6)
853            .max_tokens(8)
854            .user("u")
855            .tools(vec![Tool::Custom(CustomTool {
856                name: "t".into(),
857                description: None,
858                input_schema: serde_json::json!({"type":"object"}),
859                cache_control: None,
860            })])
861            .cache_tools_with_ttl("1h")
862            .build()
863            .unwrap();
864        let v = serde_json::to_value(&req).unwrap();
865        let cc = &v["tools"][0]["cache_control"];
866        assert_eq!(cc["type"], "ephemeral");
867        assert_eq!(cc["ttl"], "1h");
868    }
869}