Skip to main content

claude_api/messages/
request.rs

1//! Request payloads for the Messages API.
2//!
3//! [`CreateMessageRequest`] is the typed builder for `POST /v1/messages`.
4//! Every field is optional except `model` and `max_tokens`; the fluent
5//! builder exposes convenience methods (`.user()`, `.system()`,
6//! `.tools()`, `.cache_control_on_system()`, etc.) alongside the raw
7//! field setters.
8//!
9//! [`CountTokensRequest`] is the slimmer sibling used by
10//! `POST /v1/messages/count_tokens`; it accepts the same message list and
11//! tools but omits generation parameters.
12
13use serde::Serialize;
14
15use crate::error::{Error, Result};
16use crate::messages::cache::CacheControl;
17use crate::messages::content::{ContentBlock, KnownBlock};
18use crate::messages::input::{MessageInput, SystemPrompt};
19
20fn apply_cache_control_to_last_block_with(blocks: &mut [ContentBlock], cc: CacheControl) {
21    let Some(last) = blocks.last_mut() else {
22        return;
23    };
24    if let ContentBlock::Known(
25        KnownBlock::Text { cache_control, .. }
26        | KnownBlock::Image { cache_control, .. }
27        | KnownBlock::Document { cache_control, .. }
28        | KnownBlock::ToolResult { cache_control, .. },
29    ) = last
30    {
31        *cache_control = Some(cc);
32    }
33}
34use crate::messages::mcp::McpServerConfig;
35use crate::messages::metadata::{MessageMetadata, RequestServiceTier};
36use crate::messages::thinking::ThinkingConfig;
37use crate::messages::tools::{Tool, ToolChoice};
38use crate::types::ModelId;
39
40#[allow(clippy::trivially_copy_pass_by_ref)]
41fn is_false(b: &bool) -> bool {
42    !*b
43}
44
45/// Request payload for `POST /v1/messages`.
46///
47/// Construct via [`CreateMessageRequest::builder`].
48#[derive(Debug, Clone, Serialize)]
49#[non_exhaustive]
50pub struct CreateMessageRequest {
51    /// Model to query.
52    pub model: ModelId,
53    /// Maximum number of output tokens to generate.
54    pub max_tokens: u32,
55    /// Conversation history.
56    pub messages: Vec<MessageInput>,
57
58    /// Optional system prompt.
59    #[serde(skip_serializing_if = "Option::is_none")]
60    pub system: Option<SystemPrompt>,
61    /// Sampling temperature.
62    #[serde(skip_serializing_if = "Option::is_none")]
63    pub temperature: Option<f32>,
64    /// Nucleus sampling cutoff.
65    #[serde(skip_serializing_if = "Option::is_none")]
66    pub top_p: Option<f32>,
67    /// Top-k sampling cutoff.
68    #[serde(skip_serializing_if = "Option::is_none")]
69    pub top_k: Option<u32>,
70    /// Custom stop sequences.
71    #[serde(skip_serializing_if = "Option::is_none")]
72    pub stop_sequences: Option<Vec<String>>,
73    /// Tools the model may invoke.
74    #[serde(skip_serializing_if = "Vec::is_empty")]
75    pub tools: Vec<Tool>,
76    /// Tool-use policy.
77    #[serde(skip_serializing_if = "Option::is_none")]
78    pub tool_choice: Option<ToolChoice>,
79    /// Optional per-request metadata (`user_id`).
80    #[serde(skip_serializing_if = "Option::is_none")]
81    pub metadata: Option<MessageMetadata>,
82    /// Request-side service tier preference.
83    #[serde(skip_serializing_if = "Option::is_none")]
84    pub service_tier: Option<RequestServiceTier>,
85    /// Extended-thinking configuration.
86    #[serde(skip_serializing_if = "Option::is_none")]
87    pub thinking: Option<ThinkingConfig>,
88    /// MCP servers exposed to the model on this request.
89    #[serde(skip_serializing_if = "Vec::is_empty")]
90    pub mcp_servers: Vec<McpServerConfig>,
91    /// Container ID for the code-execution built-in tool.
92    #[serde(skip_serializing_if = "Option::is_none")]
93    pub container: Option<String>,
94
95    /// Whether to stream the response. Set internally by `create_stream`;
96    /// not normally touched by callers.
97    #[doc(hidden)]
98    #[serde(default, skip_serializing_if = "is_false")]
99    pub stream: bool,
100}
101
102impl CreateMessageRequest {
103    /// Begin configuring a request.
104    #[must_use]
105    pub fn builder() -> CreateMessageRequestBuilder {
106        CreateMessageRequestBuilder::default()
107    }
108}
109
110/// Builder for [`CreateMessageRequest`].
111#[derive(Debug, Default)]
112pub struct CreateMessageRequestBuilder {
113    model: Option<ModelId>,
114    max_tokens: Option<u32>,
115    messages: Vec<MessageInput>,
116    system: Option<SystemPrompt>,
117    temperature: Option<f32>,
118    top_p: Option<f32>,
119    top_k: Option<u32>,
120    stop_sequences: Option<Vec<String>>,
121    tools: Vec<Tool>,
122    tool_choice: Option<ToolChoice>,
123    metadata: Option<MessageMetadata>,
124    service_tier: Option<RequestServiceTier>,
125    thinking: Option<ThinkingConfig>,
126    mcp_servers: Vec<McpServerConfig>,
127    container: Option<String>,
128}
129
130impl CreateMessageRequestBuilder {
131    /// Set the model. Required.
132    #[must_use]
133    pub fn model(mut self, m: impl Into<ModelId>) -> Self {
134        self.model = Some(m.into());
135        self
136    }
137
138    /// Set the max output tokens. Required.
139    #[must_use]
140    pub fn max_tokens(mut self, n: u32) -> Self {
141        self.max_tokens = Some(n);
142        self
143    }
144
145    /// Set the system prompt.
146    #[must_use]
147    pub fn system(mut self, s: impl Into<SystemPrompt>) -> Self {
148        self.system = Some(s.into());
149        self
150    }
151
152    /// Replace the entire conversation history.
153    #[must_use]
154    pub fn messages(mut self, msgs: Vec<MessageInput>) -> Self {
155        self.messages = msgs;
156        self
157    }
158
159    /// Append a user-authored message to the history.
160    #[must_use]
161    pub fn user(mut self, content: impl Into<crate::messages::input::MessageContent>) -> Self {
162        self.messages.push(MessageInput::user(content));
163        self
164    }
165
166    /// Append an assistant-authored message (typically used for prefill).
167    #[must_use]
168    pub fn assistant(mut self, content: impl Into<crate::messages::input::MessageContent>) -> Self {
169        self.messages.push(MessageInput::assistant(content));
170        self
171    }
172
173    /// Set the available tools.
174    #[must_use]
175    pub fn tools(mut self, tools: Vec<Tool>) -> Self {
176        self.tools = tools;
177        self
178    }
179
180    /// Set the tool-use policy.
181    #[must_use]
182    pub fn tool_choice(mut self, choice: ToolChoice) -> Self {
183        self.tool_choice = Some(choice);
184        self
185    }
186
187    /// Set the sampling temperature.
188    #[must_use]
189    pub fn temperature(mut self, t: f32) -> Self {
190        self.temperature = Some(t);
191        self
192    }
193
194    /// Set the nucleus sampling cutoff.
195    #[must_use]
196    pub fn top_p(mut self, p: f32) -> Self {
197        self.top_p = Some(p);
198        self
199    }
200
201    /// Set the top-k sampling cutoff.
202    #[must_use]
203    pub fn top_k(mut self, k: u32) -> Self {
204        self.top_k = Some(k);
205        self
206    }
207
208    /// Set custom stop sequences.
209    #[must_use]
210    pub fn stop_sequences(mut self, seqs: Vec<String>) -> Self {
211        self.stop_sequences = Some(seqs);
212        self
213    }
214
215    /// Set request metadata (currently `user_id` only).
216    #[must_use]
217    pub fn metadata(mut self, m: MessageMetadata) -> Self {
218        self.metadata = Some(m);
219        self
220    }
221
222    /// Set the request-side service tier.
223    #[must_use]
224    pub fn service_tier(mut self, tier: RequestServiceTier) -> Self {
225        self.service_tier = Some(tier);
226        self
227    }
228
229    /// Set the extended-thinking config.
230    #[must_use]
231    pub fn thinking(mut self, t: ThinkingConfig) -> Self {
232        self.thinking = Some(t);
233        self
234    }
235
236    /// Set the MCP servers exposed on this request.
237    #[must_use]
238    pub fn mcp_servers(mut self, servers: Vec<McpServerConfig>) -> Self {
239        self.mcp_servers = servers;
240        self
241    }
242
243    /// Set the container ID for the code-execution built-in tool.
244    #[must_use]
245    pub fn container(mut self, id: impl Into<String>) -> Self {
246        self.container = Some(id.into());
247        self
248    }
249
250    /// Sugar: apply an ephemeral cache breakpoint at the end of the system prompt.
251    ///
252    /// - `Some(Text(s))` becomes a single text block with `cache_control: ephemeral`.
253    /// - `Some(Blocks(_))` has `cache_control: ephemeral` set on the last text block.
254    /// - `None` is a no-op.
255    #[must_use]
256    pub fn cache_control_on_system(self) -> Self {
257        self.cache_system_inner(CacheControl::ephemeral())
258    }
259
260    /// Shorter alias for [`Self::cache_control_on_system`].
261    #[must_use]
262    pub fn cache_system(self) -> Self {
263        self.cache_control_on_system()
264    }
265
266    /// Like [`Self::cache_system`] but with an explicit TTL (`"5m"`,
267    /// `"1h"`). The `"1h"` form requires the
268    /// `extended-cache-ttl-2025-04-11` beta header.
269    #[must_use]
270    pub fn cache_system_with_ttl(self, ttl: impl Into<String>) -> Self {
271        self.cache_system_inner(CacheControl::ephemeral_ttl(ttl))
272    }
273
274    fn cache_system_inner(mut self, cc: CacheControl) -> Self {
275        let blocks = match self.system.take() {
276            Some(SystemPrompt::Text(text)) => vec![ContentBlock::Known(KnownBlock::Text {
277                text,
278                cache_control: Some(cc),
279                citations: None,
280            })],
281            Some(SystemPrompt::Blocks(mut blocks)) => {
282                if let Some(ContentBlock::Known(KnownBlock::Text { cache_control, .. })) =
283                    blocks.last_mut()
284                {
285                    *cache_control = Some(cc);
286                }
287                blocks
288            }
289            None => return self,
290        };
291        self.system = Some(SystemPrompt::Blocks(blocks));
292        self
293    }
294
295    /// Sugar: apply an ephemeral cache breakpoint to the last user-authored
296    /// message in the history.
297    ///
298    /// String content is converted to a single text block carrying
299    /// `cache_control: ephemeral`. Block content has `cache_control` set on
300    /// the last block that supports it (text, image, document, `tool_result`).
301    /// No-op if there are no user-authored messages.
302    #[must_use]
303    pub fn cache_control_on_last_user(self) -> Self {
304        self.cache_last_user_inner(CacheControl::ephemeral())
305    }
306
307    /// Shorter alias for [`Self::cache_control_on_last_user`].
308    #[must_use]
309    pub fn cache_last_user(self) -> Self {
310        self.cache_control_on_last_user()
311    }
312
313    /// Like [`Self::cache_last_user`] but with an explicit TTL.
314    #[must_use]
315    pub fn cache_last_user_with_ttl(self, ttl: impl Into<String>) -> Self {
316        self.cache_last_user_inner(CacheControl::ephemeral_ttl(ttl))
317    }
318
319    fn cache_last_user_inner(mut self, cc: CacheControl) -> Self {
320        use crate::messages::input::MessageContent;
321        use crate::types::Role;
322
323        let Some(idx) = self.messages.iter().rposition(|m| m.role == Role::User) else {
324            return self;
325        };
326        let target = &mut self.messages[idx];
327        match &mut target.content {
328            MessageContent::Text(text) => {
329                target.content =
330                    MessageContent::Blocks(vec![ContentBlock::Known(KnownBlock::Text {
331                        text: std::mem::take(text),
332                        cache_control: Some(cc),
333                        citations: None,
334                    })]);
335            }
336            MessageContent::Blocks(blocks) => {
337                apply_cache_control_to_last_block_with(blocks, cc);
338            }
339        }
340        self
341    }
342
343    /// Sugar: apply an ephemeral cache breakpoint to the last tool
344    /// definition. The server caches all tool definitions up to that point;
345    /// useful when the same tool list is reused across many requests.
346    /// No-op if no tools are configured.
347    #[must_use]
348    pub fn cache_control_on_tools(self) -> Self {
349        self.cache_tools_inner(CacheControl::ephemeral())
350    }
351
352    /// Shorter alias for [`Self::cache_control_on_tools`].
353    #[must_use]
354    pub fn cache_tools(self) -> Self {
355        self.cache_control_on_tools()
356    }
357
358    /// Like [`Self::cache_tools`] but with an explicit TTL.
359    #[must_use]
360    pub fn cache_tools_with_ttl(self, ttl: impl Into<String>) -> Self {
361        self.cache_tools_inner(CacheControl::ephemeral_ttl(ttl))
362    }
363
364    fn cache_tools_inner(mut self, cc: CacheControl) -> Self {
365        use crate::messages::tools::Tool as MessagesTool;
366        let Some(last) = self.tools.last_mut() else {
367            return self;
368        };
369        if let MessagesTool::Custom(ct) = last {
370            ct.cache_control = Some(cc);
371        }
372        self
373    }
374
375    /// Finalize the request.
376    ///
377    /// # Errors
378    ///
379    /// Returns [`Error::InvalidConfig`] if `model` or `max_tokens` was not set.
380    pub fn build(self) -> Result<CreateMessageRequest> {
381        let model = self
382            .model
383            .ok_or_else(|| Error::InvalidConfig("model is required".into()))?;
384        let max_tokens = self
385            .max_tokens
386            .ok_or_else(|| Error::InvalidConfig("max_tokens is required".into()))?;
387
388        Ok(CreateMessageRequest {
389            model,
390            max_tokens,
391            messages: self.messages,
392            system: self.system,
393            temperature: self.temperature,
394            top_p: self.top_p,
395            top_k: self.top_k,
396            stop_sequences: self.stop_sequences,
397            tools: self.tools,
398            tool_choice: self.tool_choice,
399            metadata: self.metadata,
400            service_tier: self.service_tier,
401            thinking: self.thinking,
402            mcp_servers: self.mcp_servers,
403            container: self.container,
404            stream: false,
405        })
406    }
407}
408
409/// Request payload for `POST /v1/messages/count_tokens`.
410///
411/// Construct via [`CountTokensRequest::builder`].
412#[derive(Debug, Clone, Serialize)]
413#[non_exhaustive]
414pub struct CountTokensRequest {
415    /// Model whose tokenizer to use.
416    pub model: ModelId,
417    /// Conversation history.
418    pub messages: Vec<MessageInput>,
419
420    /// Optional system prompt.
421    #[serde(skip_serializing_if = "Option::is_none")]
422    pub system: Option<SystemPrompt>,
423    /// Tools that would be exposed.
424    #[serde(skip_serializing_if = "Vec::is_empty")]
425    pub tools: Vec<Tool>,
426    /// Tool-use policy.
427    #[serde(skip_serializing_if = "Option::is_none")]
428    pub tool_choice: Option<ToolChoice>,
429    /// Extended-thinking configuration.
430    #[serde(skip_serializing_if = "Option::is_none")]
431    pub thinking: Option<ThinkingConfig>,
432    /// MCP servers exposed.
433    #[serde(skip_serializing_if = "Vec::is_empty")]
434    pub mcp_servers: Vec<McpServerConfig>,
435}
436
437impl CountTokensRequest {
438    /// Begin configuring a token-count request.
439    #[must_use]
440    pub fn builder() -> CountTokensRequestBuilder {
441        CountTokensRequestBuilder::default()
442    }
443}
444
445impl From<&CreateMessageRequest> for CountTokensRequest {
446    /// Project a [`CreateMessageRequest`] onto the subset of fields the
447    /// count-tokens endpoint accepts. Sampling parameters (`temperature`,
448    /// `top_p`, etc.) and `max_tokens` are dropped because they don't
449    /// affect tokenization.
450    fn from(req: &CreateMessageRequest) -> Self {
451        Self {
452            model: req.model.clone(),
453            messages: req.messages.clone(),
454            system: req.system.clone(),
455            tools: req.tools.clone(),
456            tool_choice: req.tool_choice.clone(),
457            thinking: req.thinking,
458            mcp_servers: req.mcp_servers.clone(),
459        }
460    }
461}
462
463/// Builder for [`CountTokensRequest`].
464#[derive(Debug, Default)]
465pub struct CountTokensRequestBuilder {
466    model: Option<ModelId>,
467    messages: Vec<MessageInput>,
468    system: Option<SystemPrompt>,
469    tools: Vec<Tool>,
470    tool_choice: Option<ToolChoice>,
471    thinking: Option<ThinkingConfig>,
472    mcp_servers: Vec<McpServerConfig>,
473}
474
475impl CountTokensRequestBuilder {
476    /// Set the model. Required.
477    #[must_use]
478    pub fn model(mut self, m: impl Into<ModelId>) -> Self {
479        self.model = Some(m.into());
480        self
481    }
482
483    /// Replace the conversation history.
484    #[must_use]
485    pub fn messages(mut self, msgs: Vec<MessageInput>) -> Self {
486        self.messages = msgs;
487        self
488    }
489
490    /// Append a user-authored message.
491    #[must_use]
492    pub fn user(mut self, content: impl Into<crate::messages::input::MessageContent>) -> Self {
493        self.messages.push(MessageInput::user(content));
494        self
495    }
496
497    /// Append an assistant-authored message.
498    #[must_use]
499    pub fn assistant(mut self, content: impl Into<crate::messages::input::MessageContent>) -> Self {
500        self.messages.push(MessageInput::assistant(content));
501        self
502    }
503
504    /// Set the system prompt.
505    #[must_use]
506    pub fn system(mut self, s: impl Into<SystemPrompt>) -> Self {
507        self.system = Some(s.into());
508        self
509    }
510
511    /// Set the available tools.
512    #[must_use]
513    pub fn tools(mut self, tools: Vec<Tool>) -> Self {
514        self.tools = tools;
515        self
516    }
517
518    /// Set the tool-use policy.
519    #[must_use]
520    pub fn tool_choice(mut self, choice: ToolChoice) -> Self {
521        self.tool_choice = Some(choice);
522        self
523    }
524
525    /// Set the extended-thinking config.
526    #[must_use]
527    pub fn thinking(mut self, t: ThinkingConfig) -> Self {
528        self.thinking = Some(t);
529        self
530    }
531
532    /// Set the MCP servers exposed.
533    #[must_use]
534    pub fn mcp_servers(mut self, servers: Vec<McpServerConfig>) -> Self {
535        self.mcp_servers = servers;
536        self
537    }
538
539    /// Finalize the request.
540    ///
541    /// # Errors
542    ///
543    /// Returns [`Error::InvalidConfig`] if `model` was not set.
544    pub fn build(self) -> Result<CountTokensRequest> {
545        let model = self
546            .model
547            .ok_or_else(|| Error::InvalidConfig("model is required".into()))?;
548        Ok(CountTokensRequest {
549            model,
550            messages: self.messages,
551            system: self.system,
552            tools: self.tools,
553            tool_choice: self.tool_choice,
554            thinking: self.thinking,
555            mcp_servers: self.mcp_servers,
556        })
557    }
558}
559
560#[cfg(test)]
561mod tests {
562    use super::*;
563    use pretty_assertions::assert_eq;
564    use serde_json::json;
565
566    #[test]
567    fn build_requires_model_and_max_tokens() {
568        let err = CreateMessageRequest::builder().build().unwrap_err();
569        assert!(matches!(err, Error::InvalidConfig(_)));
570
571        let err = CreateMessageRequest::builder()
572            .model(ModelId::SONNET_4_6)
573            .build()
574            .unwrap_err();
575        assert!(matches!(err, Error::InvalidConfig(_)));
576    }
577
578    #[test]
579    fn minimal_request_serializes_cleanly() {
580        let req = CreateMessageRequest::builder()
581            .model(ModelId::SONNET_4_6)
582            .max_tokens(64)
583            .user("hello")
584            .build()
585            .unwrap();
586        let v = serde_json::to_value(&req).unwrap();
587        assert_eq!(
588            v,
589            json!({
590                "model": "claude-sonnet-4-6",
591                "max_tokens": 64,
592                "messages": [{"role": "user", "content": "hello"}]
593            })
594        );
595    }
596
597    #[test]
598    fn full_request_serializes_all_fields() {
599        let req = CreateMessageRequest::builder()
600            .model(ModelId::OPUS_4_7)
601            .max_tokens(1024)
602            .system("be concise")
603            .user("hi")
604            .assistant("hey, what's up")
605            .user("tell me a joke")
606            .temperature(0.5)
607            .top_p(0.75)
608            .top_k(40)
609            .stop_sequences(vec!["\n\n".into()])
610            .metadata(MessageMetadata::with_user("user_42"))
611            .service_tier(RequestServiceTier::Auto)
612            .thinking(ThinkingConfig::enabled(2048))
613            .container("cnt_x")
614            .build()
615            .unwrap();
616        let v = serde_json::to_value(&req).unwrap();
617        assert_eq!(v["model"], "claude-opus-4-7");
618        assert_eq!(v["max_tokens"], 1024);
619        assert_eq!(v["system"], "be concise");
620        assert_eq!(v["temperature"], 0.5);
621        assert_eq!(v["top_p"], 0.75);
622        assert_eq!(v["top_k"], 40);
623        assert_eq!(v["stop_sequences"], json!(["\n\n"]));
624        assert_eq!(v["metadata"]["user_id"], "user_42");
625        assert_eq!(v["service_tier"], "auto");
626        assert_eq!(v["thinking"]["type"], "enabled");
627        assert_eq!(v["thinking"]["budget_tokens"], 2048);
628        assert_eq!(v["container"], "cnt_x");
629        assert_eq!(v["messages"].as_array().unwrap().len(), 3);
630        // `stream` is false by default and must be omitted from the wire payload.
631        assert!(
632            v.get("stream").is_none(),
633            "stream must be omitted when false"
634        );
635    }
636
637    #[test]
638    fn cache_control_on_system_converts_text_to_blocks_with_breakpoint() {
639        let req = CreateMessageRequest::builder()
640            .model(ModelId::SONNET_4_6)
641            .max_tokens(8)
642            .system("you are concise")
643            .cache_control_on_system()
644            .user("hi")
645            .build()
646            .unwrap();
647        let v = serde_json::to_value(&req).unwrap();
648        assert_eq!(
649            v["system"],
650            json!([{
651                "type": "text",
652                "text": "you are concise",
653                "cache_control": {"type": "ephemeral"}
654            }])
655        );
656    }
657
658    #[test]
659    fn cache_control_on_system_marks_last_text_block_when_blocks_supplied() {
660        let req = CreateMessageRequest::builder()
661            .model(ModelId::SONNET_4_6)
662            .max_tokens(8)
663            .system(vec![
664                ContentBlock::text("first"),
665                ContentBlock::text("second"),
666            ])
667            .cache_control_on_system()
668            .user("hi")
669            .build()
670            .unwrap();
671        let v = serde_json::to_value(&req).unwrap();
672        let blocks = v["system"].as_array().unwrap();
673        assert!(blocks[0].get("cache_control").is_none());
674        assert_eq!(blocks[1]["cache_control"]["type"], "ephemeral");
675    }
676
677    #[test]
678    fn cache_control_on_system_is_noop_when_no_system_set() {
679        let req = CreateMessageRequest::builder()
680            .model(ModelId::SONNET_4_6)
681            .max_tokens(8)
682            .cache_control_on_system()
683            .user("hi")
684            .build()
685            .unwrap();
686        let v = serde_json::to_value(&req).unwrap();
687        assert!(v.get("system").is_none());
688    }
689
690    #[test]
691    fn count_tokens_minimal_request_serializes_cleanly() {
692        let req = CountTokensRequest::builder()
693            .model(ModelId::HAIKU_4_5)
694            .user("hi")
695            .build()
696            .unwrap();
697        let v = serde_json::to_value(&req).unwrap();
698        assert_eq!(
699            v,
700            json!({
701                "model": "claude-haiku-4-5-20251001",
702                "messages": [{"role": "user", "content": "hi"}]
703            })
704        );
705    }
706
707    #[test]
708    fn count_tokens_requires_model() {
709        let err = CountTokensRequest::builder().build().unwrap_err();
710        assert!(matches!(err, Error::InvalidConfig(_)));
711    }
712
713    #[test]
714    fn cache_control_on_last_user_converts_text_to_blocks() {
715        let req = CreateMessageRequest::builder()
716            .model(ModelId::SONNET_4_6)
717            .max_tokens(8)
718            .user("first")
719            .assistant("response")
720            .user("follow-up")
721            .cache_control_on_last_user()
722            .build()
723            .unwrap();
724        let v = serde_json::to_value(&req).unwrap();
725        let msgs = v["messages"].as_array().unwrap();
726        // First user untouched.
727        assert_eq!(msgs[0]["content"], "first");
728        // Last user converted to a single cached text block.
729        assert_eq!(msgs[2]["role"], "user");
730        assert_eq!(msgs[2]["content"][0]["type"], "text");
731        assert_eq!(msgs[2]["content"][0]["text"], "follow-up");
732        assert_eq!(msgs[2]["content"][0]["cache_control"]["type"], "ephemeral");
733    }
734
735    #[test]
736    fn cache_control_on_last_user_marks_last_block_when_blocks_supplied() {
737        let req = CreateMessageRequest::builder()
738            .model(ModelId::SONNET_4_6)
739            .max_tokens(8)
740            .user(vec![ContentBlock::text("a"), ContentBlock::text("b")])
741            .cache_control_on_last_user()
742            .build()
743            .unwrap();
744        let v = serde_json::to_value(&req).unwrap();
745        let blocks = v["messages"][0]["content"].as_array().unwrap();
746        assert!(blocks[0].get("cache_control").is_none());
747        assert_eq!(blocks[1]["cache_control"]["type"], "ephemeral");
748    }
749
750    #[test]
751    fn cache_control_on_last_user_is_noop_without_user_messages() {
752        let req = CreateMessageRequest::builder()
753            .model(ModelId::SONNET_4_6)
754            .max_tokens(8)
755            .assistant("orphan prefill")
756            .cache_control_on_last_user()
757            .build()
758            .unwrap();
759        let v = serde_json::to_value(&req).unwrap();
760        // No user message exists; the assistant prefill is left untouched.
761        assert_eq!(v["messages"][0]["content"], "orphan prefill");
762    }
763
764    #[test]
765    fn cache_control_on_tools_marks_last_tool() {
766        use crate::messages::tools::{CustomTool, Tool as MessagesTool};
767        let req = CreateMessageRequest::builder()
768            .model(ModelId::SONNET_4_6)
769            .max_tokens(8)
770            .user("hi")
771            .tools(vec![
772                MessagesTool::Custom(CustomTool::new("first", json!({"type": "object"}))),
773                MessagesTool::Custom(CustomTool::new("second", json!({"type": "object"}))),
774            ])
775            .cache_control_on_tools()
776            .build()
777            .unwrap();
778        let v = serde_json::to_value(&req).unwrap();
779        let tools = v["tools"].as_array().unwrap();
780        assert!(tools[0].get("cache_control").is_none());
781        assert_eq!(tools[1]["cache_control"]["type"], "ephemeral");
782    }
783
784    #[test]
785    fn cache_control_on_tools_is_noop_without_tools() {
786        let req = CreateMessageRequest::builder()
787            .model(ModelId::SONNET_4_6)
788            .max_tokens(8)
789            .user("hi")
790            .cache_control_on_tools()
791            .build()
792            .unwrap();
793        let v = serde_json::to_value(&req).unwrap();
794        assert!(v.get("tools").is_none() || v["tools"].as_array().unwrap().is_empty());
795    }
796
797    #[test]
798    fn cache_system_alias_matches_long_form() {
799        let short = CreateMessageRequest::builder()
800            .model(ModelId::SONNET_4_6)
801            .max_tokens(8)
802            .system("S")
803            .user("u")
804            .cache_system()
805            .build()
806            .unwrap();
807        let long = CreateMessageRequest::builder()
808            .model(ModelId::SONNET_4_6)
809            .max_tokens(8)
810            .system("S")
811            .user("u")
812            .cache_control_on_system()
813            .build()
814            .unwrap();
815        assert_eq!(
816            serde_json::to_value(&short).unwrap(),
817            serde_json::to_value(&long).unwrap(),
818        );
819    }
820
821    #[test]
822    fn cache_system_with_ttl_emits_ttl_field() {
823        let req = CreateMessageRequest::builder()
824            .model(ModelId::SONNET_4_6)
825            .max_tokens(8)
826            .system("S")
827            .user("u")
828            .cache_system_with_ttl("1h")
829            .build()
830            .unwrap();
831        let v = serde_json::to_value(&req).unwrap();
832        let blocks = v["system"].as_array().unwrap();
833        let cc = &blocks[0]["cache_control"];
834        assert_eq!(cc["type"], "ephemeral");
835        assert_eq!(cc["ttl"], "1h");
836    }
837
838    #[test]
839    fn cache_last_user_with_ttl_emits_ttl_field() {
840        let req = CreateMessageRequest::builder()
841            .model(ModelId::SONNET_4_6)
842            .max_tokens(8)
843            .user("question")
844            .cache_last_user_with_ttl("5m")
845            .build()
846            .unwrap();
847        let v = serde_json::to_value(&req).unwrap();
848        let blocks = v["messages"][0]["content"].as_array().unwrap();
849        let cc = &blocks[0]["cache_control"];
850        assert_eq!(cc["type"], "ephemeral");
851        assert_eq!(cc["ttl"], "5m");
852    }
853
854    #[test]
855    fn cache_tools_with_ttl_emits_ttl_field() {
856        use crate::messages::tools::CustomTool;
857        let req = CreateMessageRequest::builder()
858            .model(ModelId::SONNET_4_6)
859            .max_tokens(8)
860            .user("u")
861            .tools(vec![Tool::Custom(CustomTool {
862                name: "t".into(),
863                description: None,
864                input_schema: serde_json::json!({"type":"object"}),
865                cache_control: None,
866            })])
867            .cache_tools_with_ttl("1h")
868            .build()
869            .unwrap();
870        let v = serde_json::to_value(&req).unwrap();
871        let cc = &v["tools"][0]["cache_control"];
872        assert_eq!(cc["type"], "ephemeral");
873        assert_eq!(cc["ttl"], "1h");
874    }
875}