Skip to main content

agent_sdk_foundation/
llm.rs

1//! LLM message and chat data types.
2//!
3//! These are the wire-format types shared between the runtime, providers,
4//! and the server.  The module intentionally contains **no** async traits
5//! or runtime-specific logic so it can be depended on from thin crates.
6
7use std::time::Duration;
8
9use serde::{Deserialize, Serialize};
10
11// ── Thinking ──────────────────────────────────────────────────────────
12
13/// The mode of extended thinking.
14#[derive(Debug, Clone)]
15pub enum ThinkingMode {
16    /// Explicitly enabled with a token budget.
17    Enabled { budget_tokens: u32 },
18    /// Adaptive thinking — the model decides how much to think.
19    Adaptive,
20}
21
22/// Effort level for adaptive thinking via `output_config`.
23#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
24#[serde(rename_all = "lowercase")]
25pub enum Effort {
26    Low,
27    Medium,
28    High,
29    Max,
30}
31
32/// Configuration for extended thinking.
33///
34/// When enabled, the model will show its reasoning process before
35/// generating the final response.
36#[derive(Debug, Clone)]
37pub struct ThinkingConfig {
38    /// Which thinking mode to use.
39    pub mode: ThinkingMode,
40    /// Optional effort level (sent via `output_config`).
41    pub effort: Option<Effort>,
42}
43
44impl ThinkingConfig {
45    /// Default budget: 10,000 tokens.
46    ///
47    /// This provides enough capacity for meaningful reasoning on most tasks
48    /// while keeping costs reasonable. Increase for complex multi-step problems.
49    pub const DEFAULT_BUDGET_TOKENS: u32 = 10_000;
50
51    /// Minimum budget required by the Anthropic API.
52    pub const MIN_BUDGET_TOKENS: u32 = 1_024;
53
54    /// Create a config with an explicit token budget (Enabled mode).
55    #[must_use]
56    pub const fn new(budget_tokens: u32) -> Self {
57        Self {
58            mode: ThinkingMode::Enabled { budget_tokens },
59            effort: None,
60        }
61    }
62
63    /// Create an adaptive thinking config.
64    #[must_use]
65    pub const fn adaptive() -> Self {
66        Self {
67            mode: ThinkingMode::Adaptive,
68            effort: None,
69        }
70    }
71
72    /// Create an adaptive thinking config with an effort level.
73    #[must_use]
74    pub const fn adaptive_with_effort(effort: Effort) -> Self {
75        Self {
76            mode: ThinkingMode::Adaptive,
77            effort: Some(effort),
78        }
79    }
80
81    /// Set the effort level on an existing config.
82    #[must_use]
83    pub const fn with_effort(mut self, effort: Effort) -> Self {
84        self.effort = Some(effort);
85        self
86    }
87}
88
89impl Default for ThinkingConfig {
90    fn default() -> Self {
91        Self::new(Self::DEFAULT_BUDGET_TOKENS)
92    }
93}
94
95// ── Request / Response ────────────────────────────────────────────────
96
97/// Controls whether the model must use a tool.
98#[derive(Debug, Clone)]
99pub enum ToolChoice {
100    /// Let the model decide whether to use tools (default when `None`).
101    Auto,
102    /// Force the model to call a specific tool by name.
103    Tool(String),
104}
105
106/// Requests that the model constrain its final answer to a JSON Schema.
107///
108/// This is the wire-level description of a structured-output request. The
109/// runtime maps it to each provider's native capability:
110///
111/// - **`OpenAI` / Gemini**: native JSON-mode / structured-outputs
112///   (`response_format` / `responseSchema`).
113/// - **Anthropic**: tool-forcing fallback — the runtime injects a single
114///   "respond" tool whose `input_schema` is [`schema`](Self::schema) and
115///   forces the model to call it.
116///
117/// The runtime validates the model's final output against [`schema`](Self::schema)
118/// and, on mismatch, bounded-re-prompts before failing with a typed error.
119#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
120pub struct ResponseFormat {
121    /// Stable identifier for the schema. Surfaced to providers that require a
122    /// name (`OpenAI` `json_schema.name`, the Anthropic fallback tool name).
123    pub name: String,
124    /// The JSON Schema the final assistant output must satisfy.
125    ///
126    /// This is a raw JSON Schema document (an object), not a Rust type. Callers
127    /// that derive schemas from Rust types can plug in `schemars` upstream and
128    /// pass the resulting document here.
129    pub schema: serde_json::Value,
130    /// Whether the provider should enforce strict schema adherence when it
131    /// supports a strict mode (`OpenAI` `strict: true`). Has no effect on
132    /// providers without a strict mode.
133    pub strict: bool,
134}
135
136impl ResponseFormat {
137    /// Create a response format from a schema name and a JSON Schema document.
138    ///
139    /// Defaults to `strict = true` so providers with a strict mode enforce the
140    /// schema rather than treating it as a hint.
141    #[must_use]
142    pub fn new(name: impl Into<String>, schema: serde_json::Value) -> Self {
143        Self {
144            name: name.into(),
145            schema,
146            strict: true,
147        }
148    }
149
150    /// Set whether strict schema adherence is requested.
151    #[must_use]
152    pub const fn with_strict(mut self, strict: bool) -> Self {
153        self.strict = strict;
154        self
155    }
156}
157
158/// Time-to-live for a provider-side prompt-cache breakpoint.
159///
160/// Only the values the Anthropic Messages API accepts are modelled, so the
161/// enum maps losslessly onto the wire `ttl` string. Providers without an
162/// equivalent control ignore it.
163#[derive(Debug, Clone, Copy, PartialEq, Eq)]
164pub enum CacheTtl {
165    /// Five-minute ephemeral cache (the provider default).
166    FiveMinutes,
167    /// One-hour ephemeral cache (extended retention).
168    OneHour,
169}
170
171impl CacheTtl {
172    /// The wire string a provider sends for this TTL (`"5m"` / `"1h"`).
173    #[must_use]
174    pub const fn as_wire_str(self) -> &'static str {
175        match self {
176            Self::FiveMinutes => "5m",
177            Self::OneHour => "1h",
178        }
179    }
180}
181
182/// Caller-facing control over provider-side prompt caching.
183///
184/// This is additive: a [`ChatRequest`] with `cache = None` preserves each
185/// provider's default caching behaviour. Set it to shape (or disable) caching:
186///
187/// - `enabled = false` opts the request out of caching entirely — providers
188///   send no `cache_control` breakpoints.
189/// - `ttl` selects the cache retention window (Anthropic ephemeral TTL).
190/// - `max_breakpoints` caps how many cache breakpoints the provider may emit,
191///   in decreasing order of prefix stability (tools, then system, then the
192///   conversation tail). `None` leaves the provider's default count.
193///
194/// Providers without a prompt-cache control ignore every field gracefully.
195#[derive(Debug, Clone)]
196pub struct CacheConfig {
197    /// Whether prompt caching is enabled for this request.
198    pub enabled: bool,
199    /// Optional cache retention window. `None` uses the provider default.
200    pub ttl: Option<CacheTtl>,
201    /// Optional cap on the number of cache breakpoints the provider emits.
202    pub max_breakpoints: Option<u8>,
203}
204
205impl Default for CacheConfig {
206    fn default() -> Self {
207        Self::enabled()
208    }
209}
210
211impl CacheConfig {
212    /// An enabled cache config with provider defaults (no TTL override, all
213    /// breakpoints).
214    #[must_use]
215    pub const fn enabled() -> Self {
216        Self {
217            enabled: true,
218            ttl: None,
219            max_breakpoints: None,
220        }
221    }
222
223    /// A config that opts the request out of provider-side caching.
224    #[must_use]
225    pub const fn disabled() -> Self {
226        Self {
227            enabled: false,
228            ttl: None,
229            max_breakpoints: None,
230        }
231    }
232
233    /// Set the cache retention window.
234    #[must_use]
235    pub const fn with_ttl(mut self, ttl: CacheTtl) -> Self {
236        self.ttl = Some(ttl);
237        self
238    }
239
240    /// Cap the number of cache breakpoints the provider may emit.
241    #[must_use]
242    pub const fn with_max_breakpoints(mut self, max_breakpoints: u8) -> Self {
243        self.max_breakpoints = Some(max_breakpoints);
244        self
245    }
246}
247
248#[derive(Debug, Clone)]
249pub struct ChatRequest {
250    pub system: String,
251    pub messages: Vec<Message>,
252    pub tools: Option<Vec<Tool>>,
253    pub max_tokens: u32,
254    /// Whether `max_tokens` was explicitly configured by the caller.
255    pub max_tokens_explicit: bool,
256    /// Optional session identifier for provider-side prompt caching or routing.
257    pub session_id: Option<String>,
258    /// Optional provider-managed cached content reference.
259    ///
260    /// This currently maps to Gemini / Vertex AI `cachedContent` handles.
261    pub cached_content: Option<String>,
262    /// Optional extended thinking configuration.
263    pub thinking: Option<ThinkingConfig>,
264    /// Optional constraint on tool usage.
265    ///
266    /// When `None` the provider's default behaviour applies (typically `auto`).
267    pub tool_choice: Option<ToolChoice>,
268    /// Optional request for the final answer to be constrained to a JSON
269    /// Schema.
270    ///
271    /// When `Some`, the provider maps this to its native JSON-mode /
272    /// structured-output capability (or a tool-forcing fallback) and the
273    /// runtime validates the final output against the schema. When `None`
274    /// (default) the model responds freely.
275    pub response_format: Option<ResponseFormat>,
276    /// Optional control over provider-side prompt caching.
277    ///
278    /// When `None` (default) each provider keeps its built-in caching
279    /// behaviour. When `Some`, providers that support prompt caching honour
280    /// the [`CacheConfig`] (TTL, opt-out, breakpoint cap); others ignore it.
281    pub cache: Option<CacheConfig>,
282}
283
284impl ChatRequest {
285    /// Default token budget used by [`ChatRequest::new`] when the caller does
286    /// not set one explicitly. Providers clamp this to their own ceiling.
287    pub const DEFAULT_MAX_TOKENS: u32 = 4096;
288
289    /// Build a request from a system prompt and a message list, leaving every
290    /// optional knob at its default.
291    ///
292    /// This is the ergonomic counterpart to the (still-public) struct literal:
293    /// the common case only needs `system` + `messages`, so callers no longer
294    /// have to spell out the eight `None`/default fields. Layer optional
295    /// settings on with the chainable `with_*` setters:
296    ///
297    /// ```
298    /// use agent_sdk_foundation::llm::{ChatRequest, Message, ToolChoice};
299    ///
300    /// let req = ChatRequest::new("You are helpful.", vec![Message::user("Hi")])
301    ///     .with_max_tokens(1024)
302    ///     .with_tool_choice(ToolChoice::Auto);
303    /// ```
304    #[must_use]
305    pub fn new(system: impl Into<String>, messages: Vec<Message>) -> Self {
306        Self {
307            system: system.into(),
308            messages,
309            tools: None,
310            max_tokens: Self::DEFAULT_MAX_TOKENS,
311            max_tokens_explicit: false,
312            session_id: None,
313            cached_content: None,
314            thinking: None,
315            tool_choice: None,
316            response_format: None,
317            cache: None,
318        }
319    }
320
321    /// Set the tool list the model may call.
322    #[must_use]
323    pub fn with_tools(mut self, tools: Vec<Tool>) -> Self {
324        self.tools = Some(tools);
325        self
326    }
327
328    /// Set the maximum output-token budget (marks it as explicitly configured).
329    #[must_use]
330    pub const fn with_max_tokens(mut self, max_tokens: u32) -> Self {
331        self.max_tokens = max_tokens;
332        self.max_tokens_explicit = true;
333        self
334    }
335
336    /// Set the session identifier (provider-side prompt caching / routing).
337    #[must_use]
338    pub fn with_session_id(mut self, session_id: impl Into<String>) -> Self {
339        self.session_id = Some(session_id.into());
340        self
341    }
342
343    /// Set the extended-thinking configuration.
344    #[must_use]
345    pub const fn with_thinking(mut self, thinking: ThinkingConfig) -> Self {
346        self.thinking = Some(thinking);
347        self
348    }
349
350    /// Constrain tool usage (defaults to the provider's `auto` when unset).
351    #[must_use]
352    pub fn with_tool_choice(mut self, tool_choice: ToolChoice) -> Self {
353        self.tool_choice = Some(tool_choice);
354        self
355    }
356
357    /// Request the final answer be constrained to the given JSON-Schema
358    /// [`ResponseFormat`] (structured output).
359    #[must_use]
360    pub fn with_response_format(mut self, response_format: ResponseFormat) -> Self {
361        self.response_format = Some(response_format);
362        self
363    }
364
365    /// Set the provider-side prompt-cache control ([`CacheConfig`]).
366    #[must_use]
367    pub const fn with_cache(mut self, cache: CacheConfig) -> Self {
368        self.cache = Some(cache);
369        self
370    }
371}
372
373#[derive(Debug, Clone, Serialize, Deserialize)]
374pub struct Message {
375    pub role: Role,
376    pub content: Content,
377}
378
379impl Message {
380    #[must_use]
381    pub fn user(text: impl Into<String>) -> Self {
382        Self {
383            role: Role::User,
384            content: Content::Text(text.into()),
385        }
386    }
387
388    #[must_use]
389    pub const fn user_with_content(blocks: Vec<ContentBlock>) -> Self {
390        Self {
391            role: Role::User,
392            content: Content::Blocks(blocks),
393        }
394    }
395
396    #[must_use]
397    pub fn assistant(text: impl Into<String>) -> Self {
398        Self {
399            role: Role::Assistant,
400            content: Content::Text(text.into()),
401        }
402    }
403
404    #[must_use]
405    pub const fn assistant_with_content(blocks: Vec<ContentBlock>) -> Self {
406        Self {
407            role: Role::Assistant,
408            content: Content::Blocks(blocks),
409        }
410    }
411
412    #[must_use]
413    pub fn assistant_with_tool_use(
414        text: Option<String>,
415        id: impl Into<String>,
416        name: impl Into<String>,
417        input: serde_json::Value,
418    ) -> Self {
419        let mut blocks = Vec::new();
420        if let Some(t) = text {
421            blocks.push(ContentBlock::Text { text: t });
422        }
423        blocks.push(ContentBlock::ToolUse {
424            id: id.into(),
425            name: name.into(),
426            input,
427            thought_signature: None,
428        });
429        Self {
430            role: Role::Assistant,
431            content: Content::Blocks(blocks),
432        }
433    }
434
435    #[must_use]
436    pub fn tool_result(
437        tool_use_id: impl Into<String>,
438        content: impl Into<String>,
439        is_error: bool,
440    ) -> Self {
441        Self {
442            role: Role::User,
443            content: Content::Blocks(vec![ContentBlock::ToolResult {
444                tool_use_id: tool_use_id.into(),
445                content: content.into(),
446                is_error: if is_error { Some(true) } else { None },
447            }]),
448        }
449    }
450}
451
452#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
453#[serde(rename_all = "lowercase")]
454pub enum Role {
455    User,
456    Assistant,
457}
458
459#[derive(Debug, Clone, Serialize, Deserialize)]
460#[serde(untagged)]
461pub enum Content {
462    Text(String),
463    Blocks(Vec<ContentBlock>),
464}
465
466impl Content {
467    #[must_use]
468    pub fn first_text(&self) -> Option<&str> {
469        match self {
470            Self::Text(s) => Some(s),
471            Self::Blocks(blocks) => blocks.iter().find_map(|b| match b {
472                ContentBlock::Text { text } => Some(text.as_str()),
473                _ => None,
474            }),
475        }
476    }
477}
478
479/// Source data for image and document content blocks.
480#[derive(Debug, Clone, Serialize, Deserialize)]
481pub struct ContentSource {
482    pub media_type: String,
483    pub data: String,
484}
485
486impl ContentSource {
487    #[must_use]
488    pub fn new(media_type: impl Into<String>, data: impl Into<String>) -> Self {
489        Self {
490            media_type: media_type.into(),
491            data: data.into(),
492        }
493    }
494}
495
496#[derive(Debug, Clone, Serialize, Deserialize)]
497#[serde(tag = "type")]
498#[non_exhaustive]
499pub enum ContentBlock {
500    #[serde(rename = "text")]
501    Text { text: String },
502
503    #[serde(rename = "thinking")]
504    Thinking {
505        thinking: String,
506        /// Opaque signature for round-tripping thinking blocks back to the API.
507        #[serde(skip_serializing_if = "Option::is_none")]
508        signature: Option<String>,
509    },
510
511    #[serde(rename = "redacted_thinking")]
512    RedactedThinking { data: String },
513
514    #[serde(rename = "tool_use")]
515    ToolUse {
516        id: String,
517        name: String,
518        input: serde_json::Value,
519        /// Gemini thought signature for preserving reasoning context.
520        /// Required for Gemini 3 models when sending function calls back.
521        #[serde(skip_serializing_if = "Option::is_none")]
522        thought_signature: Option<String>,
523    },
524
525    #[serde(rename = "tool_result")]
526    ToolResult {
527        tool_use_id: String,
528        content: String,
529        #[serde(skip_serializing_if = "Option::is_none")]
530        is_error: Option<bool>,
531    },
532
533    #[serde(rename = "image")]
534    Image { source: ContentSource },
535
536    #[serde(rename = "document")]
537    Document { source: ContentSource },
538}
539
540#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
541pub struct Tool {
542    pub name: String,
543    pub description: String,
544    pub input_schema: serde_json::Value,
545    /// Human-readable display name shown in UI and audit records.
546    pub display_name: String,
547    /// Permission tier for this tool.
548    pub tier: super::types::ToolTier,
549}
550
551#[derive(Debug, Clone)]
552pub struct ChatResponse {
553    pub id: String,
554    pub content: Vec<ContentBlock>,
555    pub model: String,
556    pub stop_reason: Option<StopReason>,
557    pub usage: Usage,
558}
559
560impl ChatResponse {
561    #[must_use]
562    pub fn first_text(&self) -> Option<&str> {
563        self.content.iter().find_map(|b| match b {
564            ContentBlock::Text { text } => Some(text.as_str()),
565            _ => None,
566        })
567    }
568
569    #[must_use]
570    pub fn first_thinking(&self) -> Option<&str> {
571        self.content.iter().find_map(|b| match b {
572            ContentBlock::Thinking { thinking, .. } => Some(thinking.as_str()),
573            _ => None,
574        })
575    }
576
577    pub fn tool_uses(&self) -> impl Iterator<Item = (&str, &str, &serde_json::Value)> {
578        self.content.iter().filter_map(|b| match b {
579            ContentBlock::ToolUse {
580                id, name, input, ..
581            } => Some((id.as_str(), name.as_str(), input)),
582            _ => None,
583        })
584    }
585
586    #[must_use]
587    pub fn has_tool_use(&self) -> bool {
588        self.content
589            .iter()
590            .any(|b| matches!(b, ContentBlock::ToolUse { .. }))
591    }
592}
593
594#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
595#[serde(rename_all = "snake_case")]
596#[non_exhaustive]
597pub enum StopReason {
598    EndTurn,
599    ToolUse,
600    MaxTokens,
601    StopSequence,
602    Refusal,
603    ModelContextWindowExceeded,
604    /// A stop reason this version of the SDK does not recognize.
605    ///
606    /// Providers may introduce new stop reasons at any time. Rather than
607    /// failing deserialization of an otherwise-valid response (or a
608    /// persisted/replayed audit row), unknown values map here via
609    /// `#[serde(other)]`. Consumers should treat it like
610    /// [`StopReason::EndTurn`] (turn finished, nothing actionable) unless
611    /// they have a more specific fallback.
612    #[serde(other)]
613    Unknown,
614}
615
616impl StopReason {
617    /// Stable discriminant string used for durable rows, metrics, and
618    /// dashboards.  Matches the serde representation.
619    #[must_use]
620    pub const fn as_str(&self) -> &'static str {
621        match self {
622            Self::EndTurn => "end_turn",
623            Self::ToolUse => "tool_use",
624            Self::MaxTokens => "max_tokens",
625            Self::StopSequence => "stop_sequence",
626            Self::Refusal => "refusal",
627            Self::ModelContextWindowExceeded => "model_context_window_exceeded",
628            Self::Unknown => "unknown",
629        }
630    }
631}
632
633#[derive(Debug, Clone, Serialize, Deserialize)]
634pub struct Usage {
635    /// Total input tokens reported by the provider.
636    pub input_tokens: u32,
637    pub output_tokens: u32,
638    /// Portion of `input_tokens` billed at a cached-input rate, when reported.
639    #[serde(default)]
640    pub cached_input_tokens: u32,
641    /// Portion of `input_tokens` spent creating provider-side prompt cache entries.
642    #[serde(default)]
643    pub cache_creation_input_tokens: u32,
644}
645
646#[derive(Debug, Clone)]
647#[non_exhaustive]
648pub enum ChatOutcome {
649    Success(ChatResponse),
650    /// The provider rate-limited the request (HTTP 429).
651    ///
652    /// Carries the retry delay parsed from the response's `Retry-After`
653    /// header when the provider supplied one (see [`parse_retry_after`]), so
654    /// the caller can honour the server's hint instead of guessing a backoff.
655    /// `None` when no usable `Retry-After` was present.
656    RateLimited(Option<Duration>),
657    InvalidRequest(String),
658    ServerError(String),
659}
660
661/// Parse the value of an HTTP `Retry-After` header into a [`Duration`].
662///
663/// Per [RFC 9110 §10.2.3], `Retry-After` is either a non-negative number of
664/// seconds (delta-seconds) or an IMF-fixdate HTTP timestamp
665/// (`Sun, 06 Nov 1994 08:49:37 GMT`). For the date form the delay is the
666/// difference between that instant and now; a timestamp at or before now (or
667/// any value that cannot be parsed) yields `None`.
668///
669/// [RFC 9110 §10.2.3]: https://www.rfc-editor.org/rfc/rfc9110#section-10.2.3
670#[must_use]
671pub fn parse_retry_after(value: &str) -> Option<Duration> {
672    let trimmed = value.trim();
673    if trimmed.is_empty() {
674        return None;
675    }
676
677    // delta-seconds: a bare non-negative integer number of seconds.
678    if let Ok(seconds) = trimmed.parse::<u64>() {
679        return Some(Duration::from_secs(seconds));
680    }
681
682    // IMF-fixdate: compute the remaining delay from now, dropping past dates.
683    let target = parse_imf_fixdate(trimmed)?;
684    let now = time::OffsetDateTime::now_utc();
685    if target <= now {
686        return None;
687    }
688    (target - now).try_into().ok()
689}
690
691/// Parse an IMF-fixdate (`Sun, 06 Nov 1994 08:49:37 GMT`) as a UTC instant.
692fn parse_imf_fixdate(value: &str) -> Option<time::OffsetDateTime> {
693    // IMF-fixdate is always UTC ("GMT"); parse the civil datetime and assume
694    // UTC. A custom description avoids depending on the `macros` feature.
695    let format = time::format_description::parse_borrowed::<1>(
696        "[weekday repr:short], [day] [month repr:short] [year] \
697         [hour]:[minute]:[second] GMT",
698    )
699    .ok()?;
700    time::PrimitiveDateTime::parse(value, &format)
701        .ok()
702        .map(time::PrimitiveDateTime::assume_utc)
703}
704
705#[cfg(test)]
706mod tests {
707    use super::*;
708
709    #[test]
710    fn chat_request_new_defaults_then_setters() {
711        let req = ChatRequest::new("sys", vec![Message::user("hi")]);
712        assert_eq!(req.system, "sys");
713        assert_eq!(req.messages.len(), 1);
714        assert_eq!(req.max_tokens, ChatRequest::DEFAULT_MAX_TOKENS);
715        assert!(!req.max_tokens_explicit);
716        assert!(req.tools.is_none());
717        assert!(req.tool_choice.is_none());
718        assert!(req.response_format.is_none());
719
720        let req = req
721            .with_max_tokens(1234)
722            .with_tool_choice(ToolChoice::Auto)
723            .with_response_format(ResponseFormat::new(
724                "r",
725                serde_json::json!({"type": "object"}),
726            ))
727            .with_session_id("s-1");
728        assert_eq!(req.max_tokens, 1234);
729        assert!(req.max_tokens_explicit);
730        assert!(matches!(req.tool_choice, Some(ToolChoice::Auto)));
731        assert!(req.response_format.is_some());
732        assert_eq!(req.session_id.as_deref(), Some("s-1"));
733    }
734
735    #[test]
736    fn stop_reason_known_values_round_trip() -> Result<(), serde_json::Error> {
737        for (json, expected) in [
738            ("\"end_turn\"", StopReason::EndTurn),
739            ("\"tool_use\"", StopReason::ToolUse),
740            ("\"max_tokens\"", StopReason::MaxTokens),
741            ("\"stop_sequence\"", StopReason::StopSequence),
742            ("\"refusal\"", StopReason::Refusal),
743            (
744                "\"model_context_window_exceeded\"",
745                StopReason::ModelContextWindowExceeded,
746            ),
747        ] {
748            let parsed: StopReason = serde_json::from_str(json)?;
749            assert_eq!(parsed, expected);
750            assert_eq!(serde_json::to_string(&parsed)?, json);
751        }
752        Ok(())
753    }
754
755    #[test]
756    fn stop_reason_unknown_value_deserializes_to_unknown() -> Result<(), serde_json::Error> {
757        // An unrecognized provider stop reason must not fail deserialization;
758        // `#[serde(other)]` routes it to `StopReason::Unknown`.
759        let parsed: StopReason = serde_json::from_str("\"some_future_reason\"")?;
760        assert_eq!(parsed, StopReason::Unknown);
761        assert_eq!(parsed.as_str(), "unknown");
762        Ok(())
763    }
764
765    #[test]
766    fn stop_reason_unknown_serializes_to_unknown() -> Result<(), serde_json::Error> {
767        assert_eq!(serde_json::to_string(&StopReason::Unknown)?, "\"unknown\"");
768        Ok(())
769    }
770
771    // ── ContentBlock wire format ────────────────────────────────
772    //
773    // `ContentBlock` is persisted durably (AgentContinuation.response_content,
774    // AgentEvent::UserInput), so its tag strings and optional-field omission
775    // are part of the wire contract. A tag rename or variant reorder must fail
776    // a test here, not silently corrupt persisted threads.
777
778    #[test]
779    fn content_block_text_wire_format() -> Result<(), serde_json::Error> {
780        let json = serde_json::to_value(ContentBlock::Text { text: "hi".into() })?;
781        assert_eq!(json, serde_json::json!({"type": "text", "text": "hi"}));
782        Ok(())
783    }
784
785    #[test]
786    fn content_block_thinking_omits_none_signature() -> Result<(), serde_json::Error> {
787        let none = serde_json::to_value(ContentBlock::Thinking {
788            thinking: "t".into(),
789            signature: None,
790        })?;
791        assert_eq!(
792            none,
793            serde_json::json!({"type": "thinking", "thinking": "t"})
794        );
795
796        let some = serde_json::to_value(ContentBlock::Thinking {
797            thinking: "t".into(),
798            signature: Some("sig".into()),
799        })?;
800        assert_eq!(
801            some,
802            serde_json::json!({"type": "thinking", "thinking": "t", "signature": "sig"})
803        );
804        Ok(())
805    }
806
807    #[test]
808    fn content_block_tool_use_omits_none_thought_signature() -> Result<(), serde_json::Error> {
809        let none = serde_json::to_value(ContentBlock::ToolUse {
810            id: "i".into(),
811            name: "n".into(),
812            input: serde_json::json!({"a": 1}),
813            thought_signature: None,
814        })?;
815        assert_eq!(
816            none,
817            serde_json::json!({"type": "tool_use", "id": "i", "name": "n", "input": {"a": 1}})
818        );
819
820        let some = serde_json::to_value(ContentBlock::ToolUse {
821            id: "i".into(),
822            name: "n".into(),
823            input: serde_json::json!({}),
824            thought_signature: Some("ts".into()),
825        })?;
826        assert_eq!(
827            some.get("thought_signature").and_then(|v| v.as_str()),
828            Some("ts")
829        );
830        Ok(())
831    }
832
833    #[test]
834    fn content_block_tool_result_omits_none_is_error() -> Result<(), serde_json::Error> {
835        let none = serde_json::to_value(ContentBlock::ToolResult {
836            tool_use_id: "t".into(),
837            content: "out".into(),
838            is_error: None,
839        })?;
840        assert_eq!(
841            none,
842            serde_json::json!({"type": "tool_result", "tool_use_id": "t", "content": "out"})
843        );
844
845        let some = serde_json::to_value(ContentBlock::ToolResult {
846            tool_use_id: "t".into(),
847            content: "out".into(),
848            is_error: Some(true),
849        })?;
850        assert_eq!(
851            some.get("is_error").and_then(serde_json::Value::as_bool),
852            Some(true)
853        );
854        Ok(())
855    }
856
857    #[test]
858    fn content_block_remaining_variant_tags() -> Result<(), serde_json::Error> {
859        assert_eq!(
860            serde_json::to_value(ContentBlock::RedactedThinking { data: "d".into() })?,
861            serde_json::json!({"type": "redacted_thinking", "data": "d"})
862        );
863        assert_eq!(
864            serde_json::to_value(ContentBlock::Image {
865                source: ContentSource::new("image/png", "b64"),
866            })?,
867            serde_json::json!({"type": "image", "source": {"media_type": "image/png", "data": "b64"}})
868        );
869        assert_eq!(
870            serde_json::to_value(ContentBlock::Document {
871                source: ContentSource::new("application/pdf", "b64"),
872            })?,
873            serde_json::json!({"type": "document", "source": {"media_type": "application/pdf", "data": "b64"}})
874        );
875        Ok(())
876    }
877
878    #[test]
879    fn content_block_every_tag_round_trips() -> Result<(), serde_json::Error> {
880        let blocks = vec![
881            ContentBlock::Text { text: "t".into() },
882            ContentBlock::Thinking {
883                thinking: "th".into(),
884                signature: Some("s".into()),
885            },
886            ContentBlock::RedactedThinking { data: "d".into() },
887            ContentBlock::ToolUse {
888                id: "i".into(),
889                name: "n".into(),
890                input: serde_json::json!({"x": 1}),
891                thought_signature: None,
892            },
893            ContentBlock::ToolResult {
894                tool_use_id: "t".into(),
895                content: "c".into(),
896                is_error: Some(true),
897            },
898            ContentBlock::Image {
899                source: ContentSource::new("image/png", "b"),
900            },
901            ContentBlock::Document {
902                source: ContentSource::new("application/pdf", "b"),
903            },
904        ];
905        for block in blocks {
906            let json = serde_json::to_value(&block)?;
907            let back: ContentBlock = serde_json::from_value(json.clone())?;
908            assert_eq!(serde_json::to_value(&back)?, json);
909        }
910        Ok(())
911    }
912
913    // ── Content (untagged) wire format ──────────────────────────
914
915    #[test]
916    fn content_text_serializes_as_bare_string() -> Result<(), serde_json::Error> {
917        let json = serde_json::to_value(Content::Text("hello".into()))?;
918        assert_eq!(json, serde_json::json!("hello"));
919        let back: Content = serde_json::from_value(serde_json::json!("hello"))?;
920        assert!(matches!(back, Content::Text(s) if s == "hello"));
921        Ok(())
922    }
923
924    #[test]
925    fn content_blocks_serialize_as_array_including_empty() -> Result<(), serde_json::Error> {
926        let json = serde_json::to_value(Content::Blocks(vec![ContentBlock::Text {
927            text: "x".into(),
928        }]))?;
929        assert_eq!(json, serde_json::json!([{"type": "text", "text": "x"}]));
930
931        // Empty blocks → `[]` and must round-trip back to `Blocks`, not `Text`,
932        // even though `Text` is the first untagged variant.
933        let empty = serde_json::to_value(Content::Blocks(vec![]))?;
934        assert_eq!(empty, serde_json::json!([]));
935        let back: Content = serde_json::from_value(empty)?;
936        assert!(matches!(back, Content::Blocks(b) if b.is_empty()));
937        Ok(())
938    }
939
940    // ── Message wire format ─────────────────────────────────────
941
942    #[test]
943    fn message_wire_format_text_and_blocks() -> Result<(), serde_json::Error> {
944        let user = serde_json::to_value(Message::user("hi"))?;
945        assert_eq!(user, serde_json::json!({"role": "user", "content": "hi"}));
946
947        let assistant =
948            serde_json::to_value(Message::assistant_with_content(vec![ContentBlock::Text {
949                text: "yo".into(),
950            }]))?;
951        assert_eq!(
952            assistant,
953            serde_json::json!({"role": "assistant", "content": [{"type": "text", "text": "yo"}]})
954        );
955
956        let back: Message =
957            serde_json::from_value(serde_json::json!({"role": "user", "content": "hi"}))?;
958        assert_eq!(back.role, Role::User);
959        assert!(matches!(back.content, Content::Text(s) if s == "hi"));
960        Ok(())
961    }
962
963    // ── Retry-After parsing ─────────────────────────────────────
964
965    #[test]
966    fn parse_retry_after_delta_seconds() {
967        assert_eq!(parse_retry_after("125"), Some(Duration::from_secs(125)));
968        assert_eq!(parse_retry_after("0"), Some(Duration::from_secs(0)));
969        // Surrounding whitespace is tolerated.
970        assert_eq!(parse_retry_after("  30 "), Some(Duration::from_secs(30)));
971    }
972
973    #[test]
974    fn parse_retry_after_rejects_garbage_and_empty() {
975        assert_eq!(parse_retry_after(""), None);
976        assert_eq!(parse_retry_after("   "), None);
977        assert_eq!(parse_retry_after("soon"), None);
978        // Negative deltas are not valid delta-seconds.
979        assert_eq!(parse_retry_after("-5"), None);
980    }
981
982    #[test]
983    fn parse_retry_after_past_imf_date_is_none() {
984        // A date well in the past must not produce a (would-be negative) delay.
985        assert_eq!(parse_retry_after("Sun, 06 Nov 1994 08:49:37 GMT"), None);
986    }
987
988    #[test]
989    fn parse_retry_after_future_imf_date_is_some() {
990        // Far-future date: must parse and yield a positive, large delay (the
991        // 1_000_000s ≈ 11.6-day lower bound is trivially exceeded by a year-9999
992        // target and avoids a round-unit literal).
993        let parsed = parse_retry_after("Fri, 31 Dec 9999 23:59:59 GMT");
994        assert!(parsed.is_some_and(|d| d > Duration::from_secs(1_000_000)));
995    }
996
997    // ── CacheConfig ─────────────────────────────────────────────
998
999    #[test]
1000    fn cache_ttl_wire_strings() {
1001        assert_eq!(CacheTtl::FiveMinutes.as_wire_str(), "5m");
1002        assert_eq!(CacheTtl::OneHour.as_wire_str(), "1h");
1003    }
1004
1005    #[test]
1006    fn cache_config_builders_and_default_request_cache_is_none() {
1007        let req = ChatRequest::new("sys", vec![Message::user("hi")]);
1008        assert!(
1009            req.cache.is_none(),
1010            "default request must not set a cache config"
1011        );
1012
1013        let enabled = CacheConfig::enabled().with_ttl(CacheTtl::OneHour);
1014        assert!(enabled.enabled);
1015        assert_eq!(enabled.ttl, Some(CacheTtl::OneHour));
1016        assert_eq!(enabled.max_breakpoints, None);
1017
1018        let disabled = CacheConfig::disabled();
1019        assert!(!disabled.enabled);
1020
1021        let capped = CacheConfig::enabled().with_max_breakpoints(2);
1022        assert_eq!(capped.max_breakpoints, Some(2));
1023
1024        let req = ChatRequest::new("s", vec![]).with_cache(CacheConfig::disabled());
1025        assert!(req.cache.is_some_and(|c| !c.enabled));
1026    }
1027}