Skip to main content

agent_sdk_foundation/
llm.rs

1//! LLM message and chat data types.
2//!
3//! These are the wire-format types shared between the runtime, providers,
4//! and the server.  The module intentionally contains **no** async traits
5//! or runtime-specific logic so it can be depended on from thin crates.
6
7use serde::{Deserialize, Serialize};
8
9// ── Thinking ──────────────────────────────────────────────────────────
10
11/// The mode of extended thinking.
12#[derive(Debug, Clone)]
13pub enum ThinkingMode {
14    /// Explicitly enabled with a token budget.
15    Enabled { budget_tokens: u32 },
16    /// Adaptive thinking — the model decides how much to think.
17    Adaptive,
18}
19
20/// Effort level for adaptive thinking via `output_config`.
21#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
22#[serde(rename_all = "lowercase")]
23pub enum Effort {
24    Low,
25    Medium,
26    High,
27    Max,
28}
29
30/// Configuration for extended thinking.
31///
32/// When enabled, the model will show its reasoning process before
33/// generating the final response.
34#[derive(Debug, Clone)]
35pub struct ThinkingConfig {
36    /// Which thinking mode to use.
37    pub mode: ThinkingMode,
38    /// Optional effort level (sent via `output_config`).
39    pub effort: Option<Effort>,
40}
41
42impl ThinkingConfig {
43    /// Default budget: 10,000 tokens.
44    ///
45    /// This provides enough capacity for meaningful reasoning on most tasks
46    /// while keeping costs reasonable. Increase for complex multi-step problems.
47    pub const DEFAULT_BUDGET_TOKENS: u32 = 10_000;
48
49    /// Minimum budget required by the Anthropic API.
50    pub const MIN_BUDGET_TOKENS: u32 = 1_024;
51
52    /// Create a config with an explicit token budget (Enabled mode).
53    #[must_use]
54    pub const fn new(budget_tokens: u32) -> Self {
55        Self {
56            mode: ThinkingMode::Enabled { budget_tokens },
57            effort: None,
58        }
59    }
60
61    /// Create an adaptive thinking config.
62    #[must_use]
63    pub const fn adaptive() -> Self {
64        Self {
65            mode: ThinkingMode::Adaptive,
66            effort: None,
67        }
68    }
69
70    /// Create an adaptive thinking config with an effort level.
71    #[must_use]
72    pub const fn adaptive_with_effort(effort: Effort) -> Self {
73        Self {
74            mode: ThinkingMode::Adaptive,
75            effort: Some(effort),
76        }
77    }
78
79    /// Set the effort level on an existing config.
80    #[must_use]
81    pub const fn with_effort(mut self, effort: Effort) -> Self {
82        self.effort = Some(effort);
83        self
84    }
85}
86
87impl Default for ThinkingConfig {
88    fn default() -> Self {
89        Self::new(Self::DEFAULT_BUDGET_TOKENS)
90    }
91}
92
93// ── Request / Response ────────────────────────────────────────────────
94
95/// Controls whether the model must use a tool.
96#[derive(Debug, Clone)]
97pub enum ToolChoice {
98    /// Let the model decide whether to use tools (default when `None`).
99    Auto,
100    /// Force the model to call a specific tool by name.
101    Tool(String),
102}
103
104/// Requests that the model constrain its final answer to a JSON Schema.
105///
106/// This is the wire-level description of a structured-output request. The
107/// runtime maps it to each provider's native capability:
108///
109/// - **`OpenAI` / Gemini**: native JSON-mode / structured-outputs
110///   (`response_format` / `responseSchema`).
111/// - **Anthropic**: tool-forcing fallback — the runtime injects a single
112///   "respond" tool whose `input_schema` is [`schema`](Self::schema) and
113///   forces the model to call it.
114///
115/// The runtime validates the model's final output against [`schema`](Self::schema)
116/// and, on mismatch, bounded-re-prompts before failing with a typed error.
117#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
118pub struct ResponseFormat {
119    /// Stable identifier for the schema. Surfaced to providers that require a
120    /// name (`OpenAI` `json_schema.name`, the Anthropic fallback tool name).
121    pub name: String,
122    /// The JSON Schema the final assistant output must satisfy.
123    ///
124    /// This is a raw JSON Schema document (an object), not a Rust type. Callers
125    /// that derive schemas from Rust types can plug in `schemars` upstream and
126    /// pass the resulting document here.
127    pub schema: serde_json::Value,
128    /// Whether the provider should enforce strict schema adherence when it
129    /// supports a strict mode (`OpenAI` `strict: true`). Has no effect on
130    /// providers without a strict mode.
131    pub strict: bool,
132}
133
134impl ResponseFormat {
135    /// Create a response format from a schema name and a JSON Schema document.
136    ///
137    /// Defaults to `strict = true` so providers with a strict mode enforce the
138    /// schema rather than treating it as a hint.
139    #[must_use]
140    pub fn new(name: impl Into<String>, schema: serde_json::Value) -> Self {
141        Self {
142            name: name.into(),
143            schema,
144            strict: true,
145        }
146    }
147
148    /// Set whether strict schema adherence is requested.
149    #[must_use]
150    pub const fn with_strict(mut self, strict: bool) -> Self {
151        self.strict = strict;
152        self
153    }
154}
155
156#[derive(Debug, Clone)]
157pub struct ChatRequest {
158    pub system: String,
159    pub messages: Vec<Message>,
160    pub tools: Option<Vec<Tool>>,
161    pub max_tokens: u32,
162    /// Whether `max_tokens` was explicitly configured by the caller.
163    pub max_tokens_explicit: bool,
164    /// Optional session identifier for provider-side prompt caching or routing.
165    pub session_id: Option<String>,
166    /// Optional provider-managed cached content reference.
167    ///
168    /// This currently maps to Gemini / Vertex AI `cachedContent` handles.
169    pub cached_content: Option<String>,
170    /// Optional extended thinking configuration.
171    pub thinking: Option<ThinkingConfig>,
172    /// Optional constraint on tool usage.
173    ///
174    /// When `None` the provider's default behaviour applies (typically `auto`).
175    pub tool_choice: Option<ToolChoice>,
176    /// Optional request for the final answer to be constrained to a JSON
177    /// Schema.
178    ///
179    /// When `Some`, the provider maps this to its native JSON-mode /
180    /// structured-output capability (or a tool-forcing fallback) and the
181    /// runtime validates the final output against the schema. When `None`
182    /// (default) the model responds freely.
183    pub response_format: Option<ResponseFormat>,
184}
185
186impl ChatRequest {
187    /// Default token budget used by [`ChatRequest::new`] when the caller does
188    /// not set one explicitly. Providers clamp this to their own ceiling.
189    pub const DEFAULT_MAX_TOKENS: u32 = 4096;
190
191    /// Build a request from a system prompt and a message list, leaving every
192    /// optional knob at its default.
193    ///
194    /// This is the ergonomic counterpart to the (still-public) struct literal:
195    /// the common case only needs `system` + `messages`, so callers no longer
196    /// have to spell out the eight `None`/default fields. Layer optional
197    /// settings on with the chainable `with_*` setters:
198    ///
199    /// ```
200    /// use agent_sdk_foundation::llm::{ChatRequest, Message, ToolChoice};
201    ///
202    /// let req = ChatRequest::new("You are helpful.", vec![Message::user("Hi")])
203    ///     .with_max_tokens(1024)
204    ///     .with_tool_choice(ToolChoice::Auto);
205    /// ```
206    #[must_use]
207    pub fn new(system: impl Into<String>, messages: Vec<Message>) -> Self {
208        Self {
209            system: system.into(),
210            messages,
211            tools: None,
212            max_tokens: Self::DEFAULT_MAX_TOKENS,
213            max_tokens_explicit: false,
214            session_id: None,
215            cached_content: None,
216            thinking: None,
217            tool_choice: None,
218            response_format: None,
219        }
220    }
221
222    /// Set the tool list the model may call.
223    #[must_use]
224    pub fn with_tools(mut self, tools: Vec<Tool>) -> Self {
225        self.tools = Some(tools);
226        self
227    }
228
229    /// Set the maximum output-token budget (marks it as explicitly configured).
230    #[must_use]
231    pub const fn with_max_tokens(mut self, max_tokens: u32) -> Self {
232        self.max_tokens = max_tokens;
233        self.max_tokens_explicit = true;
234        self
235    }
236
237    /// Set the session identifier (provider-side prompt caching / routing).
238    #[must_use]
239    pub fn with_session_id(mut self, session_id: impl Into<String>) -> Self {
240        self.session_id = Some(session_id.into());
241        self
242    }
243
244    /// Set the extended-thinking configuration.
245    #[must_use]
246    pub const fn with_thinking(mut self, thinking: ThinkingConfig) -> Self {
247        self.thinking = Some(thinking);
248        self
249    }
250
251    /// Constrain tool usage (defaults to the provider's `auto` when unset).
252    #[must_use]
253    pub fn with_tool_choice(mut self, tool_choice: ToolChoice) -> Self {
254        self.tool_choice = Some(tool_choice);
255        self
256    }
257
258    /// Request the final answer be constrained to the given JSON-Schema
259    /// [`ResponseFormat`] (structured output).
260    #[must_use]
261    pub fn with_response_format(mut self, response_format: ResponseFormat) -> Self {
262        self.response_format = Some(response_format);
263        self
264    }
265}
266
267#[derive(Debug, Clone, Serialize, Deserialize)]
268pub struct Message {
269    pub role: Role,
270    pub content: Content,
271}
272
273impl Message {
274    #[must_use]
275    pub fn user(text: impl Into<String>) -> Self {
276        Self {
277            role: Role::User,
278            content: Content::Text(text.into()),
279        }
280    }
281
282    #[must_use]
283    pub const fn user_with_content(blocks: Vec<ContentBlock>) -> Self {
284        Self {
285            role: Role::User,
286            content: Content::Blocks(blocks),
287        }
288    }
289
290    #[must_use]
291    pub fn assistant(text: impl Into<String>) -> Self {
292        Self {
293            role: Role::Assistant,
294            content: Content::Text(text.into()),
295        }
296    }
297
298    #[must_use]
299    pub const fn assistant_with_content(blocks: Vec<ContentBlock>) -> Self {
300        Self {
301            role: Role::Assistant,
302            content: Content::Blocks(blocks),
303        }
304    }
305
306    #[must_use]
307    pub fn assistant_with_tool_use(
308        text: Option<String>,
309        id: impl Into<String>,
310        name: impl Into<String>,
311        input: serde_json::Value,
312    ) -> Self {
313        let mut blocks = Vec::new();
314        if let Some(t) = text {
315            blocks.push(ContentBlock::Text { text: t });
316        }
317        blocks.push(ContentBlock::ToolUse {
318            id: id.into(),
319            name: name.into(),
320            input,
321            thought_signature: None,
322        });
323        Self {
324            role: Role::Assistant,
325            content: Content::Blocks(blocks),
326        }
327    }
328
329    #[must_use]
330    pub fn tool_result(
331        tool_use_id: impl Into<String>,
332        content: impl Into<String>,
333        is_error: bool,
334    ) -> Self {
335        Self {
336            role: Role::User,
337            content: Content::Blocks(vec![ContentBlock::ToolResult {
338                tool_use_id: tool_use_id.into(),
339                content: content.into(),
340                is_error: if is_error { Some(true) } else { None },
341            }]),
342        }
343    }
344}
345
346#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
347#[serde(rename_all = "lowercase")]
348pub enum Role {
349    User,
350    Assistant,
351}
352
353#[derive(Debug, Clone, Serialize, Deserialize)]
354#[serde(untagged)]
355pub enum Content {
356    Text(String),
357    Blocks(Vec<ContentBlock>),
358}
359
360impl Content {
361    #[must_use]
362    pub fn first_text(&self) -> Option<&str> {
363        match self {
364            Self::Text(s) => Some(s),
365            Self::Blocks(blocks) => blocks.iter().find_map(|b| match b {
366                ContentBlock::Text { text } => Some(text.as_str()),
367                _ => None,
368            }),
369        }
370    }
371}
372
373/// Source data for image and document content blocks.
374#[derive(Debug, Clone, Serialize, Deserialize)]
375pub struct ContentSource {
376    pub media_type: String,
377    pub data: String,
378}
379
380impl ContentSource {
381    #[must_use]
382    pub fn new(media_type: impl Into<String>, data: impl Into<String>) -> Self {
383        Self {
384            media_type: media_type.into(),
385            data: data.into(),
386        }
387    }
388}
389
390#[derive(Debug, Clone, Serialize, Deserialize)]
391#[serde(tag = "type")]
392#[non_exhaustive]
393pub enum ContentBlock {
394    #[serde(rename = "text")]
395    Text { text: String },
396
397    #[serde(rename = "thinking")]
398    Thinking {
399        thinking: String,
400        /// Opaque signature for round-tripping thinking blocks back to the API.
401        #[serde(skip_serializing_if = "Option::is_none")]
402        signature: Option<String>,
403    },
404
405    #[serde(rename = "redacted_thinking")]
406    RedactedThinking { data: String },
407
408    #[serde(rename = "tool_use")]
409    ToolUse {
410        id: String,
411        name: String,
412        input: serde_json::Value,
413        /// Gemini thought signature for preserving reasoning context.
414        /// Required for Gemini 3 models when sending function calls back.
415        #[serde(skip_serializing_if = "Option::is_none")]
416        thought_signature: Option<String>,
417    },
418
419    #[serde(rename = "tool_result")]
420    ToolResult {
421        tool_use_id: String,
422        content: String,
423        #[serde(skip_serializing_if = "Option::is_none")]
424        is_error: Option<bool>,
425    },
426
427    #[serde(rename = "image")]
428    Image { source: ContentSource },
429
430    #[serde(rename = "document")]
431    Document { source: ContentSource },
432}
433
434#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
435pub struct Tool {
436    pub name: String,
437    pub description: String,
438    pub input_schema: serde_json::Value,
439    /// Human-readable display name shown in UI and audit records.
440    pub display_name: String,
441    /// Permission tier for this tool.
442    pub tier: super::types::ToolTier,
443}
444
445#[derive(Debug, Clone)]
446pub struct ChatResponse {
447    pub id: String,
448    pub content: Vec<ContentBlock>,
449    pub model: String,
450    pub stop_reason: Option<StopReason>,
451    pub usage: Usage,
452}
453
454impl ChatResponse {
455    #[must_use]
456    pub fn first_text(&self) -> Option<&str> {
457        self.content.iter().find_map(|b| match b {
458            ContentBlock::Text { text } => Some(text.as_str()),
459            _ => None,
460        })
461    }
462
463    #[must_use]
464    pub fn first_thinking(&self) -> Option<&str> {
465        self.content.iter().find_map(|b| match b {
466            ContentBlock::Thinking { thinking, .. } => Some(thinking.as_str()),
467            _ => None,
468        })
469    }
470
471    pub fn tool_uses(&self) -> impl Iterator<Item = (&str, &str, &serde_json::Value)> {
472        self.content.iter().filter_map(|b| match b {
473            ContentBlock::ToolUse {
474                id, name, input, ..
475            } => Some((id.as_str(), name.as_str(), input)),
476            _ => None,
477        })
478    }
479
480    #[must_use]
481    pub fn has_tool_use(&self) -> bool {
482        self.content
483            .iter()
484            .any(|b| matches!(b, ContentBlock::ToolUse { .. }))
485    }
486}
487
488#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
489#[serde(rename_all = "snake_case")]
490#[non_exhaustive]
491pub enum StopReason {
492    EndTurn,
493    ToolUse,
494    MaxTokens,
495    StopSequence,
496    Refusal,
497    ModelContextWindowExceeded,
498    /// A stop reason this version of the SDK does not recognize.
499    ///
500    /// Providers may introduce new stop reasons at any time. Rather than
501    /// failing deserialization of an otherwise-valid response (or a
502    /// persisted/replayed audit row), unknown values map here via
503    /// `#[serde(other)]`. Consumers should treat it like
504    /// [`StopReason::EndTurn`] (turn finished, nothing actionable) unless
505    /// they have a more specific fallback.
506    #[serde(other)]
507    Unknown,
508}
509
510impl StopReason {
511    /// Stable discriminant string used for durable rows, metrics, and
512    /// dashboards.  Matches the serde representation.
513    #[must_use]
514    pub const fn as_str(&self) -> &'static str {
515        match self {
516            Self::EndTurn => "end_turn",
517            Self::ToolUse => "tool_use",
518            Self::MaxTokens => "max_tokens",
519            Self::StopSequence => "stop_sequence",
520            Self::Refusal => "refusal",
521            Self::ModelContextWindowExceeded => "model_context_window_exceeded",
522            Self::Unknown => "unknown",
523        }
524    }
525}
526
527#[derive(Debug, Clone, Deserialize)]
528pub struct Usage {
529    /// Total input tokens reported by the provider.
530    pub input_tokens: u32,
531    pub output_tokens: u32,
532    /// Portion of `input_tokens` billed at a cached-input rate, when reported.
533    #[serde(default)]
534    pub cached_input_tokens: u32,
535    /// Portion of `input_tokens` spent creating provider-side prompt cache entries.
536    #[serde(default)]
537    pub cache_creation_input_tokens: u32,
538}
539
540#[derive(Debug, Clone)]
541#[non_exhaustive]
542pub enum ChatOutcome {
543    Success(ChatResponse),
544    RateLimited,
545    InvalidRequest(String),
546    ServerError(String),
547}
548
549#[cfg(test)]
550mod tests {
551    use super::*;
552
553    #[test]
554    fn chat_request_new_defaults_then_setters() {
555        let req = ChatRequest::new("sys", vec![Message::user("hi")]);
556        assert_eq!(req.system, "sys");
557        assert_eq!(req.messages.len(), 1);
558        assert_eq!(req.max_tokens, ChatRequest::DEFAULT_MAX_TOKENS);
559        assert!(!req.max_tokens_explicit);
560        assert!(req.tools.is_none());
561        assert!(req.tool_choice.is_none());
562        assert!(req.response_format.is_none());
563
564        let req = req
565            .with_max_tokens(1234)
566            .with_tool_choice(ToolChoice::Auto)
567            .with_response_format(ResponseFormat::new(
568                "r",
569                serde_json::json!({"type": "object"}),
570            ))
571            .with_session_id("s-1");
572        assert_eq!(req.max_tokens, 1234);
573        assert!(req.max_tokens_explicit);
574        assert!(matches!(req.tool_choice, Some(ToolChoice::Auto)));
575        assert!(req.response_format.is_some());
576        assert_eq!(req.session_id.as_deref(), Some("s-1"));
577    }
578
579    #[test]
580    fn stop_reason_known_values_round_trip() -> Result<(), serde_json::Error> {
581        for (json, expected) in [
582            ("\"end_turn\"", StopReason::EndTurn),
583            ("\"tool_use\"", StopReason::ToolUse),
584            ("\"max_tokens\"", StopReason::MaxTokens),
585            ("\"stop_sequence\"", StopReason::StopSequence),
586            ("\"refusal\"", StopReason::Refusal),
587            (
588                "\"model_context_window_exceeded\"",
589                StopReason::ModelContextWindowExceeded,
590            ),
591        ] {
592            let parsed: StopReason = serde_json::from_str(json)?;
593            assert_eq!(parsed, expected);
594            assert_eq!(serde_json::to_string(&parsed)?, json);
595        }
596        Ok(())
597    }
598
599    #[test]
600    fn stop_reason_unknown_value_deserializes_to_unknown() -> Result<(), serde_json::Error> {
601        // An unrecognized provider stop reason must not fail deserialization;
602        // `#[serde(other)]` routes it to `StopReason::Unknown`.
603        let parsed: StopReason = serde_json::from_str("\"some_future_reason\"")?;
604        assert_eq!(parsed, StopReason::Unknown);
605        assert_eq!(parsed.as_str(), "unknown");
606        Ok(())
607    }
608
609    #[test]
610    fn stop_reason_unknown_serializes_to_unknown() -> Result<(), serde_json::Error> {
611        assert_eq!(serde_json::to_string(&StopReason::Unknown)?, "\"unknown\"");
612        Ok(())
613    }
614}