Skip to main content

caliban_provider/
request.rs

1//! Completion request, builder, and validation.
2
3use serde::{Deserialize, Serialize};
4
5use crate::effort::Effort;
6use crate::error::{Error, Result};
7use crate::message::{ContentBlock, Message, Role};
8use crate::thinking::ThinkingSetting;
9use crate::tool::{Tool, ToolChoice};
10
11/// A provider-neutral request to generate a completion.
12#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
13pub struct CompletionRequest {
14    /// The model identifier.
15    pub model: String,
16    /// Ordered list of conversation messages.
17    pub messages: Vec<Message>,
18    /// Tools available to the model.
19    #[serde(default, skip_serializing_if = "Vec::is_empty")]
20    pub tools: Vec<Tool>,
21    /// How the model should select tools.
22    #[serde(default)]
23    pub tool_choice: ToolChoice,
24    /// Maximum number of tokens to generate.
25    pub max_tokens: u32,
26    /// Sampling temperature.
27    #[serde(default, skip_serializing_if = "Option::is_none")]
28    pub temperature: Option<f32>,
29    /// Nucleus sampling probability.
30    #[serde(default, skip_serializing_if = "Option::is_none")]
31    pub top_p: Option<f32>,
32    /// Top-k sampling cutoff.
33    #[serde(default, skip_serializing_if = "Option::is_none")]
34    pub top_k: Option<u32>,
35    /// Sequences that stop generation when produced.
36    #[serde(default, skip_serializing_if = "Vec::is_empty")]
37    pub stop_sequences: Vec<String>,
38    /// Extended-thinking control, decoupled from `effort` (ticket #100).
39    /// Defaults to [`ThinkingSetting::Auto`], which derives thinking from
40    /// `effort` (legacy behavior). `Off`/`On` force it independently.
41    #[serde(default)]
42    pub thinking: ThinkingSetting,
43    /// Reasoning-effort level. `None` (or `Some(Effort::Auto)`) means the
44    /// provider's default behavior; adapters skip writing the field.
45    #[serde(default, skip_serializing_if = "Option::is_none")]
46    pub effort: Option<Effort>,
47    /// Optional per-request metadata.
48    #[serde(default)]
49    pub metadata: RequestMetadata,
50}
51
52/// Category of a request, used by the model router (when present) to pick a
53/// provider/model pair. `None` (the default) falls back to whichever route is
54/// declared as the default. Round-trips through serde; non-router providers
55/// simply ignore the field.
56#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
57#[serde(rename_all = "snake_case")]
58pub enum RequestPurpose {
59    /// Main conversational agent loop.
60    MainLoop,
61    /// Summarization / compaction.
62    Summarization,
63    /// Small fast-classifier calls (intent detection, routing).
64    FastClassifier,
65    /// Sub-agent loop (a child agent spawned by `AgentTool`).
66    SubAgent,
67    /// Embeddings.
68    Embedding,
69    /// Anything else; matches a generic "Other" route if declared.
70    Other,
71}
72
73/// Optional per-request metadata passed to providers.
74#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
75pub struct RequestMetadata {
76    /// An opaque user identifier forwarded to the provider.
77    #[serde(default, skip_serializing_if = "Option::is_none")]
78    pub user_id: Option<String>,
79    /// Logical category of this request. Consumed by the model router; other
80    /// providers ignore it.
81    #[serde(default, skip_serializing_if = "Option::is_none")]
82    pub purpose: Option<RequestPurpose>,
83}
84
85impl CompletionRequest {
86    /// Create a builder for a new request targeting `model`.
87    pub fn builder(model: impl Into<String>) -> CompletionRequestBuilder {
88        CompletionRequestBuilder {
89            model: model.into(),
90            messages: Vec::new(),
91            tools: Vec::new(),
92            tool_choice: ToolChoice::default(),
93            max_tokens: 1024,
94            temperature: None,
95            top_p: None,
96            top_k: None,
97            stop_sequences: Vec::new(),
98            thinking: ThinkingSetting::Auto,
99            effort: None,
100            metadata: RequestMetadata::default(),
101        }
102    }
103
104    /// Validate the request structure.
105    ///
106    /// # Errors
107    ///
108    /// Returns `Err(Error::InvalidRequest)` if the model is empty, `max_tokens` is zero,
109    /// a System message appears after a User/Assistant message, a System message contains
110    /// a non-text block, or there are no User or Assistant messages.
111    pub fn validate(&self) -> Result<()> {
112        if self.model.is_empty() {
113            return Err(Error::InvalidRequest("model is empty".into()));
114        }
115        if self.max_tokens == 0 {
116            return Err(Error::InvalidRequest("max_tokens must be > 0".into()));
117        }
118        validate_messages(&self.messages)
119    }
120}
121
122fn validate_messages(messages: &[Message]) -> Result<()> {
123    let mut seen_non_system = false;
124    let mut has_user_or_assistant = false;
125    for (i, msg) in messages.iter().enumerate() {
126        match msg.role {
127            Role::System => {
128                if seen_non_system {
129                    return Err(Error::InvalidRequest(format!(
130                        "Role::System message at index {i} appears after a User/Assistant \
131                         message; System must lead"
132                    )));
133                }
134                for block in &msg.content {
135                    if !matches!(block, ContentBlock::Text(_)) {
136                        return Err(Error::InvalidRequest(format!(
137                            "Role::System message at index {i} contains a non-text block"
138                        )));
139                    }
140                }
141            }
142            Role::User | Role::Assistant => {
143                seen_non_system = true;
144                has_user_or_assistant = true;
145            }
146        }
147    }
148    if !has_user_or_assistant {
149        return Err(Error::InvalidRequest(
150            "request has no User or Assistant messages".into(),
151        ));
152    }
153    Ok(())
154}
155
156/// Builder for [`CompletionRequest`].
157#[must_use = "builder has no effect until .build() is called"]
158pub struct CompletionRequestBuilder {
159    model: String,
160    messages: Vec<Message>,
161    tools: Vec<Tool>,
162    tool_choice: ToolChoice,
163    max_tokens: u32,
164    temperature: Option<f32>,
165    top_p: Option<f32>,
166    top_k: Option<u32>,
167    stop_sequences: Vec<String>,
168    thinking: ThinkingSetting,
169    effort: Option<Effort>,
170    metadata: RequestMetadata,
171}
172
173impl CompletionRequestBuilder {
174    /// Append a system message after any existing leading System messages.
175    ///
176    /// Multiple calls to `.system()` preserve call order: the second call
177    /// inserts after the first, not before it.
178    pub fn system(mut self, text: impl Into<String>) -> Self {
179        // Insert after any existing leading System messages, before the first non-System.
180        let insertion_index = self
181            .messages
182            .iter()
183            .position(|m| m.role != Role::System)
184            .unwrap_or(self.messages.len());
185        self.messages
186            .insert(insertion_index, Message::system_text(text));
187        self
188    }
189
190    /// Append a user text message.
191    pub fn user_text(mut self, text: impl Into<String>) -> Self {
192        self.messages.push(Message::user_text(text));
193        self
194    }
195
196    /// Append an assistant text message.
197    pub fn assistant_text(mut self, text: impl Into<String>) -> Self {
198        self.messages.push(Message::assistant_text(text));
199        self
200    }
201
202    /// Append an arbitrary message.
203    pub fn message(mut self, m: Message) -> Self {
204        self.messages.push(m);
205        self
206    }
207
208    /// Add a tool declaration.
209    pub fn tool(mut self, t: Tool) -> Self {
210        self.tools.push(t);
211        self
212    }
213
214    /// Set the tool-choice policy.
215    pub fn tool_choice(mut self, choice: ToolChoice) -> Self {
216        self.tool_choice = choice;
217        self
218    }
219
220    /// Set the maximum number of output tokens.
221    pub fn max_tokens(mut self, n: u32) -> Self {
222        self.max_tokens = n;
223        self
224    }
225
226    /// Set the sampling temperature.
227    pub fn temperature(mut self, t: f32) -> Self {
228        self.temperature = Some(t);
229        self
230    }
231
232    /// Set the nucleus-sampling probability.
233    pub fn top_p(mut self, p: f32) -> Self {
234        self.top_p = Some(p);
235        self
236    }
237
238    /// Set the top-k sampling cutoff.
239    pub fn top_k(mut self, k: u32) -> Self {
240        self.top_k = Some(k);
241        self
242    }
243
244    /// Add a stop sequence.
245    pub fn stop_sequence(mut self, s: impl Into<String>) -> Self {
246        self.stop_sequences.push(s.into());
247        self
248    }
249
250    /// Set the extended-thinking control (`Auto`/`Off`/`On`).
251    pub fn thinking(mut self, setting: ThinkingSetting) -> Self {
252        self.thinking = setting;
253        self
254    }
255
256    /// Set the reasoning-effort level. Passing `Effort::Auto` keeps the
257    /// field non-`None`; adapters still treat `Auto` as "omit from the
258    /// wire request".
259    pub fn effort(mut self, e: Effort) -> Self {
260        self.effort = Some(e);
261        self
262    }
263
264    /// Attach an opaque user identifier.
265    pub fn user_id(mut self, id: impl Into<String>) -> Self {
266        self.metadata.user_id = Some(id.into());
267        self
268    }
269
270    /// Validate and build the [`CompletionRequest`].
271    ///
272    /// # Errors
273    ///
274    /// Returns `Err(Error::InvalidRequest)` if any validation rule is violated.
275    #[must_use = "discarding the Result silently ignores validation errors"]
276    pub fn build(self) -> Result<CompletionRequest> {
277        let req = CompletionRequest {
278            model: self.model,
279            messages: self.messages,
280            tools: self.tools,
281            tool_choice: self.tool_choice,
282            max_tokens: self.max_tokens,
283            temperature: self.temperature,
284            top_p: self.top_p,
285            top_k: self.top_k,
286            stop_sequences: self.stop_sequences,
287            thinking: self.thinking,
288            effort: self.effort,
289            metadata: self.metadata,
290        };
291        req.validate()?;
292        Ok(req)
293    }
294}