entelix_core/ir/request.rs
1//! `ModelRequest` — the provider-neutral request shape (invariant 4).
2//!
3//! Every model call must pass through this type before reaching a `Codec`.
4
5use std::sync::Arc;
6
7use serde::{Deserialize, Serialize};
8
9use crate::ir::message::Message;
10use crate::ir::provider_echo::ProviderEchoSnapshot;
11use crate::ir::provider_extensions::ProviderExtensions;
12use crate::ir::reasoning::ReasoningEffort;
13use crate::ir::structured::ResponseFormat;
14use crate::ir::system::SystemPrompt;
15use crate::ir::tool_spec::{ToolChoice, ToolSpec};
16
17/// One model invocation, before encoding to vendor wire format.
18///
19/// Built by users (or higher-level recipes) and handed to `Codec::encode`.
20/// Codecs produce vendor-shaped JSON; the IR is the canonical surface and
21/// never carries vendor-specific fields directly.
22#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
23pub struct ModelRequest {
24 /// Vendor model identifier (e.g. `claude-opus-4-7`, `gpt-4.1`).
25 pub model: String,
26 /// Conversation up to this turn. Must contain at least one user message
27 /// for most providers; codecs reject empty lists at encode time.
28 pub messages: Vec<Message>,
29 /// Ordered system-prompt blocks. Empty = "no system prompt"
30 /// (codecs treat as if the field were absent). Per-block
31 /// [`crate::ir::CacheControl`] is honored natively by codecs
32 /// that support it (Anthropic, Bedrock Converse for Claude);
33 /// other codecs concatenate block text and emit
34 /// `LossyEncode` warnings when any block is cached.
35 #[serde(default)]
36 pub system: SystemPrompt,
37 /// Hard cap on output tokens. `None` = vendor default.
38 pub max_tokens: Option<u32>,
39 /// Sampling temperature `[0.0, 2.0]`. Codecs clamp to vendor range.
40 pub temperature: Option<f32>,
41 /// Nucleus sampling parameter.
42 pub top_p: Option<f32>,
43 /// Top-k sampling parameter — restrict candidate-token sampling
44 /// to the `k` most-likely tokens. `None` defers to the vendor
45 /// default.
46 ///
47 /// Codec mapping (CLAUDE.md §"Provider IR promotion"; native on
48 /// Anthropic, Gemini, Bedrock Converse on Claude — three
49 /// vendors, criterion satisfied):
50 /// - **Anthropic**, **Bedrock Converse on Claude** — pass-through
51 /// to the Messages API `top_k` field.
52 /// - **Gemini** — pass-through to `generationConfig.topK`.
53 /// - **OpenAI Chat** / **OpenAI Responses** — `LossyEncode` (no
54 /// native parameter).
55 #[serde(default, skip_serializing_if = "Option::is_none")]
56 pub top_k: Option<u32>,
57 /// Sequences that, when produced, halt generation.
58 #[serde(default)]
59 pub stop_sequences: Vec<String>,
60 /// Tools advertised to the model. Empty = no tool calls permitted.
61 /// Held as `Arc<[ToolSpec]>` so per-dispatch cloning of the
62 /// request shape is an atomic refcount bump rather than a deep
63 /// walk of every tool's JSON schema. Codecs read through the
64 /// `Deref<Target = [ToolSpec]>` coercion — every `&request.tools`
65 /// site continues to see `&[ToolSpec]` unchanged.
66 #[serde(default)]
67 pub tools: Arc<[ToolSpec]>,
68 /// Constraint on tool selection. Defaults to [`ToolChoice::Auto`].
69 #[serde(default)]
70 pub tool_choice: ToolChoice,
71 /// Allow the model to emit more than one tool call in a single
72 /// turn. `Some(true)` opts in to parallel tool use, `Some(false)`
73 /// forces serial dispatch, `None` defers to the vendor default.
74 ///
75 /// Codec mapping:
76 /// - **Anthropic**, **Bedrock Converse on Claude** — translate to
77 /// `tool_choice.disable_parallel_tool_use` (inverted polarity);
78 /// the codec only emits when a `tool_choice` block is present.
79 /// - **OpenAI Chat** / **OpenAI Responses** — pass-through to the
80 /// `parallel_tool_calls` field.
81 /// - **Gemini** — `LossyEncode` (no native parallel-tool toggle).
82 ///
83 /// Promoted to IR per the rule "≥ 2 first-party vendors carry
84 /// the concept natively → IR field" (CLAUDE.md §"Provider IR
85 /// promotion").
86 #[serde(default, skip_serializing_if = "Option::is_none")]
87 pub parallel_tool_calls: Option<bool>,
88 /// Optional structured-output constraint. Codecs route to
89 /// vendor-canonical channels (Anthropic `output_config.format`,
90 /// OpenAI `response_format` / `text.format`, Gemini
91 /// `responseJsonSchema`).
92 #[serde(default, skip_serializing_if = "Option::is_none")]
93 pub response_format: Option<ResponseFormat>,
94 /// Pseudonymous end-user identifier — abuse-monitoring,
95 /// per-user rate-limit attribution, and audit trail. Vendor
96 /// pseudonym, never PII (no email / IP / real name).
97 ///
98 /// Codec mapping (native on Anthropic + OpenAI Chat + OpenAI
99 /// Responses — two distinct vendors, criterion satisfied):
100 /// - **Anthropic** — `metadata.user_id`.
101 /// - **OpenAI Chat** / **OpenAI Responses** — top-level `user`.
102 /// - **Gemini**, **Bedrock Converse** — `LossyEncode` (no
103 /// native end-user attribution channel).
104 #[serde(default, skip_serializing_if = "Option::is_none")]
105 pub end_user_id: Option<String>,
106 /// Deterministic-generation seed. Same seed + same request →
107 /// same output, best-effort (vendors document this as not
108 /// strictly guaranteed across model versions).
109 ///
110 /// Codec mapping (native on OpenAI Chat + OpenAI Responses +
111 /// Gemini — two distinct vendors, criterion satisfied):
112 /// - **OpenAI Chat** / **OpenAI Responses** — top-level `seed`.
113 /// - **Gemini** — `generationConfig.seed`.
114 /// - **Anthropic**, **Bedrock Converse** — `LossyEncode` (no
115 /// native deterministic-sampling knob).
116 #[serde(default, skip_serializing_if = "Option::is_none")]
117 pub seed: Option<i64>,
118 /// Cross-vendor reasoning-effort knob. When `Some`, codecs
119 /// translate onto their native wire shape per the mapping in
120 /// [`ReasoningEffort`]'s module doc — `Off`/`Minimal`/`Low`/
121 /// `Medium`/`High`/`Auto` snap to vendor buckets, lossy
122 /// approximations emit `ModelWarning::LossyEncode`, and
123 /// `VendorSpecific(s)` passes through the literal vendor wire
124 /// value. `None` ⇒ vendor default (codec emits no thinking /
125 /// reasoning field).
126 #[serde(default, skip_serializing_if = "Option::is_none")]
127 pub reasoning_effort: Option<ReasoningEffort>,
128 /// Per-vendor typed knobs that don't generalise to a
129 /// cross-provider IR field — e.g. Anthropic
130 /// `disable_parallel_tool_use`, Gemini `safetySettings`,
131 /// Bedrock guardrails. Codecs read their own ext when encoding
132 /// and emit `ModelWarning::ProviderExtensionIgnored` when
133 /// another vendor's ext is present (the operator intended a
134 /// knob this wire format cannot honour).
135 #[serde(default, skip_serializing_if = "ProviderExtensions::is_empty")]
136 pub provider_extensions: ProviderExtensions,
137 /// Vendor-keyed opaque round-trip tokens carrying state from a
138 /// prior turn — OpenAI Responses `previous_response_id` is the
139 /// canonical example. Codecs read entries matching their own
140 /// `Codec::name` and translate to the vendor's chain-pointer
141 /// wire field; non-matching entries are ignored. Empty when the
142 /// request does not chain from a prior turn.
143 #[serde(default, skip_serializing_if = "Vec::is_empty")]
144 pub continued_from: Vec<ProviderEchoSnapshot>,
145}
146
147impl ModelRequest {
148 /// Advance this request to the next conversational turn — append
149 /// the prior assistant turn, chain the vendor's opaque echoes,
150 /// and add the next message (whatever its role).
151 ///
152 /// The transformation is:
153 ///
154 /// 1. The model's prior reply (`prior_response.content`) is wrapped
155 /// in [`Message::new(Role::Assistant, ...)`](crate::ir::Message::new)
156 /// and pushed to `self.messages`.
157 /// 2. `self.continued_from` is replaced with
158 /// `prior_response.provider_echoes` so vendor-specific
159 /// continuation pointers (OpenAI Responses `previous_response_id`,
160 /// Anthropic extended-thinking signatures, Gemini thought
161 /// signatures) ride the next wire encoding. Codecs whose wire
162 /// format represents the prior turn through a server-side
163 /// chain pointer (OpenAI Responses) deduplicate the appended
164 /// assistant turn against that pointer at encode time so the
165 /// transcript is not double-carried.
166 /// 3. `next_message` is pushed onto `self.messages`. The role is
167 /// free — `Role::User` is the common case, `Role::Tool` is the
168 /// canonical shape when chaining a tool-round-trip result into
169 /// the next turn, and operator-driven flows that splice a
170 /// `Role::System` correction message are also valid.
171 ///
172 /// Model / system prompt / tools / response format / sampling
173 /// knobs survive unchanged. Callers needing per-turn adjustments
174 /// chain further builder methods on the returned value.
175 ///
176 /// ## Tool round-trip
177 ///
178 /// `Role::Tool` results for pending `ToolUse` blocks in
179 /// `prior_response.content` are passed in via `next_message` —
180 /// either directly (single tool round-trip) or by composing
181 /// additional pushes on the returned request before the next
182 /// dispatch (multi-tool fan-out). The helper does not
183 /// auto-synthesise tool results; agent loops dispatch tools and
184 /// produce the matching `Message::Tool` themselves.
185 ///
186 /// ## Why a self-consuming method
187 ///
188 /// `ModelRequest` is cheap to clone but the chain shape is
189 /// fundamentally "previous turn → next turn"; consuming `self`
190 /// makes accidental mutation of the old request impossible and
191 /// reads naturally at the call site:
192 ///
193 /// ```ignore
194 /// let next = prior_request.continue_turn(&prior_response, Message::user("more"));
195 /// ```
196 #[must_use]
197 pub fn continue_turn(
198 mut self,
199 prior_response: &crate::ir::response::ModelResponse,
200 next_message: crate::ir::message::Message,
201 ) -> Self {
202 self.messages.push(crate::ir::message::Message::new(
203 crate::ir::message::Role::Assistant,
204 prior_response.content.clone(),
205 ));
206 self.continued_from
207 .clone_from(&prior_response.provider_echoes);
208 self.messages.push(next_message);
209 self
210 }
211}