Skip to main content

entelix_core/ir/
request.rs

1//! `ModelRequest` — the provider-neutral request shape (invariant 4).
2//!
3//! Every model call must pass through this type before reaching a `Codec`.
4
5use std::sync::Arc;
6
7use serde::{Deserialize, Serialize};
8
9use crate::ir::message::Message;
10use crate::ir::provider_echo::ProviderEchoSnapshot;
11use crate::ir::provider_extensions::ProviderExtensions;
12use crate::ir::reasoning::ReasoningEffort;
13use crate::ir::structured::ResponseFormat;
14use crate::ir::system::SystemPrompt;
15use crate::ir::tool_spec::{ToolChoice, ToolSpec};
16
17/// One model invocation, before encoding to vendor wire format.
18///
19/// Built by users (or higher-level recipes) and handed to `Codec::encode`.
20/// Codecs produce vendor-shaped JSON; the IR is the canonical surface and
21/// never carries vendor-specific fields directly.
22#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
23pub struct ModelRequest {
24    /// Vendor model identifier (e.g. `claude-opus-4-7`, `gpt-4.1`).
25    pub model: String,
26    /// Conversation up to this turn. Must contain at least one user message
27    /// for most providers; codecs reject empty lists at encode time.
28    pub messages: Vec<Message>,
29    /// Ordered system-prompt blocks. Empty = "no system prompt"
30    /// (codecs treat as if the field were absent). Per-block
31    /// [`crate::ir::CacheControl`] is honored natively by codecs
32    /// that support it (Anthropic, Bedrock Converse for Claude);
33    /// other codecs concatenate block text and emit
34    /// `LossyEncode` warnings when any block is cached.
35    #[serde(default)]
36    pub system: SystemPrompt,
37    /// Hard cap on output tokens. `None` = vendor default.
38    pub max_tokens: Option<u32>,
39    /// Sampling temperature `[0.0, 2.0]`. Codecs clamp to vendor range.
40    pub temperature: Option<f32>,
41    /// Nucleus sampling parameter.
42    pub top_p: Option<f32>,
43    /// Top-k sampling parameter — restrict candidate-token sampling
44    /// to the `k` most-likely tokens. `None` defers to the vendor
45    /// default.
46    ///
47    /// Codec mapping (CLAUDE.md §"Provider IR promotion"; native on
48    /// Anthropic, Gemini, Bedrock Converse on Claude — three
49    /// vendors, criterion satisfied):
50    /// - **Anthropic**, **Bedrock Converse on Claude** — pass-through
51    ///   to the Messages API `top_k` field.
52    /// - **Gemini** — pass-through to `generationConfig.topK`.
53    /// - **OpenAI Chat** / **OpenAI Responses** — `LossyEncode` (no
54    ///   native parameter).
55    #[serde(default, skip_serializing_if = "Option::is_none")]
56    pub top_k: Option<u32>,
57    /// Sequences that, when produced, halt generation.
58    #[serde(default)]
59    pub stop_sequences: Vec<String>,
60    /// Tools advertised to the model. Empty = no tool calls permitted.
61    /// Held as `Arc<[ToolSpec]>` so per-dispatch cloning of the
62    /// request shape is an atomic refcount bump rather than a deep
63    /// walk of every tool's JSON schema. Codecs read through the
64    /// `Deref<Target = [ToolSpec]>` coercion — every `&request.tools`
65    /// site continues to see `&[ToolSpec]` unchanged.
66    #[serde(default)]
67    pub tools: Arc<[ToolSpec]>,
68    /// Constraint on tool selection. Defaults to [`ToolChoice::Auto`].
69    #[serde(default)]
70    pub tool_choice: ToolChoice,
71    /// Allow the model to emit more than one tool call in a single
72    /// turn. `Some(true)` opts in to parallel tool use, `Some(false)`
73    /// forces serial dispatch, `None` defers to the vendor default.
74    ///
75    /// Codec mapping:
76    /// - **Anthropic**, **Bedrock Converse on Claude** — translate to
77    ///   `tool_choice.disable_parallel_tool_use` (inverted polarity);
78    ///   the codec only emits when a `tool_choice` block is present.
79    /// - **OpenAI Chat** / **OpenAI Responses** — pass-through to the
80    ///   `parallel_tool_calls` field.
81    /// - **Gemini** — `LossyEncode` (no native parallel-tool toggle).
82    ///
83    /// Promoted to IR per the rule "≥ 2 first-party vendors carry
84    /// the concept natively → IR field" (CLAUDE.md §"Provider IR
85    /// promotion").
86    #[serde(default, skip_serializing_if = "Option::is_none")]
87    pub parallel_tool_calls: Option<bool>,
88    /// Optional structured-output constraint. Codecs route to
89    /// vendor-canonical channels (Anthropic `output_config.format`,
90    /// OpenAI `response_format` / `text.format`, Gemini
91    /// `responseJsonSchema`).
92    #[serde(default, skip_serializing_if = "Option::is_none")]
93    pub response_format: Option<ResponseFormat>,
94    /// Pseudonymous end-user identifier — abuse-monitoring,
95    /// per-user rate-limit attribution, and audit trail. Vendor
96    /// pseudonym, never PII (no email / IP / real name).
97    ///
98    /// Codec mapping (native on Anthropic + OpenAI Chat + OpenAI
99    /// Responses — two distinct vendors, criterion satisfied):
100    /// - **Anthropic** — `metadata.user_id`.
101    /// - **OpenAI Chat** / **OpenAI Responses** — top-level `user`.
102    /// - **Gemini**, **Bedrock Converse** — `LossyEncode` (no
103    ///   native end-user attribution channel).
104    #[serde(default, skip_serializing_if = "Option::is_none")]
105    pub end_user_id: Option<String>,
106    /// Deterministic-generation seed. Same seed + same request →
107    /// same output, best-effort (vendors document this as not
108    /// strictly guaranteed across model versions).
109    ///
110    /// Codec mapping (native on OpenAI Chat + OpenAI Responses +
111    /// Gemini — two distinct vendors, criterion satisfied):
112    /// - **OpenAI Chat** / **OpenAI Responses** — top-level `seed`.
113    /// - **Gemini** — `generationConfig.seed`.
114    /// - **Anthropic**, **Bedrock Converse** — `LossyEncode` (no
115    ///   native deterministic-sampling knob).
116    #[serde(default, skip_serializing_if = "Option::is_none")]
117    pub seed: Option<i64>,
118    /// Cross-vendor reasoning-effort knob. When `Some`, codecs
119    /// translate onto their native wire shape per the mapping in
120    /// [`ReasoningEffort`]'s module doc — `Off`/`Minimal`/`Low`/
121    /// `Medium`/`High`/`Auto` snap to vendor buckets, lossy
122    /// approximations emit `ModelWarning::LossyEncode`, and
123    /// `VendorSpecific(s)` passes through the literal vendor wire
124    /// value. `None` ⇒ vendor default (codec emits no thinking /
125    /// reasoning field).
126    #[serde(default, skip_serializing_if = "Option::is_none")]
127    pub reasoning_effort: Option<ReasoningEffort>,
128    /// Per-vendor typed knobs that don't generalise to a
129    /// cross-provider IR field — e.g. Anthropic
130    /// `disable_parallel_tool_use`, Gemini `safetySettings`,
131    /// Bedrock guardrails. Codecs read their own ext when encoding
132    /// and emit `ModelWarning::ProviderExtensionIgnored` when
133    /// another vendor's ext is present (the operator intended a
134    /// knob this wire format cannot honour).
135    #[serde(default, skip_serializing_if = "ProviderExtensions::is_empty")]
136    pub provider_extensions: ProviderExtensions,
137    /// Vendor-keyed opaque round-trip tokens carrying state from a
138    /// prior turn — OpenAI Responses `previous_response_id` is the
139    /// canonical example. Codecs read entries matching their own
140    /// `Codec::name` and translate to the vendor's chain-pointer
141    /// wire field; non-matching entries are ignored. Empty when the
142    /// request does not chain from a prior turn.
143    #[serde(default, skip_serializing_if = "Vec::is_empty")]
144    pub continued_from: Vec<ProviderEchoSnapshot>,
145}
146
147impl ModelRequest {
148    /// Advance this request to the next conversational turn — append
149    /// the prior assistant turn, chain the vendor's opaque echoes,
150    /// and add the next message (whatever its role).
151    ///
152    /// The transformation is:
153    ///
154    /// 1. The model's prior reply (`prior_response.content`) is wrapped
155    ///    in [`Message::new(Role::Assistant, ...)`](crate::ir::Message::new)
156    ///    and pushed to `self.messages`.
157    /// 2. `self.continued_from` is replaced with
158    ///    `prior_response.provider_echoes` so vendor-specific
159    ///    continuation pointers (OpenAI Responses `previous_response_id`,
160    ///    Anthropic extended-thinking signatures, Gemini thought
161    ///    signatures) ride the next wire encoding. Codecs whose wire
162    ///    format represents the prior turn through a server-side
163    ///    chain pointer (OpenAI Responses) deduplicate the appended
164    ///    assistant turn against that pointer at encode time so the
165    ///    transcript is not double-carried.
166    /// 3. `next_message` is pushed onto `self.messages`. The role is
167    ///    free — `Role::User` is the common case, `Role::Tool` is the
168    ///    canonical shape when chaining a tool-round-trip result into
169    ///    the next turn, and operator-driven flows that splice a
170    ///    `Role::System` correction message are also valid.
171    ///
172    /// Model / system prompt / tools / response format / sampling
173    /// knobs survive unchanged. Callers needing per-turn adjustments
174    /// chain further builder methods on the returned value.
175    ///
176    /// ## Tool round-trip
177    ///
178    /// `Role::Tool` results for pending `ToolUse` blocks in
179    /// `prior_response.content` are passed in via `next_message` —
180    /// either directly (single tool round-trip) or by composing
181    /// additional pushes on the returned request before the next
182    /// dispatch (multi-tool fan-out). The helper does not
183    /// auto-synthesise tool results; agent loops dispatch tools and
184    /// produce the matching `Message::Tool` themselves.
185    ///
186    /// ## Why a self-consuming method
187    ///
188    /// `ModelRequest` is cheap to clone but the chain shape is
189    /// fundamentally "previous turn → next turn"; consuming `self`
190    /// makes accidental mutation of the old request impossible and
191    /// reads naturally at the call site:
192    ///
193    /// ```ignore
194    /// let next = prior_request.continue_turn(&prior_response, Message::user("more"));
195    /// ```
196    #[must_use]
197    pub fn continue_turn(
198        mut self,
199        prior_response: &crate::ir::response::ModelResponse,
200        next_message: crate::ir::message::Message,
201    ) -> Self {
202        self.messages.push(crate::ir::message::Message::new(
203            crate::ir::message::Role::Assistant,
204            prior_response.content.clone(),
205        ));
206        self.continued_from
207            .clone_from(&prior_response.provider_echoes);
208        self.messages.push(next_message);
209        self
210    }
211}