1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
//! `ModelRequest` — the provider-neutral request shape (invariant 4).
//!
//! Every model call must pass through this type before reaching a `Codec`.
use std::sync::Arc;
use serde::{Deserialize, Serialize};
use crate::ir::message::Message;
use crate::ir::provider_echo::ProviderEchoSnapshot;
use crate::ir::provider_extensions::ProviderExtensions;
use crate::ir::reasoning::ReasoningEffort;
use crate::ir::structured::ResponseFormat;
use crate::ir::system::SystemPrompt;
use crate::ir::tool_spec::{ToolChoice, ToolSpec};
/// One model invocation, before encoding to vendor wire format.
///
/// Built by users (or higher-level recipes) and handed to `Codec::encode`.
/// Codecs produce vendor-shaped JSON; the IR is the canonical surface and
/// never carries vendor-specific fields directly.
#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
pub struct ModelRequest {
/// Vendor model identifier (e.g. `claude-opus-4-7`, `gpt-4.1`).
pub model: String,
/// Conversation up to this turn. Must contain at least one user message
/// for most providers; codecs reject empty lists at encode time.
pub messages: Vec<Message>,
/// Ordered system-prompt blocks. Empty = "no system prompt"
/// (codecs treat as if the field were absent). Per-block
/// [`crate::ir::CacheControl`] is honored natively by codecs
/// that support it (Anthropic, Bedrock Converse for Claude);
/// other codecs concatenate block text and emit
/// `LossyEncode` warnings when any block is cached.
#[serde(default)]
pub system: SystemPrompt,
/// Hard cap on output tokens. `None` = vendor default.
pub max_tokens: Option<u32>,
/// Sampling temperature `[0.0, 2.0]`. Codecs clamp to vendor range.
pub temperature: Option<f32>,
/// Nucleus sampling parameter.
pub top_p: Option<f32>,
/// Top-k sampling parameter — restrict candidate-token sampling
/// to the `k` most-likely tokens. `None` defers to the vendor
/// default.
///
/// Codec mapping (CLAUDE.md §"Provider IR promotion"; native on
/// Anthropic, Gemini, Bedrock Converse on Claude — three
/// vendors, criterion satisfied):
/// - **Anthropic**, **Bedrock Converse on Claude** — pass-through
/// to the Messages API `top_k` field.
/// - **Gemini** — pass-through to `generationConfig.topK`.
/// - **OpenAI Chat** / **OpenAI Responses** — `LossyEncode` (no
/// native parameter).
#[serde(default, skip_serializing_if = "Option::is_none")]
pub top_k: Option<u32>,
/// Sequences that, when produced, halt generation.
#[serde(default)]
pub stop_sequences: Vec<String>,
/// Tools advertised to the model. Empty = no tool calls permitted.
/// Held as `Arc<[ToolSpec]>` so per-dispatch cloning of the
/// request shape is an atomic refcount bump rather than a deep
/// walk of every tool's JSON schema. Codecs read through the
/// `Deref<Target = [ToolSpec]>` coercion — every `&request.tools`
/// site continues to see `&[ToolSpec]` unchanged.
#[serde(default)]
pub tools: Arc<[ToolSpec]>,
/// Constraint on tool selection. Defaults to [`ToolChoice::Auto`].
#[serde(default)]
pub tool_choice: ToolChoice,
/// Allow the model to emit more than one tool call in a single
/// turn. `Some(true)` opts in to parallel tool use, `Some(false)`
/// forces serial dispatch, `None` defers to the vendor default.
///
/// Codec mapping:
/// - **Anthropic**, **Bedrock Converse on Claude** — translate to
/// `tool_choice.disable_parallel_tool_use` (inverted polarity);
/// the codec only emits when a `tool_choice` block is present.
/// - **OpenAI Chat** / **OpenAI Responses** — pass-through to the
/// `parallel_tool_calls` field.
/// - **Gemini** — `LossyEncode` (no native parallel-tool toggle).
///
/// Promoted to IR per the rule "≥ 2 first-party vendors carry
/// the concept natively → IR field" (CLAUDE.md §"Provider IR
/// promotion").
#[serde(default, skip_serializing_if = "Option::is_none")]
pub parallel_tool_calls: Option<bool>,
/// Optional structured-output constraint. Codecs route to
/// vendor-canonical channels (Anthropic `output_config.format`,
/// OpenAI `response_format` / `text.format`, Gemini
/// `responseJsonSchema`).
#[serde(default, skip_serializing_if = "Option::is_none")]
pub response_format: Option<ResponseFormat>,
/// Pseudonymous end-user identifier — abuse-monitoring,
/// per-user rate-limit attribution, and audit trail. Vendor
/// pseudonym, never PII (no email / IP / real name).
///
/// Codec mapping (native on Anthropic + OpenAI Chat + OpenAI
/// Responses — two distinct vendors, criterion satisfied):
/// - **Anthropic** — `metadata.user_id`.
/// - **OpenAI Chat** / **OpenAI Responses** — top-level `user`.
/// - **Gemini**, **Bedrock Converse** — `LossyEncode` (no
/// native end-user attribution channel).
#[serde(default, skip_serializing_if = "Option::is_none")]
pub end_user_id: Option<String>,
/// Deterministic-generation seed. Same seed + same request →
/// same output, best-effort (vendors document this as not
/// strictly guaranteed across model versions).
///
/// Codec mapping (native on OpenAI Chat + OpenAI Responses +
/// Gemini — two distinct vendors, criterion satisfied):
/// - **OpenAI Chat** / **OpenAI Responses** — top-level `seed`.
/// - **Gemini** — `generationConfig.seed`.
/// - **Anthropic**, **Bedrock Converse** — `LossyEncode` (no
/// native deterministic-sampling knob).
#[serde(default, skip_serializing_if = "Option::is_none")]
pub seed: Option<i64>,
/// Cross-vendor reasoning-effort knob. When `Some`, codecs
/// translate onto their native wire shape per the mapping in
/// [`ReasoningEffort`]'s module doc — `Off`/`Minimal`/`Low`/
/// `Medium`/`High`/`Auto` snap to vendor buckets, lossy
/// approximations emit `ModelWarning::LossyEncode`, and
/// `VendorSpecific(s)` passes through the literal vendor wire
/// value. `None` ⇒ vendor default (codec emits no thinking /
/// reasoning field).
#[serde(default, skip_serializing_if = "Option::is_none")]
pub reasoning_effort: Option<ReasoningEffort>,
/// Per-vendor typed knobs that don't generalise to a
/// cross-provider IR field — e.g. Anthropic
/// `disable_parallel_tool_use`, Gemini `safetySettings`,
/// Bedrock guardrails. Codecs read their own ext when encoding
/// and emit `ModelWarning::ProviderExtensionIgnored` when
/// another vendor's ext is present (the operator intended a
/// knob this wire format cannot honour).
#[serde(default, skip_serializing_if = "ProviderExtensions::is_empty")]
pub provider_extensions: ProviderExtensions,
/// Vendor-keyed opaque round-trip tokens carrying state from a
/// prior turn — OpenAI Responses `previous_response_id` is the
/// canonical example. Codecs read entries matching their own
/// `Codec::name` and translate to the vendor's chain-pointer
/// wire field; non-matching entries are ignored. Empty when the
/// request does not chain from a prior turn.
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub continued_from: Vec<ProviderEchoSnapshot>,
}
impl ModelRequest {
/// Advance this request to the next conversational turn — append
/// the prior assistant turn, chain the vendor's opaque echoes,
/// and add the next message (whatever its role).
///
/// The transformation is:
///
/// 1. The model's prior reply (`prior_response.content`) is wrapped
/// in [`Message::new(Role::Assistant, ...)`](crate::ir::Message::new)
/// and pushed to `self.messages`.
/// 2. `self.continued_from` is replaced with
/// `prior_response.provider_echoes` so vendor-specific
/// continuation pointers (OpenAI Responses `previous_response_id`,
/// Anthropic extended-thinking signatures, Gemini thought
/// signatures) ride the next wire encoding. Codecs whose wire
/// format represents the prior turn through a server-side
/// chain pointer (OpenAI Responses) deduplicate the appended
/// assistant turn against that pointer at encode time so the
/// transcript is not double-carried.
/// 3. `next_message` is pushed onto `self.messages`. The role is
/// free — `Role::User` is the common case, `Role::Tool` is the
/// canonical shape when chaining a tool-round-trip result into
/// the next turn, and operator-driven flows that splice a
/// `Role::System` correction message are also valid.
///
/// Model / system prompt / tools / response format / sampling
/// knobs survive unchanged. Callers needing per-turn adjustments
/// chain further builder methods on the returned value.
///
/// ## Tool round-trip
///
/// `Role::Tool` results for pending `ToolUse` blocks in
/// `prior_response.content` are passed in via `next_message` —
/// either directly (single tool round-trip) or by composing
/// additional pushes on the returned request before the next
/// dispatch (multi-tool fan-out). The helper does not
/// auto-synthesise tool results; agent loops dispatch tools and
/// produce the matching `Message::Tool` themselves.
///
/// ## Why a self-consuming method
///
/// `ModelRequest` is cheap to clone but the chain shape is
/// fundamentally "previous turn → next turn"; consuming `self`
/// makes accidental mutation of the old request impossible and
/// reads naturally at the call site:
///
/// ```ignore
/// let next = prior_request.continue_turn(&prior_response, Message::user("more"));
/// ```
#[must_use]
pub fn continue_turn(
mut self,
prior_response: &crate::ir::response::ModelResponse,
next_message: crate::ir::message::Message,
) -> Self {
self.messages.push(crate::ir::message::Message::new(
crate::ir::message::Role::Assistant,
prior_response.content.clone(),
));
self.continued_from
.clone_from(&prior_response.provider_echoes);
self.messages.push(next_message);
self
}
}