inferd_proto/v2/request.rs
1//! v2 request envelope, message + content-block types, and validation.
2//!
3//! Per ADR 0015 §"v2 Request" + §"v2 ContentBlock variants". The
4//! shape mirrors Anthropic's `/v1/messages` envelope (typed content
5//! blocks, top-level attachments[] table, top-level tools[] table)
6//! with HTTP stripped and inferd-specific fields (`id`) added.
7
8use crate::error::ProtoError;
9use crate::v2::attachment::Attachment;
10use crate::v2::tool::{Tool, ToolCallId, ToolUseInput};
11use serde::{Deserialize, Serialize};
12use std::collections::{HashMap, HashSet};
13
14/// Conversation role on a v2 message.
15///
16/// Same set as v1's `Role` (system / user / assistant) but defined
17/// independently so v1 and v2 can evolve their role enums without
18/// affecting each other. Tool roles are *not* a separate
19/// conversation-role variant in v2: a tool invocation is an
20/// `assistant`-role message containing a `tool_use` content block,
21/// and the result is a `user`-role message containing a
22/// `tool_result` content block. This matches Anthropic's shape.
23#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
24#[serde(rename_all = "lowercase")]
25pub enum RoleV2 {
26 /// System prompt setting overall instructions.
27 System,
28 /// End-user input (or tool results, dressed as user-role).
29 User,
30 /// Prior model output, including tool-use requests.
31 Assistant,
32}
33
34/// One element of a `MessageV2::content` array.
35///
36/// Forward-compatibility: unknown content-block types deserialise as
37/// the `Unknown` variant so v2.0 daemons / clients ignore content
38/// shapes added in later v2.x revisions gracefully. The daemon emits
39/// `invalid_request` only if the model needs the unknown content to
40/// proceed (per ADR 0015 §"v2 ContentBlock variants").
41#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
42#[serde(tag = "type", rename_all = "snake_case")]
43pub enum ContentBlock {
44 /// Plain text segment within a message.
45 Text {
46 /// Inline text. May be empty (rare but legal).
47 text: String,
48 },
49 /// Reference to an `image`-kind attachment in the request's top-level
50 /// `attachments[]` table.
51 Image {
52 /// Must match exactly one `Attachment::id` of kind `Image`.
53 attachment_id: String,
54 },
55 /// Reference to an `audio`-kind attachment.
56 Audio {
57 /// Must match exactly one `Attachment::id` of kind `Audio`.
58 attachment_id: String,
59 },
60 /// Reference to a `video`-kind attachment. Backends that don't
61 /// support video reject the request with `attachment_unsupported`.
62 Video {
63 /// Must match exactly one `Attachment::id` of kind `Video`.
64 attachment_id: String,
65 },
66 /// Assistant-emitted invocation. Consumers don't typically construct
67 /// these on the request side — the daemon emits them as response
68 /// frames; consumers then send a follow-up request with a matching
69 /// `ToolResult` block. Allowed in request `messages[]` only when
70 /// replaying prior assistant turns for context.
71 ToolUse {
72 /// Pairs this invocation with the corresponding `ToolResult`.
73 tool_call_id: ToolCallId,
74 /// Tool name, must match a `Tool::name` from the request's
75 /// `tools[]` table (or a tool the model knows from training).
76 name: String,
77 /// JSON arguments emitted by the model.
78 input: ToolUseInput,
79 },
80 /// Consumer-constructed result of executing a tool. Routed back into
81 /// the model's context by the daemon's chat-templating layer.
82 ToolResult {
83 /// Must match the `tool_call_id` of the assistant-emitted
84 /// `ToolUse` block this is responding to.
85 tool_call_id: ToolCallId,
86 /// Result content; typically a single `Text` block.
87 content: Vec<ContentBlock>,
88 },
89 /// Forward-compatible escape hatch — any `type` value the local
90 /// build doesn't recognise lands here so older clients/daemons
91 /// don't reject newer payloads at parse time.
92 #[serde(other)]
93 Unknown,
94}
95
96/// One message in the v2 conversation history.
97#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
98pub struct MessageV2 {
99 /// Speaker.
100 pub role: RoleV2,
101 /// Typed content blocks; must be non-empty.
102 pub content: Vec<ContentBlock>,
103}
104
105/// The v2 request envelope sent by clients.
106///
107/// `Default` is intentionally available for `..Default::default()`
108/// shorthand; callers must populate `id` and `messages` before
109/// sending.
110#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
111pub struct RequestV2 {
112 /// Caller-assigned correlation id; echoed on every response frame.
113 #[serde(default, skip_serializing_if = "String::is_empty")]
114 pub id: String,
115
116 /// Conversation history in chronological order. Must be non-empty.
117 pub messages: Vec<MessageV2>,
118
119 /// Binary attachments referenced by `attachment_id` from content
120 /// blocks. Empty when the request is text-only.
121 #[serde(default, skip_serializing_if = "Vec::is_empty")]
122 pub attachments: Vec<Attachment>,
123
124 /// Tool definitions the model may call. Empty when no tools are
125 /// in scope for this request.
126 #[serde(default, skip_serializing_if = "Vec::is_empty")]
127 pub tools: Vec<Tool>,
128
129 /// Sampling temperature; daemon applies engine default if absent.
130 #[serde(default, skip_serializing_if = "Option::is_none")]
131 pub temperature: Option<f64>,
132
133 /// Nucleus sampling probability; daemon applies engine default if absent.
134 #[serde(default, skip_serializing_if = "Option::is_none")]
135 pub top_p: Option<f64>,
136
137 /// Top-k sampling cutoff; daemon applies engine default if absent.
138 #[serde(default, skip_serializing_if = "Option::is_none")]
139 pub top_k: Option<u32>,
140
141 /// Maximum tokens to generate; daemon applies engine default if absent.
142 #[serde(default, skip_serializing_if = "Option::is_none")]
143 pub max_tokens: Option<u32>,
144
145 /// Stream tokens vs return one final `done`; daemon defaults to streaming.
146 #[serde(default, skip_serializing_if = "Option::is_none")]
147 pub stream: Option<bool>,
148}
149
150/// `RequestV2` with semantic validation completed.
151///
152/// Differences from `RequestV2`: attachment ids referenced from
153/// content blocks are guaranteed to resolve; tool names referenced
154/// from `ToolUse` blocks are guaranteed to be unique within the
155/// `tools[]` table; sampling fields stay `Option` here (engine
156/// defaults are applied at the backend layer, not the proto layer,
157/// because they vary per backend in v2 — unlike v1 where Gemma 4
158/// defaults could be hard-coded).
159#[derive(Debug, Clone, PartialEq)]
160pub struct ResolvedV2 {
161 /// Caller-assigned correlation id.
162 pub id: String,
163 /// Validated conversation history.
164 pub messages: Vec<MessageV2>,
165 /// Validated attachment table.
166 pub attachments: Vec<Attachment>,
167 /// Validated tool definitions.
168 pub tools: Vec<Tool>,
169 /// Sampling temperature, if set.
170 pub temperature: Option<f64>,
171 /// Nucleus sampling probability, if set.
172 pub top_p: Option<f64>,
173 /// Top-k cutoff, if set.
174 pub top_k: Option<u32>,
175 /// Max tokens, if set.
176 pub max_tokens: Option<u32>,
177 /// Streaming flag, if set.
178 pub stream: Option<bool>,
179}
180
181impl RequestV2 {
182 /// Validate the request envelope. Resolves attachment-id references,
183 /// checks uniqueness of attachment ids and tool names, and
184 /// rejects empty `messages` / empty `content` arrays.
185 ///
186 /// Does NOT apply sampling defaults — those are backend-specific
187 /// in v2 (cloud backends and llamacpp pick different sensible
188 /// defaults). Backends fill them in at `generate_v2` time.
189 pub fn resolve(self) -> Result<ResolvedV2, ProtoError> {
190 if self.messages.is_empty() {
191 return Err(ProtoError::InvalidRequest(
192 "messages must not be empty".into(),
193 ));
194 }
195
196 let mut attachments_by_id: HashMap<&str, &Attachment> = HashMap::new();
197 for att in &self.attachments {
198 if matches!(att, Attachment::Unknown) {
199 return Err(ProtoError::InvalidRequest(
200 "attachments contain an unknown kind".into(),
201 ));
202 }
203 let id = att.id();
204 if id.is_empty() {
205 return Err(ProtoError::InvalidRequest(
206 "attachments must have non-empty id".into(),
207 ));
208 }
209 if attachments_by_id.insert(id, att).is_some() {
210 return Err(ProtoError::InvalidRequest(format!(
211 "duplicate attachment id: {id}"
212 )));
213 }
214 }
215
216 let mut tool_names: HashSet<&str> = HashSet::new();
217 for tool in &self.tools {
218 if !tool_names.insert(tool.name.as_str()) {
219 return Err(ProtoError::InvalidRequest(format!(
220 "duplicate tool name: {}",
221 tool.name
222 )));
223 }
224 }
225
226 for (mi, msg) in self.messages.iter().enumerate() {
227 if msg.content.is_empty() {
228 return Err(ProtoError::InvalidRequest(format!(
229 "messages[{mi}].content must not be empty"
230 )));
231 }
232 validate_content_blocks(&msg.content, mi, &attachments_by_id, &tool_names)?;
233 }
234
235 Ok(ResolvedV2 {
236 id: self.id,
237 messages: self.messages,
238 attachments: self.attachments,
239 tools: self.tools,
240 temperature: self.temperature,
241 top_p: self.top_p,
242 top_k: self.top_k,
243 max_tokens: self.max_tokens,
244 stream: self.stream,
245 })
246 }
247}
248
249fn validate_content_blocks(
250 blocks: &[ContentBlock],
251 msg_index: usize,
252 attachments_by_id: &HashMap<&str, &Attachment>,
253 tool_names: &HashSet<&str>,
254) -> Result<(), ProtoError> {
255 for (bi, block) in blocks.iter().enumerate() {
256 match block {
257 ContentBlock::Text { .. } => {}
258 ContentBlock::Image { attachment_id } => check_kind(
259 msg_index,
260 bi,
261 attachment_id,
262 attachments_by_id,
263 Attachment::is_image,
264 "image",
265 )?,
266 ContentBlock::Audio { attachment_id } => check_kind(
267 msg_index,
268 bi,
269 attachment_id,
270 attachments_by_id,
271 Attachment::is_audio,
272 "audio",
273 )?,
274 ContentBlock::Video { attachment_id } => check_kind(
275 msg_index,
276 bi,
277 attachment_id,
278 attachments_by_id,
279 Attachment::is_video,
280 "video",
281 )?,
282 ContentBlock::ToolUse { name, .. } => {
283 // tool_names may be empty if the request replays an
284 // assistant message that references a tool the model
285 // knew from training but the consumer didn't redeclare.
286 // We do not reject here.
287 let _ = (name, tool_names);
288 }
289 ContentBlock::ToolResult { content, .. } => {
290 // Recurse — tool_result wraps further content blocks.
291 validate_content_blocks(content, msg_index, attachments_by_id, tool_names)?;
292 }
293 ContentBlock::Unknown => {
294 return Err(ProtoError::InvalidRequest(format!(
295 "messages[{msg_index}].content[{bi}] uses unknown content-block type"
296 )));
297 }
298 }
299 }
300 Ok(())
301}
302
303fn check_kind(
304 msg_index: usize,
305 block_index: usize,
306 attachment_id: &str,
307 attachments_by_id: &HashMap<&str, &Attachment>,
308 pred: fn(&Attachment) -> bool,
309 expected: &str,
310) -> Result<(), ProtoError> {
311 let att = attachments_by_id.get(attachment_id).ok_or_else(|| {
312 ProtoError::InvalidRequest(format!(
313 "messages[{msg_index}].content[{block_index}] references unknown attachment_id {attachment_id:?}"
314 ))
315 })?;
316 if !pred(att) {
317 return Err(ProtoError::InvalidRequest(format!(
318 "messages[{msg_index}].content[{block_index}] block expects {expected} attachment but {attachment_id:?} is a different kind"
319 )));
320 }
321 Ok(())
322}