Skip to main content

inferd_proto/v2/
request.rs

1//! v2 request envelope, message + content-block types, and validation.
2//!
3//! Per ADR 0015 §"v2 Request" + §"v2 ContentBlock variants". The
4//! shape mirrors Anthropic's `/v1/messages` envelope (typed content
5//! blocks, top-level attachments[] table, top-level tools[] table)
6//! with HTTP stripped and inferd-specific fields (`id`) added.
7
8use crate::error::ProtoError;
9use crate::v2::attachment::Attachment;
10use crate::v2::tool::{Tool, ToolCallId, ToolUseInput};
11use serde::{Deserialize, Serialize};
12use std::collections::{HashMap, HashSet};
13
14/// Conversation role on a v2 message.
15///
16/// Same set as v1's `Role` (system / user / assistant) but defined
17/// independently so v1 and v2 can evolve their role enums without
18/// affecting each other. Tool roles are *not* a separate
19/// conversation-role variant in v2: a tool invocation is an
20/// `assistant`-role message containing a `tool_use` content block,
21/// and the result is a `user`-role message containing a
22/// `tool_result` content block. This matches Anthropic's shape.
23#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
24#[serde(rename_all = "lowercase")]
25pub enum RoleV2 {
26    /// System prompt setting overall instructions.
27    System,
28    /// End-user input (or tool results, dressed as user-role).
29    User,
30    /// Prior model output, including tool-use requests.
31    Assistant,
32}
33
34/// One element of a `MessageV2::content` array.
35///
36/// Forward-compatibility: unknown content-block types deserialise as
37/// the `Unknown` variant so v2.0 daemons / clients ignore content
38/// shapes added in later v2.x revisions gracefully. The daemon emits
39/// `invalid_request` only if the model needs the unknown content to
40/// proceed (per ADR 0015 §"v2 ContentBlock variants").
41#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
42#[serde(tag = "type", rename_all = "snake_case")]
43pub enum ContentBlock {
44    /// Plain text segment within a message.
45    Text {
46        /// Inline text. May be empty (rare but legal).
47        text: String,
48    },
49    /// Reference to an `image`-kind attachment in the request's top-level
50    /// `attachments[]` table.
51    Image {
52        /// Must match exactly one `Attachment::id` of kind `Image`.
53        attachment_id: String,
54    },
55    /// Reference to an `audio`-kind attachment.
56    Audio {
57        /// Must match exactly one `Attachment::id` of kind `Audio`.
58        attachment_id: String,
59    },
60    /// Reference to a `video`-kind attachment. Backends that don't
61    /// support video reject the request with `attachment_unsupported`.
62    Video {
63        /// Must match exactly one `Attachment::id` of kind `Video`.
64        attachment_id: String,
65    },
66    /// Assistant-emitted invocation. Consumers don't typically construct
67    /// these on the request side — the daemon emits them as response
68    /// frames; consumers then send a follow-up request with a matching
69    /// `ToolResult` block. Allowed in request `messages[]` only when
70    /// replaying prior assistant turns for context.
71    ToolUse {
72        /// Pairs this invocation with the corresponding `ToolResult`.
73        tool_call_id: ToolCallId,
74        /// Tool name, must match a `Tool::name` from the request's
75        /// `tools[]` table (or a tool the model knows from training).
76        name: String,
77        /// JSON arguments emitted by the model.
78        input: ToolUseInput,
79    },
80    /// Consumer-constructed result of executing a tool. Routed back into
81    /// the model's context by the daemon's chat-templating layer.
82    ToolResult {
83        /// Must match the `tool_call_id` of the assistant-emitted
84        /// `ToolUse` block this is responding to.
85        tool_call_id: ToolCallId,
86        /// Result content; typically a single `Text` block.
87        content: Vec<ContentBlock>,
88    },
89    /// Forward-compatible escape hatch — any `type` value the local
90    /// build doesn't recognise lands here so older clients/daemons
91    /// don't reject newer payloads at parse time.
92    #[serde(other)]
93    Unknown,
94}
95
96/// One message in the v2 conversation history.
97#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
98pub struct MessageV2 {
99    /// Speaker.
100    pub role: RoleV2,
101    /// Typed content blocks; must be non-empty.
102    pub content: Vec<ContentBlock>,
103}
104
105/// The v2 request envelope sent by clients.
106///
107/// `Default` is intentionally available for `..Default::default()`
108/// shorthand; callers must populate `id` and `messages` before
109/// sending.
110#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
111pub struct RequestV2 {
112    /// Caller-assigned correlation id; echoed on every response frame.
113    #[serde(default, skip_serializing_if = "String::is_empty")]
114    pub id: String,
115
116    /// Conversation history in chronological order. Must be non-empty.
117    pub messages: Vec<MessageV2>,
118
119    /// Binary attachments referenced by `attachment_id` from content
120    /// blocks. Empty when the request is text-only.
121    #[serde(default, skip_serializing_if = "Vec::is_empty")]
122    pub attachments: Vec<Attachment>,
123
124    /// Tool definitions the model may call. Empty when no tools are
125    /// in scope for this request.
126    #[serde(default, skip_serializing_if = "Vec::is_empty")]
127    pub tools: Vec<Tool>,
128
129    /// Sampling temperature; daemon applies engine default if absent.
130    #[serde(default, skip_serializing_if = "Option::is_none")]
131    pub temperature: Option<f64>,
132
133    /// Nucleus sampling probability; daemon applies engine default if absent.
134    #[serde(default, skip_serializing_if = "Option::is_none")]
135    pub top_p: Option<f64>,
136
137    /// Top-k sampling cutoff; daemon applies engine default if absent.
138    #[serde(default, skip_serializing_if = "Option::is_none")]
139    pub top_k: Option<u32>,
140
141    /// Maximum tokens to generate; daemon applies engine default if absent.
142    #[serde(default, skip_serializing_if = "Option::is_none")]
143    pub max_tokens: Option<u32>,
144
145    /// Stream tokens vs return one final `done`; daemon defaults to streaming.
146    #[serde(default, skip_serializing_if = "Option::is_none")]
147    pub stream: Option<bool>,
148}
149
150/// `RequestV2` with semantic validation completed.
151///
152/// Differences from `RequestV2`: attachment ids referenced from
153/// content blocks are guaranteed to resolve; tool names referenced
154/// from `ToolUse` blocks are guaranteed to be unique within the
155/// `tools[]` table; sampling fields stay `Option` here (engine
156/// defaults are applied at the backend layer, not the proto layer,
157/// because they vary per backend in v2 — unlike v1 where Gemma 4
158/// defaults could be hard-coded).
159#[derive(Debug, Clone, PartialEq)]
160pub struct ResolvedV2 {
161    /// Caller-assigned correlation id.
162    pub id: String,
163    /// Validated conversation history.
164    pub messages: Vec<MessageV2>,
165    /// Validated attachment table.
166    pub attachments: Vec<Attachment>,
167    /// Validated tool definitions.
168    pub tools: Vec<Tool>,
169    /// Sampling temperature, if set.
170    pub temperature: Option<f64>,
171    /// Nucleus sampling probability, if set.
172    pub top_p: Option<f64>,
173    /// Top-k cutoff, if set.
174    pub top_k: Option<u32>,
175    /// Max tokens, if set.
176    pub max_tokens: Option<u32>,
177    /// Streaming flag, if set.
178    pub stream: Option<bool>,
179}
180
181impl RequestV2 {
182    /// Validate the request envelope. Resolves attachment-id references,
183    /// checks uniqueness of attachment ids and tool names, and
184    /// rejects empty `messages` / empty `content` arrays.
185    ///
186    /// Does NOT apply sampling defaults — those are backend-specific
187    /// in v2 (cloud backends and llamacpp pick different sensible
188    /// defaults). Backends fill them in at `generate_v2` time.
189    pub fn resolve(self) -> Result<ResolvedV2, ProtoError> {
190        if self.messages.is_empty() {
191            return Err(ProtoError::InvalidRequest(
192                "messages must not be empty".into(),
193            ));
194        }
195
196        let mut attachments_by_id: HashMap<&str, &Attachment> = HashMap::new();
197        for att in &self.attachments {
198            if matches!(att, Attachment::Unknown) {
199                return Err(ProtoError::InvalidRequest(
200                    "attachments contain an unknown kind".into(),
201                ));
202            }
203            let id = att.id();
204            if id.is_empty() {
205                return Err(ProtoError::InvalidRequest(
206                    "attachments must have non-empty id".into(),
207                ));
208            }
209            if attachments_by_id.insert(id, att).is_some() {
210                return Err(ProtoError::InvalidRequest(format!(
211                    "duplicate attachment id: {id}"
212                )));
213            }
214        }
215
216        let mut tool_names: HashSet<&str> = HashSet::new();
217        for tool in &self.tools {
218            if !tool_names.insert(tool.name.as_str()) {
219                return Err(ProtoError::InvalidRequest(format!(
220                    "duplicate tool name: {}",
221                    tool.name
222                )));
223            }
224        }
225
226        for (mi, msg) in self.messages.iter().enumerate() {
227            if msg.content.is_empty() {
228                return Err(ProtoError::InvalidRequest(format!(
229                    "messages[{mi}].content must not be empty"
230                )));
231            }
232            validate_content_blocks(&msg.content, mi, &attachments_by_id, &tool_names)?;
233        }
234
235        Ok(ResolvedV2 {
236            id: self.id,
237            messages: self.messages,
238            attachments: self.attachments,
239            tools: self.tools,
240            temperature: self.temperature,
241            top_p: self.top_p,
242            top_k: self.top_k,
243            max_tokens: self.max_tokens,
244            stream: self.stream,
245        })
246    }
247}
248
249fn validate_content_blocks(
250    blocks: &[ContentBlock],
251    msg_index: usize,
252    attachments_by_id: &HashMap<&str, &Attachment>,
253    tool_names: &HashSet<&str>,
254) -> Result<(), ProtoError> {
255    for (bi, block) in blocks.iter().enumerate() {
256        match block {
257            ContentBlock::Text { .. } => {}
258            ContentBlock::Image { attachment_id } => check_kind(
259                msg_index,
260                bi,
261                attachment_id,
262                attachments_by_id,
263                Attachment::is_image,
264                "image",
265            )?,
266            ContentBlock::Audio { attachment_id } => check_kind(
267                msg_index,
268                bi,
269                attachment_id,
270                attachments_by_id,
271                Attachment::is_audio,
272                "audio",
273            )?,
274            ContentBlock::Video { attachment_id } => check_kind(
275                msg_index,
276                bi,
277                attachment_id,
278                attachments_by_id,
279                Attachment::is_video,
280                "video",
281            )?,
282            ContentBlock::ToolUse { name, .. } => {
283                // tool_names may be empty if the request replays an
284                // assistant message that references a tool the model
285                // knew from training but the consumer didn't redeclare.
286                // We do not reject here.
287                let _ = (name, tool_names);
288            }
289            ContentBlock::ToolResult { content, .. } => {
290                // Recurse — tool_result wraps further content blocks.
291                validate_content_blocks(content, msg_index, attachments_by_id, tool_names)?;
292            }
293            ContentBlock::Unknown => {
294                return Err(ProtoError::InvalidRequest(format!(
295                    "messages[{msg_index}].content[{bi}] uses unknown content-block type"
296                )));
297            }
298        }
299    }
300    Ok(())
301}
302
303fn check_kind(
304    msg_index: usize,
305    block_index: usize,
306    attachment_id: &str,
307    attachments_by_id: &HashMap<&str, &Attachment>,
308    pred: fn(&Attachment) -> bool,
309    expected: &str,
310) -> Result<(), ProtoError> {
311    let att = attachments_by_id.get(attachment_id).ok_or_else(|| {
312        ProtoError::InvalidRequest(format!(
313            "messages[{msg_index}].content[{block_index}] references unknown attachment_id {attachment_id:?}"
314        ))
315    })?;
316    if !pred(att) {
317        return Err(ProtoError::InvalidRequest(format!(
318            "messages[{msg_index}].content[{block_index}] block expects {expected} attachment but {attachment_id:?} is a different kind"
319        )));
320    }
321    Ok(())
322}