Skip to main content

inferd_proto/v2/
response.rs

1//! v2 response frame schema.
2//!
3//! Per ADR 0015 §"v2 Response frames". Three frame variants:
4//!
5//! - `frame` — streaming partial output. Carries one `ResponseBlock`,
6//!   which is either a `text` delta (incremental token text), a
7//!   `thinking` delta (reasoning trace, separated from user-visible
8//!   output per `docs/thinking.mode.in.gemma.md`), or a complete
9//!   `tool_use` block (the daemon parses Gemma 4's
10//!   `<|tool_call>...<tool_call|>` sequence in full before emitting).
11//!
12//! - `done` — terminal success frame. Carries usage and the v2
13//!   stop-reason taxonomy (`end_turn`, `max_tokens`, `tool_use`,
14//!   `stop_sequence`).
15//!
16//! - `error` — terminal failure frame. Carries the v2 error-code
17//!   taxonomy (v1 codes plus `attachment_unsupported` and
18//!   `tool_call_malformed`).
19
20use crate::v2::tool::{ToolCallId, ToolUseInput};
21use serde::{Deserialize, Serialize};
22
23/// Why a v2 generation ended. Carried on `done` frames.
24#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
25#[serde(rename_all = "snake_case")]
26pub enum StopReasonV2 {
27    /// Model emitted the end-of-turn token cleanly. Equivalent to v1's `end`.
28    EndTurn,
29    /// `max_tokens` reached.
30    MaxTokens,
31    /// Model emitted a complete `tool_use` block; consumer must
32    /// execute the tool and send a follow-up request with a
33    /// `tool_result` content block.
34    ToolUse,
35    /// Generation hit a configured stop sequence (reserved for v2.x;
36    /// v2.0 daemons may not emit this).
37    StopSequence,
38    /// Caller disconnected or otherwise cancelled.
39    Cancelled,
40    /// Generation aborted; partial output was already emitted.
41    Error,
42}
43
44/// Token-count usage report carried on v2 `done` frames.
45///
46/// Field names (`input_tokens` / `output_tokens`) match Anthropic's
47/// shape; v1's `prompt_tokens` / `completion_tokens` matched
48/// llama.cpp's terminology. Both refer to the same underlying counts.
49#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
50pub struct UsageV2 {
51    /// Tokens consumed by the prompt (including templated chat structure).
52    pub input_tokens: u32,
53    /// Tokens generated in the response.
54    pub output_tokens: u32,
55}
56
57/// One streaming-output payload carried inside a `frame` response.
58///
59/// Variants:
60/// - `Text { delta }` — incremental token text. Multiple `Text` frames
61///   per request are normal; concatenating their `delta`s yields the
62///   full response text.
63/// - `Thinking { delta }` — incremental reasoning-trace text. The
64///   daemon separates content emitted between Gemma 4's `<|think|>`
65///   and `<|/think|>` tokens into this variant so middleware can
66///   choose to display, hide, or log it independently.
67/// - `ToolUse { ... }` — a complete tool-call request from the model.
68///   Arrives whole, not streamed. Following a `ToolUse` block, the
69///   stream typically terminates with `Done { stop_reason: ToolUse }`.
70#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
71#[serde(tag = "type", rename_all = "snake_case")]
72pub enum ResponseBlock {
73    /// Incremental user-visible text.
74    Text {
75        /// Newly-generated text since the last `Text` frame.
76        delta: String,
77    },
78    /// Incremental reasoning-trace text.
79    Thinking {
80        /// Newly-generated reasoning trace since the last `Thinking` frame.
81        delta: String,
82    },
83    /// Complete model-emitted tool-call request.
84    ToolUse {
85        /// Pairs this invocation with the consumer's eventual
86        /// `ToolResult` block in the next request.
87        tool_call_id: ToolCallId,
88        /// Tool name; matches a `Tool::name` from the request's
89        /// `tools[]` table.
90        name: String,
91        /// JSON arguments emitted by the model.
92        input: ToolUseInput,
93    },
94}
95
96/// v2 error-code taxonomy. Superset of v1's `ErrorCode` (kept
97/// independent so the v1 enum stays frozen per ADR 0008).
98#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
99#[serde(rename_all = "snake_case")]
100pub enum ErrorCodeV2 {
101    /// Admission queue full at submit time.
102    QueueFull,
103    /// Selected backend errored before or during generation.
104    BackendUnavailable,
105    /// Request failed validation (bad shape, dangling attachment id, etc.).
106    InvalidRequest,
107    /// Frame exceeded the 64 MiB cap.
108    FrameTooLarge,
109    /// Daemon-side bug or unexpected condition.
110    Internal,
111    /// Backend cannot handle the requested attachment kind / MIME.
112    AttachmentUnsupported,
113    /// Model emitted a tool-call sequence the daemon couldn't parse.
114    ToolCallMalformed,
115}
116
117/// One frame on the v2 response NDJSON stream.
118#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
119#[serde(tag = "type", rename_all = "snake_case")]
120pub enum ResponseV2 {
121    /// One streaming-output payload.
122    Frame {
123        /// Request id.
124        id: String,
125        /// The payload — text delta, thinking delta, or complete tool_use.
126        block: ResponseBlock,
127    },
128    /// Terminal frame for a successful generation.
129    Done {
130        /// Request id.
131        id: String,
132        /// Token-count usage.
133        usage: UsageV2,
134        /// Why generation stopped.
135        stop_reason: StopReasonV2,
136        /// `Backend::name()` of the adapter that served this request.
137        ///
138        /// Diagnostic only — apps must not branch on this (ADR 0007).
139        backend: String,
140    },
141    /// Terminal frame for a failed generation.
142    Error {
143        /// Request id.
144        id: String,
145        /// Machine-readable classification.
146        code: ErrorCodeV2,
147        /// Human-readable description.
148        message: String,
149    },
150}
151
152impl ResponseV2 {
153    /// Correlation id of the frame regardless of variant.
154    pub fn id(&self) -> &str {
155        match self {
156            ResponseV2::Frame { id, .. }
157            | ResponseV2::Done { id, .. }
158            | ResponseV2::Error { id, .. } => id,
159        }
160    }
161
162    /// `true` if this frame ends a request stream.
163    pub fn is_terminal(&self) -> bool {
164        matches!(self, ResponseV2::Done { .. } | ResponseV2::Error { .. })
165    }
166}