Skip to main content

inferd_proto/
response.rs

1//! Response frame schema.
2
3use crate::error::ErrorCode;
4use serde::{Deserialize, Serialize};
5
6/// Why a generation ended. Carried on `done` frames.
7#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
8#[serde(rename_all = "lowercase")]
9pub enum StopReason {
10    /// Model emitted the end-of-turn token cleanly.
11    End,
12    /// `max_tokens` reached.
13    Length,
14    /// Caller disconnected or otherwise cancelled.
15    Cancelled,
16    /// Generation aborted; partial output may be in `Response::content`.
17    Error,
18}
19
20/// Token-count usage report carried on `done` frames.
21#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
22pub struct Usage {
23    /// Tokens consumed by the prompt.
24    pub prompt_tokens: u32,
25    /// Tokens generated in the response.
26    pub completion_tokens: u32,
27}
28
29/// One frame on the response NDJSON stream.
30///
31/// Variant fields map directly to the wire shape documented in
32/// `docs/protocol-v1.md` §"Response stream".
33#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
34#[serde(tag = "type", rename_all = "lowercase")]
35pub enum Response {
36    /// Lifecycle transition not tied to token output. `id` is `"admin"` for
37    /// status broadcast on the admin socket; otherwise the request id.
38    Status {
39        /// Correlation id; `"admin"` for non-request status.
40        id: String,
41        /// Lifecycle state name (`loading_model`, `ready`, `restarting`, `draining`).
42        status: String,
43    },
44    /// One incremental generated token.
45    Token {
46        /// Request id.
47        id: String,
48        /// Token text.
49        content: String,
50    },
51    /// Terminal frame for a successful generation.
52    Done {
53        /// Request id.
54        id: String,
55        /// Full generated text.
56        content: String,
57        /// Token-count usage.
58        usage: Usage,
59        /// Why generation stopped.
60        stop_reason: StopReason,
61        /// `Backend::name()` of the adapter that served this request.
62        ///
63        /// Diagnostic only — apps must not branch on this (ADR 0007).
64        backend: String,
65    },
66    /// Terminal frame for a failed generation.
67    Error {
68        /// Request id.
69        id: String,
70        /// Machine-readable classification.
71        code: ErrorCode,
72        /// Human-readable description.
73        message: String,
74    },
75}
76
77impl Response {
78    /// Correlation id of the frame regardless of variant.
79    pub fn id(&self) -> &str {
80        match self {
81            Response::Status { id, .. }
82            | Response::Token { id, .. }
83            | Response::Done { id, .. }
84            | Response::Error { id, .. } => id,
85        }
86    }
87
88    /// `true` if this frame ends a request stream (`done` or `error`).
89    pub fn is_terminal(&self) -> bool {
90        matches!(self, Response::Done { .. } | Response::Error { .. })
91    }
92}