inferd_proto/response.rs
1//! Response frame schema.
2
3use crate::error::ErrorCode;
4use serde::{Deserialize, Serialize};
5
6/// Why a generation ended. Carried on `done` frames.
7#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
8#[serde(rename_all = "lowercase")]
9pub enum StopReason {
10 /// Model emitted the end-of-turn token cleanly.
11 End,
12 /// `max_tokens` reached.
13 Length,
14 /// Caller disconnected or otherwise cancelled.
15 Cancelled,
16 /// Generation aborted; partial output may be in `Response::content`.
17 Error,
18}
19
20/// Token-count usage report carried on `done` frames.
21#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
22pub struct Usage {
23 /// Tokens consumed by the prompt.
24 pub prompt_tokens: u32,
25 /// Tokens generated in the response.
26 pub completion_tokens: u32,
27}
28
29/// One frame on the response NDJSON stream.
30///
31/// Variant fields map directly to the wire shape documented in
32/// `docs/protocol-v1.md` §"Response stream".
33#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
34#[serde(tag = "type", rename_all = "lowercase")]
35pub enum Response {
36 /// Lifecycle transition not tied to token output. `id` is `"admin"` for
37 /// status broadcast on the admin socket; otherwise the request id.
38 Status {
39 /// Correlation id; `"admin"` for non-request status.
40 id: String,
41 /// Lifecycle state name (`loading_model`, `ready`, `restarting`, `draining`).
42 status: String,
43 },
44 /// One incremental generated token.
45 Token {
46 /// Request id.
47 id: String,
48 /// Token text.
49 content: String,
50 },
51 /// Terminal frame for a successful generation.
52 Done {
53 /// Request id.
54 id: String,
55 /// Full generated text.
56 content: String,
57 /// Token-count usage.
58 usage: Usage,
59 /// Why generation stopped.
60 stop_reason: StopReason,
61 /// `Backend::name()` of the adapter that served this request.
62 ///
63 /// Diagnostic only — apps must not branch on this (ADR 0007).
64 backend: String,
65 },
66 /// Terminal frame for a failed generation.
67 Error {
68 /// Request id.
69 id: String,
70 /// Machine-readable classification.
71 code: ErrorCode,
72 /// Human-readable description.
73 message: String,
74 },
75}
76
77impl Response {
78 /// Correlation id of the frame regardless of variant.
79 pub fn id(&self) -> &str {
80 match self {
81 Response::Status { id, .. }
82 | Response::Token { id, .. }
83 | Response::Done { id, .. }
84 | Response::Error { id, .. } => id,
85 }
86 }
87
88 /// `true` if this frame ends a request stream (`done` or `error`).
89 pub fn is_terminal(&self) -> bool {
90 matches!(self, Response::Done { .. } | Response::Error { .. })
91 }
92}