inferd_proto/request.rs
1//! Request envelope and validation.
2
3use crate::error::ProtoError;
4use serde::{Deserialize, Serialize};
5
6/// Gemma 4 sampling defaults applied when a Request omits the field.
7///
8/// Mirrors `docs/protocol-v1.md` §Request.
9pub mod defaults {
10 /// Default temperature when omitted.
11 pub const TEMPERATURE: f64 = 1.0;
12 /// Default top-p when omitted.
13 pub const TOP_P: f64 = 0.95;
14 /// Default top-k when omitted.
15 pub const TOP_K: u32 = 64;
16 /// Default max-tokens when omitted.
17 pub const MAX_TOKENS: u32 = 1000;
18 /// Default streaming behaviour when omitted.
19 pub const STREAM: bool = true;
20}
21
22/// The set of `image_token_budget` values accepted by the daemon. Any other
23/// value is rejected with `ErrorCode::InvalidRequest`.
24pub const VALID_IMAGE_TOKEN_BUDGETS: [u32; 5] = [70, 140, 280, 560, 1120];
25
26/// Conversation role attached to each message.
27#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
28#[serde(rename_all = "lowercase")]
29pub enum Role {
30 /// System prompt setting overall instructions.
31 System,
32 /// End-user input.
33 User,
34 /// Prior model output replayed for context.
35 Assistant,
36}
37
38/// One conversation turn carried in `Request::messages`.
39#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
40pub struct Message {
41 /// Speaker.
42 pub role: Role,
43 /// Plain UTF-8 content for this turn. v1 does not support multimodal
44 /// content arrays; image inputs are signalled via `Request::image_token_budget`.
45 pub content: String,
46}
47
48/// Image-token budget; one of `VALID_IMAGE_TOKEN_BUDGETS`. Wraps a `u32` so
49/// constructors can enforce the enum at the type level.
50#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
51pub struct ImageTokenBudget(u32);
52
53impl ImageTokenBudget {
54 /// Construct an `ImageTokenBudget` if the value is one of the accepted constants.
55 pub fn new(value: u32) -> Option<Self> {
56 if VALID_IMAGE_TOKEN_BUDGETS.contains(&value) {
57 Some(Self(value))
58 } else {
59 None
60 }
61 }
62
63 /// Inner numeric value.
64 pub fn get(self) -> u32 {
65 self.0
66 }
67}
68
69/// The inference request envelope sent by clients.
70///
71/// `Default` produces an empty request: empty `messages`, all
72/// sampling fields `None` (server applies Gemma 4 defaults), no
73/// grammar. Useful for the `..Default::default()` shorthand in
74/// callers; remember to fill in `messages` before sending.
75#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
76pub struct Request {
77 /// Caller-assigned correlation id; echoed on every response frame.
78 #[serde(default, skip_serializing_if = "String::is_empty")]
79 pub id: String,
80
81 /// Conversation history in chronological order. Must be non-empty.
82 pub messages: Vec<Message>,
83
84 /// Sampling temperature; default `defaults::TEMPERATURE`.
85 #[serde(default, skip_serializing_if = "Option::is_none")]
86 pub temperature: Option<f64>,
87
88 /// Nucleus sampling probability; default `defaults::TOP_P`.
89 #[serde(default, skip_serializing_if = "Option::is_none")]
90 pub top_p: Option<f64>,
91
92 /// Top-k sampling cutoff; default `defaults::TOP_K`.
93 #[serde(default, skip_serializing_if = "Option::is_none")]
94 pub top_k: Option<u32>,
95
96 /// Maximum tokens to generate; default `defaults::MAX_TOKENS`.
97 #[serde(default, skip_serializing_if = "Option::is_none")]
98 pub max_tokens: Option<u32>,
99
100 /// Stream tokens vs return one final `done` frame; default `defaults::STREAM`.
101 #[serde(default, skip_serializing_if = "Option::is_none")]
102 pub stream: Option<bool>,
103
104 /// Image token budget; if present, must be in `VALID_IMAGE_TOKEN_BUDGETS`.
105 #[serde(default, skip_serializing_if = "Option::is_none")]
106 pub image_token_budget: Option<u32>,
107
108 /// GBNF grammar to constrain generation; empty means unconstrained.
109 #[serde(default, skip_serializing_if = "String::is_empty")]
110 pub grammar: String,
111}
112
113/// `Request` with all defaults applied and validation completed. Backends
114/// receive this; they never see the optional-shaped wire form.
115#[derive(Debug, Clone, PartialEq)]
116pub struct Resolved {
117 /// Caller-assigned correlation id; echoed on every response frame.
118 pub id: String,
119 /// Conversation history.
120 pub messages: Vec<Message>,
121 /// Effective sampling temperature.
122 pub temperature: f64,
123 /// Effective nucleus sampling probability.
124 pub top_p: f64,
125 /// Effective top-k cutoff.
126 pub top_k: u32,
127 /// Effective max-tokens cap.
128 pub max_tokens: u32,
129 /// Effective streaming flag.
130 pub stream: bool,
131 /// Image token budget if the request declared one; `None` for text-only.
132 pub image_token_budget: Option<ImageTokenBudget>,
133 /// GBNF grammar; empty means unconstrained.
134 pub grammar: String,
135}
136
137impl Request {
138 /// Validate the request and apply Gemma 4 defaults to omitted fields.
139 pub fn resolve(self) -> Result<Resolved, ProtoError> {
140 if self.messages.is_empty() {
141 return Err(ProtoError::InvalidRequest(
142 "messages must not be empty".into(),
143 ));
144 }
145
146 let image_token_budget = match self.image_token_budget {
147 Some(v) => Some(ImageTokenBudget::new(v).ok_or_else(|| {
148 ProtoError::InvalidRequest(format!(
149 "image_token_budget {v} not in {VALID_IMAGE_TOKEN_BUDGETS:?}"
150 ))
151 })?),
152 None => None,
153 };
154
155 Ok(Resolved {
156 id: self.id,
157 messages: self.messages,
158 temperature: self.temperature.unwrap_or(defaults::TEMPERATURE),
159 top_p: self.top_p.unwrap_or(defaults::TOP_P),
160 top_k: self.top_k.unwrap_or(defaults::TOP_K),
161 max_tokens: self.max_tokens.unwrap_or(defaults::MAX_TOKENS),
162 stream: self.stream.unwrap_or(defaults::STREAM),
163 image_token_budget,
164 grammar: self.grammar,
165 })
166 }
167}