Skip to main content

inferd_proto/
request.rs

1//! Request envelope and validation.
2
3use crate::error::ProtoError;
4use serde::{Deserialize, Serialize};
5
6/// Gemma 4 sampling defaults applied when a Request omits the field.
7///
8/// Mirrors `docs/protocol-v1.md` §Request.
9pub mod defaults {
10    /// Default temperature when omitted.
11    pub const TEMPERATURE: f64 = 1.0;
12    /// Default top-p when omitted.
13    pub const TOP_P: f64 = 0.95;
14    /// Default top-k when omitted.
15    pub const TOP_K: u32 = 64;
16    /// Default max-tokens when omitted.
17    pub const MAX_TOKENS: u32 = 1000;
18    /// Default streaming behaviour when omitted.
19    pub const STREAM: bool = true;
20}
21
22/// The set of `image_token_budget` values accepted by the daemon. Any other
23/// value is rejected with `ErrorCode::InvalidRequest`.
24pub const VALID_IMAGE_TOKEN_BUDGETS: [u32; 5] = [70, 140, 280, 560, 1120];
25
26/// Conversation role attached to each message.
27#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
28#[serde(rename_all = "lowercase")]
29pub enum Role {
30    /// System prompt setting overall instructions.
31    System,
32    /// End-user input.
33    User,
34    /// Prior model output replayed for context.
35    Assistant,
36}
37
38/// One conversation turn carried in `Request::messages`.
39#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
40pub struct Message {
41    /// Speaker.
42    pub role: Role,
43    /// Plain UTF-8 content for this turn. v1 does not support multimodal
44    /// content arrays; image inputs are signalled via `Request::image_token_budget`.
45    pub content: String,
46}
47
48/// Image-token budget; one of `VALID_IMAGE_TOKEN_BUDGETS`. Wraps a `u32` so
49/// constructors can enforce the enum at the type level.
50#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
51pub struct ImageTokenBudget(u32);
52
53impl ImageTokenBudget {
54    /// Construct an `ImageTokenBudget` if the value is one of the accepted constants.
55    pub fn new(value: u32) -> Option<Self> {
56        if VALID_IMAGE_TOKEN_BUDGETS.contains(&value) {
57            Some(Self(value))
58        } else {
59            None
60        }
61    }
62
63    /// Inner numeric value.
64    pub fn get(self) -> u32 {
65        self.0
66    }
67}
68
69/// The inference request envelope sent by clients.
70///
71/// `Default` produces an empty request: empty `messages`, all
72/// sampling fields `None` (server applies Gemma 4 defaults), no
73/// grammar. Useful for the `..Default::default()` shorthand in
74/// callers; remember to fill in `messages` before sending.
75#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
76pub struct Request {
77    /// Caller-assigned correlation id; echoed on every response frame.
78    #[serde(default, skip_serializing_if = "String::is_empty")]
79    pub id: String,
80
81    /// Conversation history in chronological order. Must be non-empty.
82    pub messages: Vec<Message>,
83
84    /// Sampling temperature; default `defaults::TEMPERATURE`.
85    #[serde(default, skip_serializing_if = "Option::is_none")]
86    pub temperature: Option<f64>,
87
88    /// Nucleus sampling probability; default `defaults::TOP_P`.
89    #[serde(default, skip_serializing_if = "Option::is_none")]
90    pub top_p: Option<f64>,
91
92    /// Top-k sampling cutoff; default `defaults::TOP_K`.
93    #[serde(default, skip_serializing_if = "Option::is_none")]
94    pub top_k: Option<u32>,
95
96    /// Maximum tokens to generate; default `defaults::MAX_TOKENS`.
97    #[serde(default, skip_serializing_if = "Option::is_none")]
98    pub max_tokens: Option<u32>,
99
100    /// Stream tokens vs return one final `done` frame; default `defaults::STREAM`.
101    #[serde(default, skip_serializing_if = "Option::is_none")]
102    pub stream: Option<bool>,
103
104    /// Image token budget; if present, must be in `VALID_IMAGE_TOKEN_BUDGETS`.
105    #[serde(default, skip_serializing_if = "Option::is_none")]
106    pub image_token_budget: Option<u32>,
107
108    /// GBNF grammar to constrain generation; empty means unconstrained.
109    #[serde(default, skip_serializing_if = "String::is_empty")]
110    pub grammar: String,
111}
112
113/// `Request` with all defaults applied and validation completed. Backends
114/// receive this; they never see the optional-shaped wire form.
115#[derive(Debug, Clone, PartialEq)]
116pub struct Resolved {
117    /// Caller-assigned correlation id; echoed on every response frame.
118    pub id: String,
119    /// Conversation history.
120    pub messages: Vec<Message>,
121    /// Effective sampling temperature.
122    pub temperature: f64,
123    /// Effective nucleus sampling probability.
124    pub top_p: f64,
125    /// Effective top-k cutoff.
126    pub top_k: u32,
127    /// Effective max-tokens cap.
128    pub max_tokens: u32,
129    /// Effective streaming flag.
130    pub stream: bool,
131    /// Image token budget if the request declared one; `None` for text-only.
132    pub image_token_budget: Option<ImageTokenBudget>,
133    /// GBNF grammar; empty means unconstrained.
134    pub grammar: String,
135}
136
137impl Request {
138    /// Validate the request and apply Gemma 4 defaults to omitted fields.
139    pub fn resolve(self) -> Result<Resolved, ProtoError> {
140        if self.messages.is_empty() {
141            return Err(ProtoError::InvalidRequest(
142                "messages must not be empty".into(),
143            ));
144        }
145
146        let image_token_budget = match self.image_token_budget {
147            Some(v) => Some(ImageTokenBudget::new(v).ok_or_else(|| {
148                ProtoError::InvalidRequest(format!(
149                    "image_token_budget {v} not in {VALID_IMAGE_TOKEN_BUDGETS:?}"
150                ))
151            })?),
152            None => None,
153        };
154
155        Ok(Resolved {
156            id: self.id,
157            messages: self.messages,
158            temperature: self.temperature.unwrap_or(defaults::TEMPERATURE),
159            top_p: self.top_p.unwrap_or(defaults::TOP_P),
160            top_k: self.top_k.unwrap_or(defaults::TOP_K),
161            max_tokens: self.max_tokens.unwrap_or(defaults::MAX_TOKENS),
162            stream: self.stream.unwrap_or(defaults::STREAM),
163            image_token_budget,
164            grammar: self.grammar,
165        })
166    }
167}