blazen_local_llm/lib.rs
1//! Shared option + request types for Blazen's local-LLM backends.
2//!
3//! Sits below `blazen-llm-candle`, `blazen-llm-mistralrs`, `blazen-llm-llamacpp`,
4//! `blazen-llm` (orchestrator), and `blazen-controlplane` so that all five can
5//! agree on a single strongly-typed shape for "options that apply to any local
6//! LLM" (the `LocalLlmOptions` base) and "a typed request to materialise a
7//! local model" (the `LocalModelRequest` carried through the factory seam in
8//! `blazen-llm::providers::factory`).
9//!
10//! Backend-specific knobs (e.g. candle's `force_safetensors`, llama.cpp's
11//! `n_gpu_layers`, mistral.rs's `vision`) stay on each backend's own Options
12//! struct, which embeds [`LocalLlmOptions`] via a `pub base: LocalLlmOptions`
13//! field. Use the [`LocalLlmOptionsBuilder`] convenience type for ergonomic
14//! construction.
15
16use std::path::PathBuf;
17
18use serde::{Deserialize, Serialize};
19
20/// Options shared by every local-LLM backend (`candle`, `mistralrs`,
21/// `llama.cpp`). Embedded as the `base` field on each backend's own
22/// `*Options` struct.
23///
24/// All fields default to `None` / empty. Backend-specific knobs live on the
25/// outer struct; this struct holds only the configuration that is meaningful
26/// regardless of which engine is selected.
27#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq)]
28pub struct LocalLlmOptions {
29 /// Hugging Face model repo ID (e.g. `"meta-llama/Llama-3.2-1B-Instruct"`)
30 /// or a local filesystem path to the weights. Backends that require an
31 /// HF repo (mistralrs `TextModelBuilder`) error at load time when this is
32 /// a non-repo path; backends that accept either (candle, llama.cpp) treat
33 /// the string as a path when it points at an existing file.
34 pub model_id: Option<String>,
35
36 /// Optional **separate** Hugging Face repo to fetch `tokenizer.json` from.
37 /// Use this when [`Self::model_id`] points at a quantization-only repo
38 /// (the common pattern in `TheBloke/*-GGUF`, `bartowski/*-GGUF`, etc.)
39 /// whose owners don't redistribute the tokenizer. When `None`, the
40 /// tokenizer is fetched from [`Self::model_id`].
41 ///
42 /// Example: `model_id = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"` +
43 /// `tokenizer_repo = Some("TinyLlama/TinyLlama-1.1B-Chat-v1.0")`.
44 pub tokenizer_repo: Option<String>,
45
46 /// Revision / branch / tag / commit on the Hugging Face repo
47 /// (e.g. `"main"`, `"refs/pr/42"`, a commit SHA). Applies to both
48 /// [`Self::model_id`] and [`Self::tokenizer_repo`].
49 pub revision: Option<String>,
50
51 /// Hardware device specifier — `"cpu"`, `"cuda"`, `"cuda:N"`, `"metal"`.
52 /// Each backend parses this with its own device resolver. `None` ⇒ each
53 /// backend's CPU default.
54 pub device: Option<String>,
55
56 /// Quantization format string — `"f32"`, `"f16"`, `"bf16"`, `"q8_0"`,
57 /// `"q6_k"`, `"q5_k_m"`, `"q4_k_m"`, `"q4_k_s"`, `"q3_k_m"`, `"q2_k"`.
58 /// Backend treats `None` as "use native precision".
59 pub quantization: Option<String>,
60
61 /// Maximum context length in tokens. `None` ⇒ the model's built-in cap.
62 pub context_length: Option<usize>,
63
64 /// Per-call override for the model-file cache directory.
65 /// `None` ⇒ `blazen-model-cache`'s default
66 /// (`$BLAZEN_CACHE_DIR` or `~/.cache/blazen/models`).
67 pub cache_dir: Option<PathBuf>,
68
69 /// PEFT/LoRA adapter directories to mount immediately after the base
70 /// model loads. Each directory must contain `adapter_config.json` and
71 /// `adapter_model.safetensors`. The adapter id defaults to the
72 /// directory's last path component; callers needing custom ids should
73 /// mount via the backend's `load_adapter` method after construction.
74 #[serde(default)]
75 pub initial_adapters: Vec<PathBuf>,
76}
77
78impl LocalLlmOptions {
79 /// Construct an empty options struct (all fields `None` / empty).
80 #[must_use]
81 pub fn new() -> Self {
82 Self::default()
83 }
84
85 /// Builder shorthand — sugar over field assignment.
86 #[must_use]
87 pub fn with_model_id(mut self, model_id: impl Into<String>) -> Self {
88 self.model_id = Some(model_id.into());
89 self
90 }
91
92 /// Builder shorthand for [`Self::tokenizer_repo`].
93 #[must_use]
94 pub fn with_tokenizer_repo(mut self, repo: impl Into<String>) -> Self {
95 self.tokenizer_repo = Some(repo.into());
96 self
97 }
98
99 /// Builder shorthand for [`Self::revision`].
100 #[must_use]
101 pub fn with_revision(mut self, revision: impl Into<String>) -> Self {
102 self.revision = Some(revision.into());
103 self
104 }
105
106 /// Builder shorthand for [`Self::device`].
107 #[must_use]
108 pub fn with_device(mut self, device: impl Into<String>) -> Self {
109 self.device = Some(device.into());
110 self
111 }
112
113 /// Builder shorthand for [`Self::quantization`].
114 #[must_use]
115 pub fn with_quantization(mut self, quant: impl Into<String>) -> Self {
116 self.quantization = Some(quant.into());
117 self
118 }
119
120 /// Builder shorthand for [`Self::context_length`].
121 #[must_use]
122 pub fn with_context_length(mut self, n: usize) -> Self {
123 self.context_length = Some(n);
124 self
125 }
126
127 /// Builder shorthand for [`Self::cache_dir`].
128 #[must_use]
129 pub fn with_cache_dir(mut self, path: impl Into<PathBuf>) -> Self {
130 self.cache_dir = Some(path.into());
131 self
132 }
133
134 /// Returns the effective tokenizer repo: [`Self::tokenizer_repo`] when
135 /// set, otherwise [`Self::model_id`]. Returns `None` when neither is set.
136 #[must_use]
137 pub fn effective_tokenizer_repo(&self) -> Option<&str> {
138 self.tokenizer_repo.as_deref().or(self.model_id.as_deref())
139 }
140}
141
142/// A typed request to materialise a local model, carried through the
143/// `LocalModelFactory` seam in `blazen-llm::providers::factory`. Replaces the
144/// previous (`provider: &str`, `model: &str`) two-string signature so that
145/// downstream code (controlplane `ManagerHandle`, the per-binding facades)
146/// can plumb a full `LocalLlmOptions` payload through end-to-end.
147#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq)]
148pub struct LocalModelRequest {
149 /// Provider identifier (e.g. `"candle"`, `"mistralrs"`, `"llamacpp"`).
150 /// Routes the request to the matching backend.
151 pub provider: String,
152
153 /// Logical model name. Takes precedence over `options.model_id` when
154 /// non-empty; the implementation should overwrite
155 /// `options.model_id = Some(model)` before delegating to the backend so
156 /// the request is self-consistent.
157 pub model: String,
158
159 /// Full options payload. Includes the shared base fields plus any
160 /// per-backend-specific fields serialised via `serde_json::Value` in
161 /// [`Self::backend_extras`] — see the per-binding wrappers for typed
162 /// access on each backend.
163 pub options: LocalLlmOptions,
164
165 /// Opaque per-backend extra fields (e.g. candle's `force_safetensors`,
166 /// mistral.rs's `vision`, llama.cpp's `n_gpu_layers`). Encoded as
167 /// `serde_json::Value` so this struct stays cheap to transport across
168 /// the factory seam without a hard dependency on every backend's typed
169 /// option struct. Each backend's `LocalModelFactory` implementation
170 /// deserialises this into its own backend-specific options struct.
171 #[serde(default)]
172 pub backend_extras: serde_json::Value,
173}
174
175impl LocalModelRequest {
176 /// Construct a request from the four canonical fields.
177 #[must_use]
178 pub fn new(
179 provider: impl Into<String>,
180 model: impl Into<String>,
181 options: LocalLlmOptions,
182 ) -> Self {
183 Self {
184 provider: provider.into(),
185 model: model.into(),
186 options,
187 backend_extras: serde_json::Value::Null,
188 }
189 }
190
191 /// Attach typed per-backend extras by serialising `extras` into the
192 /// [`Self::backend_extras`] field.
193 ///
194 /// # Errors
195 ///
196 /// Returns a `serde_json` error if `extras` cannot be serialised. In
197 /// practice every backend's options struct derives `Serialize` so this
198 /// is infallible at the type level — the `Result` is a future-proof
199 /// safety net.
200 pub fn with_backend_extras<T: serde::Serialize>(
201 mut self,
202 extras: &T,
203 ) -> Result<Self, serde_json::Error> {
204 self.backend_extras = serde_json::to_value(extras)?;
205 Ok(self)
206 }
207}
208
209#[cfg(test)]
210mod tests {
211 use super::*;
212
213 #[test]
214 fn default_options_are_all_none() {
215 let opts = LocalLlmOptions::default();
216 assert!(opts.model_id.is_none());
217 assert!(opts.tokenizer_repo.is_none());
218 assert!(opts.revision.is_none());
219 assert!(opts.device.is_none());
220 assert!(opts.quantization.is_none());
221 assert!(opts.context_length.is_none());
222 assert!(opts.cache_dir.is_none());
223 assert!(opts.initial_adapters.is_empty());
224 }
225
226 #[test]
227 fn builders_compose() {
228 let opts = LocalLlmOptions::new()
229 .with_model_id("TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF")
230 .with_tokenizer_repo("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
231 .with_revision("main")
232 .with_device("cuda:0")
233 .with_quantization("q4_k_m")
234 .with_context_length(2048)
235 .with_cache_dir("/var/blazen/models");
236 assert_eq!(
237 opts.model_id.as_deref(),
238 Some("TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF")
239 );
240 assert_eq!(
241 opts.effective_tokenizer_repo(),
242 Some("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
243 );
244 assert_eq!(opts.context_length, Some(2048));
245 }
246
247 #[test]
248 fn effective_tokenizer_repo_falls_back_to_model_id() {
249 let opts = LocalLlmOptions::new().with_model_id("unsloth/Qwen2.5-0.5B-Instruct-GGUF");
250 assert_eq!(
251 opts.effective_tokenizer_repo(),
252 Some("unsloth/Qwen2.5-0.5B-Instruct-GGUF")
253 );
254 }
255
256 #[test]
257 fn serde_roundtrip() {
258 let opts = LocalLlmOptions {
259 model_id: Some("a/b".into()),
260 tokenizer_repo: Some("c/d".into()),
261 revision: Some("main".into()),
262 device: Some("cpu".into()),
263 quantization: Some("q4_k_m".into()),
264 context_length: Some(4096),
265 cache_dir: Some(PathBuf::from("/var/cache")),
266 initial_adapters: vec![PathBuf::from("/var/adapter")],
267 };
268 let json = serde_json::to_string(&opts).expect("ser");
269 let parsed: LocalLlmOptions = serde_json::from_str(&json).expect("de");
270 assert_eq!(opts, parsed);
271 }
272
273 #[derive(Serialize, serde::Deserialize, Debug, PartialEq)]
274 struct CandleExtras {
275 force_safetensors: bool,
276 }
277
278 #[test]
279 fn local_model_request_carries_options_and_extras() {
280 let opts = LocalLlmOptions::new().with_model_id("a/b");
281 let req = LocalModelRequest::new("candle", "tinyllama", opts)
282 .with_backend_extras(&CandleExtras {
283 force_safetensors: true,
284 })
285 .expect("ser extras");
286 assert_eq!(req.provider, "candle");
287 assert_eq!(req.model, "tinyllama");
288 let parsed: CandleExtras = serde_json::from_value(req.backend_extras).expect("de");
289 assert_eq!(
290 parsed,
291 CandleExtras {
292 force_safetensors: true
293 }
294 );
295 }
296}