Skip to main content

blazen_local_llm/
lib.rs

1//! Shared option + request types for Blazen's local-LLM backends.
2//!
3//! Sits below `blazen-llm-candle`, `blazen-llm-mistralrs`, `blazen-llm-llamacpp`,
4//! `blazen-llm` (orchestrator), and `blazen-controlplane` so that all five can
5//! agree on a single strongly-typed shape for "options that apply to any local
6//! LLM" (the `LocalLlmOptions` base) and "a typed request to materialise a
7//! local model" (the `LocalModelRequest` carried through the factory seam in
8//! `blazen-llm::providers::factory`).
9//!
10//! Backend-specific knobs (e.g. candle's `force_safetensors`, llama.cpp's
11//! `n_gpu_layers`, mistral.rs's `vision`) stay on each backend's own Options
12//! struct, which embeds [`LocalLlmOptions`] via a `pub base: LocalLlmOptions`
13//! field. Use the [`LocalLlmOptionsBuilder`] convenience type for ergonomic
14//! construction.
15
16use std::path::PathBuf;
17
18use serde::{Deserialize, Serialize};
19
20/// Options shared by every local-LLM backend (`candle`, `mistralrs`,
21/// `llama.cpp`). Embedded as the `base` field on each backend's own
22/// `*Options` struct.
23///
24/// All fields default to `None` / empty. Backend-specific knobs live on the
25/// outer struct; this struct holds only the configuration that is meaningful
26/// regardless of which engine is selected.
27#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq)]
28pub struct LocalLlmOptions {
29    /// Hugging Face model repo ID (e.g. `"meta-llama/Llama-3.2-1B-Instruct"`)
30    /// or a local filesystem path to the weights. Backends that require an
31    /// HF repo (mistralrs `TextModelBuilder`) error at load time when this is
32    /// a non-repo path; backends that accept either (candle, llama.cpp) treat
33    /// the string as a path when it points at an existing file.
34    pub model_id: Option<String>,
35
36    /// Optional **separate** Hugging Face repo to fetch `tokenizer.json` from.
37    /// Use this when [`Self::model_id`] points at a quantization-only repo
38    /// (the common pattern in `TheBloke/*-GGUF`, `bartowski/*-GGUF`, etc.)
39    /// whose owners don't redistribute the tokenizer. When `None`, the
40    /// tokenizer is fetched from [`Self::model_id`].
41    ///
42    /// Example: `model_id = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"` +
43    /// `tokenizer_repo = Some("TinyLlama/TinyLlama-1.1B-Chat-v1.0")`.
44    pub tokenizer_repo: Option<String>,
45
46    /// Revision / branch / tag / commit on the Hugging Face repo
47    /// (e.g. `"main"`, `"refs/pr/42"`, a commit SHA). Applies to both
48    /// [`Self::model_id`] and [`Self::tokenizer_repo`].
49    pub revision: Option<String>,
50
51    /// Hardware device specifier — `"cpu"`, `"cuda"`, `"cuda:N"`, `"metal"`.
52    /// Each backend parses this with its own device resolver. `None` ⇒ each
53    /// backend's CPU default.
54    pub device: Option<String>,
55
56    /// Quantization format string — `"f32"`, `"f16"`, `"bf16"`, `"q8_0"`,
57    /// `"q6_k"`, `"q5_k_m"`, `"q4_k_m"`, `"q4_k_s"`, `"q3_k_m"`, `"q2_k"`.
58    /// Backend treats `None` as "use native precision".
59    pub quantization: Option<String>,
60
61    /// Maximum context length in tokens. `None` ⇒ the model's built-in cap.
62    pub context_length: Option<usize>,
63
64    /// Per-call override for the model-file cache directory.
65    /// `None` ⇒ `blazen-model-cache`'s default
66    /// (`$BLAZEN_CACHE_DIR` or `~/.cache/blazen/models`).
67    pub cache_dir: Option<PathBuf>,
68
69    /// PEFT/LoRA adapter directories to mount immediately after the base
70    /// model loads. Each directory must contain `adapter_config.json` and
71    /// `adapter_model.safetensors`. The adapter id defaults to the
72    /// directory's last path component; callers needing custom ids should
73    /// mount via the backend's `load_adapter` method after construction.
74    #[serde(default)]
75    pub initial_adapters: Vec<PathBuf>,
76}
77
78impl LocalLlmOptions {
79    /// Construct an empty options struct (all fields `None` / empty).
80    #[must_use]
81    pub fn new() -> Self {
82        Self::default()
83    }
84
85    /// Builder shorthand — sugar over field assignment.
86    #[must_use]
87    pub fn with_model_id(mut self, model_id: impl Into<String>) -> Self {
88        self.model_id = Some(model_id.into());
89        self
90    }
91
92    /// Builder shorthand for [`Self::tokenizer_repo`].
93    #[must_use]
94    pub fn with_tokenizer_repo(mut self, repo: impl Into<String>) -> Self {
95        self.tokenizer_repo = Some(repo.into());
96        self
97    }
98
99    /// Builder shorthand for [`Self::revision`].
100    #[must_use]
101    pub fn with_revision(mut self, revision: impl Into<String>) -> Self {
102        self.revision = Some(revision.into());
103        self
104    }
105
106    /// Builder shorthand for [`Self::device`].
107    #[must_use]
108    pub fn with_device(mut self, device: impl Into<String>) -> Self {
109        self.device = Some(device.into());
110        self
111    }
112
113    /// Builder shorthand for [`Self::quantization`].
114    #[must_use]
115    pub fn with_quantization(mut self, quant: impl Into<String>) -> Self {
116        self.quantization = Some(quant.into());
117        self
118    }
119
120    /// Builder shorthand for [`Self::context_length`].
121    #[must_use]
122    pub fn with_context_length(mut self, n: usize) -> Self {
123        self.context_length = Some(n);
124        self
125    }
126
127    /// Builder shorthand for [`Self::cache_dir`].
128    #[must_use]
129    pub fn with_cache_dir(mut self, path: impl Into<PathBuf>) -> Self {
130        self.cache_dir = Some(path.into());
131        self
132    }
133
134    /// Returns the effective tokenizer repo: [`Self::tokenizer_repo`] when
135    /// set, otherwise [`Self::model_id`]. Returns `None` when neither is set.
136    #[must_use]
137    pub fn effective_tokenizer_repo(&self) -> Option<&str> {
138        self.tokenizer_repo.as_deref().or(self.model_id.as_deref())
139    }
140}
141
142/// A typed request to materialise a local model, carried through the
143/// `LocalModelFactory` seam in `blazen-llm::providers::factory`. Replaces the
144/// previous (`provider: &str`, `model: &str`) two-string signature so that
145/// downstream code (controlplane `ManagerHandle`, the per-binding facades)
146/// can plumb a full `LocalLlmOptions` payload through end-to-end.
147#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq)]
148pub struct LocalModelRequest {
149    /// Provider identifier (e.g. `"candle"`, `"mistralrs"`, `"llamacpp"`).
150    /// Routes the request to the matching backend.
151    pub provider: String,
152
153    /// Logical model name. Takes precedence over `options.model_id` when
154    /// non-empty; the implementation should overwrite
155    /// `options.model_id = Some(model)` before delegating to the backend so
156    /// the request is self-consistent.
157    pub model: String,
158
159    /// Full options payload. Includes the shared base fields plus any
160    /// per-backend-specific fields serialised via `serde_json::Value` in
161    /// [`Self::backend_extras`] — see the per-binding wrappers for typed
162    /// access on each backend.
163    pub options: LocalLlmOptions,
164
165    /// Opaque per-backend extra fields (e.g. candle's `force_safetensors`,
166    /// mistral.rs's `vision`, llama.cpp's `n_gpu_layers`). Encoded as
167    /// `serde_json::Value` so this struct stays cheap to transport across
168    /// the factory seam without a hard dependency on every backend's typed
169    /// option struct. Each backend's `LocalModelFactory` implementation
170    /// deserialises this into its own backend-specific options struct.
171    #[serde(default)]
172    pub backend_extras: serde_json::Value,
173}
174
175impl LocalModelRequest {
176    /// Construct a request from the four canonical fields.
177    #[must_use]
178    pub fn new(
179        provider: impl Into<String>,
180        model: impl Into<String>,
181        options: LocalLlmOptions,
182    ) -> Self {
183        Self {
184            provider: provider.into(),
185            model: model.into(),
186            options,
187            backend_extras: serde_json::Value::Null,
188        }
189    }
190
191    /// Attach typed per-backend extras by serialising `extras` into the
192    /// [`Self::backend_extras`] field.
193    ///
194    /// # Errors
195    ///
196    /// Returns a `serde_json` error if `extras` cannot be serialised. In
197    /// practice every backend's options struct derives `Serialize` so this
198    /// is infallible at the type level — the `Result` is a future-proof
199    /// safety net.
200    pub fn with_backend_extras<T: serde::Serialize>(
201        mut self,
202        extras: &T,
203    ) -> Result<Self, serde_json::Error> {
204        self.backend_extras = serde_json::to_value(extras)?;
205        Ok(self)
206    }
207}
208
209#[cfg(test)]
210mod tests {
211    use super::*;
212
213    #[test]
214    fn default_options_are_all_none() {
215        let opts = LocalLlmOptions::default();
216        assert!(opts.model_id.is_none());
217        assert!(opts.tokenizer_repo.is_none());
218        assert!(opts.revision.is_none());
219        assert!(opts.device.is_none());
220        assert!(opts.quantization.is_none());
221        assert!(opts.context_length.is_none());
222        assert!(opts.cache_dir.is_none());
223        assert!(opts.initial_adapters.is_empty());
224    }
225
226    #[test]
227    fn builders_compose() {
228        let opts = LocalLlmOptions::new()
229            .with_model_id("TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF")
230            .with_tokenizer_repo("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
231            .with_revision("main")
232            .with_device("cuda:0")
233            .with_quantization("q4_k_m")
234            .with_context_length(2048)
235            .with_cache_dir("/var/blazen/models");
236        assert_eq!(
237            opts.model_id.as_deref(),
238            Some("TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF")
239        );
240        assert_eq!(
241            opts.effective_tokenizer_repo(),
242            Some("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
243        );
244        assert_eq!(opts.context_length, Some(2048));
245    }
246
247    #[test]
248    fn effective_tokenizer_repo_falls_back_to_model_id() {
249        let opts = LocalLlmOptions::new().with_model_id("unsloth/Qwen2.5-0.5B-Instruct-GGUF");
250        assert_eq!(
251            opts.effective_tokenizer_repo(),
252            Some("unsloth/Qwen2.5-0.5B-Instruct-GGUF")
253        );
254    }
255
256    #[test]
257    fn serde_roundtrip() {
258        let opts = LocalLlmOptions {
259            model_id: Some("a/b".into()),
260            tokenizer_repo: Some("c/d".into()),
261            revision: Some("main".into()),
262            device: Some("cpu".into()),
263            quantization: Some("q4_k_m".into()),
264            context_length: Some(4096),
265            cache_dir: Some(PathBuf::from("/var/cache")),
266            initial_adapters: vec![PathBuf::from("/var/adapter")],
267        };
268        let json = serde_json::to_string(&opts).expect("ser");
269        let parsed: LocalLlmOptions = serde_json::from_str(&json).expect("de");
270        assert_eq!(opts, parsed);
271    }
272
273    #[derive(Serialize, serde::Deserialize, Debug, PartialEq)]
274    struct CandleExtras {
275        force_safetensors: bool,
276    }
277
278    #[test]
279    fn local_model_request_carries_options_and_extras() {
280        let opts = LocalLlmOptions::new().with_model_id("a/b");
281        let req = LocalModelRequest::new("candle", "tinyllama", opts)
282            .with_backend_extras(&CandleExtras {
283                force_safetensors: true,
284            })
285            .expect("ser extras");
286        assert_eq!(req.provider, "candle");
287        assert_eq!(req.model, "tinyllama");
288        let parsed: CandleExtras = serde_json::from_value(req.backend_extras).expect("de");
289        assert_eq!(
290            parsed,
291            CandleExtras {
292                force_safetensors: true
293            }
294        );
295    }
296}