Skip to main content

llama_cpp_bindings/model/
params.rs

1//! A safe wrapper around `llama_model_params`.
2
3use crate::LlamaCppError;
4use crate::error::ModelParamsError;
5use crate::model::params::kv_overrides::KvOverrides;
6use crate::model::split_mode::{LlamaSplitMode, LlamaSplitModeParseError};
7use std::ffi::{CStr, c_char};
8use std::fmt::{Debug, Formatter};
9use std::pin::Pin;
10use std::ptr::null;
11
12pub mod kv_overrides;
13pub mod param_override_value;
14
15/// The maximum number of devices supported.
16///
17/// The real maximum number of devices is the lesser one of this value and the value returned by
18/// `llama_cpp_bindings::max_devices()`.
19pub const LLAMA_CPP_MAX_DEVICES: usize = 16;
20
21/// A safe wrapper around `llama_model_params`.
22pub struct LlamaModelParams {
23    /// The underlying `llama_model_params` from the C API.
24    pub params: llama_cpp_bindings_sys::llama_model_params,
25    kv_overrides: Vec<llama_cpp_bindings_sys::llama_model_kv_override>,
26    buft_overrides: Vec<llama_cpp_bindings_sys::llama_model_tensor_buft_override>,
27    devices: Pin<Box<[llama_cpp_bindings_sys::ggml_backend_dev_t; LLAMA_CPP_MAX_DEVICES]>>,
28}
29
30impl Debug for LlamaModelParams {
31    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
32        f.debug_struct("LlamaModelParams")
33            .field("n_gpu_layers", &self.params.n_gpu_layers)
34            .field("main_gpu", &self.params.main_gpu)
35            .field("vocab_only", &self.params.vocab_only)
36            .field("use_mmap", &self.params.use_mmap)
37            .field("use_mlock", &self.params.use_mlock)
38            .field("split_mode", &self.split_mode())
39            .field("devices", &self.devices)
40            .field("kv_overrides", &"vec of kv_overrides")
41            .finish_non_exhaustive()
42    }
43}
44
45impl LlamaModelParams {
46    /// See [`KvOverrides`]
47    ///
48    /// # Examples
49    ///
50    /// ```rust
51    /// # use llama_cpp_bindings::model::params::LlamaModelParams;
52    /// let params = Box::pin(LlamaModelParams::default());
53    /// let kv_overrides = params.kv_overrides();
54    /// let count = kv_overrides.into_iter().count();
55    /// assert_eq!(count, 0);
56    /// ```
57    #[must_use]
58    pub const fn kv_overrides(&self) -> KvOverrides<'_> {
59        KvOverrides::new(self)
60    }
61
62    /// Appends a key-value override to the model parameters. It must be pinned as this creates a self-referential struct.
63    ///
64    /// # Errors
65    /// Returns [`ModelParamsError`] if the internal override vector has no available slot,
66    /// the slot is not empty, or the key contains invalid characters.
67    ///
68    /// # Examples
69    ///
70    /// ```rust
71    /// # use std::ffi::{CStr, CString};
72    /// use std::pin::pin;
73    /// # use llama_cpp_bindings::model::params::LlamaModelParams;
74    /// # use llama_cpp_bindings::model::params::param_override_value::ParamOverrideValue;
75    /// let mut params = pin!(LlamaModelParams::default());
76    /// let key = CString::new("key").expect("CString::new failed");
77    /// params.as_mut().append_kv_override(&key, ParamOverrideValue::Int(50)).unwrap();
78    ///
79    /// let kv_overrides = params.kv_overrides().into_iter().collect::<Vec<_>>();
80    /// assert_eq!(kv_overrides.len(), 1);
81    ///
82    /// let (k, v) = &kv_overrides[0];
83    /// assert_eq!(v, &ParamOverrideValue::Int(50));
84    ///
85    /// assert_eq!(k.to_bytes(), b"key", "expected key to be 'key', was {:?}", k);
86    /// ```
87    pub fn append_kv_override(
88        mut self: Pin<&mut Self>,
89        key: &CStr,
90        value: param_override_value::ParamOverrideValue,
91    ) -> Result<(), ModelParamsError> {
92        let kv_override = self
93            .kv_overrides
94            .get_mut(0)
95            .ok_or(ModelParamsError::NoAvailableSlot)?;
96
97        if kv_override.key[0] != 0 {
98            return Err(ModelParamsError::SlotNotEmpty);
99        }
100
101        for (i, &byte) in key.to_bytes_with_nul().iter().enumerate() {
102            kv_override.key[i] = c_char::try_from(byte).map_err(|convert_error| {
103                ModelParamsError::InvalidCharacterInKey {
104                    byte,
105                    reason: convert_error.to_string(),
106                }
107            })?;
108        }
109
110        kv_override.tag = value.tag();
111        kv_override.__bindgen_anon_1 = value.value();
112
113        // set to null pointer for panic safety (as push may move the vector, invalidating the pointer)
114        self.params.kv_overrides = null();
115
116        // push the next one to ensure we maintain the iterator invariant of ending with a 0
117        self.kv_overrides
118            .push(llama_cpp_bindings_sys::llama_model_kv_override {
119                key: [0; 128],
120                tag: 0,
121                __bindgen_anon_1: llama_cpp_bindings_sys::llama_model_kv_override__bindgen_ty_1 {
122                    val_i64: 0,
123                },
124            });
125
126        // set the pointer to the (potentially) new vector
127        self.params.kv_overrides = self.kv_overrides.as_ptr();
128
129        Ok(())
130    }
131}
132
133impl LlamaModelParams {
134    /// Adds buffer type overrides to move all mixture-of-experts layers to CPU.
135    ///
136    /// # Errors
137    /// Returns [`ModelParamsError`] if the internal override vector has no available slot,
138    /// the slot is not empty, or the key contains invalid characters.
139    pub fn add_cpu_moe_override(self: Pin<&mut Self>) -> Result<(), ModelParamsError> {
140        self.add_cpu_buft_override(c"\\.ffn_(up|down|gate)_(ch|)exps")
141    }
142
143    /// Appends a buffer type override to the model parameters, to move layers matching pattern to CPU.
144    /// It must be pinned as this creates a self-referential struct.
145    ///
146    /// # Errors
147    /// Returns [`ModelParamsError`] if the internal override vector has no available slot,
148    /// the slot is not empty, or the key contains invalid characters.
149    pub fn add_cpu_buft_override(
150        mut self: Pin<&mut Self>,
151        key: &CStr,
152    ) -> Result<(), ModelParamsError> {
153        let buft_override = self
154            .buft_overrides
155            .get_mut(0)
156            .ok_or(ModelParamsError::NoAvailableSlot)?;
157
158        if !buft_override.pattern.is_null() {
159            return Err(ModelParamsError::SlotNotEmpty);
160        }
161
162        for &byte in key.to_bytes_with_nul() {
163            c_char::try_from(byte).map_err(|convert_error| {
164                ModelParamsError::InvalidCharacterInKey {
165                    byte,
166                    reason: convert_error.to_string(),
167                }
168            })?;
169        }
170
171        buft_override.pattern = key.as_ptr();
172        buft_override.buft = unsafe { llama_cpp_bindings_sys::ggml_backend_cpu_buffer_type() };
173
174        // set to null pointer for panic safety (as push may move the vector, invalidating the pointer)
175        self.params.tensor_buft_overrides = null();
176
177        // push the next one to ensure we maintain the iterator invariant of ending with a 0
178        self.buft_overrides
179            .push(llama_cpp_bindings_sys::llama_model_tensor_buft_override {
180                pattern: null(),
181                buft: std::ptr::null_mut(),
182            });
183
184        // set the pointer to the (potentially) new vector
185        self.params.tensor_buft_overrides = self.buft_overrides.as_ptr();
186
187        Ok(())
188    }
189}
190
191impl LlamaModelParams {
192    /// Get the number of layers to offload to the GPU.
193    #[must_use]
194    pub const fn n_gpu_layers(&self) -> i32 {
195        self.params.n_gpu_layers
196    }
197
198    /// The GPU that is used for scratch and small tensors
199    #[must_use]
200    pub const fn main_gpu(&self) -> i32 {
201        self.params.main_gpu
202    }
203
204    /// only load the vocabulary, no weights
205    #[must_use]
206    pub const fn vocab_only(&self) -> bool {
207        self.params.vocab_only
208    }
209
210    /// use mmap if possible
211    #[must_use]
212    pub const fn use_mmap(&self) -> bool {
213        self.params.use_mmap
214    }
215
216    /// force system to keep model in RAM
217    #[must_use]
218    pub const fn use_mlock(&self) -> bool {
219        self.params.use_mlock
220    }
221
222    /// get the split mode
223    ///
224    /// # Errors
225    /// Returns `LlamaSplitModeParseError` if the unknown split mode is encountered.
226    pub fn split_mode(&self) -> Result<LlamaSplitMode, LlamaSplitModeParseError> {
227        LlamaSplitMode::try_from(self.params.split_mode)
228    }
229
230    /// get the devices
231    #[must_use]
232    pub fn devices(&self) -> Vec<usize> {
233        let mut backend_devices = Vec::new();
234        for i in 0..unsafe { llama_cpp_bindings_sys::ggml_backend_dev_count() } {
235            let dev = unsafe { llama_cpp_bindings_sys::ggml_backend_dev_get(i) };
236            backend_devices.push(dev);
237        }
238        let mut devices = Vec::new();
239        for &dev in self.devices.iter() {
240            if dev.is_null() {
241                break;
242            }
243            let matched_index = backend_devices
244                .iter()
245                .enumerate()
246                .find(|&(_i, &d)| d == dev)
247                .map(|(index, _)| index);
248
249            if let Some(index) = matched_index {
250                devices.push(index);
251            }
252        }
253        devices
254    }
255
256    /// sets the number of gpu layers to offload to the GPU.
257    /// ```
258    /// # use llama_cpp_bindings::model::params::LlamaModelParams;
259    /// let params = LlamaModelParams::default();
260    /// let params = params.with_n_gpu_layers(1);
261    /// assert_eq!(params.n_gpu_layers(), 1);
262    /// ```
263    #[must_use]
264    pub fn with_n_gpu_layers(mut self, n_gpu_layers: u32) -> Self {
265        // The only way this conversion can fail is if u32 overflows the i32 - in which case we set
266        // to MAX
267        let n_gpu_layers = i32::try_from(n_gpu_layers).unwrap_or(i32::MAX);
268        self.params.n_gpu_layers = n_gpu_layers;
269        self
270    }
271
272    /// sets the main GPU
273    ///
274    /// To enable this option, you must set `split_mode` to `LlamaSplitMode::None` to enable single GPU mode.
275    #[must_use]
276    pub const fn with_main_gpu(mut self, main_gpu: i32) -> Self {
277        self.params.main_gpu = main_gpu;
278        self
279    }
280
281    /// sets `vocab_only`
282    #[must_use]
283    pub const fn with_vocab_only(mut self, vocab_only: bool) -> Self {
284        self.params.vocab_only = vocab_only;
285        self
286    }
287
288    /// sets `use_mmap`
289    ///
290    /// # Examples
291    ///
292    /// ```rust
293    /// # use llama_cpp_bindings::model::params::LlamaModelParams;
294    /// let params = LlamaModelParams::default().with_use_mmap(false);
295    /// assert!(!params.use_mmap());
296    /// ```
297    #[must_use]
298    pub const fn with_use_mmap(mut self, use_mmap: bool) -> Self {
299        self.params.use_mmap = use_mmap;
300        self
301    }
302
303    /// Get `no_alloc`
304    #[must_use]
305    pub const fn no_alloc(&self) -> bool {
306        self.params.no_alloc
307    }
308
309    /// Set `no_alloc`. When enabled, tensor data is not allocated.
310    /// Incompatible with `use_mmap`, so enabling this also disables mmap.
311    ///
312    /// # Examples
313    ///
314    /// ```rust
315    /// # use llama_cpp_bindings::model::params::LlamaModelParams;
316    /// let params = LlamaModelParams::default().with_no_alloc(true);
317    /// assert!(params.no_alloc());
318    /// assert!(!params.use_mmap());
319    /// ```
320    #[must_use]
321    pub const fn with_no_alloc(mut self, no_alloc: bool) -> Self {
322        self.params.no_alloc = no_alloc;
323        if no_alloc {
324            self.params.use_mmap = false;
325        }
326        self
327    }
328
329    /// sets `use_mlock`
330    #[must_use]
331    pub const fn with_use_mlock(mut self, use_mlock: bool) -> Self {
332        self.params.use_mlock = use_mlock;
333        self
334    }
335
336    /// sets `split_mode`
337    #[must_use]
338    pub fn with_split_mode(mut self, split_mode: LlamaSplitMode) -> Self {
339        self.params.split_mode = split_mode.into();
340        self
341    }
342
343    /// sets `devices`
344    ///
345    /// The devices are specified as indices that correspond to the ggml backend device indices.
346    ///
347    /// The maximum number of devices is 16.
348    ///
349    /// You don't need to specify CPU or ACCEL devices.
350    ///
351    /// # Errors
352    /// Returns `LlamaCppError::BackendDeviceNotFound` if any device index is invalid.
353    pub fn with_devices(mut self, devices: &[usize]) -> Result<Self, LlamaCppError> {
354        for dev in self.devices.iter_mut() {
355            *dev = std::ptr::null_mut();
356        }
357        // Check device count
358        let max_devices = crate::max_devices().min(LLAMA_CPP_MAX_DEVICES);
359        if devices.len() > max_devices {
360            return Err(LlamaCppError::MaxDevicesExceeded(max_devices));
361        }
362        for (i, &dev) in devices.iter().enumerate() {
363            if dev >= unsafe { llama_cpp_bindings_sys::ggml_backend_dev_count() } {
364                return Err(LlamaCppError::BackendDeviceNotFound(dev));
365            }
366            let backend_dev = unsafe { llama_cpp_bindings_sys::ggml_backend_dev_get(dev) };
367            self.devices[i] = backend_dev;
368        }
369        self.params.devices = self.devices.as_mut_ptr();
370
371        Ok(self)
372    }
373}
374
375/// Default parameters for `LlamaModel`. (as defined in llama.cpp by `llama_model_default_params`)
376/// ```
377/// # use llama_cpp_bindings::model::params::LlamaModelParams;
378/// use llama_cpp_bindings::model::split_mode::LlamaSplitMode;
379/// let params = LlamaModelParams::default();
380/// assert_eq!(params.n_gpu_layers(), -1, "n_gpu_layers should be -1");
381/// assert_eq!(params.main_gpu(), 0, "main_gpu should be 0");
382/// assert_eq!(params.vocab_only(), false, "vocab_only should be false");
383/// assert_eq!(params.use_mmap(), true, "use_mmap should be true");
384/// assert_eq!(params.use_mlock(), false, "use_mlock should be false");
385/// assert_eq!(params.split_mode(), Ok(LlamaSplitMode::Layer), "split_mode should be LAYER");
386/// assert_eq!(params.devices().len(), 0, "devices should be empty");
387/// ```
388impl Default for LlamaModelParams {
389    fn default() -> Self {
390        let default_params = unsafe { llama_cpp_bindings_sys::llama_model_default_params() };
391        Self {
392            params: default_params,
393            // push the next one to ensure we maintain the iterator invariant of ending with a 0
394            kv_overrides: vec![llama_cpp_bindings_sys::llama_model_kv_override {
395                key: [0; 128],
396                tag: 0,
397                __bindgen_anon_1: llama_cpp_bindings_sys::llama_model_kv_override__bindgen_ty_1 {
398                    val_i64: 0,
399                },
400            }],
401            buft_overrides: vec![llama_cpp_bindings_sys::llama_model_tensor_buft_override {
402                pattern: null(),
403                buft: std::ptr::null_mut(),
404            }],
405            devices: Box::pin([std::ptr::null_mut(); 16]),
406        }
407    }
408}
409
410#[cfg(test)]
411mod tests {
412    use crate::model::split_mode::LlamaSplitMode;
413
414    use super::LlamaModelParams;
415
416    #[test]
417    fn default_params_have_expected_values() {
418        let params = LlamaModelParams::default();
419
420        assert_eq!(params.n_gpu_layers(), -1);
421        assert_eq!(params.main_gpu(), 0);
422        assert!(!params.vocab_only());
423        assert!(params.use_mmap());
424        assert!(!params.use_mlock());
425        assert_eq!(params.split_mode(), Ok(LlamaSplitMode::Layer));
426        assert!(params.devices().is_empty());
427    }
428
429    #[test]
430    fn n_gpu_layers_overflow_clamps_to_max() {
431        let params = LlamaModelParams::default().with_n_gpu_layers(u32::MAX);
432
433        assert_eq!(params.n_gpu_layers(), i32::MAX);
434    }
435
436    #[test]
437    fn with_n_gpu_layers_sets_value() {
438        let params = LlamaModelParams::default().with_n_gpu_layers(32);
439
440        assert_eq!(params.n_gpu_layers(), 32);
441    }
442
443    #[test]
444    fn with_main_gpu_sets_value() {
445        let params = LlamaModelParams::default().with_main_gpu(2);
446
447        assert_eq!(params.main_gpu(), 2);
448    }
449
450    #[test]
451    fn with_split_mode_none() {
452        let params = LlamaModelParams::default().with_split_mode(LlamaSplitMode::None);
453
454        assert_eq!(params.split_mode(), Ok(LlamaSplitMode::None));
455    }
456
457    #[test]
458    fn with_split_mode_row() {
459        let params = LlamaModelParams::default().with_split_mode(LlamaSplitMode::Row);
460
461        assert_eq!(params.split_mode(), Ok(LlamaSplitMode::Row));
462    }
463
464    #[test]
465    fn with_vocab_only_enables() {
466        let params = LlamaModelParams::default().with_vocab_only(true);
467
468        assert!(params.vocab_only());
469    }
470
471    #[test]
472    fn with_vocab_only_disables() {
473        let params = LlamaModelParams::default().with_vocab_only(false);
474
475        assert!(!params.vocab_only());
476    }
477
478    #[test]
479    fn with_use_mmap_enables() {
480        let params = LlamaModelParams::default().with_use_mmap(true);
481
482        assert!(params.use_mmap());
483    }
484
485    #[test]
486    fn with_use_mmap_disables() {
487        let params = LlamaModelParams::default().with_use_mmap(false);
488
489        assert!(!params.use_mmap());
490    }
491
492    #[test]
493    fn with_no_alloc_enables() {
494        let params = LlamaModelParams::default().with_no_alloc(true);
495
496        assert!(params.no_alloc());
497    }
498
499    #[test]
500    fn with_no_alloc_disables() {
501        let params = LlamaModelParams::default().with_no_alloc(false);
502
503        assert!(!params.no_alloc());
504    }
505
506    #[test]
507    fn with_no_alloc_true_disables_mmap() {
508        let params = LlamaModelParams::default()
509            .with_use_mmap(true)
510            .with_no_alloc(true);
511
512        assert!(params.no_alloc());
513        assert!(!params.use_mmap());
514    }
515
516    #[test]
517    fn default_no_alloc_is_false() {
518        let params = LlamaModelParams::default();
519
520        assert!(!params.no_alloc());
521    }
522
523    #[test]
524    fn with_use_mlock_enables() {
525        let params = LlamaModelParams::default().with_use_mlock(true);
526
527        assert!(params.use_mlock());
528    }
529
530    #[test]
531    fn with_use_mlock_disables() {
532        let params = LlamaModelParams::default().with_use_mlock(false);
533
534        assert!(!params.use_mlock());
535    }
536
537    #[test]
538    fn debug_format_contains_field_names() {
539        let params = LlamaModelParams::default();
540        let debug_output = format!("{params:?}");
541
542        assert!(debug_output.contains("n_gpu_layers"));
543        assert!(debug_output.contains("main_gpu"));
544        assert!(debug_output.contains("vocab_only"));
545        assert!(debug_output.contains("use_mmap"));
546        assert!(debug_output.contains("use_mlock"));
547        assert!(debug_output.contains("split_mode"));
548    }
549
550    #[test]
551    fn builder_chaining_preserves_all_values() {
552        let params = LlamaModelParams::default()
553            .with_n_gpu_layers(10)
554            .with_main_gpu(1)
555            .with_split_mode(LlamaSplitMode::Row)
556            .with_vocab_only(true)
557            .with_use_mlock(true);
558
559        assert_eq!(params.n_gpu_layers(), 10);
560        assert_eq!(params.main_gpu(), 1);
561        assert_eq!(params.split_mode(), Ok(LlamaSplitMode::Row));
562        assert!(params.vocab_only());
563        assert!(params.use_mlock());
564    }
565
566    #[test]
567    fn with_devices_empty_list_succeeds() {
568        let params = LlamaModelParams::default().with_devices(&[]);
569
570        assert!(params.is_ok());
571        assert!(params.unwrap().devices().is_empty());
572    }
573
574    #[test]
575    fn with_devices_invalid_index_returns_error() {
576        let result = LlamaModelParams::default().with_devices(&[999_999]);
577
578        assert_eq!(
579            result.unwrap_err(),
580            crate::LlamaCppError::BackendDeviceNotFound(999_999)
581        );
582    }
583
584    #[test]
585    fn add_cpu_buft_override_succeeds() {
586        let mut params = std::pin::pin!(LlamaModelParams::default());
587        let result = params.as_mut().add_cpu_buft_override(c"test_pattern");
588
589        assert!(result.is_ok());
590    }
591
592    #[test]
593    fn add_cpu_buft_override_twice_fails_with_slot_not_empty() {
594        let mut params = std::pin::pin!(LlamaModelParams::default());
595        params
596            .as_mut()
597            .add_cpu_buft_override(c"first_pattern")
598            .unwrap();
599        let result = params.as_mut().add_cpu_buft_override(c"second_pattern");
600
601        assert_eq!(
602            result.unwrap_err(),
603            crate::error::ModelParamsError::SlotNotEmpty
604        );
605    }
606
607    #[test]
608    fn add_cpu_moe_override_succeeds() {
609        let mut params = std::pin::pin!(LlamaModelParams::default());
610        let result = params.as_mut().add_cpu_moe_override();
611
612        assert!(result.is_ok());
613    }
614
615    #[test]
616    fn append_kv_override_twice_fails_with_slot_not_empty() {
617        use crate::model::params::param_override_value::ParamOverrideValue;
618        use std::ffi::CString;
619
620        let mut params = std::pin::pin!(LlamaModelParams::default());
621        let key = CString::new("first_key").unwrap();
622        params
623            .as_mut()
624            .append_kv_override(&key, ParamOverrideValue::Int(1))
625            .unwrap();
626
627        let key2 = CString::new("second_key").unwrap();
628        let result = params
629            .as_mut()
630            .append_kv_override(&key2, ParamOverrideValue::Int(2));
631
632        assert_eq!(
633            result.unwrap_err(),
634            crate::error::ModelParamsError::SlotNotEmpty
635        );
636    }
637
638    #[test]
639    fn with_devices_too_many_returns_max_exceeded() {
640        let too_many: Vec<usize> = (0..17).collect();
641        let result = LlamaModelParams::default().with_devices(&too_many);
642
643        assert!(
644            result
645                .unwrap_err()
646                .to_string()
647                .contains("Max devices exceeded")
648        );
649    }
650
651    #[test]
652    fn with_devices_sets_devices_when_available() {
653        let dev_count = unsafe { llama_cpp_bindings_sys::ggml_backend_dev_count() };
654        assert!(dev_count > 0, "Test requires at least one backend device");
655
656        let params = LlamaModelParams::default().with_devices(&[0]).unwrap();
657
658        assert_eq!(params.devices().len(), 1);
659        assert_eq!(params.devices()[0], 0);
660    }
661
662    #[test]
663    fn with_devices_invalid_index_returns_not_found() {
664        let invalid_index = usize::MAX;
665        let result = LlamaModelParams::default().with_devices(&[invalid_index]);
666
667        assert!(result.unwrap_err().to_string().contains("Backend device"));
668    }
669
670    #[test]
671    #[cfg(not(target_os = "windows"))]
672    fn append_kv_override_with_high_byte_returns_invalid_character_error() {
673        use crate::model::params::param_override_value::ParamOverrideValue;
674
675        let key_bytes: &[u8] = b"\xff\0";
676        let key = std::ffi::CStr::from_bytes_with_nul(key_bytes).unwrap();
677        let mut params = std::pin::pin!(LlamaModelParams::default());
678        let result = params
679            .as_mut()
680            .append_kv_override(key, ParamOverrideValue::Int(1));
681
682        assert!(matches!(
683            result,
684            Err(crate::error::ModelParamsError::InvalidCharacterInKey { byte: 0xff, .. })
685        ));
686    }
687
688    #[test]
689    #[cfg(not(target_os = "windows"))]
690    fn add_cpu_buft_override_with_high_byte_returns_invalid_character_error() {
691        let key_bytes: &[u8] = b"\xff\0";
692        let key = std::ffi::CStr::from_bytes_with_nul(key_bytes).unwrap();
693        let mut params = std::pin::pin!(LlamaModelParams::default());
694        let result = params.as_mut().add_cpu_buft_override(key);
695
696        assert!(matches!(
697            result,
698            Err(crate::error::ModelParamsError::InvalidCharacterInKey { byte: 0xff, .. })
699        ));
700    }
701}