Skip to main content

llama_cpp_bindings/model/
params.rs

1//! A safe wrapper around `llama_model_params`.
2
3use crate::LlamaCppError;
4use crate::context::params::LlamaContextParams;
5use crate::error::{FitError, ModelParamsError};
6use crate::model::llama_split_mode_parse_error::LlamaSplitModeParseError;
7use crate::model::params::fit_result::FitResult;
8use crate::model::params::kv_overrides::KvOverrides;
9use crate::model::split_mode::LlamaSplitMode;
10use std::ffi::{CStr, c_char};
11use std::fmt::{Debug, Formatter};
12use std::pin::Pin;
13use std::ptr::null;
14
15pub mod fit_result;
16pub mod kv_override_value_iterator;
17pub mod kv_overrides;
18pub mod param_override_value;
19pub mod unknown_kv_override_tag;
20
21/// The maximum number of devices supported.
22///
23/// The real maximum number of devices is the lesser one of this value and the value returned by
24/// `llama_cpp_bindings::max_devices()`.
25pub const LLAMA_CPP_MAX_DEVICES: usize = 16;
26
27/// A safe wrapper around `llama_model_params`.
28pub struct LlamaModelParams {
29    /// The underlying `llama_model_params` from the C API.
30    pub params: llama_cpp_bindings_sys::llama_model_params,
31    kv_overrides: Vec<llama_cpp_bindings_sys::llama_model_kv_override>,
32    buft_overrides: Vec<llama_cpp_bindings_sys::llama_model_tensor_buft_override>,
33    devices: Pin<Box<[llama_cpp_bindings_sys::ggml_backend_dev_t; LLAMA_CPP_MAX_DEVICES]>>,
34    tensor_split: Vec<f32>,
35}
36
37impl Debug for LlamaModelParams {
38    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
39        f.debug_struct("LlamaModelParams")
40            .field("n_gpu_layers", &self.params.n_gpu_layers)
41            .field("main_gpu", &self.params.main_gpu)
42            .field("vocab_only", &self.params.vocab_only)
43            .field("use_mmap", &self.params.use_mmap)
44            .field("use_mlock", &self.params.use_mlock)
45            .field("split_mode", &self.split_mode())
46            .field("devices", &self.devices)
47            .field("kv_overrides", &"vec of kv_overrides")
48            .finish_non_exhaustive()
49    }
50}
51
52impl LlamaModelParams {
53    /// See [`KvOverrides`]
54    ///
55    /// # Examples
56    ///
57    /// ```rust
58    /// # use llama_cpp_bindings::model::params::LlamaModelParams;
59    /// let params = Box::pin(LlamaModelParams::default());
60    /// let kv_overrides = params.kv_overrides();
61    /// let count = kv_overrides.into_iter().count();
62    /// assert_eq!(count, 0);
63    /// ```
64    #[must_use]
65    pub const fn kv_overrides(&self) -> KvOverrides<'_> {
66        KvOverrides::new(self)
67    }
68
69    /// Appends a key-value override to the model parameters. It must be pinned as this creates a self-referential struct.
70    ///
71    /// # Errors
72    /// Returns [`ModelParamsError`] if the internal override vector has no available slot,
73    /// the slot is not empty, or the key contains invalid characters.
74    ///
75    /// # Examples
76    ///
77    /// ```rust
78    /// # use std::ffi::{CStr, CString};
79    /// use std::pin::pin;
80    /// # use llama_cpp_bindings::model::params::LlamaModelParams;
81    /// # use llama_cpp_bindings::model::params::param_override_value::ParamOverrideValue;
82    /// let mut params = pin!(LlamaModelParams::default());
83    /// let key = CString::new("key").expect("CString::new failed");
84    /// params.as_mut().append_kv_override(&key, ParamOverrideValue::Int(50)).unwrap();
85    ///
86    /// let kv_overrides = params.kv_overrides().into_iter().collect::<Vec<_>>();
87    /// assert_eq!(kv_overrides.len(), 1);
88    ///
89    /// let (k, v) = &kv_overrides[0];
90    /// assert_eq!(v, &ParamOverrideValue::Int(50));
91    ///
92    /// assert_eq!(k.to_bytes(), b"key", "expected key to be 'key', was {:?}", k);
93    /// ```
94    pub fn append_kv_override(
95        mut self: Pin<&mut Self>,
96        key: &CStr,
97        value: param_override_value::ParamOverrideValue,
98    ) -> Result<(), ModelParamsError> {
99        let kv_override = self
100            .kv_overrides
101            .get_mut(0)
102            .ok_or(ModelParamsError::NoAvailableSlot)?;
103
104        if kv_override.key[0] != 0 {
105            return Err(ModelParamsError::SlotNotEmpty);
106        }
107
108        for (i, &byte) in key.to_bytes_with_nul().iter().enumerate() {
109            kv_override.key[i] = c_char::try_from(byte).map_err(|convert_error| {
110                ModelParamsError::InvalidCharacterInKey {
111                    byte,
112                    reason: convert_error.to_string(),
113                }
114            })?;
115        }
116
117        kv_override.tag = value.tag();
118        kv_override.__bindgen_anon_1 = value.value();
119
120        self.push_kv_override_terminator();
121
122        Ok(())
123    }
124
125    /// Pushes the trailing zero-tag sentinel onto `kv_overrides` and refreshes
126    /// `params.kv_overrides`. The cached pointer is nulled before [`Vec::push`]
127    /// so that a relocation-induced panic never leaves a dangling pointer in
128    /// `params`.
129    fn push_kv_override_terminator(mut self: Pin<&mut Self>) {
130        self.params.kv_overrides = null();
131
132        self.kv_overrides
133            .push(llama_cpp_bindings_sys::llama_model_kv_override {
134                key: [0; 128],
135                tag: 0,
136                __bindgen_anon_1: llama_cpp_bindings_sys::llama_model_kv_override__bindgen_ty_1 {
137                    val_i64: 0,
138                },
139            });
140
141        self.params.kv_overrides = self.kv_overrides.as_ptr();
142    }
143}
144
145impl LlamaModelParams {
146    /// Adds buffer type overrides to move all mixture-of-experts layers to CPU.
147    ///
148    /// # Errors
149    /// Returns [`ModelParamsError`] if the internal override vector has no available slot,
150    /// the slot is not empty, or the key contains invalid characters.
151    pub fn add_cpu_moe_override(self: Pin<&mut Self>) -> Result<(), ModelParamsError> {
152        self.add_cpu_buft_override(c"\\.ffn_(up|down|gate)_(ch|)exps")
153    }
154
155    /// Appends a buffer type override to the model parameters, to move layers matching pattern to CPU.
156    /// It must be pinned as this creates a self-referential struct.
157    ///
158    /// # Errors
159    /// Returns [`ModelParamsError`] if the internal override vector has no available slot,
160    /// the slot is not empty, or the key contains invalid characters.
161    pub fn add_cpu_buft_override(
162        mut self: Pin<&mut Self>,
163        key: &CStr,
164    ) -> Result<(), ModelParamsError> {
165        let buft_override = self
166            .buft_overrides
167            .get_mut(0)
168            .ok_or(ModelParamsError::NoAvailableSlot)?;
169
170        if !buft_override.pattern.is_null() {
171            return Err(ModelParamsError::SlotNotEmpty);
172        }
173
174        for &byte in key.to_bytes_with_nul() {
175            c_char::try_from(byte).map_err(|convert_error| {
176                ModelParamsError::InvalidCharacterInKey {
177                    byte,
178                    reason: convert_error.to_string(),
179                }
180            })?;
181        }
182
183        buft_override.pattern = key.as_ptr();
184        buft_override.buft = unsafe { llama_cpp_bindings_sys::ggml_backend_cpu_buffer_type() };
185
186        self.push_buft_override_terminator();
187
188        Ok(())
189    }
190
191    /// Pushes the trailing null-pattern sentinel onto `buft_overrides` and
192    /// refreshes `params.tensor_buft_overrides`. The cached pointer is nulled
193    /// before [`Vec::push`] so that a relocation-induced panic never leaves a
194    /// dangling pointer in `params`.
195    fn push_buft_override_terminator(mut self: Pin<&mut Self>) {
196        self.params.tensor_buft_overrides = null();
197
198        self.buft_overrides
199            .push(llama_cpp_bindings_sys::llama_model_tensor_buft_override {
200                pattern: null(),
201                buft: std::ptr::null_mut(),
202            });
203
204        self.params.tensor_buft_overrides = self.buft_overrides.as_ptr();
205    }
206}
207
208impl LlamaModelParams {
209    /// Get the number of layers to offload to the GPU.
210    #[must_use]
211    pub const fn n_gpu_layers(&self) -> i32 {
212        self.params.n_gpu_layers
213    }
214
215    /// The GPU that is used for scratch and small tensors
216    #[must_use]
217    pub const fn main_gpu(&self) -> i32 {
218        self.params.main_gpu
219    }
220
221    /// only load the vocabulary, no weights
222    #[must_use]
223    pub const fn vocab_only(&self) -> bool {
224        self.params.vocab_only
225    }
226
227    /// use mmap if possible
228    #[must_use]
229    pub const fn use_mmap(&self) -> bool {
230        self.params.use_mmap
231    }
232
233    /// force system to keep model in RAM
234    #[must_use]
235    pub const fn use_mlock(&self) -> bool {
236        self.params.use_mlock
237    }
238
239    /// get the split mode
240    ///
241    /// # Errors
242    /// Returns `LlamaSplitModeParseError` if the unknown split mode is encountered.
243    pub fn split_mode(&self) -> Result<LlamaSplitMode, LlamaSplitModeParseError> {
244        LlamaSplitMode::try_from(self.params.split_mode)
245    }
246
247    /// get the devices
248    #[must_use]
249    pub fn devices(&self) -> Vec<usize> {
250        let mut backend_devices = Vec::new();
251        for i in 0..unsafe { llama_cpp_bindings_sys::ggml_backend_dev_count() } {
252            let dev = unsafe { llama_cpp_bindings_sys::ggml_backend_dev_get(i) };
253            backend_devices.push(dev);
254        }
255        let mut devices = Vec::new();
256        for &dev in self.devices.iter() {
257            if dev.is_null() {
258                break;
259            }
260            let matched_index = backend_devices
261                .iter()
262                .enumerate()
263                .find(|&(_i, &d)| d == dev)
264                .map(|(index, _)| index);
265
266            if let Some(index) = matched_index {
267                devices.push(index);
268            }
269        }
270        devices
271    }
272
273    /// sets the number of gpu layers to offload to the GPU.
274    /// ```
275    /// # use llama_cpp_bindings::model::params::LlamaModelParams;
276    /// let params = LlamaModelParams::default();
277    /// let params = params.with_n_gpu_layers(1);
278    /// assert_eq!(params.n_gpu_layers(), 1);
279    /// ```
280    #[must_use]
281    pub fn with_n_gpu_layers(mut self, n_gpu_layers: u32) -> Self {
282        let n_gpu_layers = i32::try_from(n_gpu_layers).unwrap_or(i32::MAX);
283        self.params.n_gpu_layers = n_gpu_layers;
284        self
285    }
286
287    /// sets the main GPU
288    ///
289    /// To enable this option, you must set `split_mode` to `LlamaSplitMode::None` to enable single GPU mode.
290    #[must_use]
291    pub const fn with_main_gpu(mut self, main_gpu: i32) -> Self {
292        self.params.main_gpu = main_gpu;
293        self
294    }
295
296    /// sets `vocab_only`
297    #[must_use]
298    pub const fn with_vocab_only(mut self, vocab_only: bool) -> Self {
299        self.params.vocab_only = vocab_only;
300        self
301    }
302
303    /// sets `use_mmap`
304    ///
305    /// # Examples
306    ///
307    /// ```rust
308    /// # use llama_cpp_bindings::model::params::LlamaModelParams;
309    /// let params = LlamaModelParams::default().with_use_mmap(false);
310    /// assert!(!params.use_mmap());
311    /// ```
312    #[must_use]
313    pub const fn with_use_mmap(mut self, use_mmap: bool) -> Self {
314        self.params.use_mmap = use_mmap;
315        self
316    }
317
318    /// Get `no_alloc`
319    #[must_use]
320    pub const fn no_alloc(&self) -> bool {
321        self.params.no_alloc
322    }
323
324    /// Set `no_alloc`. When enabled, tensor data is not allocated.
325    /// Incompatible with `use_mmap`, so enabling this also disables mmap.
326    ///
327    /// # Examples
328    ///
329    /// ```rust
330    /// # use llama_cpp_bindings::model::params::LlamaModelParams;
331    /// let params = LlamaModelParams::default().with_no_alloc(true);
332    /// assert!(params.no_alloc());
333    /// assert!(!params.use_mmap());
334    /// ```
335    #[must_use]
336    pub const fn with_no_alloc(mut self, no_alloc: bool) -> Self {
337        self.params.no_alloc = no_alloc;
338        if no_alloc {
339            self.params.use_mmap = false;
340        }
341        self
342    }
343
344    /// sets `use_mlock`
345    #[must_use]
346    pub const fn with_use_mlock(mut self, use_mlock: bool) -> Self {
347        self.params.use_mlock = use_mlock;
348        self
349    }
350
351    /// sets `split_mode`
352    #[must_use]
353    pub fn with_split_mode(mut self, split_mode: LlamaSplitMode) -> Self {
354        self.params.split_mode = split_mode.into();
355        self
356    }
357
358    /// sets `devices`
359    ///
360    /// The devices are specified as indices that correspond to the ggml backend device indices.
361    ///
362    /// The maximum number of devices is 16.
363    ///
364    /// You don't need to specify CPU or ACCEL devices.
365    ///
366    /// # Errors
367    /// Returns `LlamaCppError::BackendDeviceNotFound` if any device index is invalid.
368    pub fn with_devices(mut self, devices: &[usize]) -> Result<Self, LlamaCppError> {
369        for dev in self.devices.iter_mut() {
370            *dev = std::ptr::null_mut();
371        }
372        let max_devices = crate::max_devices().min(LLAMA_CPP_MAX_DEVICES);
373        if devices.len() > max_devices {
374            return Err(LlamaCppError::MaxDevicesExceeded(max_devices));
375        }
376        for (i, &dev) in devices.iter().enumerate() {
377            if dev >= unsafe { llama_cpp_bindings_sys::ggml_backend_dev_count() } {
378                return Err(LlamaCppError::BackendDeviceNotFound(dev));
379            }
380            let backend_dev = unsafe { llama_cpp_bindings_sys::ggml_backend_dev_get(dev) };
381            self.devices[i] = backend_dev;
382        }
383        self.params.devices = self.devices.as_mut_ptr();
384
385        Ok(self)
386    }
387}
388
389impl LlamaModelParams {
390    /// Automatically fit model and context parameters to available device memory.
391    ///
392    /// Wraps llama.cpp's `common_fit_params`. Given a model path, available per-device memory
393    /// margins, and a minimum context size, it fills in `n_gpu_layers`, `tensor_split`, and
394    /// `tensor_buft_overrides` to fit the model to the available VRAM, and may reduce
395    /// `cparams.n_ctx` if needed. On success the model and context params are updated in place.
396    ///
397    /// # Requirements
398    ///
399    /// Per the C API docstring, only parameters that still hold their default value are
400    /// modified. In practice this means:
401    /// - `n_gpu_layers` must be at its default (`-1`). Do not call
402    ///   [`with_n_gpu_layers`](Self::with_n_gpu_layers) before this.
403    /// - No `tensor_buft_overrides` may be set. Do not call
404    ///   [`add_cpu_buft_override`](Self::add_cpu_buft_override) or
405    ///   [`add_cpu_moe_override`](Self::add_cpu_moe_override) before this.
406    /// - `cparams.n_ctx` is only auto-selected if it is `0`; otherwise it is left alone.
407    ///
408    /// # Arguments
409    ///
410    /// - `model_path` — path to the GGUF model file as a C string.
411    /// - `context_params` — context parameters; `n_ctx` may be modified (see above).
412    /// - `margins` — memory margin per device in bytes. Must have at least
413    ///   `crate::max_devices()` elements.
414    /// - `n_ctx_min` — minimum context size to preserve when reducing memory usage.
415    /// - `log_level` — minimum log level for fitting output; lower levels go to the debug log.
416    ///
417    /// # Thread safety
418    ///
419    /// This function is **not** thread safe: the underlying C call mutates the global
420    /// llama logger state.
421    ///
422    /// # Errors
423    ///
424    /// Returns one of the [`FitError`] variants matching the vendored wrapper's status code.
425    pub fn fit_params(
426        mut self: Pin<&mut Self>,
427        model_path: &CStr,
428        context_params: &mut LlamaContextParams,
429        margins: &mut [usize],
430        n_ctx_min: u32,
431        log_level: llama_cpp_bindings_sys::ggml_log_level,
432    ) -> Result<FitResult, FitError> {
433        let max_devices = unsafe { llama_cpp_bindings_sys::llama_max_devices() };
434        let max_buft = unsafe { llama_cpp_bindings_sys::llama_max_tensor_buft_overrides() };
435
436        self.tensor_split.clear();
437        self.tensor_split.resize(max_devices, 0.0);
438
439        self.buft_overrides.clear();
440        self.buft_overrides.resize(
441            max_buft + 1,
442            llama_cpp_bindings_sys::llama_model_tensor_buft_override {
443                pattern: null(),
444                buft: std::ptr::null_mut(),
445            },
446        );
447
448        self.params.tensor_split = null::<f32>();
449        self.params.tensor_buft_overrides = null();
450
451        let mut out_unrecognized_status_code: i32 = 0;
452        let mut out_error: *mut c_char = std::ptr::null_mut();
453
454        let status = unsafe {
455            llama_cpp_bindings_sys::llama_rs_fit_params(
456                model_path.as_ptr(),
457                &raw mut self.params,
458                &raw mut context_params.context_params,
459                self.tensor_split.as_mut_ptr(),
460                self.buft_overrides.as_mut_ptr(),
461                margins.as_mut_ptr(),
462                n_ctx_min,
463                log_level,
464                &raw mut out_unrecognized_status_code,
465                &raw mut out_error,
466            )
467        };
468
469        match status {
470            llama_cpp_bindings_sys::LLAMA_RS_FIT_PARAMS_OK => {}
471            llama_cpp_bindings_sys::LLAMA_RS_FIT_PARAMS_VENDORED_REPORTED_FAILURE => {
472                return Err(FitError::NoFittingMemoryLayout);
473            }
474            llama_cpp_bindings_sys::LLAMA_RS_FIT_PARAMS_VENDORED_REPORTED_ERROR => {
475                return Err(FitError::Aborted);
476            }
477            llama_cpp_bindings_sys::LLAMA_RS_FIT_PARAMS_VENDORED_RETURNED_UNRECOGNIZED_STATUS_CODE => {
478                return Err(FitError::UnknownStatus {
479                    code: out_unrecognized_status_code,
480                });
481            }
482            llama_cpp_bindings_sys::LLAMA_RS_FIT_PARAMS_ERROR_STRING_ALLOCATION_FAILED => {
483                return Err(FitError::NotEnoughMemory);
484            }
485            llama_cpp_bindings_sys::LLAMA_RS_FIT_PARAMS_VENDORED_THREW_CXX_EXCEPTION => {
486                let message =
487                    unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) };
488                return Err(FitError::Reported { message });
489            }
490            other => unreachable!("llama_rs_fit_params returned unrecognized wrapper status: {other}"),
491        }
492
493        self.params.tensor_split = self.tensor_split.as_ptr();
494        self.params.tensor_buft_overrides = self.buft_overrides.as_ptr();
495
496        Ok(FitResult {
497            n_ctx: context_params.context_params.n_ctx,
498        })
499    }
500}
501
502/// Default parameters for `LlamaModel`. (as defined in llama.cpp by `llama_model_default_params`)
503/// ```
504/// # use llama_cpp_bindings::model::params::LlamaModelParams;
505/// use llama_cpp_bindings::model::split_mode::LlamaSplitMode;
506/// let params = LlamaModelParams::default();
507/// assert_eq!(params.n_gpu_layers(), -1, "n_gpu_layers should be -1");
508/// assert_eq!(params.main_gpu(), 0, "main_gpu should be 0");
509/// assert_eq!(params.vocab_only(), false, "vocab_only should be false");
510/// assert_eq!(params.use_mmap(), true, "use_mmap should be true");
511/// assert_eq!(params.use_mlock(), false, "use_mlock should be false");
512/// assert_eq!(params.split_mode(), Ok(LlamaSplitMode::Layer), "split_mode should be LAYER");
513/// assert_eq!(params.devices().len(), 0, "devices should be empty");
514/// ```
515impl Default for LlamaModelParams {
516    fn default() -> Self {
517        let default_params = unsafe { llama_cpp_bindings_sys::llama_model_default_params() };
518        Self {
519            params: default_params,
520            kv_overrides: vec![llama_cpp_bindings_sys::llama_model_kv_override {
521                key: [0; 128],
522                tag: 0,
523                __bindgen_anon_1: llama_cpp_bindings_sys::llama_model_kv_override__bindgen_ty_1 {
524                    val_i64: 0,
525                },
526            }],
527            buft_overrides: vec![llama_cpp_bindings_sys::llama_model_tensor_buft_override {
528                pattern: null(),
529                buft: std::ptr::null_mut(),
530            }],
531            devices: Box::pin([std::ptr::null_mut(); 16]),
532            tensor_split: Vec::new(),
533        }
534    }
535}
536
537#[cfg(test)]
538mod tests {
539    use crate::model::split_mode::LlamaSplitMode;
540
541    use super::LlamaModelParams;
542
543    #[test]
544    fn default_params_have_expected_values() {
545        let params = LlamaModelParams::default();
546
547        assert_eq!(params.n_gpu_layers(), -1);
548        assert_eq!(params.main_gpu(), 0);
549        assert!(!params.vocab_only());
550        assert!(params.use_mmap());
551        assert!(!params.use_mlock());
552        assert_eq!(params.split_mode(), Ok(LlamaSplitMode::Layer));
553        assert!(params.devices().is_empty());
554    }
555
556    #[test]
557    fn n_gpu_layers_overflow_clamps_to_max() {
558        let params = LlamaModelParams::default().with_n_gpu_layers(u32::MAX);
559
560        assert_eq!(params.n_gpu_layers(), i32::MAX);
561    }
562
563    #[test]
564    fn with_n_gpu_layers_sets_value() {
565        let params = LlamaModelParams::default().with_n_gpu_layers(32);
566
567        assert_eq!(params.n_gpu_layers(), 32);
568    }
569
570    #[test]
571    fn with_main_gpu_sets_value() {
572        let params = LlamaModelParams::default().with_main_gpu(2);
573
574        assert_eq!(params.main_gpu(), 2);
575    }
576
577    #[test]
578    fn with_split_mode_none() {
579        let params = LlamaModelParams::default().with_split_mode(LlamaSplitMode::None);
580
581        assert_eq!(params.split_mode(), Ok(LlamaSplitMode::None));
582    }
583
584    #[test]
585    fn with_split_mode_row() {
586        let params = LlamaModelParams::default().with_split_mode(LlamaSplitMode::Row);
587
588        assert_eq!(params.split_mode(), Ok(LlamaSplitMode::Row));
589    }
590
591    #[test]
592    fn with_vocab_only_enables() {
593        let params = LlamaModelParams::default().with_vocab_only(true);
594
595        assert!(params.vocab_only());
596    }
597
598    #[test]
599    fn with_vocab_only_disables() {
600        let params = LlamaModelParams::default().with_vocab_only(false);
601
602        assert!(!params.vocab_only());
603    }
604
605    #[test]
606    fn with_use_mmap_enables() {
607        let params = LlamaModelParams::default().with_use_mmap(true);
608
609        assert!(params.use_mmap());
610    }
611
612    #[test]
613    fn with_use_mmap_disables() {
614        let params = LlamaModelParams::default().with_use_mmap(false);
615
616        assert!(!params.use_mmap());
617    }
618
619    #[test]
620    fn with_no_alloc_enables() {
621        let params = LlamaModelParams::default().with_no_alloc(true);
622
623        assert!(params.no_alloc());
624    }
625
626    #[test]
627    fn with_no_alloc_disables() {
628        let params = LlamaModelParams::default().with_no_alloc(false);
629
630        assert!(!params.no_alloc());
631    }
632
633    #[test]
634    fn with_no_alloc_true_disables_mmap() {
635        let params = LlamaModelParams::default()
636            .with_use_mmap(true)
637            .with_no_alloc(true);
638
639        assert!(params.no_alloc());
640        assert!(!params.use_mmap());
641    }
642
643    #[test]
644    fn default_no_alloc_is_false() {
645        let params = LlamaModelParams::default();
646
647        assert!(!params.no_alloc());
648    }
649
650    #[test]
651    fn with_use_mlock_enables() {
652        let params = LlamaModelParams::default().with_use_mlock(true);
653
654        assert!(params.use_mlock());
655    }
656
657    #[test]
658    fn with_use_mlock_disables() {
659        let params = LlamaModelParams::default().with_use_mlock(false);
660
661        assert!(!params.use_mlock());
662    }
663
664    #[test]
665    fn debug_format_contains_field_names() {
666        let params = LlamaModelParams::default();
667        let debug_output = format!("{params:?}");
668
669        assert!(debug_output.contains("n_gpu_layers"));
670        assert!(debug_output.contains("main_gpu"));
671        assert!(debug_output.contains("vocab_only"));
672        assert!(debug_output.contains("use_mmap"));
673        assert!(debug_output.contains("use_mlock"));
674        assert!(debug_output.contains("split_mode"));
675    }
676
677    #[test]
678    fn builder_chaining_preserves_all_values() {
679        let params = LlamaModelParams::default()
680            .with_n_gpu_layers(10)
681            .with_main_gpu(1)
682            .with_split_mode(LlamaSplitMode::Row)
683            .with_vocab_only(true)
684            .with_use_mlock(true);
685
686        assert_eq!(params.n_gpu_layers(), 10);
687        assert_eq!(params.main_gpu(), 1);
688        assert_eq!(params.split_mode(), Ok(LlamaSplitMode::Row));
689        assert!(params.vocab_only());
690        assert!(params.use_mlock());
691    }
692
693    #[test]
694    fn with_devices_empty_list_succeeds() {
695        let params = LlamaModelParams::default().with_devices(&[]);
696
697        assert!(params.is_ok());
698        assert!(params.unwrap().devices().is_empty());
699    }
700
701    #[test]
702    fn with_devices_invalid_index_returns_error() {
703        let result = LlamaModelParams::default().with_devices(&[999_999]);
704
705        assert!(matches!(
706            result.unwrap_err(),
707            crate::LlamaCppError::BackendDeviceNotFound(999_999)
708        ));
709    }
710
711    #[test]
712    fn add_cpu_buft_override_succeeds() {
713        let mut params = std::pin::pin!(LlamaModelParams::default());
714        let result = params.as_mut().add_cpu_buft_override(c"test_pattern");
715
716        assert!(result.is_ok());
717    }
718
719    #[test]
720    fn add_cpu_buft_override_twice_fails_with_slot_not_empty() {
721        let mut params = std::pin::pin!(LlamaModelParams::default());
722        params
723            .as_mut()
724            .add_cpu_buft_override(c"first_pattern")
725            .unwrap();
726        let result = params.as_mut().add_cpu_buft_override(c"second_pattern");
727
728        assert_eq!(
729            result.unwrap_err(),
730            crate::error::ModelParamsError::SlotNotEmpty
731        );
732    }
733
734    #[test]
735    fn add_cpu_moe_override_succeeds() {
736        let mut params = std::pin::pin!(LlamaModelParams::default());
737        let result = params.as_mut().add_cpu_moe_override();
738
739        assert!(result.is_ok());
740    }
741
742    #[test]
743    fn append_kv_override_twice_fails_with_slot_not_empty() {
744        use crate::model::params::param_override_value::ParamOverrideValue;
745        use std::ffi::CString;
746
747        let mut params = std::pin::pin!(LlamaModelParams::default());
748        let key = CString::new("first_key").unwrap();
749        params
750            .as_mut()
751            .append_kv_override(&key, ParamOverrideValue::Int(1))
752            .unwrap();
753
754        let key2 = CString::new("second_key").unwrap();
755        let result = params
756            .as_mut()
757            .append_kv_override(&key2, ParamOverrideValue::Int(2));
758
759        assert_eq!(
760            result.unwrap_err(),
761            crate::error::ModelParamsError::SlotNotEmpty
762        );
763    }
764
765    #[test]
766    fn with_devices_too_many_returns_max_exceeded() {
767        let too_many: Vec<usize> = (0..17).collect();
768        let result = LlamaModelParams::default().with_devices(&too_many);
769
770        assert!(
771            result
772                .unwrap_err()
773                .to_string()
774                .contains("Max devices exceeded")
775        );
776    }
777
778    #[test]
779    fn with_devices_sets_devices_when_available() {
780        #[cfg(feature = "dynamic-backends")]
781        crate::load_backends::load_backends().unwrap();
782
783        let dev_count = unsafe { llama_cpp_bindings_sys::ggml_backend_dev_count() };
784        assert!(dev_count > 0, "Test requires at least one backend device");
785
786        let params = LlamaModelParams::default().with_devices(&[0]).unwrap();
787
788        assert_eq!(params.devices().len(), 1);
789        assert_eq!(params.devices()[0], 0);
790    }
791
792    #[test]
793    fn with_devices_invalid_index_returns_not_found() {
794        let invalid_index = usize::MAX;
795        let result = LlamaModelParams::default().with_devices(&[invalid_index]);
796
797        assert!(result.unwrap_err().to_string().contains("Backend device"));
798    }
799
800    #[test]
801    #[cfg(not(target_os = "windows"))]
802    fn append_kv_override_with_high_byte_returns_invalid_character_error() {
803        use crate::model::params::param_override_value::ParamOverrideValue;
804
805        let key_bytes: &[u8] = b"\xff\0";
806        let key = std::ffi::CStr::from_bytes_with_nul(key_bytes).unwrap();
807        let mut params = std::pin::pin!(LlamaModelParams::default());
808        let result = params
809            .as_mut()
810            .append_kv_override(key, ParamOverrideValue::Int(1));
811
812        assert!(matches!(
813            result,
814            Err(crate::error::ModelParamsError::InvalidCharacterInKey { byte: 0xff, .. })
815        ));
816    }
817
818    #[test]
819    #[cfg(not(target_os = "windows"))]
820    fn add_cpu_buft_override_with_high_byte_returns_invalid_character_error() {
821        let key_bytes: &[u8] = b"\xff\0";
822        let key = std::ffi::CStr::from_bytes_with_nul(key_bytes).unwrap();
823        let mut params = std::pin::pin!(LlamaModelParams::default());
824        let result = params.as_mut().add_cpu_buft_override(key);
825
826        assert!(matches!(
827            result,
828            Err(crate::error::ModelParamsError::InvalidCharacterInKey { byte: 0xff, .. })
829        ));
830    }
831
832    #[test]
833    #[serial_test::serial]
834    fn fit_params_invalid_model_path_returns_error() {
835        use crate::context::params::LlamaContextParams;
836        use crate::error::FitError;
837        use crate::llama_backend::LlamaBackend;
838
839        let _backend = LlamaBackend::init();
840        let mut params = std::pin::pin!(LlamaModelParams::default());
841        let mut context_params = LlamaContextParams::default();
842        let mut margins = vec![0usize; crate::max_devices()];
843
844        let bogus_path = c"/nonexistent/path/to/model.gguf";
845        let result = params.as_mut().fit_params(
846            bogus_path,
847            &mut context_params,
848            &mut margins,
849            512,
850            llama_cpp_bindings_sys::GGML_LOG_LEVEL_NONE,
851        );
852
853        assert!(
854            matches!(result, Err(FitError::Aborted | FitError::Reported { .. })),
855            "expected Aborted or Reported, got {result:?}"
856        );
857    }
858}