llama_cpp_bindings/model/
params.rs1use crate::LlamaCppError;
4use crate::error::ModelParamsError;
5use crate::model::params::kv_overrides::KvOverrides;
6use crate::model::split_mode::{LlamaSplitMode, LlamaSplitModeParseError};
7use std::ffi::{CStr, c_char};
8use std::fmt::{Debug, Formatter};
9use std::pin::Pin;
10use std::ptr::null;
11
12pub mod kv_overrides;
13pub mod param_override_value;
14
15pub const LLAMA_CPP_MAX_DEVICES: usize = 16;
20
21pub struct LlamaModelParams {
23 pub params: llama_cpp_bindings_sys::llama_model_params,
25 kv_overrides: Vec<llama_cpp_bindings_sys::llama_model_kv_override>,
26 buft_overrides: Vec<llama_cpp_bindings_sys::llama_model_tensor_buft_override>,
27 devices: Pin<Box<[llama_cpp_bindings_sys::ggml_backend_dev_t; LLAMA_CPP_MAX_DEVICES]>>,
28}
29
30impl Debug for LlamaModelParams {
31 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
32 f.debug_struct("LlamaModelParams")
33 .field("n_gpu_layers", &self.params.n_gpu_layers)
34 .field("main_gpu", &self.params.main_gpu)
35 .field("vocab_only", &self.params.vocab_only)
36 .field("use_mmap", &self.params.use_mmap)
37 .field("use_mlock", &self.params.use_mlock)
38 .field("split_mode", &self.split_mode())
39 .field("devices", &self.devices)
40 .field("kv_overrides", &"vec of kv_overrides")
41 .finish_non_exhaustive()
42 }
43}
44
45impl LlamaModelParams {
46 #[must_use]
58 pub const fn kv_overrides(&self) -> KvOverrides<'_> {
59 KvOverrides::new(self)
60 }
61
62 pub fn append_kv_override(
88 mut self: Pin<&mut Self>,
89 key: &CStr,
90 value: param_override_value::ParamOverrideValue,
91 ) -> Result<(), ModelParamsError> {
92 let kv_override = self
93 .kv_overrides
94 .get_mut(0)
95 .ok_or(ModelParamsError::NoAvailableSlot)?;
96
97 if kv_override.key[0] != 0 {
98 return Err(ModelParamsError::SlotNotEmpty);
99 }
100
101 for (i, &byte) in key.to_bytes_with_nul().iter().enumerate() {
102 kv_override.key[i] = c_char::try_from(byte).map_err(|convert_error| {
103 ModelParamsError::InvalidCharacterInKey {
104 byte,
105 reason: convert_error.to_string(),
106 }
107 })?;
108 }
109
110 kv_override.tag = value.tag();
111 kv_override.__bindgen_anon_1 = value.value();
112
113 self.params.kv_overrides = null();
115
116 self.kv_overrides
118 .push(llama_cpp_bindings_sys::llama_model_kv_override {
119 key: [0; 128],
120 tag: 0,
121 __bindgen_anon_1: llama_cpp_bindings_sys::llama_model_kv_override__bindgen_ty_1 {
122 val_i64: 0,
123 },
124 });
125
126 self.params.kv_overrides = self.kv_overrides.as_ptr();
128
129 Ok(())
130 }
131}
132
133impl LlamaModelParams {
134 pub fn add_cpu_moe_override(self: Pin<&mut Self>) -> Result<(), ModelParamsError> {
140 self.add_cpu_buft_override(c"\\.ffn_(up|down|gate)_(ch|)exps")
141 }
142
143 pub fn add_cpu_buft_override(
150 mut self: Pin<&mut Self>,
151 key: &CStr,
152 ) -> Result<(), ModelParamsError> {
153 let buft_override = self
154 .buft_overrides
155 .get_mut(0)
156 .ok_or(ModelParamsError::NoAvailableSlot)?;
157
158 if !buft_override.pattern.is_null() {
159 return Err(ModelParamsError::SlotNotEmpty);
160 }
161
162 for &byte in key.to_bytes_with_nul() {
163 c_char::try_from(byte).map_err(|convert_error| {
164 ModelParamsError::InvalidCharacterInKey {
165 byte,
166 reason: convert_error.to_string(),
167 }
168 })?;
169 }
170
171 buft_override.pattern = key.as_ptr();
172 buft_override.buft = unsafe { llama_cpp_bindings_sys::ggml_backend_cpu_buffer_type() };
173
174 self.params.tensor_buft_overrides = null();
176
177 self.buft_overrides
179 .push(llama_cpp_bindings_sys::llama_model_tensor_buft_override {
180 pattern: null(),
181 buft: std::ptr::null_mut(),
182 });
183
184 self.params.tensor_buft_overrides = self.buft_overrides.as_ptr();
186
187 Ok(())
188 }
189}
190
191impl LlamaModelParams {
192 #[must_use]
194 pub const fn n_gpu_layers(&self) -> i32 {
195 self.params.n_gpu_layers
196 }
197
198 #[must_use]
200 pub const fn main_gpu(&self) -> i32 {
201 self.params.main_gpu
202 }
203
204 #[must_use]
206 pub const fn vocab_only(&self) -> bool {
207 self.params.vocab_only
208 }
209
210 #[must_use]
212 pub const fn use_mmap(&self) -> bool {
213 self.params.use_mmap
214 }
215
216 #[must_use]
218 pub const fn use_mlock(&self) -> bool {
219 self.params.use_mlock
220 }
221
222 pub fn split_mode(&self) -> Result<LlamaSplitMode, LlamaSplitModeParseError> {
227 LlamaSplitMode::try_from(self.params.split_mode)
228 }
229
230 #[must_use]
232 pub fn devices(&self) -> Vec<usize> {
233 let mut backend_devices = Vec::new();
234 for i in 0..unsafe { llama_cpp_bindings_sys::ggml_backend_dev_count() } {
235 let dev = unsafe { llama_cpp_bindings_sys::ggml_backend_dev_get(i) };
236 backend_devices.push(dev);
237 }
238 let mut devices = Vec::new();
239 for &dev in self.devices.iter() {
240 if dev.is_null() {
241 break;
242 }
243 let matched_index = backend_devices
244 .iter()
245 .enumerate()
246 .find(|&(_i, &d)| d == dev)
247 .map(|(index, _)| index);
248
249 if let Some(index) = matched_index {
250 devices.push(index);
251 }
252 }
253 devices
254 }
255
256 #[must_use]
264 pub fn with_n_gpu_layers(mut self, n_gpu_layers: u32) -> Self {
265 let n_gpu_layers = i32::try_from(n_gpu_layers).unwrap_or(i32::MAX);
268 self.params.n_gpu_layers = n_gpu_layers;
269 self
270 }
271
272 #[must_use]
276 pub const fn with_main_gpu(mut self, main_gpu: i32) -> Self {
277 self.params.main_gpu = main_gpu;
278 self
279 }
280
281 #[must_use]
283 pub const fn with_vocab_only(mut self, vocab_only: bool) -> Self {
284 self.params.vocab_only = vocab_only;
285 self
286 }
287
288 #[must_use]
298 pub const fn with_use_mmap(mut self, use_mmap: bool) -> Self {
299 self.params.use_mmap = use_mmap;
300 self
301 }
302
303 #[must_use]
305 pub const fn no_alloc(&self) -> bool {
306 self.params.no_alloc
307 }
308
309 #[must_use]
321 pub const fn with_no_alloc(mut self, no_alloc: bool) -> Self {
322 self.params.no_alloc = no_alloc;
323 if no_alloc {
324 self.params.use_mmap = false;
325 }
326 self
327 }
328
329 #[must_use]
331 pub const fn with_use_mlock(mut self, use_mlock: bool) -> Self {
332 self.params.use_mlock = use_mlock;
333 self
334 }
335
336 #[must_use]
338 pub fn with_split_mode(mut self, split_mode: LlamaSplitMode) -> Self {
339 self.params.split_mode = split_mode.into();
340 self
341 }
342
343 pub fn with_devices(mut self, devices: &[usize]) -> Result<Self, LlamaCppError> {
354 for dev in self.devices.iter_mut() {
355 *dev = std::ptr::null_mut();
356 }
357 let max_devices = crate::max_devices().min(LLAMA_CPP_MAX_DEVICES);
359 if devices.len() > max_devices {
360 return Err(LlamaCppError::MaxDevicesExceeded(max_devices));
361 }
362 for (i, &dev) in devices.iter().enumerate() {
363 if dev >= unsafe { llama_cpp_bindings_sys::ggml_backend_dev_count() } {
364 return Err(LlamaCppError::BackendDeviceNotFound(dev));
365 }
366 let backend_dev = unsafe { llama_cpp_bindings_sys::ggml_backend_dev_get(dev) };
367 self.devices[i] = backend_dev;
368 }
369 self.params.devices = self.devices.as_mut_ptr();
370
371 Ok(self)
372 }
373}
374
375impl Default for LlamaModelParams {
389 fn default() -> Self {
390 let default_params = unsafe { llama_cpp_bindings_sys::llama_model_default_params() };
391 Self {
392 params: default_params,
393 kv_overrides: vec![llama_cpp_bindings_sys::llama_model_kv_override {
395 key: [0; 128],
396 tag: 0,
397 __bindgen_anon_1: llama_cpp_bindings_sys::llama_model_kv_override__bindgen_ty_1 {
398 val_i64: 0,
399 },
400 }],
401 buft_overrides: vec![llama_cpp_bindings_sys::llama_model_tensor_buft_override {
402 pattern: null(),
403 buft: std::ptr::null_mut(),
404 }],
405 devices: Box::pin([std::ptr::null_mut(); 16]),
406 }
407 }
408}
409
410#[cfg(test)]
411mod tests {
412 use crate::model::split_mode::LlamaSplitMode;
413
414 use super::LlamaModelParams;
415
416 #[test]
417 fn default_params_have_expected_values() {
418 let params = LlamaModelParams::default();
419
420 assert_eq!(params.n_gpu_layers(), -1);
421 assert_eq!(params.main_gpu(), 0);
422 assert!(!params.vocab_only());
423 assert!(params.use_mmap());
424 assert!(!params.use_mlock());
425 assert_eq!(params.split_mode(), Ok(LlamaSplitMode::Layer));
426 assert!(params.devices().is_empty());
427 }
428
429 #[test]
430 fn n_gpu_layers_overflow_clamps_to_max() {
431 let params = LlamaModelParams::default().with_n_gpu_layers(u32::MAX);
432
433 assert_eq!(params.n_gpu_layers(), i32::MAX);
434 }
435
436 #[test]
437 fn with_n_gpu_layers_sets_value() {
438 let params = LlamaModelParams::default().with_n_gpu_layers(32);
439
440 assert_eq!(params.n_gpu_layers(), 32);
441 }
442
443 #[test]
444 fn with_main_gpu_sets_value() {
445 let params = LlamaModelParams::default().with_main_gpu(2);
446
447 assert_eq!(params.main_gpu(), 2);
448 }
449
450 #[test]
451 fn with_split_mode_none() {
452 let params = LlamaModelParams::default().with_split_mode(LlamaSplitMode::None);
453
454 assert_eq!(params.split_mode(), Ok(LlamaSplitMode::None));
455 }
456
457 #[test]
458 fn with_split_mode_row() {
459 let params = LlamaModelParams::default().with_split_mode(LlamaSplitMode::Row);
460
461 assert_eq!(params.split_mode(), Ok(LlamaSplitMode::Row));
462 }
463
464 #[test]
465 fn with_vocab_only_enables() {
466 let params = LlamaModelParams::default().with_vocab_only(true);
467
468 assert!(params.vocab_only());
469 }
470
471 #[test]
472 fn with_vocab_only_disables() {
473 let params = LlamaModelParams::default().with_vocab_only(false);
474
475 assert!(!params.vocab_only());
476 }
477
478 #[test]
479 fn with_use_mmap_enables() {
480 let params = LlamaModelParams::default().with_use_mmap(true);
481
482 assert!(params.use_mmap());
483 }
484
485 #[test]
486 fn with_use_mmap_disables() {
487 let params = LlamaModelParams::default().with_use_mmap(false);
488
489 assert!(!params.use_mmap());
490 }
491
492 #[test]
493 fn with_no_alloc_enables() {
494 let params = LlamaModelParams::default().with_no_alloc(true);
495
496 assert!(params.no_alloc());
497 }
498
499 #[test]
500 fn with_no_alloc_disables() {
501 let params = LlamaModelParams::default().with_no_alloc(false);
502
503 assert!(!params.no_alloc());
504 }
505
506 #[test]
507 fn with_no_alloc_true_disables_mmap() {
508 let params = LlamaModelParams::default()
509 .with_use_mmap(true)
510 .with_no_alloc(true);
511
512 assert!(params.no_alloc());
513 assert!(!params.use_mmap());
514 }
515
516 #[test]
517 fn default_no_alloc_is_false() {
518 let params = LlamaModelParams::default();
519
520 assert!(!params.no_alloc());
521 }
522
523 #[test]
524 fn with_use_mlock_enables() {
525 let params = LlamaModelParams::default().with_use_mlock(true);
526
527 assert!(params.use_mlock());
528 }
529
530 #[test]
531 fn with_use_mlock_disables() {
532 let params = LlamaModelParams::default().with_use_mlock(false);
533
534 assert!(!params.use_mlock());
535 }
536
537 #[test]
538 fn debug_format_contains_field_names() {
539 let params = LlamaModelParams::default();
540 let debug_output = format!("{params:?}");
541
542 assert!(debug_output.contains("n_gpu_layers"));
543 assert!(debug_output.contains("main_gpu"));
544 assert!(debug_output.contains("vocab_only"));
545 assert!(debug_output.contains("use_mmap"));
546 assert!(debug_output.contains("use_mlock"));
547 assert!(debug_output.contains("split_mode"));
548 }
549
550 #[test]
551 fn builder_chaining_preserves_all_values() {
552 let params = LlamaModelParams::default()
553 .with_n_gpu_layers(10)
554 .with_main_gpu(1)
555 .with_split_mode(LlamaSplitMode::Row)
556 .with_vocab_only(true)
557 .with_use_mlock(true);
558
559 assert_eq!(params.n_gpu_layers(), 10);
560 assert_eq!(params.main_gpu(), 1);
561 assert_eq!(params.split_mode(), Ok(LlamaSplitMode::Row));
562 assert!(params.vocab_only());
563 assert!(params.use_mlock());
564 }
565
566 #[test]
567 fn with_devices_empty_list_succeeds() {
568 let params = LlamaModelParams::default().with_devices(&[]);
569
570 assert!(params.is_ok());
571 assert!(params.unwrap().devices().is_empty());
572 }
573
574 #[test]
575 fn with_devices_invalid_index_returns_error() {
576 let result = LlamaModelParams::default().with_devices(&[999_999]);
577
578 assert_eq!(
579 result.unwrap_err(),
580 crate::LlamaCppError::BackendDeviceNotFound(999_999)
581 );
582 }
583
584 #[test]
585 fn add_cpu_buft_override_succeeds() {
586 let mut params = std::pin::pin!(LlamaModelParams::default());
587 let result = params.as_mut().add_cpu_buft_override(c"test_pattern");
588
589 assert!(result.is_ok());
590 }
591
592 #[test]
593 fn add_cpu_buft_override_twice_fails_with_slot_not_empty() {
594 let mut params = std::pin::pin!(LlamaModelParams::default());
595 params
596 .as_mut()
597 .add_cpu_buft_override(c"first_pattern")
598 .unwrap();
599 let result = params.as_mut().add_cpu_buft_override(c"second_pattern");
600
601 assert_eq!(
602 result.unwrap_err(),
603 crate::error::ModelParamsError::SlotNotEmpty
604 );
605 }
606
607 #[test]
608 fn add_cpu_moe_override_succeeds() {
609 let mut params = std::pin::pin!(LlamaModelParams::default());
610 let result = params.as_mut().add_cpu_moe_override();
611
612 assert!(result.is_ok());
613 }
614
615 #[test]
616 fn append_kv_override_twice_fails_with_slot_not_empty() {
617 use crate::model::params::param_override_value::ParamOverrideValue;
618 use std::ffi::CString;
619
620 let mut params = std::pin::pin!(LlamaModelParams::default());
621 let key = CString::new("first_key").unwrap();
622 params
623 .as_mut()
624 .append_kv_override(&key, ParamOverrideValue::Int(1))
625 .unwrap();
626
627 let key2 = CString::new("second_key").unwrap();
628 let result = params
629 .as_mut()
630 .append_kv_override(&key2, ParamOverrideValue::Int(2));
631
632 assert_eq!(
633 result.unwrap_err(),
634 crate::error::ModelParamsError::SlotNotEmpty
635 );
636 }
637
638 #[test]
639 fn with_devices_too_many_returns_max_exceeded() {
640 let too_many: Vec<usize> = (0..17).collect();
641 let result = LlamaModelParams::default().with_devices(&too_many);
642
643 assert!(
644 result
645 .unwrap_err()
646 .to_string()
647 .contains("Max devices exceeded")
648 );
649 }
650
651 #[test]
652 fn with_devices_sets_devices_when_available() {
653 let dev_count = unsafe { llama_cpp_bindings_sys::ggml_backend_dev_count() };
654 assert!(dev_count > 0, "Test requires at least one backend device");
655
656 let params = LlamaModelParams::default().with_devices(&[0]).unwrap();
657
658 assert_eq!(params.devices().len(), 1);
659 assert_eq!(params.devices()[0], 0);
660 }
661
662 #[test]
663 fn with_devices_invalid_index_returns_not_found() {
664 let invalid_index = usize::MAX;
665 let result = LlamaModelParams::default().with_devices(&[invalid_index]);
666
667 assert!(result.unwrap_err().to_string().contains("Backend device"));
668 }
669
670 #[test]
671 #[cfg(not(target_os = "windows"))]
672 fn append_kv_override_with_high_byte_returns_invalid_character_error() {
673 use crate::model::params::param_override_value::ParamOverrideValue;
674
675 let key_bytes: &[u8] = b"\xff\0";
676 let key = std::ffi::CStr::from_bytes_with_nul(key_bytes).unwrap();
677 let mut params = std::pin::pin!(LlamaModelParams::default());
678 let result = params
679 .as_mut()
680 .append_kv_override(key, ParamOverrideValue::Int(1));
681
682 assert!(matches!(
683 result,
684 Err(crate::error::ModelParamsError::InvalidCharacterInKey { byte: 0xff, .. })
685 ));
686 }
687
688 #[test]
689 #[cfg(not(target_os = "windows"))]
690 fn add_cpu_buft_override_with_high_byte_returns_invalid_character_error() {
691 let key_bytes: &[u8] = b"\xff\0";
692 let key = std::ffi::CStr::from_bytes_with_nul(key_bytes).unwrap();
693 let mut params = std::pin::pin!(LlamaModelParams::default());
694 let result = params.as_mut().add_cpu_buft_override(key);
695
696 assert!(matches!(
697 result,
698 Err(crate::error::ModelParamsError::InvalidCharacterInKey { byte: 0xff, .. })
699 ));
700 }
701}