llama_cpp_bindings/model/
params.rs1use crate::LlamaCppError;
4use crate::error::ModelParamsError;
5use crate::model::params::kv_overrides::KvOverrides;
6use crate::model::split_mode::{LlamaSplitMode, LlamaSplitModeParseError};
7use std::ffi::{CStr, c_char};
8use std::fmt::{Debug, Formatter};
9use std::pin::Pin;
10use std::ptr::null;
11
12pub mod kv_overrides;
13pub mod param_override_value;
14
15pub const LLAMA_CPP_MAX_DEVICES: usize = 16;
20
21pub struct LlamaModelParams {
23 pub params: llama_cpp_bindings_sys::llama_model_params,
25 kv_overrides: Vec<llama_cpp_bindings_sys::llama_model_kv_override>,
26 buft_overrides: Vec<llama_cpp_bindings_sys::llama_model_tensor_buft_override>,
27 devices: Pin<Box<[llama_cpp_bindings_sys::ggml_backend_dev_t; LLAMA_CPP_MAX_DEVICES]>>,
28}
29
30impl Debug for LlamaModelParams {
31 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
32 f.debug_struct("LlamaModelParams")
33 .field("n_gpu_layers", &self.params.n_gpu_layers)
34 .field("main_gpu", &self.params.main_gpu)
35 .field("vocab_only", &self.params.vocab_only)
36 .field("use_mmap", &self.params.use_mmap)
37 .field("use_mlock", &self.params.use_mlock)
38 .field("split_mode", &self.split_mode())
39 .field("devices", &self.devices)
40 .field("kv_overrides", &"vec of kv_overrides")
41 .finish()
42 }
43}
44
45impl LlamaModelParams {
46 #[must_use]
58 pub fn kv_overrides(&self) -> KvOverrides<'_> {
59 KvOverrides::new(self)
60 }
61
62 pub fn append_kv_override(
88 mut self: Pin<&mut Self>,
89 key: &CStr,
90 value: param_override_value::ParamOverrideValue,
91 ) -> Result<(), ModelParamsError> {
92 let kv_override = self
93 .kv_overrides
94 .get_mut(0)
95 .ok_or(ModelParamsError::NoAvailableSlot)?;
96
97 if kv_override.key[0] != 0 {
98 return Err(ModelParamsError::SlotNotEmpty);
99 }
100
101 for (i, &byte) in key.to_bytes_with_nul().iter().enumerate() {
102 kv_override.key[i] = c_char::try_from(byte).map_err(|convert_error| {
103 ModelParamsError::InvalidCharacterInKey {
104 byte,
105 reason: convert_error.to_string(),
106 }
107 })?;
108 }
109
110 kv_override.tag = value.tag();
111 kv_override.__bindgen_anon_1 = value.value();
112
113 self.params.kv_overrides = null();
115
116 self.kv_overrides
118 .push(llama_cpp_bindings_sys::llama_model_kv_override {
119 key: [0; 128],
120 tag: 0,
121 __bindgen_anon_1: llama_cpp_bindings_sys::llama_model_kv_override__bindgen_ty_1 {
122 val_i64: 0,
123 },
124 });
125
126 self.params.kv_overrides = self.kv_overrides.as_ptr();
128
129 Ok(())
130 }
131}
132
133impl LlamaModelParams {
134 pub fn add_cpu_moe_override(self: Pin<&mut Self>) -> Result<(), ModelParamsError> {
140 self.add_cpu_buft_override(c"\\.ffn_(up|down|gate)_(ch|)exps")
141 }
142
143 pub fn add_cpu_buft_override(
150 mut self: Pin<&mut Self>,
151 key: &CStr,
152 ) -> Result<(), ModelParamsError> {
153 let buft_override = self
154 .buft_overrides
155 .get_mut(0)
156 .ok_or(ModelParamsError::NoAvailableSlot)?;
157
158 if !buft_override.pattern.is_null() {
159 return Err(ModelParamsError::SlotNotEmpty);
160 }
161
162 for &byte in key.to_bytes_with_nul() {
163 c_char::try_from(byte).map_err(|convert_error| {
164 ModelParamsError::InvalidCharacterInKey {
165 byte,
166 reason: convert_error.to_string(),
167 }
168 })?;
169 }
170
171 buft_override.pattern = key.as_ptr();
172 buft_override.buft = unsafe { llama_cpp_bindings_sys::ggml_backend_cpu_buffer_type() };
173
174 self.params.tensor_buft_overrides = null();
176
177 self.buft_overrides
179 .push(llama_cpp_bindings_sys::llama_model_tensor_buft_override {
180 pattern: std::ptr::null(),
181 buft: std::ptr::null_mut(),
182 });
183
184 self.params.tensor_buft_overrides = self.buft_overrides.as_ptr();
186
187 Ok(())
188 }
189}
190
191impl LlamaModelParams {
192 #[must_use]
194 pub fn n_gpu_layers(&self) -> i32 {
195 self.params.n_gpu_layers
196 }
197
198 #[must_use]
200 pub fn main_gpu(&self) -> i32 {
201 self.params.main_gpu
202 }
203
204 #[must_use]
206 pub fn vocab_only(&self) -> bool {
207 self.params.vocab_only
208 }
209
210 #[must_use]
212 pub fn use_mmap(&self) -> bool {
213 self.params.use_mmap
214 }
215
216 #[must_use]
218 pub fn use_mlock(&self) -> bool {
219 self.params.use_mlock
220 }
221
222 pub fn split_mode(&self) -> Result<LlamaSplitMode, LlamaSplitModeParseError> {
227 LlamaSplitMode::try_from(self.params.split_mode)
228 }
229
230 #[must_use]
232 pub fn devices(&self) -> Vec<usize> {
233 let mut backend_devices = Vec::new();
234 for i in 0..unsafe { llama_cpp_bindings_sys::ggml_backend_dev_count() } {
235 let dev = unsafe { llama_cpp_bindings_sys::ggml_backend_dev_get(i) };
236 backend_devices.push(dev);
237 }
238 let mut devices = Vec::new();
239 for &dev in self.devices.iter() {
240 if dev.is_null() {
241 break;
242 }
243 if let Some((index, _)) = backend_devices
244 .iter()
245 .enumerate()
246 .find(|&(_i, &d)| d == dev)
247 {
248 devices.push(index);
249 }
250 }
251 devices
252 }
253
254 #[must_use]
262 pub fn with_n_gpu_layers(mut self, n_gpu_layers: u32) -> Self {
263 let n_gpu_layers = i32::try_from(n_gpu_layers).unwrap_or(i32::MAX);
266 self.params.n_gpu_layers = n_gpu_layers;
267 self
268 }
269
270 #[must_use]
274 pub fn with_main_gpu(mut self, main_gpu: i32) -> Self {
275 self.params.main_gpu = main_gpu;
276 self
277 }
278
279 #[must_use]
281 pub fn with_vocab_only(mut self, vocab_only: bool) -> Self {
282 self.params.vocab_only = vocab_only;
283 self
284 }
285
286 #[must_use]
288 pub fn with_use_mlock(mut self, use_mlock: bool) -> Self {
289 self.params.use_mlock = use_mlock;
290 self
291 }
292
293 #[must_use]
295 pub fn with_split_mode(mut self, split_mode: LlamaSplitMode) -> Self {
296 self.params.split_mode = split_mode.into();
297 self
298 }
299
300 pub fn with_devices(mut self, devices: &[usize]) -> Result<Self, LlamaCppError> {
311 for dev in self.devices.iter_mut() {
312 *dev = std::ptr::null_mut();
313 }
314 let max_devices = crate::max_devices().min(LLAMA_CPP_MAX_DEVICES);
316 if devices.len() > max_devices {
317 return Err(LlamaCppError::MaxDevicesExceeded(max_devices));
318 }
319 for (i, &dev) in devices.iter().enumerate() {
320 if dev >= unsafe { llama_cpp_bindings_sys::ggml_backend_dev_count() } {
321 return Err(LlamaCppError::BackendDeviceNotFound(dev));
322 }
323 let backend_dev = unsafe { llama_cpp_bindings_sys::ggml_backend_dev_get(dev) };
324 self.devices[i] = backend_dev;
325 }
326 if self.devices.is_empty() {
327 self.params.devices = std::ptr::null_mut();
328 } else {
329 self.params.devices = self.devices.as_mut_ptr();
330 }
331 Ok(self)
332 }
333}
334
335impl Default for LlamaModelParams {
349 fn default() -> Self {
350 let default_params = unsafe { llama_cpp_bindings_sys::llama_model_default_params() };
351 LlamaModelParams {
352 params: default_params,
353 kv_overrides: vec![llama_cpp_bindings_sys::llama_model_kv_override {
355 key: [0; 128],
356 tag: 0,
357 __bindgen_anon_1: llama_cpp_bindings_sys::llama_model_kv_override__bindgen_ty_1 {
358 val_i64: 0,
359 },
360 }],
361 buft_overrides: vec![llama_cpp_bindings_sys::llama_model_tensor_buft_override {
362 pattern: std::ptr::null(),
363 buft: std::ptr::null_mut(),
364 }],
365 devices: Box::pin([std::ptr::null_mut(); 16]),
366 }
367 }
368}
369
370#[cfg(test)]
371mod tests {
372 use crate::model::split_mode::LlamaSplitMode;
373
374 use super::LlamaModelParams;
375
376 #[test]
377 fn default_params_have_expected_values() {
378 let params = LlamaModelParams::default();
379
380 assert_eq!(params.n_gpu_layers(), -1);
381 assert_eq!(params.main_gpu(), 0);
382 assert!(!params.vocab_only());
383 assert!(params.use_mmap());
384 assert!(!params.use_mlock());
385 assert_eq!(params.split_mode(), Ok(LlamaSplitMode::Layer));
386 assert!(params.devices().is_empty());
387 }
388
389 #[test]
390 fn n_gpu_layers_overflow_clamps_to_max() {
391 let params = LlamaModelParams::default().with_n_gpu_layers(u32::MAX);
392
393 assert_eq!(params.n_gpu_layers(), i32::MAX);
394 }
395}