pub struct LlamaModelParams { /* private fields */ }Expand description
A safe wrapper around llama_model_params.
Implementations§
Source§impl LlamaModelParams
impl LlamaModelParams
Sourcepub fn kv_overrides(&self) -> KvOverrides<'_>
pub fn kv_overrides(&self) -> KvOverrides<'_>
See KvOverrides
§Examples
let params = Box::pin(LlamaModelParams::default());
let kv_overrides = params.kv_overrides();
let count = kv_overrides.into_iter().count();
assert_eq!(count, 0);Sourcepub fn append_kv_override(
self: Pin<&mut Self>,
key: &CStr,
value: ParamOverrideValue,
)
pub fn append_kv_override( self: Pin<&mut Self>, key: &CStr, value: ParamOverrideValue, )
Appends a key-value override to the model parameters. It must be pinned as this creates a self-referential struct.
§Examples
use std::pin::pin;
let mut params = pin!(LlamaModelParams::default());
let key = CString::new("key").expect("CString::new failed");
params.as_mut().append_kv_override(&key, ParamOverrideValue::Int(50));
let kv_overrides = params.kv_overrides().into_iter().collect::<Vec<_>>();
assert_eq!(kv_overrides.len(), 1);
let (k, v) = &kv_overrides[0];
assert_eq!(v, &ParamOverrideValue::Int(50));
assert_eq!(k.to_bytes(), b"key", "expected key to be 'key', was {:?}", k);Source§impl LlamaModelParams
impl LlamaModelParams
Sourcepub fn n_gpu_layers(&self) -> i32
pub fn n_gpu_layers(&self) -> i32
Get the number of layers to offload to the GPU.
Sourcepub fn vocab_only(&self) -> bool
pub fn vocab_only(&self) -> bool
only load the vocabulary, no weights
Sourcepub fn with_n_gpu_layers(self, n_gpu_layers: u32) -> Self
pub fn with_n_gpu_layers(self, n_gpu_layers: u32) -> Self
sets the number of gpu layers to offload to the GPU.
let params = LlamaModelParams::default();
let params = params.with_n_gpu_layers(1);
assert_eq!(params.n_gpu_layers(), 1);Sourcepub fn with_main_gpu(self, main_gpu: i32) -> Self
pub fn with_main_gpu(self, main_gpu: i32) -> Self
sets the main GPU
Sourcepub fn with_vocab_only(self, vocab_only: bool) -> Self
pub fn with_vocab_only(self, vocab_only: bool) -> Self
sets vocab_only
Sourcepub fn with_use_mlock(self, use_mlock: bool) -> Self
pub fn with_use_mlock(self, use_mlock: bool) -> Self
sets use_mlock
Sourcepub fn with_override_arch(self, override_arch: Option<&str>) -> Self
pub fn with_override_arch(self, override_arch: Option<&str>) -> Self
Override model architecture string used when loading.
This is primarily used by MTP to load the draft head architecture
from the same GGUF (for example qwen35_mtp / qwen35moe_mtp).
This API is only available when built with the mtp feature.
Sourcepub fn override_arch(&self) -> Option<&str>
pub fn override_arch(&self) -> Option<&str>
Get the currently configured model architecture override.
This API is only available when built with the mtp feature.
Trait Implementations§
Source§impl Debug for LlamaModelParams
impl Debug for LlamaModelParams
Source§impl Default for LlamaModelParams
Default parameters for LlamaModel. (as defined in llama.cpp by llama_model_default_params)
impl Default for LlamaModelParams
Default parameters for LlamaModel. (as defined in llama.cpp by llama_model_default_params)
let params = LlamaModelParams::default();
#[cfg(not(target_os = "macos"))]
assert_eq!(params.n_gpu_layers(), 0, "n_gpu_layers should be 0");
#[cfg(target_os = "macos")]
assert_eq!(params.n_gpu_layers(), -1, "n_gpu_layers should be -1 (all layers)");
assert_eq!(params.main_gpu(), 0, "main_gpu should be 0");
assert_eq!(params.vocab_only(), false, "vocab_only should be false");
assert_eq!(params.use_mmap(), true, "use_mmap should be true");
assert_eq!(params.use_mlock(), false, "use_mlock should be false");