Struct LlamaModelParams

Source

pub struct LlamaModelParams { /* private fields */ }

Expand description

A safe wrapper around llama_model_params.

Implementations§

Source §

impl LlamaModelParams

Source

pub fn kv_overrides(&self) -> KvOverrides<'_>

See KvOverrides

§Examples

let params = Box::pin(LlamaModelParams::default());
let kv_overrides = params.kv_overrides();
let count = kv_overrides.into_iter().count();
assert_eq!(count, 0);

Source

pub fn append_kv_override( self: Pin<&mut Self>, key: &CStr, value: ParamOverrideValue, )

Appends a key-value override to the model parameters. It must be pinned as this creates a self-referential struct.

§Examples

use std::pin::pin;
let mut params = pin!(LlamaModelParams::default());
let key = CString::new("key").expect("CString::new failed");
params.as_mut().append_kv_override(&key, ParamOverrideValue::Int(50));

let kv_overrides = params.kv_overrides().into_iter().collect::<Vec<_>>();
assert_eq!(kv_overrides.len(), 1);

let (k, v) = &kv_overrides[0];
assert_eq!(v, &ParamOverrideValue::Int(50));

assert_eq!(k.to_bytes(), b"key", "expected key to be 'key', was {:?}", k);

Source §

impl LlamaModelParams

Source

pub fn n_gpu_layers(&self) -> i32

Get the number of layers to offload to the GPU.

Source

pub fn main_gpu(&self) -> i32

The GPU that is used for scratch and small tensors

Source

pub fn vocab_only(&self) -> bool

only load the vocabulary, no weights

Source

pub fn use_mmap(&self) -> bool

use mmap if possible

Source

pub fn use_mlock(&self) -> bool

force system to keep model in RAM

Source

pub fn with_n_gpu_layers(self, n_gpu_layers: u32) -> Self

sets the number of gpu layers to offload to the GPU.

let params = LlamaModelParams::default();
let params = params.with_n_gpu_layers(1);
assert_eq!(params.n_gpu_layers(), 1);

Source

pub fn with_main_gpu(self, main_gpu: i32) -> Self

sets the main GPU

Source

pub fn with_vocab_only(self, vocab_only: bool) -> Self

sets vocab_only

Source

pub fn with_use_mlock(self, use_mlock: bool) -> Self

sets use_mlock

Source

pub fn with_override_arch(self, override_arch: Option<&str>) -> Self

Override model architecture string used when loading.

This is primarily used by MTP to load the draft head architecture from the same GGUF (for example qwen35_mtp / qwen35moe_mtp).

This API is only available when built with the mtp feature.

Source

pub fn override_arch(&self) -> Option<&str>

Get the currently configured model architecture override.

This API is only available when built with the mtp feature.

Trait Implementations§

Source §

impl Debug for LlamaModelParams

Source §

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

Source §

impl Default for LlamaModelParams

Default parameters for LlamaModel. (as defined in llama.cpp by llama_model_default_params)

let params = LlamaModelParams::default();
#[cfg(not(target_os = "macos"))]
assert_eq!(params.n_gpu_layers(), 0, "n_gpu_layers should be 0");
#[cfg(target_os = "macos")]
assert_eq!(params.n_gpu_layers(), -1, "n_gpu_layers should be -1 (all layers)");
assert_eq!(params.main_gpu(), 0, "main_gpu should be 0");
assert_eq!(params.vocab_only(), false, "vocab_only should be false");
assert_eq!(params.use_mmap(), true, "use_mmap should be true");
assert_eq!(params.use_mlock(), false, "use_mlock should be false");