pub struct LlamaModelParams { /* private fields */ }Expand description
A safe wrapper around llama_model_params.
Implementations§
Source§impl LlamaModelParams
impl LlamaModelParams
Sourcepub fn kv_overrides(&self) -> KvOverrides<'_>
pub fn kv_overrides(&self) -> KvOverrides<'_>
See KvOverrides
§Examples
let params = Box::pin(LlamaModelParams::default());
let kv_overrides = params.kv_overrides();
let count = kv_overrides.into_iter().count();
assert_eq!(count, 0);Sourcepub fn append_kv_override(
self: Pin<&mut Self>,
key: &CStr,
value: ParamOverrideValue,
)
pub fn append_kv_override( self: Pin<&mut Self>, key: &CStr, value: ParamOverrideValue, )
Appends a key-value override to the model parameters. It must be pinned as this creates a self-referential struct.
§Examples
use std::pin::pin;
let mut params = pin!(LlamaModelParams::default());
let key = CString::new("key").expect("CString::new failed");
params.as_mut().append_kv_override(&key, ParamOverrideValue::Int(50));
let kv_overrides = params.kv_overrides().into_iter().collect::<Vec<_>>();
assert_eq!(kv_overrides.len(), 1);
let (k, v) = &kv_overrides[0];
assert_eq!(v, &ParamOverrideValue::Int(50));
assert_eq!(k.to_bytes(), b"key", "expected key to be 'key', was {:?}", k);Source§impl LlamaModelParams
impl LlamaModelParams
Sourcepub fn add_cpu_moe_override(self: Pin<&mut Self>)
pub fn add_cpu_moe_override(self: Pin<&mut Self>)
Adds buffer type overides to move all mixture-of-experts layers to CPU.
Sourcepub fn add_cpu_buft_override(self: Pin<&mut Self>, key: &CStr)
pub fn add_cpu_buft_override(self: Pin<&mut Self>, key: &CStr)
Appends a buffer type override to the model parameters, to move layers matching pattern to CPU. It must be pinned as this creates a self-referential struct.
Source§impl LlamaModelParams
impl LlamaModelParams
Sourcepub fn n_gpu_layers(&self) -> i32
pub fn n_gpu_layers(&self) -> i32
Get the number of layers to offload to the GPU.
Sourcepub fn vocab_only(&self) -> bool
pub fn vocab_only(&self) -> bool
only load the vocabulary, no weights
Sourcepub fn split_mode(&self) -> Result<LlamaSplitMode, LlamaSplitModeParseError>
pub fn split_mode(&self) -> Result<LlamaSplitMode, LlamaSplitModeParseError>
get the split mode
§Errors
Returns LlamaSplitModeParseError if the unknown split mode is encountered.
Sourcepub fn with_n_gpu_layers(self, n_gpu_layers: u32) -> Self
pub fn with_n_gpu_layers(self, n_gpu_layers: u32) -> Self
sets the number of gpu layers to offload to the GPU.
let params = LlamaModelParams::default();
let params = params.with_n_gpu_layers(1);
assert_eq!(params.n_gpu_layers(), 1);Sourcepub fn with_main_gpu(self, main_gpu: i32) -> Self
pub fn with_main_gpu(self, main_gpu: i32) -> Self
sets the main GPU
To enable this option, you must set split_mode to LlamaSplitMode::None to enable single GPU mode.
Sourcepub fn with_vocab_only(self, vocab_only: bool) -> Self
pub fn with_vocab_only(self, vocab_only: bool) -> Self
sets vocab_only
Sourcepub fn with_use_mlock(self, use_mlock: bool) -> Self
pub fn with_use_mlock(self, use_mlock: bool) -> Self
sets use_mlock
Sourcepub fn with_split_mode(self, split_mode: LlamaSplitMode) -> Self
pub fn with_split_mode(self, split_mode: LlamaSplitMode) -> Self
sets split_mode
Sourcepub fn with_devices(self, devices: &[usize]) -> Result<Self, LLamaCppError>
pub fn with_devices(self, devices: &[usize]) -> Result<Self, LLamaCppError>
sets devices
The devices are specified as indices that correspond to the ggml backend device indices.
The maximum number of devices is 16.
You don’t need to specify CPU or ACCEL devices.
§Errors
Returns LLamaCppError::BackendDeviceNotFound if any device index is invalid.
Trait Implementations§
Source§impl Debug for LlamaModelParams
impl Debug for LlamaModelParams
Source§impl Default for LlamaModelParams
Default parameters for LlamaModel. (as defined in llama.cpp by llama_model_default_params)
impl Default for LlamaModelParams
Default parameters for LlamaModel. (as defined in llama.cpp by llama_model_default_params)
use llama_cpp_2::model::params::LlamaSplitMode;
let params = LlamaModelParams::default();
assert_eq!(params.n_gpu_layers(), 999, "n_gpu_layers should be 999");
assert_eq!(params.main_gpu(), 0, "main_gpu should be 0");
assert_eq!(params.vocab_only(), false, "vocab_only should be false");
assert_eq!(params.use_mmap(), true, "use_mmap should be true");
assert_eq!(params.use_mlock(), false, "use_mlock should be false");
assert_eq!(params.split_mode(), Ok(LlamaSplitMode::Layer), "split_mode should be LAYER");
assert_eq!(params.devices().len(), 0, "devices should be empty");