Struct LlamaModelParams

Source

pub struct LlamaModelParams { /* private fields */ }

Expand description

A safe wrapper around llama_model_params.

Implementations§

Source §

impl LlamaModelParams

Source

pub fn kv_overrides(&self) -> KvOverrides<'_>

See KvOverrides

§Examples

let params = Box::pin(LlamaModelParams::default());
let kv_overrides = params.kv_overrides();
let count = kv_overrides.into_iter().count();
assert_eq!(count, 0);

Source

pub fn append_kv_override( self: Pin<&mut Self>, key: &CStr, value: ParamOverrideValue, )

Appends a key-value override to the model parameters. It must be pinned as this creates a self-referential struct.

§Examples

use std::pin::pin;
let mut params = pin!(LlamaModelParams::default());
let key = CString::new("key").expect("CString::new failed");
params.as_mut().append_kv_override(&key, ParamOverrideValue::Int(50));

let kv_overrides = params.kv_overrides().into_iter().collect::<Vec<_>>();
assert_eq!(kv_overrides.len(), 1);

let (k, v) = &kv_overrides[0];
assert_eq!(v, &ParamOverrideValue::Int(50));

assert_eq!(k.to_bytes(), b"key", "expected key to be 'key', was {:?}", k);

Source §

impl LlamaModelParams

Source

pub fn add_cpu_moe_override(self: Pin<&mut Self>)

Adds buffer type overides to move all mixture-of-experts layers to CPU.

Source

pub fn add_cpu_buft_override(self: Pin<&mut Self>, key: &CStr)

Appends a buffer type override to the model parameters, to move layers matching pattern to CPU. It must be pinned as this creates a self-referential struct.

Source §

impl LlamaModelParams

Source

pub fn n_gpu_layers(&self) -> i32

Get the number of layers to offload to the GPU.

Source

pub fn main_gpu(&self) -> i32

The GPU that is used for scratch and small tensors

Source

pub fn vocab_only(&self) -> bool

only load the vocabulary, no weights

Source

pub fn use_mmap(&self) -> bool

use mmap if possible

Source

pub fn use_mlock(&self) -> bool

force system to keep model in RAM

Source

pub fn split_mode(&self) -> Result<LlamaSplitMode, LlamaSplitModeParseError>

get the split mode

§Errors

Returns LlamaSplitModeParseError if the unknown split mode is encountered.

Source

pub fn devices(&self) -> Vec<usize>

get the devices

Source

pub fn with_n_gpu_layers(self, n_gpu_layers: u32) -> Self

sets the number of gpu layers to offload to the GPU.

let params = LlamaModelParams::default();
let params = params.with_n_gpu_layers(1);
assert_eq!(params.n_gpu_layers(), 1);

Source

pub fn with_main_gpu(self, main_gpu: i32) -> Self

sets the main GPU

To enable this option, you must set split_mode to LlamaSplitMode::None to enable single GPU mode.

Source

pub fn with_vocab_only(self, vocab_only: bool) -> Self

sets vocab_only

Source

pub fn with_use_mlock(self, use_mlock: bool) -> Self

sets use_mlock

Source

pub fn with_split_mode(self, split_mode: LlamaSplitMode) -> Self

sets split_mode

Source

pub fn with_devices(self, devices: &[usize]) -> Result<Self, LLamaCppError>

sets devices

The devices are specified as indices that correspond to the ggml backend device indices.

The maximum number of devices is 16.

You don’t need to specify CPU or ACCEL devices.

§Errors

Returns LLamaCppError::BackendDeviceNotFound if any device index is invalid.

Trait Implementations§

Source §

impl Debug for LlamaModelParams

Source §

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

Source §

impl Default for LlamaModelParams

Default parameters for LlamaModel. (as defined in llama.cpp by llama_model_default_params)

use llama_cpp_2::model::params::LlamaSplitMode;
let params = LlamaModelParams::default();
assert_eq!(params.n_gpu_layers(), 999, "n_gpu_layers should be 999");
assert_eq!(params.main_gpu(), 0, "main_gpu should be 0");
assert_eq!(params.vocab_only(), false, "vocab_only should be false");
assert_eq!(params.use_mmap(), true, "use_mmap should be true");
assert_eq!(params.use_mlock(), false, "use_mlock should be false");
assert_eq!(params.split_mode(), Ok(LlamaSplitMode::Layer), "split_mode should be LAYER");
assert_eq!(params.devices().len(), 0, "devices should be empty");