pub struct LlamaModelParams {
pub params: llama_model_params,
/* private fields */
}Expand description
A safe wrapper around llama_model_params.
Fields§
§params: llama_model_paramsThe underlying llama_model_params from the C API.
Implementations§
Source§impl LlamaModelParams
impl LlamaModelParams
Sourcepub const fn kv_overrides(&self) -> KvOverrides<'_>
pub const fn kv_overrides(&self) -> KvOverrides<'_>
See KvOverrides
§Examples
let params = Box::pin(LlamaModelParams::default());
let kv_overrides = params.kv_overrides();
let count = kv_overrides.into_iter().count();
assert_eq!(count, 0);Sourcepub fn append_kv_override(
self: Pin<&mut Self>,
key: &CStr,
value: ParamOverrideValue,
) -> Result<(), ModelParamsError>
pub fn append_kv_override( self: Pin<&mut Self>, key: &CStr, value: ParamOverrideValue, ) -> Result<(), ModelParamsError>
Appends a key-value override to the model parameters. It must be pinned as this creates a self-referential struct.
§Errors
Returns ModelParamsError if the internal override vector has no available slot,
the slot is not empty, or the key contains invalid characters.
§Examples
use std::pin::pin;
let mut params = pin!(LlamaModelParams::default());
let key = CString::new("key").expect("CString::new failed");
params.as_mut().append_kv_override(&key, ParamOverrideValue::Int(50)).unwrap();
let kv_overrides = params.kv_overrides().into_iter().collect::<Vec<_>>();
assert_eq!(kv_overrides.len(), 1);
let (k, v) = &kv_overrides[0];
assert_eq!(v, &ParamOverrideValue::Int(50));
assert_eq!(k.to_bytes(), b"key", "expected key to be 'key', was {:?}", k);Source§impl LlamaModelParams
impl LlamaModelParams
Sourcepub fn add_cpu_moe_override(
self: Pin<&mut Self>,
) -> Result<(), ModelParamsError>
pub fn add_cpu_moe_override( self: Pin<&mut Self>, ) -> Result<(), ModelParamsError>
Adds buffer type overrides to move all mixture-of-experts layers to CPU.
§Errors
Returns ModelParamsError if the internal override vector has no available slot,
the slot is not empty, or the key contains invalid characters.
Sourcepub fn add_cpu_buft_override(
self: Pin<&mut Self>,
key: &CStr,
) -> Result<(), ModelParamsError>
pub fn add_cpu_buft_override( self: Pin<&mut Self>, key: &CStr, ) -> Result<(), ModelParamsError>
Appends a buffer type override to the model parameters, to move layers matching pattern to CPU. It must be pinned as this creates a self-referential struct.
§Errors
Returns ModelParamsError if the internal override vector has no available slot,
the slot is not empty, or the key contains invalid characters.
Source§impl LlamaModelParams
impl LlamaModelParams
Sourcepub const fn n_gpu_layers(&self) -> i32
pub const fn n_gpu_layers(&self) -> i32
Get the number of layers to offload to the GPU.
Sourcepub const fn vocab_only(&self) -> bool
pub const fn vocab_only(&self) -> bool
only load the vocabulary, no weights
Sourcepub fn split_mode(&self) -> Result<LlamaSplitMode, LlamaSplitModeParseError>
pub fn split_mode(&self) -> Result<LlamaSplitMode, LlamaSplitModeParseError>
get the split mode
§Errors
Returns LlamaSplitModeParseError if the unknown split mode is encountered.
Sourcepub fn with_n_gpu_layers(self, n_gpu_layers: u32) -> Self
pub fn with_n_gpu_layers(self, n_gpu_layers: u32) -> Self
sets the number of gpu layers to offload to the GPU.
let params = LlamaModelParams::default();
let params = params.with_n_gpu_layers(1);
assert_eq!(params.n_gpu_layers(), 1);Sourcepub const fn with_main_gpu(self, main_gpu: i32) -> Self
pub const fn with_main_gpu(self, main_gpu: i32) -> Self
sets the main GPU
To enable this option, you must set split_mode to LlamaSplitMode::None to enable single GPU mode.
Sourcepub const fn with_vocab_only(self, vocab_only: bool) -> Self
pub const fn with_vocab_only(self, vocab_only: bool) -> Self
sets vocab_only
Sourcepub const fn with_use_mmap(self, use_mmap: bool) -> Self
pub const fn with_use_mmap(self, use_mmap: bool) -> Self
sets use_mmap
§Examples
let params = LlamaModelParams::default().with_use_mmap(false);
assert!(!params.use_mmap());Sourcepub const fn with_no_alloc(self, no_alloc: bool) -> Self
pub const fn with_no_alloc(self, no_alloc: bool) -> Self
Set no_alloc. When enabled, tensor data is not allocated.
Incompatible with use_mmap, so enabling this also disables mmap.
§Examples
let params = LlamaModelParams::default().with_no_alloc(true);
assert!(params.no_alloc());
assert!(!params.use_mmap());Sourcepub const fn with_use_mlock(self, use_mlock: bool) -> Self
pub const fn with_use_mlock(self, use_mlock: bool) -> Self
sets use_mlock
Sourcepub fn with_split_mode(self, split_mode: LlamaSplitMode) -> Self
pub fn with_split_mode(self, split_mode: LlamaSplitMode) -> Self
sets split_mode
Sourcepub fn with_devices(self, devices: &[usize]) -> Result<Self, LlamaCppError>
pub fn with_devices(self, devices: &[usize]) -> Result<Self, LlamaCppError>
sets devices
The devices are specified as indices that correspond to the ggml backend device indices.
The maximum number of devices is 16.
You don’t need to specify CPU or ACCEL devices.
§Errors
Returns LlamaCppError::BackendDeviceNotFound if any device index is invalid.
Source§impl LlamaModelParams
impl LlamaModelParams
Sourcepub fn fit_params(
self: Pin<&mut Self>,
model_path: &CStr,
context_params: &mut LlamaContextParams,
margins: &mut [usize],
n_ctx_min: u32,
log_level: ggml_log_level,
) -> Result<FitResult, FitError>
pub fn fit_params( self: Pin<&mut Self>, model_path: &CStr, context_params: &mut LlamaContextParams, margins: &mut [usize], n_ctx_min: u32, log_level: ggml_log_level, ) -> Result<FitResult, FitError>
Automatically fit model and context parameters to available device memory.
Wraps llama.cpp’s common_fit_params. Given a model path, available per-device memory
margins, and a minimum context size, it fills in n_gpu_layers, tensor_split, and
tensor_buft_overrides to fit the model to the available VRAM, and may reduce
cparams.n_ctx if needed. On success the model and context params are updated in place.
§Requirements
Per the C API docstring, only parameters that still hold their default value are modified. In practice this means:
n_gpu_layersmust be at its default (-1). Do not callwith_n_gpu_layersbefore this.- No
tensor_buft_overridesmay be set. Do not calladd_cpu_buft_overrideoradd_cpu_moe_overridebefore this. cparams.n_ctxis only auto-selected if it is0; otherwise it is left alone.
§Arguments
model_path— path to the GGUF model file as a C string.context_params— context parameters;n_ctxmay be modified (see above).margins— memory margin per device in bytes. Must have at leastcrate::max_devices()elements.n_ctx_min— minimum context size to preserve when reducing memory usage.log_level— minimum log level for fitting output; lower levels go to the debug log.
§Thread safety
This function is not thread safe: the underlying C call mutates the global llama logger state.
§Errors
Returns one of the FitError variants matching the vendored wrapper’s status code.
Trait Implementations§
Source§impl Debug for LlamaModelParams
impl Debug for LlamaModelParams
Source§impl Default for LlamaModelParams
Default parameters for LlamaModel. (as defined in llama.cpp by llama_model_default_params)
impl Default for LlamaModelParams
Default parameters for LlamaModel. (as defined in llama.cpp by llama_model_default_params)
use llama_cpp_bindings::model::split_mode::LlamaSplitMode;
let params = LlamaModelParams::default();
assert_eq!(params.n_gpu_layers(), -1, "n_gpu_layers should be -1");
assert_eq!(params.main_gpu(), 0, "main_gpu should be 0");
assert_eq!(params.vocab_only(), false, "vocab_only should be false");
assert_eq!(params.use_mmap(), true, "use_mmap should be true");
assert_eq!(params.use_mlock(), false, "use_mlock should be false");
assert_eq!(params.split_mode(), Ok(LlamaSplitMode::Layer), "split_mode should be LAYER");
assert_eq!(params.devices().len(), 0, "devices should be empty");Auto Trait Implementations§
impl Freeze for LlamaModelParams
impl RefUnwindSafe for LlamaModelParams
impl !Send for LlamaModelParams
impl !Sync for LlamaModelParams
impl Unpin for LlamaModelParams
impl UnsafeUnpin for LlamaModelParams
impl UnwindSafe for LlamaModelParams
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more