pub struct LlamaContextParams {
pub context_params: llama_context_params,
}Expand description
A safe wrapper around llama_context_params.
Generally this should be created with Default::default() and then modified with with_* methods.
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let ctx_params = LlamaContextParams::default()
.with_n_ctx(NonZeroU32::new(2048));
assert_eq!(ctx_params.n_ctx(), NonZeroU32::new(2048));Fields§
§context_params: llama_context_paramsImplementations§
Source§impl LlamaContextParams
impl LlamaContextParams
Sourcepub fn with_n_ctx(self, n_ctx: Option<NonZeroU32>) -> Self
pub fn with_n_ctx(self, n_ctx: Option<NonZeroU32>) -> Self
Set the side of the context
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default();
let params = params.with_n_ctx(NonZeroU32::new(2048));
assert_eq!(params.n_ctx(), NonZeroU32::new(2048));Sourcepub const fn n_ctx(&self) -> Option<NonZeroU32>
pub const fn n_ctx(&self) -> Option<NonZeroU32>
Sourcepub const fn with_n_batch(self, n_batch: u32) -> Self
pub const fn with_n_batch(self, n_batch: u32) -> Self
Set the n_batch
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
.with_n_batch(2048);
assert_eq!(params.n_batch(), 2048);Sourcepub const fn n_batch(&self) -> u32
pub const fn n_batch(&self) -> u32
Get the n_batch
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default();
assert_eq!(params.n_batch(), 2048);Sourcepub const fn with_n_ubatch(self, n_ubatch: u32) -> Self
pub const fn with_n_ubatch(self, n_ubatch: u32) -> Self
Set the n_ubatch
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
.with_n_ubatch(512);
assert_eq!(params.n_ubatch(), 512);Sourcepub const fn n_ubatch(&self) -> u32
pub const fn n_ubatch(&self) -> u32
Get the n_ubatch
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default();
assert_eq!(params.n_ubatch(), 512);Sourcepub const fn with_flash_attention_policy(
self,
policy: llama_flash_attn_type,
) -> Self
pub const fn with_flash_attention_policy( self, policy: llama_flash_attn_type, ) -> Self
Set the flash attention policy using llama.cpp enum
Sourcepub const fn flash_attention_policy(&self) -> llama_flash_attn_type
pub const fn flash_attention_policy(&self) -> llama_flash_attn_type
Get the flash attention policy
Sourcepub const fn with_offload_kqv(self, enabled: bool) -> Self
pub const fn with_offload_kqv(self, enabled: bool) -> Self
Set the offload_kqv parameter to control offloading KV cache & KQV ops to GPU
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
.with_offload_kqv(false);
assert_eq!(params.offload_kqv(), false);Sourcepub const fn offload_kqv(&self) -> bool
pub const fn offload_kqv(&self) -> bool
Get the offload_kqv parameter
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default();
assert_eq!(params.offload_kqv(), true);Sourcepub fn with_rope_scaling_type(self, rope_scaling_type: RopeScalingType) -> Self
pub fn with_rope_scaling_type(self, rope_scaling_type: RopeScalingType) -> Self
Set the type of rope scaling.
§Examples
use llama_cpp_bindings::context::params::{LlamaContextParams, RopeScalingType};
let params = LlamaContextParams::default()
.with_rope_scaling_type(RopeScalingType::Linear);
assert_eq!(params.rope_scaling_type(), RopeScalingType::Linear);Sourcepub fn rope_scaling_type(&self) -> RopeScalingType
pub fn rope_scaling_type(&self) -> RopeScalingType
Get the type of rope scaling.
§Examples
let params = llama_cpp_bindings::context::params::LlamaContextParams::default();
assert_eq!(params.rope_scaling_type(), llama_cpp_bindings::context::params::RopeScalingType::Unspecified);Sourcepub const fn with_rope_freq_base(self, rope_freq_base: f32) -> Self
pub const fn with_rope_freq_base(self, rope_freq_base: f32) -> Self
Set the rope frequency base.
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
.with_rope_freq_base(0.5);
assert_eq!(params.rope_freq_base(), 0.5);Sourcepub const fn rope_freq_base(&self) -> f32
pub const fn rope_freq_base(&self) -> f32
Get the rope frequency base.
§Examples
let params = llama_cpp_bindings::context::params::LlamaContextParams::default();
assert_eq!(params.rope_freq_base(), 0.0);Sourcepub const fn with_rope_freq_scale(self, rope_freq_scale: f32) -> Self
pub const fn with_rope_freq_scale(self, rope_freq_scale: f32) -> Self
Set the rope frequency scale.
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
.with_rope_freq_scale(0.5);
assert_eq!(params.rope_freq_scale(), 0.5);Sourcepub const fn rope_freq_scale(&self) -> f32
pub const fn rope_freq_scale(&self) -> f32
Get the rope frequency scale.
§Examples
let params = llama_cpp_bindings::context::params::LlamaContextParams::default();
assert_eq!(params.rope_freq_scale(), 0.0);Sourcepub const fn n_threads(&self) -> i32
pub const fn n_threads(&self) -> i32
Get the number of threads.
§Examples
let params = llama_cpp_bindings::context::params::LlamaContextParams::default();
assert_eq!(params.n_threads(), 4);Sourcepub const fn n_threads_batch(&self) -> i32
pub const fn n_threads_batch(&self) -> i32
Get the number of threads allocated for batches.
§Examples
let params = llama_cpp_bindings::context::params::LlamaContextParams::default();
assert_eq!(params.n_threads_batch(), 4);Sourcepub const fn with_n_threads(self, n_threads: i32) -> Self
pub const fn with_n_threads(self, n_threads: i32) -> Self
Set the number of threads.
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
.with_n_threads(8);
assert_eq!(params.n_threads(), 8);Sourcepub const fn with_n_threads_batch(self, n_threads: i32) -> Self
pub const fn with_n_threads_batch(self, n_threads: i32) -> Self
Set the number of threads allocated for batches.
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
.with_n_threads_batch(8);
assert_eq!(params.n_threads_batch(), 8);Sourcepub const fn embeddings(&self) -> bool
pub const fn embeddings(&self) -> bool
Check whether embeddings are enabled
§Examples
let params = llama_cpp_bindings::context::params::LlamaContextParams::default();
assert!(!params.embeddings());Sourcepub const fn with_embeddings(self, embedding: bool) -> Self
pub const fn with_embeddings(self, embedding: bool) -> Self
Enable the use of embeddings
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
.with_embeddings(true);
assert!(params.embeddings());Sourcepub fn with_cb_eval(self, cb_eval: ggml_backend_sched_eval_callback) -> Self
pub fn with_cb_eval(self, cb_eval: ggml_backend_sched_eval_callback) -> Self
Set the evaluation callback.
§Examples
extern "C" fn cb_eval_fn(
t: *mut llama_cpp_bindings_sys::ggml_tensor,
ask: bool,
user_data: *mut std::ffi::c_void,
) -> bool {
false
}
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default().with_cb_eval(Some(cb_eval_fn));Sourcepub const fn with_cb_eval_user_data(
self,
cb_eval_user_data: *mut c_void,
) -> Self
pub const fn with_cb_eval_user_data( self, cb_eval_user_data: *mut c_void, ) -> Self
Set the evaluation callback user data.
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default();
let user_data = std::ptr::null_mut();
let params = params.with_cb_eval_user_data(user_data);Sourcepub fn with_pooling_type(self, pooling_type: LlamaPoolingType) -> Self
pub fn with_pooling_type(self, pooling_type: LlamaPoolingType) -> Self
Set the type of pooling.
§Examples
use llama_cpp_bindings::context::params::{LlamaContextParams, LlamaPoolingType};
let params = LlamaContextParams::default()
.with_pooling_type(LlamaPoolingType::Last);
assert_eq!(params.pooling_type(), LlamaPoolingType::Last);Sourcepub fn pooling_type(&self) -> LlamaPoolingType
pub fn pooling_type(&self) -> LlamaPoolingType
Get the type of pooling.
§Examples
let params = llama_cpp_bindings::context::params::LlamaContextParams::default();
assert_eq!(params.pooling_type(), llama_cpp_bindings::context::params::LlamaPoolingType::Unspecified);Sourcepub const fn with_swa_full(self, enabled: bool) -> Self
pub const fn with_swa_full(self, enabled: bool) -> Self
Set whether to use full sliding window attention
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
.with_swa_full(false);
assert_eq!(params.swa_full(), false);Sourcepub const fn swa_full(&self) -> bool
pub const fn swa_full(&self) -> bool
Get whether full sliding window attention is enabled
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default();
assert_eq!(params.swa_full(), true);Sourcepub const fn with_n_seq_max(self, n_seq_max: u32) -> Self
pub const fn with_n_seq_max(self, n_seq_max: u32) -> Self
Set the max number of sequences (i.e. distinct states for recurrent models)
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
.with_n_seq_max(64);
assert_eq!(params.n_seq_max(), 64);Sourcepub const fn n_seq_max(&self) -> u32
pub const fn n_seq_max(&self) -> u32
Get the max number of sequences (i.e. distinct states for recurrent models)
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default();
assert_eq!(params.n_seq_max(), 1);Sourcepub fn with_type_k(self, type_k: KvCacheType) -> Self
pub fn with_type_k(self, type_k: KvCacheType) -> Self
Set the KV cache data type for K
use llama_cpp_bindings::context::params::{LlamaContextParams, KvCacheType};
let params = LlamaContextParams::default().with_type_k(KvCacheType::Q4_0);
assert_eq!(params.type_k(), KvCacheType::Q4_0);
Sourcepub fn type_k(&self) -> KvCacheType
pub fn type_k(&self) -> KvCacheType
Get the KV cache data type for K
§Examples
let params = llama_cpp_bindings::context::params::LlamaContextParams::default();
let _ = params.type_k();Sourcepub fn with_type_v(self, type_v: KvCacheType) -> Self
pub fn with_type_v(self, type_v: KvCacheType) -> Self
Set the KV cache data type for V
§Examples
use llama_cpp_bindings::context::params::{LlamaContextParams, KvCacheType};
let params = LlamaContextParams::default().with_type_v(KvCacheType::Q4_1);
assert_eq!(params.type_v(), KvCacheType::Q4_1);Sourcepub fn type_v(&self) -> KvCacheType
pub fn type_v(&self) -> KvCacheType
Get the KV cache data type for V
§Examples
let params = llama_cpp_bindings::context::params::LlamaContextParams::default();
let _ = params.type_v();Sourcepub fn with_attention_type(self, attention_type: LlamaAttentionType) -> Self
pub fn with_attention_type(self, attention_type: LlamaAttentionType) -> Self
Set the attention type
§Examples
use llama_cpp_bindings::context::params::{LlamaContextParams, LlamaAttentionType};
let params = LlamaContextParams::default()
.with_attention_type(LlamaAttentionType::NonCausal);
assert_eq!(params.attention_type(), LlamaAttentionType::NonCausal);Sourcepub fn attention_type(&self) -> LlamaAttentionType
pub fn attention_type(&self) -> LlamaAttentionType
Get the attention type
§Examples
let params = llama_cpp_bindings::context::params::LlamaContextParams::default();
assert_eq!(params.attention_type(), llama_cpp_bindings::context::params::LlamaAttentionType::Unspecified);Sourcepub const fn with_yarn_ext_factor(self, yarn_ext_factor: f32) -> Self
pub const fn with_yarn_ext_factor(self, yarn_ext_factor: f32) -> Self
Set the YaRN extrapolation factor
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
.with_yarn_ext_factor(1.0);
assert!((params.yarn_ext_factor() - 1.0).abs() < f32::EPSILON);Sourcepub const fn yarn_ext_factor(&self) -> f32
pub const fn yarn_ext_factor(&self) -> f32
Get the YaRN extrapolation factor
Sourcepub const fn with_yarn_attn_factor(self, yarn_attn_factor: f32) -> Self
pub const fn with_yarn_attn_factor(self, yarn_attn_factor: f32) -> Self
Set the YaRN attention factor
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
.with_yarn_attn_factor(2.0);
assert!((params.yarn_attn_factor() - 2.0).abs() < f32::EPSILON);Sourcepub const fn yarn_attn_factor(&self) -> f32
pub const fn yarn_attn_factor(&self) -> f32
Get the YaRN attention factor
Sourcepub const fn with_yarn_beta_fast(self, yarn_beta_fast: f32) -> Self
pub const fn with_yarn_beta_fast(self, yarn_beta_fast: f32) -> Self
Set the YaRN low correction dim
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
.with_yarn_beta_fast(32.0);
assert!((params.yarn_beta_fast() - 32.0).abs() < f32::EPSILON);Sourcepub const fn yarn_beta_fast(&self) -> f32
pub const fn yarn_beta_fast(&self) -> f32
Get the YaRN low correction dim
Sourcepub const fn with_yarn_beta_slow(self, yarn_beta_slow: f32) -> Self
pub const fn with_yarn_beta_slow(self, yarn_beta_slow: f32) -> Self
Set the YaRN high correction dim
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
.with_yarn_beta_slow(1.0);
assert!((params.yarn_beta_slow() - 1.0).abs() < f32::EPSILON);Sourcepub const fn yarn_beta_slow(&self) -> f32
pub const fn yarn_beta_slow(&self) -> f32
Get the YaRN high correction dim
Sourcepub const fn with_yarn_orig_ctx(self, yarn_orig_ctx: u32) -> Self
pub const fn with_yarn_orig_ctx(self, yarn_orig_ctx: u32) -> Self
Set the YaRN original context size
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
.with_yarn_orig_ctx(4096);
assert_eq!(params.yarn_orig_ctx(), 4096);Sourcepub const fn yarn_orig_ctx(&self) -> u32
pub const fn yarn_orig_ctx(&self) -> u32
Get the YaRN original context size
Sourcepub const fn with_defrag_thold(self, defrag_thold: f32) -> Self
pub const fn with_defrag_thold(self, defrag_thold: f32) -> Self
Set the KV cache defragmentation threshold
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
.with_defrag_thold(0.1);
assert!((params.defrag_thold() - 0.1).abs() < f32::EPSILON);Sourcepub const fn defrag_thold(&self) -> f32
pub const fn defrag_thold(&self) -> f32
Get the KV cache defragmentation threshold
Sourcepub const fn with_no_perf(self, no_perf: bool) -> Self
pub const fn with_no_perf(self, no_perf: bool) -> Self
Set whether performance timings are disabled
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
.with_no_perf(true);
assert!(params.no_perf());Sourcepub const fn with_op_offload(self, op_offload: bool) -> Self
pub const fn with_op_offload(self, op_offload: bool) -> Self
Set whether to offload ops to GPU
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
.with_op_offload(false);
assert!(!params.op_offload());Sourcepub const fn op_offload(&self) -> bool
pub const fn op_offload(&self) -> bool
Get whether ops are offloaded to GPU
Sourcepub const fn with_kv_unified(self, kv_unified: bool) -> Self
pub const fn with_kv_unified(self, kv_unified: bool) -> Self
Set whether to use a unified KV cache buffer across input sequences
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
.with_kv_unified(true);
assert!(params.kv_unified());Sourcepub const fn kv_unified(&self) -> bool
pub const fn kv_unified(&self) -> bool
Get whether a unified KV cache buffer is used across input sequences
Trait Implementations§
Source§impl Clone for LlamaContextParams
impl Clone for LlamaContextParams
Source§fn clone(&self) -> LlamaContextParams
fn clone(&self) -> LlamaContextParams
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read moreSource§impl Debug for LlamaContextParams
impl Debug for LlamaContextParams
Source§impl Default for LlamaContextParams
Default parameters for LlamaContext. (as defined in llama.cpp by llama_context_default_params)
impl Default for LlamaContextParams
Default parameters for LlamaContext. (as defined in llama.cpp by llama_context_default_params)
use llama_cpp_bindings::context::params::{LlamaContextParams, RopeScalingType};
let params = LlamaContextParams::default();
assert_eq!(params.n_ctx(), NonZeroU32::new(512), "n_ctx should be 512");
assert_eq!(params.rope_scaling_type(), RopeScalingType::Unspecified);impl Send for LlamaContextParams
SAFETY: we do not currently allow setting or reading the pointers that cause this to not be automatically send or sync.