pub struct LlamaContextParams {
pub context_params: llama_context_params,
}Expand description
A safe wrapper around llama_context_params.
Generally this should be created with Default::default() and then modified with with_* methods.
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let ctx_params = LlamaContextParams::default()
.with_n_ctx(NonZeroU32::new(2048));
assert_eq!(ctx_params.n_ctx(), NonZeroU32::new(2048));Fields§
§context_params: llama_context_paramsImplementations§
Source§impl LlamaContextParams
impl LlamaContextParams
Sourcepub fn with_n_ctx(self, n_ctx: Option<NonZeroU32>) -> Self
pub fn with_n_ctx(self, n_ctx: Option<NonZeroU32>) -> Self
Set the side of the context
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default();
let params = params.with_n_ctx(NonZeroU32::new(2048));
assert_eq!(params.n_ctx(), NonZeroU32::new(2048));Sourcepub const fn n_ctx(&self) -> Option<NonZeroU32>
pub const fn n_ctx(&self) -> Option<NonZeroU32>
Sourcepub const fn with_n_batch(self, n_batch: u32) -> Self
pub const fn with_n_batch(self, n_batch: u32) -> Self
Set the n_batch
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
.with_n_batch(2048);
assert_eq!(params.n_batch(), 2048);Sourcepub const fn n_batch(&self) -> u32
pub const fn n_batch(&self) -> u32
Get the n_batch
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default();
assert_eq!(params.n_batch(), 2048);Sourcepub const fn with_n_ubatch(self, n_ubatch: u32) -> Self
pub const fn with_n_ubatch(self, n_ubatch: u32) -> Self
Set the n_ubatch
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
.with_n_ubatch(512);
assert_eq!(params.n_ubatch(), 512);Sourcepub const fn n_ubatch(&self) -> u32
pub const fn n_ubatch(&self) -> u32
Get the n_ubatch
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default();
assert_eq!(params.n_ubatch(), 512);Sourcepub const fn with_flash_attention_policy(
self,
policy: llama_flash_attn_type,
) -> Self
pub const fn with_flash_attention_policy( self, policy: llama_flash_attn_type, ) -> Self
Set the flash attention policy using llama.cpp enum
Sourcepub const fn flash_attention_policy(&self) -> llama_flash_attn_type
pub const fn flash_attention_policy(&self) -> llama_flash_attn_type
Get the flash attention policy
Sourcepub const fn with_offload_kqv(self, enabled: bool) -> Self
pub const fn with_offload_kqv(self, enabled: bool) -> Self
Set the offload_kqv parameter to control offloading KV cache & KQV ops to GPU
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
.with_offload_kqv(false);
assert_eq!(params.offload_kqv(), false);Sourcepub const fn offload_kqv(&self) -> bool
pub const fn offload_kqv(&self) -> bool
Get the offload_kqv parameter
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default();
assert_eq!(params.offload_kqv(), true);Sourcepub fn with_rope_scaling_type(self, rope_scaling_type: RopeScalingType) -> Self
pub fn with_rope_scaling_type(self, rope_scaling_type: RopeScalingType) -> Self
Set the type of rope scaling.
§Examples
use llama_cpp_bindings::context::params::{LlamaContextParams, RopeScalingType};
let params = LlamaContextParams::default()
.with_rope_scaling_type(RopeScalingType::Linear);
assert_eq!(params.rope_scaling_type(), RopeScalingType::Linear);Sourcepub fn rope_scaling_type(&self) -> RopeScalingType
pub fn rope_scaling_type(&self) -> RopeScalingType
Get the type of rope scaling.
§Examples
let params = llama_cpp_bindings::context::params::LlamaContextParams::default();
assert_eq!(params.rope_scaling_type(), llama_cpp_bindings::context::params::RopeScalingType::Unspecified);Sourcepub const fn with_rope_freq_base(self, rope_freq_base: f32) -> Self
pub const fn with_rope_freq_base(self, rope_freq_base: f32) -> Self
Set the rope frequency base.
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
.with_rope_freq_base(0.5);
assert_eq!(params.rope_freq_base(), 0.5);Sourcepub const fn rope_freq_base(&self) -> f32
pub const fn rope_freq_base(&self) -> f32
Get the rope frequency base.
§Examples
let params = llama_cpp_bindings::context::params::LlamaContextParams::default();
assert_eq!(params.rope_freq_base(), 0.0);Sourcepub const fn with_rope_freq_scale(self, rope_freq_scale: f32) -> Self
pub const fn with_rope_freq_scale(self, rope_freq_scale: f32) -> Self
Set the rope frequency scale.
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
.with_rope_freq_scale(0.5);
assert_eq!(params.rope_freq_scale(), 0.5);Sourcepub const fn rope_freq_scale(&self) -> f32
pub const fn rope_freq_scale(&self) -> f32
Get the rope frequency scale.
§Examples
let params = llama_cpp_bindings::context::params::LlamaContextParams::default();
assert_eq!(params.rope_freq_scale(), 0.0);Sourcepub const fn n_threads(&self) -> i32
pub const fn n_threads(&self) -> i32
Get the number of threads.
§Examples
let params = llama_cpp_bindings::context::params::LlamaContextParams::default();
assert_eq!(params.n_threads(), 4);Sourcepub const fn n_threads_batch(&self) -> i32
pub const fn n_threads_batch(&self) -> i32
Get the number of threads allocated for batches.
§Examples
let params = llama_cpp_bindings::context::params::LlamaContextParams::default();
assert_eq!(params.n_threads_batch(), 4);Sourcepub const fn with_n_threads(self, n_threads: i32) -> Self
pub const fn with_n_threads(self, n_threads: i32) -> Self
Set the number of threads.
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
.with_n_threads(8);
assert_eq!(params.n_threads(), 8);Sourcepub const fn with_n_threads_batch(self, n_threads: i32) -> Self
pub const fn with_n_threads_batch(self, n_threads: i32) -> Self
Set the number of threads allocated for batches.
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
.with_n_threads_batch(8);
assert_eq!(params.n_threads_batch(), 8);Sourcepub const fn embeddings(&self) -> bool
pub const fn embeddings(&self) -> bool
Check whether embeddings are enabled
§Examples
let params = llama_cpp_bindings::context::params::LlamaContextParams::default();
assert!(!params.embeddings());Sourcepub const fn with_embeddings(self, embedding: bool) -> Self
pub const fn with_embeddings(self, embedding: bool) -> Self
Enable the use of embeddings
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
.with_embeddings(true);
assert!(params.embeddings());Sourcepub fn with_cb_eval(self, cb_eval: ggml_backend_sched_eval_callback) -> Self
pub fn with_cb_eval(self, cb_eval: ggml_backend_sched_eval_callback) -> Self
Set the evaluation callback.
§Examples
extern "C" fn cb_eval_fn(
t: *mut llama_cpp_bindings_sys::ggml_tensor,
ask: bool,
user_data: *mut std::ffi::c_void,
) -> bool {
false
}
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default().with_cb_eval(Some(cb_eval_fn));Sourcepub const fn with_cb_eval_user_data(
self,
cb_eval_user_data: *mut c_void,
) -> Self
pub const fn with_cb_eval_user_data( self, cb_eval_user_data: *mut c_void, ) -> Self
Set the evaluation callback user data.
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default();
let user_data = std::ptr::null_mut();
let params = params.with_cb_eval_user_data(user_data);Sourcepub fn with_pooling_type(self, pooling_type: LlamaPoolingType) -> Self
pub fn with_pooling_type(self, pooling_type: LlamaPoolingType) -> Self
Set the type of pooling.
§Examples
use llama_cpp_bindings::context::params::{LlamaContextParams, LlamaPoolingType};
let params = LlamaContextParams::default()
.with_pooling_type(LlamaPoolingType::Last);
assert_eq!(params.pooling_type(), LlamaPoolingType::Last);Sourcepub fn pooling_type(&self) -> LlamaPoolingType
pub fn pooling_type(&self) -> LlamaPoolingType
Get the type of pooling.
§Examples
let params = llama_cpp_bindings::context::params::LlamaContextParams::default();
assert_eq!(params.pooling_type(), llama_cpp_bindings::context::params::LlamaPoolingType::Unspecified);Sourcepub const fn with_swa_full(self, enabled: bool) -> Self
pub const fn with_swa_full(self, enabled: bool) -> Self
Set whether to use full sliding window attention
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
.with_swa_full(false);
assert_eq!(params.swa_full(), false);Sourcepub const fn swa_full(&self) -> bool
pub const fn swa_full(&self) -> bool
Get whether full sliding window attention is enabled
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default();
assert_eq!(params.swa_full(), true);Sourcepub const fn with_n_seq_max(self, n_seq_max: u32) -> Self
pub const fn with_n_seq_max(self, n_seq_max: u32) -> Self
Set the max number of sequences (i.e. distinct states for recurrent models)
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
.with_n_seq_max(64);
assert_eq!(params.n_seq_max(), 64);Sourcepub const fn n_seq_max(&self) -> u32
pub const fn n_seq_max(&self) -> u32
Get the max number of sequences (i.e. distinct states for recurrent models)
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default();
assert_eq!(params.n_seq_max(), 1);Sourcepub fn with_type_k(self, type_k: KvCacheType) -> Self
pub fn with_type_k(self, type_k: KvCacheType) -> Self
Set the KV cache data type for K
use llama_cpp_bindings::context::params::{LlamaContextParams, KvCacheType};
let params = LlamaContextParams::default().with_type_k(KvCacheType::Q4_0);
assert_eq!(params.type_k(), KvCacheType::Q4_0);
Sourcepub fn type_k(&self) -> KvCacheType
pub fn type_k(&self) -> KvCacheType
Get the KV cache data type for K
§Examples
let params = llama_cpp_bindings::context::params::LlamaContextParams::default();
let _ = params.type_k();Sourcepub fn with_type_v(self, type_v: KvCacheType) -> Self
pub fn with_type_v(self, type_v: KvCacheType) -> Self
Set the KV cache data type for V
§Examples
use llama_cpp_bindings::context::params::{LlamaContextParams, KvCacheType};
let params = LlamaContextParams::default().with_type_v(KvCacheType::Q4_1);
assert_eq!(params.type_v(), KvCacheType::Q4_1);Sourcepub fn type_v(&self) -> KvCacheType
pub fn type_v(&self) -> KvCacheType
Get the KV cache data type for V
§Examples
let params = llama_cpp_bindings::context::params::LlamaContextParams::default();
let _ = params.type_v();Sourcepub fn with_attention_type(self, attention_type: LlamaAttentionType) -> Self
pub fn with_attention_type(self, attention_type: LlamaAttentionType) -> Self
Set the attention type
§Examples
use llama_cpp_bindings::context::params::{LlamaContextParams, LlamaAttentionType};
let params = LlamaContextParams::default()
.with_attention_type(LlamaAttentionType::NonCausal);
assert_eq!(params.attention_type(), LlamaAttentionType::NonCausal);Sourcepub fn attention_type(&self) -> LlamaAttentionType
pub fn attention_type(&self) -> LlamaAttentionType
Get the attention type
§Examples
let params = llama_cpp_bindings::context::params::LlamaContextParams::default();
assert_eq!(params.attention_type(), llama_cpp_bindings::context::params::LlamaAttentionType::Unspecified);Sourcepub const fn with_yarn_ext_factor(self, yarn_ext_factor: f32) -> Self
pub const fn with_yarn_ext_factor(self, yarn_ext_factor: f32) -> Self
Set the YaRN extrapolation factor
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
.with_yarn_ext_factor(1.0);
assert!((params.yarn_ext_factor() - 1.0).abs() < f32::EPSILON);Sourcepub const fn yarn_ext_factor(&self) -> f32
pub const fn yarn_ext_factor(&self) -> f32
Get the YaRN extrapolation factor
Sourcepub const fn with_yarn_attn_factor(self, yarn_attn_factor: f32) -> Self
pub const fn with_yarn_attn_factor(self, yarn_attn_factor: f32) -> Self
Set the YaRN attention factor
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
.with_yarn_attn_factor(2.0);
assert!((params.yarn_attn_factor() - 2.0).abs() < f32::EPSILON);Sourcepub const fn yarn_attn_factor(&self) -> f32
pub const fn yarn_attn_factor(&self) -> f32
Get the YaRN attention factor
Sourcepub const fn with_yarn_beta_fast(self, yarn_beta_fast: f32) -> Self
pub const fn with_yarn_beta_fast(self, yarn_beta_fast: f32) -> Self
Set the YaRN low correction dim
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
.with_yarn_beta_fast(32.0);
assert!((params.yarn_beta_fast() - 32.0).abs() < f32::EPSILON);Sourcepub const fn yarn_beta_fast(&self) -> f32
pub const fn yarn_beta_fast(&self) -> f32
Get the YaRN low correction dim
Sourcepub const fn with_yarn_beta_slow(self, yarn_beta_slow: f32) -> Self
pub const fn with_yarn_beta_slow(self, yarn_beta_slow: f32) -> Self
Set the YaRN high correction dim
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
.with_yarn_beta_slow(1.0);
assert!((params.yarn_beta_slow() - 1.0).abs() < f32::EPSILON);Sourcepub const fn yarn_beta_slow(&self) -> f32
pub const fn yarn_beta_slow(&self) -> f32
Get the YaRN high correction dim
Sourcepub const fn with_yarn_orig_ctx(self, yarn_orig_ctx: u32) -> Self
pub const fn with_yarn_orig_ctx(self, yarn_orig_ctx: u32) -> Self
Set the YaRN original context size
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
.with_yarn_orig_ctx(4096);
assert_eq!(params.yarn_orig_ctx(), 4096);Sourcepub const fn yarn_orig_ctx(&self) -> u32
pub const fn yarn_orig_ctx(&self) -> u32
Get the YaRN original context size
Sourcepub const fn with_defrag_thold(self, defrag_thold: f32) -> Self
pub const fn with_defrag_thold(self, defrag_thold: f32) -> Self
Set the KV cache defragmentation threshold
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
.with_defrag_thold(0.1);
assert!((params.defrag_thold() - 0.1).abs() < f32::EPSILON);Sourcepub const fn defrag_thold(&self) -> f32
pub const fn defrag_thold(&self) -> f32
Get the KV cache defragmentation threshold
Sourcepub const fn with_no_perf(self, no_perf: bool) -> Self
pub const fn with_no_perf(self, no_perf: bool) -> Self
Set whether performance timings are disabled
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
.with_no_perf(true);
assert!(params.no_perf());Sourcepub const fn with_op_offload(self, op_offload: bool) -> Self
pub const fn with_op_offload(self, op_offload: bool) -> Self
Set whether to offload ops to GPU
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
.with_op_offload(false);
assert!(!params.op_offload());Sourcepub const fn op_offload(&self) -> bool
pub const fn op_offload(&self) -> bool
Get whether ops are offloaded to GPU
Sourcepub const fn with_kv_unified(self, kv_unified: bool) -> Self
pub const fn with_kv_unified(self, kv_unified: bool) -> Self
Set whether to use a unified KV cache buffer across input sequences
§Examples
use llama_cpp_bindings::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
.with_kv_unified(true);
assert!(params.kv_unified());Sourcepub const fn kv_unified(&self) -> bool
pub const fn kv_unified(&self) -> bool
Get whether a unified KV cache buffer is used across input sequences
Trait Implementations§
Source§impl Clone for LlamaContextParams
impl Clone for LlamaContextParams
Source§fn clone(&self) -> LlamaContextParams
fn clone(&self) -> LlamaContextParams
1.0.0 (const: unstable) · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read moreSource§impl Debug for LlamaContextParams
impl Debug for LlamaContextParams
Source§impl Default for LlamaContextParams
Default parameters for LlamaContext. (as defined in llama.cpp by llama_context_default_params)
impl Default for LlamaContextParams
Default parameters for LlamaContext. (as defined in llama.cpp by llama_context_default_params)
use llama_cpp_bindings::context::params::{LlamaContextParams, RopeScalingType};
let params = LlamaContextParams::default();
assert_eq!(params.n_ctx(), NonZeroU32::new(512), "n_ctx should be 512");
assert_eq!(params.rope_scaling_type(), RopeScalingType::Unspecified);impl Send for LlamaContextParams
SAFETY: we do not currently allow setting or reading the pointers that cause this to not be automatically send or sync.
impl Sync for LlamaContextParams
Auto Trait Implementations§
impl Freeze for LlamaContextParams
impl RefUnwindSafe for LlamaContextParams
impl Unpin for LlamaContextParams
impl UnsafeUnpin for LlamaContextParams
impl UnwindSafe for LlamaContextParams
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more