use std::ffi::{CString, NulError};
use std::ptr::null;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[non_exhaustive]
#[allow(missing_docs)]
pub enum LlamaFtype {
AllF32 = 0,
MostlyF16 = 1,
MostlyQ4_0 = 2,
MostlyQ4_1 = 3,
MostlyQ8_0 = 7,
MostlyQ5_0 = 8,
MostlyQ5_1 = 9,
MostlyQ2K = 10,
MostlyQ3KS = 11,
MostlyQ3KM = 12,
MostlyQ3KL = 13,
MostlyQ4KS = 14,
MostlyQ4KM = 15,
MostlyQ5KS = 16,
MostlyQ5KM = 17,
MostlyQ6K = 18,
MostlyIQ2XXS = 19,
MostlyIQ2XS = 20,
MostlyQ2KS = 21,
MostlyIQ3XS = 22,
MostlyIQ3XXS = 23,
MostlyIQ1S = 24,
MostlyIQ4NL = 25,
MostlyIQ3S = 26,
MostlyIQ3M = 27,
MostlyIQ2S = 28,
MostlyIQ2M = 29,
MostlyIQ4XS = 30,
MostlyIQ1M = 31,
MostlyBF16 = 32,
MostlyTQ1_0 = 36,
MostlyTQ2_0 = 37,
MostlyMXFP4Moe = 38,
MostlyNVFP4 = 39,
#[cfg(feature = "q1")]
MostlyQ1_0 = 40,
#[cfg(feature = "q1")]
MostlyQ1_0_G128 = 41,
}
impl LlamaFtype {
#[must_use]
pub fn name(self) -> &'static str {
match self {
Self::AllF32 => "F32",
Self::MostlyF16 => "F16",
Self::MostlyQ4_0 => "Q4_0",
Self::MostlyQ4_1 => "Q4_1",
Self::MostlyQ8_0 => "Q8_0",
Self::MostlyQ5_0 => "Q5_0",
Self::MostlyQ5_1 => "Q5_1",
Self::MostlyQ2K => "Q2_K",
Self::MostlyQ3KS => "Q3_K_S",
Self::MostlyQ3KM => "Q3_K_M",
Self::MostlyQ3KL => "Q3_K_L",
Self::MostlyQ4KS => "Q4_K_S",
Self::MostlyQ4KM => "Q4_K_M",
Self::MostlyQ5KS => "Q5_K_S",
Self::MostlyQ5KM => "Q5_K_M",
Self::MostlyQ6K => "Q6_K",
Self::MostlyIQ2XXS => "IQ2_XXS",
Self::MostlyIQ2XS => "IQ2_XS",
Self::MostlyQ2KS => "Q2_K_S",
Self::MostlyIQ3XS => "IQ3_XS",
Self::MostlyIQ3XXS => "IQ3_XXS",
Self::MostlyIQ1S => "IQ1_S",
Self::MostlyIQ4NL => "IQ4_NL",
Self::MostlyIQ3S => "IQ3_S",
Self::MostlyIQ3M => "IQ3_M",
Self::MostlyIQ2S => "IQ2_S",
Self::MostlyIQ2M => "IQ2_M",
Self::MostlyIQ4XS => "IQ4_XS",
Self::MostlyIQ1M => "IQ1_M",
Self::MostlyBF16 => "BF16",
Self::MostlyTQ1_0 => "TQ1_0",
Self::MostlyTQ2_0 => "TQ2_0",
Self::MostlyMXFP4Moe => "MXFP4_MOE",
Self::MostlyNVFP4 => "NVFP4",
#[cfg(feature = "q1")]
Self::MostlyQ1_0 => "Q1_0",
#[cfg(feature = "q1")]
Self::MostlyQ1_0_G128 => "Q1_0_g128",
}
}
#[must_use]
pub fn description(self) -> &'static str {
match self {
Self::AllF32 => "26.00 GB @ 7B — full precision reference",
Self::MostlyF16 => "14.00 GB @ 7B — +0.0020 ppl vs Mistral-7B",
Self::MostlyBF16 => "14.00 GB @ 7B — -0.0050 ppl vs Mistral-7B",
Self::MostlyQ8_0 => " 7.96 GB @ 8B — +0.0026 ppl",
Self::MostlyQ6K => " 6.14 GB @ 8B — +0.0217 ppl",
Self::MostlyQ5KM => " 5.33 GB @ 8B — +0.0569 ppl",
Self::MostlyQ5KS => " 5.21 GB @ 8B — +0.1049 ppl",
Self::MostlyQ5_1 => " 5.65 GB @ 8B — +0.1062 ppl",
Self::MostlyQ5_0 => " 5.21 GB @ 8B — +0.1316 ppl",
Self::MostlyQ4KM => " 4.58 GB @ 8B — +0.1754 ppl [recommended]",
Self::MostlyQ4KS => " 4.37 GB @ 8B — +0.2689 ppl",
Self::MostlyQ4_1 => " 4.78 GB @ 8B — +0.4511 ppl",
Self::MostlyQ4_0 => " 4.34 GB @ 8B — +0.4685 ppl",
Self::MostlyQ3KL => " 4.03 GB @ 8B — +0.5562 ppl",
Self::MostlyQ3KM => " 3.74 GB @ 8B — +0.6569 ppl",
Self::MostlyQ3KS => " 3.41 GB @ 8B — +1.6321 ppl",
Self::MostlyQ2KS => " 2.96 GB @ 8B — +3.1836 ppl",
Self::MostlyQ2K => " 2.96 GB @ 8B — +3.5199 ppl",
Self::MostlyIQ4XS => " 4.25 bpw non-linear",
Self::MostlyIQ4NL => " 4.50 bpw non-linear",
Self::MostlyIQ3S => " 3.44 bpw",
Self::MostlyIQ3M => " 3.66 bpw",
Self::MostlyIQ3XS => " 3.3 bpw",
Self::MostlyIQ3XXS => " 3.06 bpw",
Self::MostlyIQ2M => " 2.7 bpw",
Self::MostlyIQ2S => " 2.5 bpw",
Self::MostlyIQ2XS => " 2.31 bpw",
Self::MostlyIQ2XXS => " 2.06 bpw",
Self::MostlyIQ1M => " 1.75 bpw — extreme compression",
Self::MostlyIQ1S => " 1.56 bpw — extreme compression",
Self::MostlyTQ1_0 => " 1.69 bpw ternary",
Self::MostlyTQ2_0 => " 2.06 bpw ternary",
Self::MostlyMXFP4Moe => "MXFP4 MoE layers",
Self::MostlyNVFP4 => "NVFP4",
#[cfg(feature = "q1")]
Self::MostlyQ1_0 => " 1.50 bpw — binary Q1_0 (block 32)",
#[cfg(feature = "q1")]
Self::MostlyQ1_0_G128 => " 1.125 bpw — binary Q1_0_g128 (block 128)",
}
}
#[must_use]
pub fn from_name(name: &str) -> Option<Self> {
let upper = name.to_uppercase();
match upper.as_str() {
"F32" => Some(Self::AllF32),
"F16" => Some(Self::MostlyF16),
"BF16" => Some(Self::MostlyBF16),
"Q4_0" => Some(Self::MostlyQ4_0),
"Q4_1" => Some(Self::MostlyQ4_1),
"Q8_0" => Some(Self::MostlyQ8_0),
"Q5_0" => Some(Self::MostlyQ5_0),
"Q5_1" => Some(Self::MostlyQ5_1),
"Q2_K" => Some(Self::MostlyQ2K),
"Q2_K_S" => Some(Self::MostlyQ2KS),
"Q3_K_S" => Some(Self::MostlyQ3KS),
"Q3_K_M" => Some(Self::MostlyQ3KM),
"Q3_K_L" => Some(Self::MostlyQ3KL),
"Q4_K_S" => Some(Self::MostlyQ4KS),
"Q4_K_M" => Some(Self::MostlyQ4KM),
"Q5_K_S" => Some(Self::MostlyQ5KS),
"Q5_K_M" => Some(Self::MostlyQ5KM),
"Q6_K" => Some(Self::MostlyQ6K),
"IQ1_S" => Some(Self::MostlyIQ1S),
"IQ1_M" => Some(Self::MostlyIQ1M),
"IQ2_XXS" => Some(Self::MostlyIQ2XXS),
"IQ2_XS" => Some(Self::MostlyIQ2XS),
"IQ2_S" => Some(Self::MostlyIQ2S),
"IQ2_M" => Some(Self::MostlyIQ2M),
"IQ3_XXS" => Some(Self::MostlyIQ3XXS),
"IQ3_XS" => Some(Self::MostlyIQ3XS),
"IQ3_S" => Some(Self::MostlyIQ3S),
"IQ3_M" => Some(Self::MostlyIQ3M),
"IQ4_NL" => Some(Self::MostlyIQ4NL),
"IQ4_XS" => Some(Self::MostlyIQ4XS),
"TQ1_0" => Some(Self::MostlyTQ1_0),
"TQ2_0" => Some(Self::MostlyTQ2_0),
"MXFP4_MOE" => Some(Self::MostlyMXFP4Moe),
"NVFP4" => Some(Self::MostlyNVFP4),
#[cfg(feature = "q1")]
"Q1_0" => Some(Self::MostlyQ1_0),
#[cfg(feature = "q1")]
"Q1_0_G128" | "Q1_0_g128" => Some(Self::MostlyQ1_0_G128),
_ => None,
}
}
#[must_use]
pub fn all() -> &'static [Self] {
&[
Self::AllF32,
Self::MostlyF16,
Self::MostlyBF16,
Self::MostlyQ8_0,
Self::MostlyQ6K,
Self::MostlyQ5KM,
Self::MostlyQ5KS,
Self::MostlyQ5_1,
Self::MostlyQ5_0,
Self::MostlyQ4KM,
Self::MostlyQ4KS,
Self::MostlyQ4_1,
Self::MostlyQ4_0,
Self::MostlyQ3KL,
Self::MostlyQ3KM,
Self::MostlyQ3KS,
Self::MostlyQ2KS,
Self::MostlyQ2K,
Self::MostlyIQ4XS,
Self::MostlyIQ4NL,
Self::MostlyIQ3S,
Self::MostlyIQ3M,
Self::MostlyIQ3XS,
Self::MostlyIQ3XXS,
Self::MostlyIQ2M,
Self::MostlyIQ2S,
Self::MostlyIQ2XS,
Self::MostlyIQ2XXS,
Self::MostlyIQ1M,
Self::MostlyIQ1S,
Self::MostlyTQ1_0,
Self::MostlyTQ2_0,
Self::MostlyMXFP4Moe,
Self::MostlyNVFP4,
#[cfg(feature = "q1")]
Self::MostlyQ1_0,
#[cfg(feature = "q1")]
Self::MostlyQ1_0_G128,
]
}
}
impl From<LlamaFtype> for llama_cpp_sys_4::llama_ftype {
fn from(t: LlamaFtype) -> Self {
t as llama_cpp_sys_4::llama_ftype
}
}
impl std::fmt::Display for LlamaFtype {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.name())
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[non_exhaustive]
#[allow(missing_docs)]
pub enum GgmlType {
F32 = 0,
F16 = 1,
Q4_0 = 2,
Q4_1 = 3,
Q5_0 = 6,
Q5_1 = 7,
Q8_0 = 8,
Q8_1 = 9,
Q2K = 10,
Q3K = 11,
Q4K = 12,
Q5K = 13,
Q6K = 14,
Q8K = 15,
IQ2XXS = 16,
IQ2XS = 17,
IQ3XXS = 18,
IQ1S = 19,
IQ4NL = 20,
IQ3S = 21,
IQ2S = 22,
IQ4XS = 23,
I8 = 24,
I16 = 25,
I32 = 26,
I64 = 27,
F64 = 28,
IQ1M = 29,
BF16 = 30,
TQ1_0 = 34,
TQ2_0 = 35,
MXFP4 = 39,
#[cfg(not(feature = "q1"))]
NVFP4 = 40,
#[cfg(feature = "q1")]
Q1_0 = 40,
#[cfg(feature = "q1")]
Q1_0_G128 = 41,
#[cfg(feature = "q1")]
NVFP4 = 42,
}
impl From<GgmlType> for llama_cpp_sys_4::ggml_type {
fn from(t: GgmlType) -> Self {
t as llama_cpp_sys_4::ggml_type
}
}
impl TryFrom<llama_cpp_sys_4::ggml_type> for GgmlType {
type Error = llama_cpp_sys_4::ggml_type;
fn try_from(v: llama_cpp_sys_4::ggml_type) -> Result<Self, Self::Error> {
match v {
0 => Ok(Self::F32),
1 => Ok(Self::F16),
2 => Ok(Self::Q4_0),
3 => Ok(Self::Q4_1),
6 => Ok(Self::Q5_0),
7 => Ok(Self::Q5_1),
8 => Ok(Self::Q8_0),
9 => Ok(Self::Q8_1),
10 => Ok(Self::Q2K),
11 => Ok(Self::Q3K),
12 => Ok(Self::Q4K),
13 => Ok(Self::Q5K),
14 => Ok(Self::Q6K),
15 => Ok(Self::Q8K),
16 => Ok(Self::IQ2XXS),
17 => Ok(Self::IQ2XS),
18 => Ok(Self::IQ3XXS),
19 => Ok(Self::IQ1S),
20 => Ok(Self::IQ4NL),
21 => Ok(Self::IQ3S),
22 => Ok(Self::IQ2S),
23 => Ok(Self::IQ4XS),
24 => Ok(Self::I8),
25 => Ok(Self::I16),
26 => Ok(Self::I32),
27 => Ok(Self::I64),
28 => Ok(Self::F64),
29 => Ok(Self::IQ1M),
30 => Ok(Self::BF16),
34 => Ok(Self::TQ1_0),
35 => Ok(Self::TQ2_0),
39 => Ok(Self::MXFP4),
#[cfg(not(feature = "q1"))]
40 => Ok(Self::NVFP4),
#[cfg(feature = "q1")]
40 => Ok(Self::Q1_0),
#[cfg(feature = "q1")]
41 => Ok(Self::Q1_0_G128),
#[cfg(feature = "q1")]
42 => Ok(Self::NVFP4),
_ => Err(v),
}
}
}
#[derive(Debug, Clone)]
pub struct ImatrixEntry {
name: CString,
data: Vec<f32>,
}
impl ImatrixEntry {
pub fn new(name: impl Into<Vec<u8>>, data: Vec<f32>) -> Result<Self, NulError> {
Ok(Self {
name: CString::new(name)?,
data,
})
}
#[must_use]
pub fn name_str(&self) -> &str {
self.name.to_str().unwrap_or("")
}
#[must_use]
pub fn len(&self) -> usize {
self.data.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.data.is_empty()
}
}
#[derive(Debug, Clone, Default)]
pub struct Imatrix {
entries: Vec<ImatrixEntry>,
}
impl Imatrix {
#[must_use]
pub fn new() -> Self {
Self::default()
}
pub fn push(&mut self, entry: ImatrixEntry) {
self.entries.push(entry);
}
#[must_use]
pub fn len(&self) -> usize {
self.entries.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.entries.is_empty()
}
}
#[derive(Debug, Clone)]
pub struct TensorTypeOverride {
pattern: CString,
ty: GgmlType,
}
impl TensorTypeOverride {
pub fn new(pattern: impl Into<Vec<u8>>, ty: GgmlType) -> Result<Self, NulError> {
Ok(Self {
pattern: CString::new(pattern)?,
ty,
})
}
#[must_use]
pub fn pattern_str(&self) -> &str {
self.pattern.to_str().unwrap_or("")
}
#[must_use]
pub fn ty(&self) -> GgmlType {
self.ty
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum KvOverrideValue {
Int(i64),
Float(f64),
Bool(bool),
Str([std::os::raw::c_char; 128]),
}
#[derive(Debug, Clone)]
pub struct KvOverride {
key: CString,
pub value: KvOverrideValue,
}
impl KvOverride {
pub fn new(key: impl Into<Vec<u8>>, value: KvOverrideValue) -> Result<Self, NulError> {
Ok(Self {
key: CString::new(key)?,
value,
})
}
}
#[derive(Debug, Clone)]
#[allow(clippy::struct_excessive_bools)]
pub struct QuantizeParams {
pub nthread: i32,
pub ftype: LlamaFtype,
pub output_tensor_type: Option<GgmlType>,
pub token_embedding_type: Option<GgmlType>,
pub allow_requantize: bool,
pub quantize_output_tensor: bool,
pub only_copy: bool,
pub pure: bool,
pub keep_split: bool,
pub dry_run: bool,
imatrix: Vec<ImatrixEntry>,
kv_overrides: Vec<KvOverride>,
tt_overrides: Vec<TensorTypeOverride>,
prune_layers: Vec<i32>,
}
impl QuantizeParams {
#[must_use]
pub fn new(ftype: LlamaFtype) -> Self {
let d = unsafe { llama_cpp_sys_4::llama_model_quantize_default_params() };
Self {
nthread: d.nthread,
ftype,
output_tensor_type: GgmlType::try_from(d.output_tensor_type).ok(),
token_embedding_type: GgmlType::try_from(d.token_embedding_type).ok(),
allow_requantize: d.allow_requantize,
quantize_output_tensor: d.quantize_output_tensor,
only_copy: d.only_copy,
pure: d.pure_,
keep_split: d.keep_split,
dry_run: d.dry_run,
imatrix: Vec::new(),
kv_overrides: Vec::new(),
tt_overrides: Vec::new(),
prune_layers: Vec::new(),
}
}
#[must_use]
pub fn with_nthread(mut self, n: i32) -> Self {
self.nthread = n;
self
}
#[must_use]
pub fn with_output_tensor_type(mut self, ty: GgmlType) -> Self {
self.output_tensor_type = Some(ty);
self
}
#[must_use]
pub fn with_token_embedding_type(mut self, ty: GgmlType) -> Self {
self.token_embedding_type = Some(ty);
self
}
#[must_use]
pub fn with_allow_requantize(mut self, v: bool) -> Self {
self.allow_requantize = v;
self
}
#[must_use]
pub fn with_quantize_output_tensor(mut self, v: bool) -> Self {
self.quantize_output_tensor = v;
self
}
#[must_use]
pub fn with_only_copy(mut self, v: bool) -> Self {
self.only_copy = v;
self
}
#[must_use]
pub fn with_pure(mut self, v: bool) -> Self {
self.pure = v;
self
}
#[must_use]
pub fn with_keep_split(mut self, v: bool) -> Self {
self.keep_split = v;
self
}
#[must_use]
pub fn with_dry_run(mut self, v: bool) -> Self {
self.dry_run = v;
self
}
#[must_use]
pub fn with_imatrix(mut self, imatrix: Imatrix) -> Self {
self.imatrix = imatrix.entries;
self
}
#[must_use]
pub fn with_imatrix_entry(mut self, entry: ImatrixEntry) -> Self {
self.imatrix.push(entry);
self
}
#[must_use]
pub fn with_kv_override(mut self, kv: KvOverride) -> Self {
self.kv_overrides.push(kv);
self
}
#[must_use]
pub fn with_tensor_type_override(mut self, ov: TensorTypeOverride) -> Self {
self.tt_overrides.push(ov);
self
}
#[must_use]
pub fn with_pruned_layer(mut self, layer: i32) -> Self {
self.prune_layers.push(layer);
self
}
#[must_use]
pub fn with_pruned_layers(mut self, layers: impl IntoIterator<Item = i32>) -> Self {
self.prune_layers.extend(layers);
self
}
pub(crate) fn to_raw(&self) -> RawQuantizeParamsGuard<'_> {
let imatrix_c: Vec<llama_cpp_sys_4::llama_model_imatrix_data> = self
.imatrix
.iter()
.map(|e| llama_cpp_sys_4::llama_model_imatrix_data {
name: e.name.as_ptr(),
data: e.data.as_ptr(),
size: e.data.len(),
})
.chain(std::iter::once(llama_cpp_sys_4::llama_model_imatrix_data {
name: null(),
data: null(),
size: 0,
}))
.collect();
let kv_c: Vec<llama_cpp_sys_4::llama_model_kv_override> = self
.kv_overrides
.iter()
.map(|kv| {
let mut raw = llama_cpp_sys_4::llama_model_kv_override {
key: [0; 128],
tag: 0,
__bindgen_anon_1: llama_cpp_sys_4::llama_model_kv_override__bindgen_ty_1 {
val_i64: 0,
},
};
let bytes = kv.key.to_bytes_with_nul();
let copy_len = bytes.len().min(128);
for (dst, &src) in raw.key.iter_mut().zip(bytes[..copy_len].iter()) {
*dst = src as std::os::raw::c_char;
}
match &kv.value {
KvOverrideValue::Int(v) => {
raw.tag = llama_cpp_sys_4::LLAMA_KV_OVERRIDE_TYPE_INT;
raw.__bindgen_anon_1 =
llama_cpp_sys_4::llama_model_kv_override__bindgen_ty_1 {
val_i64: *v,
};
}
KvOverrideValue::Float(v) => {
raw.tag = llama_cpp_sys_4::LLAMA_KV_OVERRIDE_TYPE_FLOAT;
raw.__bindgen_anon_1 =
llama_cpp_sys_4::llama_model_kv_override__bindgen_ty_1 {
val_f64: *v,
};
}
KvOverrideValue::Bool(v) => {
raw.tag = llama_cpp_sys_4::LLAMA_KV_OVERRIDE_TYPE_BOOL;
raw.__bindgen_anon_1 =
llama_cpp_sys_4::llama_model_kv_override__bindgen_ty_1 {
val_bool: *v,
};
}
KvOverrideValue::Str(s) => {
raw.tag = llama_cpp_sys_4::LLAMA_KV_OVERRIDE_TYPE_STR;
raw.__bindgen_anon_1 =
llama_cpp_sys_4::llama_model_kv_override__bindgen_ty_1 {
val_str: *s,
};
}
}
raw
})
.chain(std::iter::once(llama_cpp_sys_4::llama_model_kv_override {
key: [0; 128],
tag: 0,
__bindgen_anon_1: llama_cpp_sys_4::llama_model_kv_override__bindgen_ty_1 {
val_i64: 0,
},
}))
.collect();
let tt_c: Vec<llama_cpp_sys_4::llama_model_tensor_override> = self
.tt_overrides
.iter()
.map(|ov| llama_cpp_sys_4::llama_model_tensor_override {
pattern: ov.pattern.as_ptr(),
type_: ov.ty as llama_cpp_sys_4::ggml_type,
})
.chain(std::iter::once(
llama_cpp_sys_4::llama_model_tensor_override {
pattern: null(),
type_: llama_cpp_sys_4::GGML_TYPE_COUNT,
},
))
.collect();
let mut prune_c = self.prune_layers.clone();
prune_c.push(-1);
let raw = llama_cpp_sys_4::llama_model_quantize_params {
nthread: self.nthread,
ftype: self.ftype as llama_cpp_sys_4::llama_ftype,
output_tensor_type: self
.output_tensor_type
.map(|t| t as llama_cpp_sys_4::ggml_type)
.unwrap_or(llama_cpp_sys_4::GGML_TYPE_COUNT),
token_embedding_type: self
.token_embedding_type
.map(|t| t as llama_cpp_sys_4::ggml_type)
.unwrap_or(llama_cpp_sys_4::GGML_TYPE_COUNT),
allow_requantize: self.allow_requantize,
quantize_output_tensor: self.quantize_output_tensor,
only_copy: self.only_copy,
pure_: self.pure,
keep_split: self.keep_split,
dry_run: self.dry_run,
imatrix: if self.imatrix.is_empty() {
null()
} else {
imatrix_c.as_ptr()
},
kv_overrides: if self.kv_overrides.is_empty() {
null()
} else {
kv_c.as_ptr()
},
tt_overrides: if self.tt_overrides.is_empty() {
null()
} else {
tt_c.as_ptr()
},
prune_layers: if self.prune_layers.is_empty() {
null()
} else {
prune_c.as_ptr()
},
};
RawQuantizeParamsGuard {
raw,
_imatrix_c: imatrix_c,
_kv_c: kv_c,
_tt_c: tt_c,
_prune_c: prune_c,
_marker: std::marker::PhantomData,
}
}
}
pub(crate) struct RawQuantizeParamsGuard<'a> {
pub(crate) raw: llama_cpp_sys_4::llama_model_quantize_params,
_imatrix_c: Vec<llama_cpp_sys_4::llama_model_imatrix_data>,
_kv_c: Vec<llama_cpp_sys_4::llama_model_kv_override>,
_tt_c: Vec<llama_cpp_sys_4::llama_model_tensor_override>,
_prune_c: Vec<i32>,
_marker: std::marker::PhantomData<&'a QuantizeParams>,
}
pub fn set_attn_rot_disabled(disabled: bool) {
if disabled {
#[allow(unused_unsafe)]
unsafe {
std::env::set_var("LLAMA_ATTN_ROT_DISABLE", "1");
}
} else {
#[allow(unused_unsafe)]
unsafe {
std::env::remove_var("LLAMA_ATTN_ROT_DISABLE");
}
}
}
#[must_use]
pub fn attn_rot_disabled() -> bool {
std::env::var("LLAMA_ATTN_ROT_DISABLE")
.ok()
.and_then(|v| v.parse::<i32>().ok())
.map_or(false, |v| v != 0)
}