use llama_crab_sys as sys;
#[derive(Clone, Debug)]
pub struct BufferTypeOverride {
pub tensor_name: String,
pub ggml_type: GgmlType,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum GgmlType {
F32,
F16,
Q4_0,
Q4_1,
Q5_0,
Q5_1,
Q8_0,
Q2K,
Q3K,
Q4K,
Q5K,
Q6K,
}
impl GgmlType {
#[must_use]
pub const fn as_raw(self) -> sys::ggml_type {
match self {
Self::F32 => sys::ggml_type::GGML_TYPE_F32,
Self::F16 => sys::ggml_type::GGML_TYPE_F16,
Self::Q4_0 => sys::ggml_type::GGML_TYPE_Q4_0,
Self::Q4_1 => sys::ggml_type::GGML_TYPE_Q4_1,
Self::Q5_0 => sys::ggml_type::GGML_TYPE_Q5_0,
Self::Q5_1 => sys::ggml_type::GGML_TYPE_Q5_1,
Self::Q8_0 => sys::ggml_type::GGML_TYPE_Q8_0,
Self::Q2K => sys::ggml_type::GGML_TYPE_Q2_K,
Self::Q3K => sys::ggml_type::GGML_TYPE_Q3_K,
Self::Q4K => sys::ggml_type::GGML_TYPE_Q4_K,
Self::Q5K => sys::ggml_type::GGML_TYPE_Q5_K,
Self::Q6K => sys::ggml_type::GGML_TYPE_Q6_K,
}
}
}
impl BufferTypeOverride {
#[must_use]
pub fn new(tensor_name: impl Into<String>, ggml_type: GgmlType) -> Self {
Self {
tensor_name: tensor_name.into(),
ggml_type,
}
}
}
pub(crate) fn to_c_array(items: &[BufferTypeOverride]) -> Vec<sys::llama_model_tensor_override> {
items
.iter()
.map(|o| {
let leaked = std::ffi::CString::new(o.tensor_name.as_str())
.unwrap()
.into_raw();
sys::llama_model_tensor_override {
pattern: leaked.cast_const(),
type_: o.ggml_type.as_raw(),
}
})
.collect()
}