use llama_crab_sys as sys;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
#[allow(dead_code)]
pub enum SplitMode {
None,
#[default]
Layer,
Row,
Tensor,
}
impl From<SplitMode> for sys::llama_split_mode {
fn from(s: SplitMode) -> Self {
match s {
SplitMode::None => Self::LLAMA_SPLIT_MODE_NONE,
SplitMode::Layer => Self::LLAMA_SPLIT_MODE_LAYER,
SplitMode::Row => Self::LLAMA_SPLIT_MODE_ROW,
SplitMode::Tensor => Self::LLAMA_SPLIT_MODE_TENSOR,
}
}
}
#[derive(Debug, Clone)]
pub struct LlamaModelParams {
n_gpu_layers: i32,
split_mode: SplitMode,
main_gpu: i32,
vocab_only: bool,
use_mmap: bool,
use_mlock: bool,
}
impl Default for LlamaModelParams {
fn default() -> Self {
let raw = unsafe { sys::llama_model_default_params() };
Self {
n_gpu_layers: raw.n_gpu_layers,
split_mode: match raw.split_mode {
sys::llama_split_mode::LLAMA_SPLIT_MODE_NONE => SplitMode::None,
sys::llama_split_mode::LLAMA_SPLIT_MODE_LAYER => SplitMode::Layer,
sys::llama_split_mode::LLAMA_SPLIT_MODE_ROW => SplitMode::Row,
sys::llama_split_mode::LLAMA_SPLIT_MODE_TENSOR => SplitMode::Tensor,
#[allow(unreachable_patterns)]
_ => SplitMode::Layer,
},
main_gpu: raw.main_gpu,
vocab_only: raw.vocab_only,
use_mmap: raw.use_mmap,
use_mlock: raw.use_mlock,
}
}
}
impl LlamaModelParams {
#[must_use]
pub fn with_n_gpu_layers(mut self, n: i32) -> Self {
self.n_gpu_layers = n;
self
}
#[must_use]
pub fn with_split_mode(mut self, mode: SplitMode) -> Self {
self.split_mode = mode;
self
}
#[must_use]
pub fn with_main_gpu(mut self, idx: i32) -> Self {
self.main_gpu = idx;
self
}
#[must_use]
pub fn with_vocab_only(mut self, yes: bool) -> Self {
self.vocab_only = yes;
self
}
#[must_use]
pub fn with_use_mmap(mut self, yes: bool) -> Self {
self.use_mmap = yes;
self
}
#[must_use]
pub fn with_use_mlock(mut self, yes: bool) -> Self {
self.use_mlock = yes;
self
}
#[allow(clippy::missing_const_for_fn)]
pub(crate) fn build(&self) -> sys::llama_model_params {
let mut raw = unsafe { sys::llama_model_default_params() };
raw.n_gpu_layers = self.n_gpu_layers;
raw.split_mode = self.split_mode.into();
raw.main_gpu = self.main_gpu;
raw.vocab_only = self.vocab_only;
raw.use_mmap = self.use_mmap;
raw.use_mlock = self.use_mlock;
raw
}
}