use std::ptr;
use llama_cpp_sys::{
llama_context_default_params, llama_context_params, llama_model_default_params,
llama_model_params, llama_split_mode,
};
pub struct LlamaParams {
pub n_gpu_layers: u32,
pub split_mode: SplitMode,
pub main_gpu: u32,
pub vocab_only: bool,
pub use_mmap: bool,
pub use_mlock: bool,
}
#[non_exhaustive]
pub enum SplitMode {
None,
Layer,
Row,
}
impl From<SplitMode> for llama_split_mode {
fn from(value: SplitMode) -> Self {
match value {
SplitMode::None => llama_split_mode::LLAMA_SPLIT_MODE_NONE,
SplitMode::Layer => llama_split_mode::LLAMA_SPLIT_MODE_LAYER,
SplitMode::Row => llama_split_mode::LLAMA_SPLIT_MODE_ROW,
}
}
}
impl From<llama_split_mode> for SplitMode {
fn from(value: llama_split_mode) -> Self {
#![allow(non_upper_case_globals)]
match value {
llama_split_mode::LLAMA_SPLIT_MODE_NONE => SplitMode::None,
llama_split_mode::LLAMA_SPLIT_MODE_LAYER => SplitMode::Layer,
llama_split_mode::LLAMA_SPLIT_MODE_ROW => SplitMode::Row,
_ => unimplemented!(),
}
}
}
impl Default for LlamaParams {
fn default() -> Self {
let c_params = unsafe { llama_model_default_params() };
Self {
n_gpu_layers: c_params.n_gpu_layers as u32,
split_mode: c_params.split_mode.into(),
main_gpu: c_params.main_gpu as u32,
vocab_only: c_params.vocab_only,
use_mmap: c_params.use_mmap,
use_mlock: c_params.use_mlock,
}
}
}
impl From<LlamaParams> for llama_model_params {
fn from(value: LlamaParams) -> Self {
llama_model_params {
n_gpu_layers: value.n_gpu_layers as i32,
split_mode: value.split_mode.into(),
main_gpu: value.main_gpu as i32,
tensor_split: ptr::null_mut(),
progress_callback: None,
progress_callback_user_data: ptr::null_mut(),
kv_overrides: ptr::null_mut(),
vocab_only: value.vocab_only,
use_mmap: value.use_mmap,
use_mlock: value.use_mlock,
}
}
}
pub struct EmbeddingsParams {
pub n_threads: u32,
pub n_threads_batch: u32,
}
impl EmbeddingsParams {
pub(crate) fn as_context_params(&self, batch_capacity: usize) -> llama_context_params {
let mut ctx_params = unsafe { llama_context_default_params() };
ctx_params.embeddings = true;
ctx_params.n_threads = self.n_threads;
ctx_params.n_threads_batch = self.n_threads_batch;
ctx_params.n_ctx = batch_capacity as u32;
ctx_params.n_batch = batch_capacity as u32;
ctx_params.n_ubatch = batch_capacity as u32;
ctx_params
}
}
impl Default for EmbeddingsParams {
fn default() -> Self {
let threads = num_cpus::get_physical() as u32 - 1;
Self {
n_threads: threads,
n_threads_batch: threads,
}
}
}