use std::ffi::NulError;
use std::fmt::Debug;
use std::num::NonZeroI32;
use crate::llama_batch::BatchAddError;
use std::os::raw::c_int;
use std::path::PathBuf;
use std::string::FromUtf8Error;
pub mod context;
pub mod llama_backend;
pub mod llama_batch;
mod log;
pub mod model;
#[cfg(feature = "mtmd")]
pub mod mtmd;
pub mod sampling;
pub mod timing;
pub mod token;
pub mod token_type;
pub type Result<T> = std::result::Result<T, LLamaCppError>;
#[derive(Debug, Eq, PartialEq, thiserror::Error)]
pub enum LLamaCppError {
#[error("BackendAlreadyInitialized")]
BackendAlreadyInitialized,
#[error("{0}")]
ChatTemplateError(#[from] ChatTemplateError),
#[error("{0}")]
DecodeError(#[from] DecodeError),
#[error("{0}")]
EncodeError(#[from] EncodeError),
#[error("{0}")]
LlamaModelLoadError(#[from] LlamaModelLoadError),
#[error("{0}")]
LlamaContextLoadError(#[from] LlamaContextLoadError),
#[error["{0}"]]
BatchAddError(#[from] BatchAddError),
#[error(transparent)]
EmbeddingError(#[from] EmbeddingsError),
}
#[derive(Debug, Eq, PartialEq, thiserror::Error)]
pub enum ChatTemplateError {
#[error("chat template not found - returned null pointer")]
MissingTemplate,
#[error("null byte in string {0}")]
NullError(#[from] NulError),
#[error(transparent)]
Utf8Error(#[from] std::str::Utf8Error),
}
#[derive(Debug, Eq, PartialEq, thiserror::Error)]
pub enum MetaValError {
#[error("null byte in string {0}")]
NullError(#[from] NulError),
#[error("FromUtf8Error {0}")]
FromUtf8Error(#[from] FromUtf8Error),
#[error("Negative return value. Likely due to a missing index or key. Got return value: {0}")]
NegativeReturn(i32),
}
#[derive(Debug, Eq, PartialEq, thiserror::Error)]
pub enum LlamaContextLoadError {
#[error("null reference from llama.cpp")]
NullReturn,
}
#[derive(Debug, Eq, PartialEq, thiserror::Error)]
pub enum DecodeError {
#[error("Decode Error 1: NoKvCacheSlot")]
NoKvCacheSlot,
#[error("Decode Error -1: n_tokens == 0")]
NTokensZero,
#[error("Decode Error {0}: unknown")]
Unknown(c_int),
}
#[derive(Debug, Eq, PartialEq, thiserror::Error)]
pub enum EncodeError {
#[error("Encode Error 1: NoKvCacheSlot")]
NoKvCacheSlot,
#[error("Encode Error -1: n_tokens == 0")]
NTokensZero,
#[error("Encode Error {0}: unknown")]
Unknown(c_int),
}
#[derive(Debug, Eq, PartialEq, thiserror::Error)]
pub enum EmbeddingsError {
#[error("Embeddings weren't enabled in the context options")]
NotEnabled,
#[error("Logits were not enabled for the given token")]
LogitsNotEnabled,
#[error("Can't use sequence embeddings with a model supporting only LLAMA_POOLING_TYPE_NONE")]
NonePoolType,
}
impl From<NonZeroI32> for DecodeError {
fn from(value: NonZeroI32) -> Self {
match value.get() {
1 => DecodeError::NoKvCacheSlot,
-1 => DecodeError::NTokensZero,
i => DecodeError::Unknown(i),
}
}
}
impl From<NonZeroI32> for EncodeError {
fn from(value: NonZeroI32) -> Self {
match value.get() {
1 => EncodeError::NoKvCacheSlot,
-1 => EncodeError::NTokensZero,
i => EncodeError::Unknown(i),
}
}
}
#[derive(Debug, Eq, PartialEq, thiserror::Error)]
pub enum LlamaModelLoadError {
#[error("null byte in string {0}")]
NullError(#[from] NulError),
#[error("null result from llama cpp")]
NullResult,
#[error("failed to convert path {0} to str")]
PathToStrError(PathBuf),
}
#[derive(Debug, Eq, PartialEq, thiserror::Error)]
pub enum LlamaLoraAdapterInitError {
#[error("null byte in string {0}")]
NullError(#[from] NulError),
#[error("null result from llama cpp")]
NullResult,
#[error("failed to convert path {0} to str")]
PathToStrError(PathBuf),
}
#[derive(Debug, Eq, PartialEq, thiserror::Error)]
pub enum LlamaLoraAdapterSetError {
#[error("error code from llama cpp")]
ErrorResult(i32),
}
#[derive(Debug, Eq, PartialEq, thiserror::Error)]
pub enum LlamaLoraAdapterRemoveError {
#[error("error code from llama cpp")]
ErrorResult(i32),
}
#[must_use]
pub fn llama_time_us() -> i64 {
unsafe { fellhorn_llama_cpp_sys_2::llama_time_us() }
}
#[must_use]
pub fn max_devices() -> usize {
unsafe { fellhorn_llama_cpp_sys_2::llama_max_devices() }
}
#[must_use]
pub fn mmap_supported() -> bool {
unsafe { fellhorn_llama_cpp_sys_2::llama_supports_mmap() }
}
#[must_use]
pub fn mlock_supported() -> bool {
unsafe { fellhorn_llama_cpp_sys_2::llama_supports_mlock() }
}
#[derive(Debug, thiserror::Error, Clone)]
#[non_exhaustive]
pub enum TokenToStringError {
#[error("Unknown Token Type")]
UnknownTokenType,
#[error("Insufficient Buffer Space {0}")]
InsufficientBufferSpace(c_int),
#[error("FromUtf8Error {0}")]
FromUtf8Error(#[from] FromUtf8Error),
}
#[derive(Debug, thiserror::Error)]
pub enum StringToTokenError {
#[error("{0}")]
NulError(#[from] NulError),
#[error("{0}")]
CIntConversionError(#[from] std::num::TryFromIntError),
}
#[derive(Debug, thiserror::Error)]
pub enum NewLlamaChatMessageError {
#[error("{0}")]
NulError(#[from] NulError),
}
#[derive(Debug, thiserror::Error)]
pub enum ApplyChatTemplateError {
#[error("{0}")]
NulError(#[from] NulError),
#[error("{0}")]
FromUtf8Error(#[from] FromUtf8Error),
}
#[must_use]
pub fn ggml_time_us() -> i64 {
unsafe { fellhorn_llama_cpp_sys_2::ggml_time_us() }
}
#[must_use]
pub fn llama_supports_mlock() -> bool {
unsafe { fellhorn_llama_cpp_sys_2::llama_supports_mlock() }
}
#[derive(Default, Debug, Clone)]
pub struct LogOptions {
disabled: bool,
}
impl LogOptions {
pub fn with_logs_enabled(mut self, enabled: bool) -> Self {
self.disabled = !enabled;
self
}
}
extern "C" fn logs_to_trace(
level: fellhorn_llama_cpp_sys_2::ggml_log_level,
text: *const ::std::os::raw::c_char,
data: *mut ::std::os::raw::c_void,
) {
use std::borrow::Borrow;
let log_state = unsafe { &*(data as *const log::State) };
if log_state.options.disabled {
return;
}
if !log_state.is_enabled_for_level(level) {
log_state.update_previous_level_for_disabled_log(level);
return;
}
let text = unsafe { std::ffi::CStr::from_ptr(text) };
let text = text.to_string_lossy();
let text: &str = text.borrow();
if level == fellhorn_llama_cpp_sys_2::GGML_LOG_LEVEL_CONT {
log_state.cont_buffered_log(text);
} else if text.ends_with('\n') {
log_state.emit_non_cont_line(level, text);
} else {
log_state.buffer_non_cont(level, text);
}
}
pub fn send_logs_to_tracing(options: LogOptions) {
let llama_heap_state = Box::as_ref(
log::LLAMA_STATE
.get_or_init(|| Box::new(log::State::new(log::Module::LlamaCpp, options.clone()))),
) as *const _;
let ggml_heap_state = Box::as_ref(
log::GGML_STATE.get_or_init(|| Box::new(log::State::new(log::Module::GGML, options))),
) as *const _;
unsafe {
fellhorn_llama_cpp_sys_2::llama_log_set(Some(logs_to_trace), llama_heap_state as *mut _);
fellhorn_llama_cpp_sys_2::ggml_log_set(Some(logs_to_trace), ggml_heap_state as *mut _);
}
}