1use std::ffi::{c_char, NulError};
17use std::fmt::Debug;
18use std::num::NonZeroI32;
19
20use crate::llama_batch::BatchAddError;
21use std::os::raw::c_int;
22use std::path::PathBuf;
23use std::string::FromUtf8Error;
24
25pub mod context;
26pub mod llama_backend;
27pub mod llama_batch;
28mod log;
29pub mod model;
30#[cfg(feature = "mtmd")]
31pub mod mtmd;
32pub mod sampling;
33pub mod timing;
34pub mod token;
35pub mod token_type;
36
37pub type Result<T> = std::result::Result<T, LlamaCppError>;
39
40#[derive(Debug, Eq, PartialEq, thiserror::Error)]
42pub enum LlamaCppError {
43 #[error("BackendAlreadyInitialized")]
46 BackendAlreadyInitialized,
47 #[error("{0}")]
49 ChatTemplateError(#[from] ChatTemplateError),
50 #[error("{0}")]
52 DecodeError(#[from] DecodeError),
53 #[error("{0}")]
55 EncodeError(#[from] EncodeError),
56 #[error("{0}")]
58 LlamaModelLoadError(#[from] LlamaModelLoadError),
59 #[error("{0}")]
61 LlamaContextLoadError(#[from] LlamaContextLoadError),
62 #[error["{0}"]]
64 BatchAddError(#[from] BatchAddError),
65 #[error(transparent)]
67 EmbeddingError(#[from] EmbeddingsError),
68 #[error("Backend device {0} not found")]
71 BackendDeviceNotFound(usize),
72 #[error("Max devices exceeded. Max devices is {0}")]
74 MaxDevicesExceeded(usize),
75}
76
77#[derive(Debug, Eq, PartialEq, thiserror::Error)]
79pub enum ChatTemplateError {
80 #[error("chat template not found - returned null pointer")]
82 MissingTemplate,
83
84 #[error("null byte in string {0}")]
86 NullError(#[from] NulError),
87
88 #[error(transparent)]
90 Utf8Error(#[from] std::str::Utf8Error),
91}
92
93#[derive(Debug, Eq, PartialEq, thiserror::Error)]
95pub enum MetaValError {
96 #[error("null byte in string {0}")]
98 NullError(#[from] NulError),
99
100 #[error("FromUtf8Error {0}")]
102 FromUtf8Error(#[from] FromUtf8Error),
103
104 #[error("Negative return value. Likely due to a missing index or key. Got return value: {0}")]
106 NegativeReturn(i32),
107}
108
109#[derive(Debug, Eq, PartialEq, thiserror::Error)]
111pub enum LlamaContextLoadError {
112 #[error("null reference from llama.cpp")]
114 NullReturn,
115}
116
117#[derive(Debug, Eq, PartialEq, thiserror::Error)]
119pub enum DecodeError {
120 #[error("Decode Error 1: NoKvCacheSlot")]
122 NoKvCacheSlot,
123 #[error("Decode Error -1: n_tokens == 0")]
125 NTokensZero,
126 #[error("Decode Error {0}: unknown")]
128 Unknown(c_int),
129}
130
131#[derive(Debug, Eq, PartialEq, thiserror::Error)]
133pub enum EncodeError {
134 #[error("Encode Error 1: NoKvCacheSlot")]
136 NoKvCacheSlot,
137 #[error("Encode Error -1: n_tokens == 0")]
139 NTokensZero,
140 #[error("Encode Error {0}: unknown")]
142 Unknown(c_int),
143}
144
145#[derive(Debug, Eq, PartialEq, thiserror::Error)]
147pub enum EmbeddingsError {
148 #[error("Embeddings weren't enabled in the context options")]
150 NotEnabled,
151 #[error("Logits were not enabled for the given token")]
153 LogitsNotEnabled,
154 #[error("Can't use sequence embeddings with a model supporting only LLAMA_POOLING_TYPE_NONE")]
156 NonePoolType,
157}
158
159#[derive(Debug, Eq, PartialEq, thiserror::Error)]
161pub enum GrammarError {
162 #[error("Grammar root not found in grammar string")]
164 RootNotFound,
165 #[error("Trigger word contains null bytes")]
167 TriggerWordNullBytes,
168 #[error("Grammar string or root contains null bytes")]
170 GrammarNullBytes,
171 #[error("Grammar call returned null")]
173 NullGrammar,
174}
175
176impl From<NonZeroI32> for DecodeError {
178 fn from(value: NonZeroI32) -> Self {
179 match value.get() {
180 1 => DecodeError::NoKvCacheSlot,
181 -1 => DecodeError::NTokensZero,
182 i => DecodeError::Unknown(i),
183 }
184 }
185}
186
187impl From<NonZeroI32> for EncodeError {
189 fn from(value: NonZeroI32) -> Self {
190 match value.get() {
191 1 => EncodeError::NoKvCacheSlot,
192 -1 => EncodeError::NTokensZero,
193 i => EncodeError::Unknown(i),
194 }
195 }
196}
197
198#[derive(Debug, Eq, PartialEq, thiserror::Error)]
200pub enum LlamaModelLoadError {
201 #[error("null byte in string {0}")]
203 NullError(#[from] NulError),
204 #[error("null result from llama cpp")]
206 NullResult,
207 #[error("failed to convert path {0} to str")]
209 PathToStrError(PathBuf),
210}
211
212#[derive(Debug, Eq, PartialEq, thiserror::Error)]
214pub enum LlamaLoraAdapterInitError {
215 #[error("null byte in string {0}")]
217 NullError(#[from] NulError),
218 #[error("null result from llama cpp")]
220 NullResult,
221 #[error("failed to convert path {0} to str")]
223 PathToStrError(PathBuf),
224}
225
226#[derive(Debug, Eq, PartialEq, thiserror::Error)]
228pub enum LlamaLoraAdapterSetError {
229 #[error("error code from llama cpp")]
231 ErrorResult(i32),
232}
233
234#[derive(Debug, Eq, PartialEq, thiserror::Error)]
236pub enum LlamaLoraAdapterRemoveError {
237 #[error("error code from llama cpp")]
239 ErrorResult(i32),
240}
241
242#[must_use]
251pub fn llama_time_us() -> i64 {
252 unsafe { llama_cpp_sys_2::llama_time_us() }
253}
254
255#[must_use]
262pub fn max_devices() -> usize {
263 unsafe { llama_cpp_sys_2::llama_max_devices() }
264}
265
266#[must_use]
275pub fn mmap_supported() -> bool {
276 unsafe { llama_cpp_sys_2::llama_supports_mmap() }
277}
278
279#[must_use]
288pub fn mlock_supported() -> bool {
289 unsafe { llama_cpp_sys_2::llama_supports_mlock() }
290}
291
292#[derive(Debug, thiserror::Error, Clone)]
294#[non_exhaustive]
295pub enum TokenToStringError {
296 #[error("Unknown Token Type")]
298 UnknownTokenType,
299 #[error("Insufficient Buffer Space {0}")]
301 InsufficientBufferSpace(c_int),
302 #[error("FromUtf8Error {0}")]
304 FromUtf8Error(#[from] FromUtf8Error),
305}
306
307#[derive(Debug, thiserror::Error)]
309pub enum StringToTokenError {
310 #[error("{0}")]
312 NulError(#[from] NulError),
313 #[error("{0}")]
314 CIntConversionError(#[from] std::num::TryFromIntError),
316}
317
318#[derive(Debug, thiserror::Error)]
320pub enum NewLlamaChatMessageError {
321 #[error("{0}")]
323 NulError(#[from] NulError),
324}
325
326#[derive(Debug, thiserror::Error)]
328pub enum ApplyChatTemplateError {
329 #[error("{0}")]
331 NulError(#[from] NulError),
332 #[error("{0}")]
334 FromUtf8Error(#[from] FromUtf8Error),
335}
336
337#[must_use]
355pub fn ggml_time_us() -> i64 {
356 unsafe { llama_cpp_sys_2::ggml_time_us() }
357}
358
359#[must_use]
371pub fn llama_supports_mlock() -> bool {
372 unsafe { llama_cpp_sys_2::llama_supports_mlock() }
373}
374
375#[derive(Debug, Clone, Copy, PartialEq, Eq)]
377pub enum LlamaBackendDeviceType {
378 Cpu,
380 Accelerator,
382 Gpu,
384 IntegratedGpu,
386 Unknown,
388}
389
390#[derive(Debug, Clone)]
394pub struct LlamaBackendDevice {
395 pub index: usize,
399 pub name: String,
401 pub description: String,
403 pub backend: String,
405 pub memory_total: usize,
407 pub memory_free: usize,
409 pub device_type: LlamaBackendDeviceType,
411}
412
413#[must_use]
415pub fn list_llama_ggml_backend_devices() -> Vec<LlamaBackendDevice> {
416 let mut devices = Vec::new();
417 for i in 0..unsafe { llama_cpp_sys_2::ggml_backend_dev_count() } {
418 fn cstr_to_string(ptr: *const c_char) -> String {
419 if ptr.is_null() {
420 String::new()
421 } else {
422 unsafe { std::ffi::CStr::from_ptr(ptr) }
423 .to_string_lossy()
424 .to_string()
425 }
426 }
427 let dev = unsafe { llama_cpp_sys_2::ggml_backend_dev_get(i) };
428 let props = unsafe {
429 let mut props = std::mem::zeroed();
430 llama_cpp_sys_2::ggml_backend_dev_get_props(dev, &raw mut props);
431 props
432 };
433 let name = cstr_to_string(props.name);
434 let description = cstr_to_string(props.description);
435 let backend = unsafe { llama_cpp_sys_2::ggml_backend_dev_backend_reg(dev) };
436 let backend_name = unsafe { llama_cpp_sys_2::ggml_backend_reg_name(backend) };
437 let backend = cstr_to_string(backend_name);
438 let memory_total = props.memory_total;
439 let memory_free = props.memory_free;
440 let device_type = match props.type_ {
441 llama_cpp_sys_2::GGML_BACKEND_DEVICE_TYPE_CPU => LlamaBackendDeviceType::Cpu,
442 llama_cpp_sys_2::GGML_BACKEND_DEVICE_TYPE_ACCEL => LlamaBackendDeviceType::Accelerator,
443 llama_cpp_sys_2::GGML_BACKEND_DEVICE_TYPE_GPU => LlamaBackendDeviceType::Gpu,
444 llama_cpp_sys_2::GGML_BACKEND_DEVICE_TYPE_IGPU => LlamaBackendDeviceType::IntegratedGpu,
445 _ => LlamaBackendDeviceType::Unknown,
446 };
447 devices.push(LlamaBackendDevice {
448 index: i,
449 name,
450 description,
451 backend,
452 memory_total,
453 memory_free,
454 device_type,
455 });
456 }
457 devices
458}
459
460#[derive(Default, Debug, Clone)]
462pub struct LogOptions {
463 disabled: bool,
464}
465
466impl LogOptions {
467 #[must_use]
470 pub fn with_logs_enabled(mut self, enabled: bool) -> Self {
471 self.disabled = !enabled;
472 self
473 }
474}
475
476extern "C" fn logs_to_trace(
477 level: llama_cpp_sys_2::ggml_log_level,
478 text: *const ::std::os::raw::c_char,
479 data: *mut ::std::os::raw::c_void,
480) {
481 use std::borrow::Borrow;
486
487 let log_state = unsafe { &*(data as *const log::State) };
488
489 if log_state.options.disabled {
490 return;
491 }
492
493 if !log_state.is_enabled_for_level(level) {
495 log_state.update_previous_level_for_disabled_log(level);
496 return;
497 }
498
499 let text = unsafe { std::ffi::CStr::from_ptr(text) };
500 let text = text.to_string_lossy();
501 let text: &str = text.borrow();
502
503 if level == llama_cpp_sys_2::GGML_LOG_LEVEL_CONT {
509 log_state.cont_buffered_log(text);
510 } else if text.ends_with('\n') {
511 log_state.emit_non_cont_line(level, text);
512 } else {
513 log_state.buffer_non_cont(level, text);
514 }
515}
516
517pub fn send_logs_to_tracing(options: LogOptions) {
519 let llama_heap_state = Box::as_ref(
526 log::LLAMA_STATE
527 .get_or_init(|| Box::new(log::State::new(log::Module::LlamaCpp, options.clone()))),
528 ) as *const _;
529 let ggml_heap_state = Box::as_ref(
530 log::GGML_STATE.get_or_init(|| Box::new(log::State::new(log::Module::GGML, options))),
531 ) as *const _;
532
533 unsafe {
534 llama_cpp_sys_2::llama_log_set(Some(logs_to_trace), llama_heap_state as *mut _);
536 llama_cpp_sys_2::ggml_log_set(Some(logs_to_trace), ggml_heap_state as *mut _);
537 }
538}