1use std::ffi::{c_char, CStr, CString, NulError};
17use std::fmt::Debug;
18use std::num::NonZeroI32;
19
20use crate::llama_batch::BatchAddError;
21use std::os::raw::c_int;
22use std::path::PathBuf;
23use std::string::FromUtf8Error;
24
25pub mod context;
26pub mod gguf;
27pub mod llama_backend;
28pub mod llama_batch;
29#[cfg(feature = "llguidance")]
30pub(crate) mod llguidance_sampler;
31mod log;
32pub mod model;
33#[cfg(feature = "mtmd")]
34pub mod mtmd;
35pub mod openai;
36pub mod sampling;
37pub mod timing;
38pub mod token;
39pub mod token_type;
40
41pub use crate::context::session::LlamaStateSeqFlags;
42
43pub(crate) fn status_is_ok(status: llama_cpp_sys_2::llama_rs_status) -> bool {
44 status == llama_cpp_sys_2::LLAMA_RS_STATUS_OK
45}
46
47pub(crate) fn status_to_i32(status: llama_cpp_sys_2::llama_rs_status) -> i32 {
48 status as i32
49}
50
51pub type Result<T> = std::result::Result<T, LlamaCppError>;
53
54#[derive(Debug, Eq, PartialEq, thiserror::Error)]
56pub enum LlamaCppError {
57 #[error("BackendAlreadyInitialized")]
60 BackendAlreadyInitialized,
61 #[error("{0}")]
63 ChatTemplateError(#[from] ChatTemplateError),
64 #[error("{0}")]
66 DecodeError(#[from] DecodeError),
67 #[error("{0}")]
69 EncodeError(#[from] EncodeError),
70 #[error("{0}")]
72 LlamaModelLoadError(#[from] LlamaModelLoadError),
73 #[error("{0}")]
75 LlamaContextLoadError(#[from] LlamaContextLoadError),
76 #[error["{0}"]]
78 BatchAddError(#[from] BatchAddError),
79 #[error(transparent)]
81 EmbeddingError(#[from] EmbeddingsError),
82 #[error("Backend device {0} not found")]
85 BackendDeviceNotFound(usize),
86 #[error("Max devices exceeded. Max devices is {0}")]
88 MaxDevicesExceeded(usize),
89 #[error("JsonSchemaToGrammarError: {0}")]
91 JsonSchemaToGrammarError(String),
92}
93
94#[derive(Debug, Eq, PartialEq, thiserror::Error)]
96pub enum ChatTemplateError {
97 #[error("chat template not found - returned null pointer")]
99 MissingTemplate,
100
101 #[error("null byte in string {0}")]
103 NullError(#[from] NulError),
104
105 #[error(transparent)]
107 Utf8Error(#[from] std::str::Utf8Error),
108}
109
110#[derive(Debug, Eq, PartialEq, thiserror::Error)]
112pub enum MetaValError {
113 #[error("null byte in string {0}")]
115 NullError(#[from] NulError),
116
117 #[error("FromUtf8Error {0}")]
119 FromUtf8Error(#[from] FromUtf8Error),
120
121 #[error("Negative return value. Likely due to a missing index or key. Got return value: {0}")]
123 NegativeReturn(i32),
124}
125
126#[derive(Debug, Eq, PartialEq, thiserror::Error)]
128pub enum LlamaContextLoadError {
129 #[error("null reference from llama.cpp")]
131 NullReturn,
132}
133
134#[derive(Debug, Eq, PartialEq, thiserror::Error)]
136pub enum DecodeError {
137 #[error("Decode Error 1: NoKvCacheSlot")]
139 NoKvCacheSlot,
140 #[error("Decode Error -1: n_tokens == 0")]
142 NTokensZero,
143 #[error("Decode Error {0}: unknown")]
145 Unknown(c_int),
146}
147
148#[derive(Debug, Eq, PartialEq, thiserror::Error)]
150pub enum EncodeError {
151 #[error("Encode Error 1: NoKvCacheSlot")]
153 NoKvCacheSlot,
154 #[error("Encode Error -1: n_tokens == 0")]
156 NTokensZero,
157 #[error("Encode Error {0}: unknown")]
159 Unknown(c_int),
160}
161
162#[derive(Debug, Eq, PartialEq, thiserror::Error)]
164pub enum EmbeddingsError {
165 #[error("Embeddings weren't enabled in the context options")]
167 NotEnabled,
168 #[error("Logits were not enabled for the given token")]
170 LogitsNotEnabled,
171 #[error("Can't use sequence embeddings with a model supporting only LLAMA_POOLING_TYPE_NONE")]
173 NonePoolType,
174}
175
176#[derive(Debug, Eq, PartialEq, thiserror::Error)]
178pub enum GrammarError {
179 #[error("Grammar root not found in grammar string")]
181 RootNotFound,
182 #[error("Trigger word contains null bytes")]
184 TriggerWordNullBytes,
185 #[error("Grammar string or root contains null bytes")]
187 GrammarNullBytes,
188 #[error("Grammar call returned null")]
190 NullGrammar,
191}
192
193impl From<NonZeroI32> for DecodeError {
195 fn from(value: NonZeroI32) -> Self {
196 match value.get() {
197 1 => DecodeError::NoKvCacheSlot,
198 -1 => DecodeError::NTokensZero,
199 i => DecodeError::Unknown(i),
200 }
201 }
202}
203
204impl From<NonZeroI32> for EncodeError {
206 fn from(value: NonZeroI32) -> Self {
207 match value.get() {
208 1 => EncodeError::NoKvCacheSlot,
209 -1 => EncodeError::NTokensZero,
210 i => EncodeError::Unknown(i),
211 }
212 }
213}
214
215#[derive(Debug, Eq, PartialEq, thiserror::Error)]
217pub enum LlamaModelLoadError {
218 #[error("null byte in string {0}")]
220 NullError(#[from] NulError),
221 #[error("null result from llama cpp")]
223 NullResult,
224 #[error("failed to convert path {0} to str")]
226 PathToStrError(PathBuf),
227}
228
229#[derive(Debug, Eq, PartialEq, thiserror::Error)]
231pub enum LlamaLoraAdapterInitError {
232 #[error("null byte in string {0}")]
234 NullError(#[from] NulError),
235 #[error("null result from llama cpp")]
237 NullResult,
238 #[error("failed to convert path {0} to str")]
240 PathToStrError(PathBuf),
241}
242
243#[derive(Debug, Eq, PartialEq, thiserror::Error)]
245pub enum LlamaLoraAdapterSetError {
246 #[error("error code from llama cpp")]
248 ErrorResult(i32),
249}
250
251#[derive(Debug, Eq, PartialEq, thiserror::Error)]
253pub enum LlamaLoraAdapterRemoveError {
254 #[error("error code from llama cpp")]
256 ErrorResult(i32),
257}
258
259#[must_use]
268pub fn llama_time_us() -> i64 {
269 unsafe { llama_cpp_sys_2::llama_time_us() }
270}
271
272#[must_use]
279pub fn max_devices() -> usize {
280 unsafe { llama_cpp_sys_2::llama_max_devices() }
281}
282
283#[must_use]
292pub fn mmap_supported() -> bool {
293 unsafe { llama_cpp_sys_2::llama_supports_mmap() }
294}
295
296#[must_use]
305pub fn mlock_supported() -> bool {
306 unsafe { llama_cpp_sys_2::llama_supports_mlock() }
307}
308
309pub fn json_schema_to_grammar(schema_json: &str) -> Result<String> {
311 let schema_cstr = CString::new(schema_json)
312 .map_err(|err| LlamaCppError::JsonSchemaToGrammarError(err.to_string()))?;
313 let mut out = std::ptr::null_mut();
314 let rc = unsafe {
315 llama_cpp_sys_2::llama_rs_json_schema_to_grammar(schema_cstr.as_ptr(), false, &mut out)
316 };
317
318 let result = {
319 if !status_is_ok(rc) || out.is_null() {
320 return Err(LlamaCppError::JsonSchemaToGrammarError(format!(
321 "ffi error {}",
322 status_to_i32(rc)
323 )));
324 }
325 let grammar_bytes = unsafe { CStr::from_ptr(out) }.to_bytes().to_vec();
326 let grammar = String::from_utf8(grammar_bytes)
327 .map_err(|err| LlamaCppError::JsonSchemaToGrammarError(err.to_string()))?;
328 Ok(grammar)
329 };
330
331 unsafe { llama_cpp_sys_2::llama_rs_string_free(out) };
332 result
333}
334
335#[derive(Debug, thiserror::Error, Clone)]
337#[non_exhaustive]
338pub enum TokenToStringError {
339 #[error("Unknown Token Type")]
341 UnknownTokenType,
342 #[error("Insufficient Buffer Space {0}")]
344 InsufficientBufferSpace(c_int),
345 #[error("FromUtf8Error {0}")]
347 FromUtf8Error(#[from] FromUtf8Error),
348}
349
350#[derive(Debug, thiserror::Error)]
352pub enum StringToTokenError {
353 #[error("{0}")]
355 NulError(#[from] NulError),
356 #[error("{0}")]
357 CIntConversionError(#[from] std::num::TryFromIntError),
359}
360
361#[derive(Debug, thiserror::Error)]
363pub enum NewLlamaChatMessageError {
364 #[error("{0}")]
366 NulError(#[from] NulError),
367}
368
369#[derive(Debug, thiserror::Error)]
371pub enum ApplyChatTemplateError {
372 #[error("{0}")]
374 NulError(#[from] NulError),
375 #[error("{0}")]
377 FromUtf8Error(#[from] FromUtf8Error),
378 #[error("null result from llama.cpp")]
380 NullResult,
381 #[error("ffi error {0}")]
383 FfiError(i32),
384 #[error("invalid grammar trigger data")]
386 InvalidGrammarTriggerType,
387}
388
389#[derive(Debug, thiserror::Error)]
391pub enum ChatParseError {
392 #[error("{0}")]
394 NulError(#[from] NulError),
395 #[error("{0}")]
397 Utf8Error(#[from] FromUtf8Error),
398 #[error("null result from llama.cpp")]
400 NullResult,
401 #[error("ffi error {0}")]
403 FfiError(i32),
404}
405
406#[derive(Debug, thiserror::Error)]
408pub enum SamplerAcceptError {
409 #[error("ffi error {0}")]
411 FfiError(i32),
412}
413
414#[must_use]
432pub fn ggml_time_us() -> i64 {
433 unsafe { llama_cpp_sys_2::ggml_time_us() }
434}
435
436#[must_use]
448pub fn llama_supports_mlock() -> bool {
449 unsafe { llama_cpp_sys_2::llama_supports_mlock() }
450}
451
452#[derive(Debug, Clone, Copy, PartialEq, Eq)]
454pub enum LlamaBackendDeviceType {
455 Cpu,
457 Accelerator,
459 Gpu,
461 IntegratedGpu,
463 Unknown,
465}
466
467#[derive(Debug, Clone)]
471pub struct LlamaBackendDevice {
472 pub index: usize,
476 pub name: String,
478 pub description: String,
480 pub backend: String,
482 pub memory_total: usize,
484 pub memory_free: usize,
486 pub device_type: LlamaBackendDeviceType,
488}
489
490#[must_use]
492pub fn list_llama_ggml_backend_devices() -> Vec<LlamaBackendDevice> {
493 let mut devices = Vec::new();
494 for i in 0..unsafe { llama_cpp_sys_2::ggml_backend_dev_count() } {
495 fn cstr_to_string(ptr: *const c_char) -> String {
496 if ptr.is_null() {
497 String::new()
498 } else {
499 unsafe { std::ffi::CStr::from_ptr(ptr) }
500 .to_string_lossy()
501 .to_string()
502 }
503 }
504 let dev = unsafe { llama_cpp_sys_2::ggml_backend_dev_get(i) };
505 let props = unsafe {
506 let mut props = std::mem::zeroed();
507 llama_cpp_sys_2::ggml_backend_dev_get_props(dev, &raw mut props);
508 props
509 };
510 let name = cstr_to_string(props.name);
511 let description = cstr_to_string(props.description);
512 let backend = unsafe { llama_cpp_sys_2::ggml_backend_dev_backend_reg(dev) };
513 let backend_name = unsafe { llama_cpp_sys_2::ggml_backend_reg_name(backend) };
514 let backend = cstr_to_string(backend_name);
515 let memory_total = props.memory_total;
516 let memory_free = props.memory_free;
517 let device_type = match props.type_ {
518 llama_cpp_sys_2::GGML_BACKEND_DEVICE_TYPE_CPU => LlamaBackendDeviceType::Cpu,
519 llama_cpp_sys_2::GGML_BACKEND_DEVICE_TYPE_ACCEL => LlamaBackendDeviceType::Accelerator,
520 llama_cpp_sys_2::GGML_BACKEND_DEVICE_TYPE_GPU => LlamaBackendDeviceType::Gpu,
521 llama_cpp_sys_2::GGML_BACKEND_DEVICE_TYPE_IGPU => LlamaBackendDeviceType::IntegratedGpu,
522 _ => LlamaBackendDeviceType::Unknown,
523 };
524 devices.push(LlamaBackendDevice {
525 index: i,
526 name,
527 description,
528 backend,
529 memory_total,
530 memory_free,
531 device_type,
532 });
533 }
534 devices
535}
536
537#[derive(Default, Debug, Clone)]
539pub struct LogOptions {
540 disabled: bool,
541}
542
543impl LogOptions {
544 #[must_use]
547 pub fn with_logs_enabled(mut self, enabled: bool) -> Self {
548 self.disabled = !enabled;
549 self
550 }
551}
552
553extern "C" fn logs_to_trace(
554 level: llama_cpp_sys_2::ggml_log_level,
555 text: *const ::std::os::raw::c_char,
556 data: *mut ::std::os::raw::c_void,
557) {
558 use std::borrow::Borrow;
563
564 let log_state = unsafe { &*(data as *const log::State) };
565
566 if log_state.options.disabled {
567 return;
568 }
569
570 if !log_state.is_enabled_for_level(level) {
572 log_state.update_previous_level_for_disabled_log(level);
573 return;
574 }
575
576 let text = unsafe { std::ffi::CStr::from_ptr(text) };
577 let text = text.to_string_lossy();
578 let text: &str = text.borrow();
579
580 if level == llama_cpp_sys_2::GGML_LOG_LEVEL_CONT {
586 log_state.cont_buffered_log(text);
587 } else if text.ends_with('\n') {
588 log_state.emit_non_cont_line(level, text);
589 } else {
590 log_state.buffer_non_cont(level, text);
591 }
592}
593
594pub fn send_logs_to_tracing(options: LogOptions) {
596 let llama_heap_state = Box::as_ref(
603 log::LLAMA_STATE
604 .get_or_init(|| Box::new(log::State::new(log::Module::LlamaCpp, options.clone()))),
605 ) as *const _;
606 let ggml_heap_state = Box::as_ref(
607 log::GGML_STATE.get_or_init(|| Box::new(log::State::new(log::Module::GGML, options))),
608 ) as *const _;
609
610 unsafe {
611 llama_cpp_sys_2::llama_log_set(Some(logs_to_trace), llama_heap_state as *mut _);
613 llama_cpp_sys_2::ggml_log_set(Some(logs_to_trace), ggml_heap_state as *mut _);
614 }
615}