1use std::ffi::{c_char, CStr, CString, NulError};
17use std::fmt::Debug;
18use std::num::NonZeroI32;
19
20use crate::llama_batch::BatchAddError;
21use std::os::raw::c_int;
22use std::path::PathBuf;
23use std::string::FromUtf8Error;
24
25pub mod context;
26pub mod gguf;
27pub mod llama_backend;
28pub mod llama_batch;
29#[cfg(feature = "llguidance")]
30pub(crate) mod llguidance_sampler;
31mod log;
32pub mod model;
33#[cfg(feature = "mtmd")]
34pub mod mtmd;
35pub mod openai;
36pub mod sampling;
37pub mod timing;
38pub mod token;
39pub mod token_type;
40
41pub(crate) fn status_is_ok(status: llama_cpp_sys_2::llama_rs_status) -> bool {
42 status == llama_cpp_sys_2::LLAMA_RS_STATUS_OK
43}
44
45pub(crate) fn status_to_i32(status: llama_cpp_sys_2::llama_rs_status) -> i32 {
46 status as i32
47}
48
49pub type Result<T> = std::result::Result<T, LlamaCppError>;
51
52#[derive(Debug, Eq, PartialEq, thiserror::Error)]
54pub enum LlamaCppError {
55 #[error("BackendAlreadyInitialized")]
58 BackendAlreadyInitialized,
59 #[error("{0}")]
61 ChatTemplateError(#[from] ChatTemplateError),
62 #[error("{0}")]
64 DecodeError(#[from] DecodeError),
65 #[error("{0}")]
67 EncodeError(#[from] EncodeError),
68 #[error("{0}")]
70 LlamaModelLoadError(#[from] LlamaModelLoadError),
71 #[error("{0}")]
73 LlamaContextLoadError(#[from] LlamaContextLoadError),
74 #[error["{0}"]]
76 BatchAddError(#[from] BatchAddError),
77 #[error(transparent)]
79 EmbeddingError(#[from] EmbeddingsError),
80 #[error("Backend device {0} not found")]
83 BackendDeviceNotFound(usize),
84 #[error("Max devices exceeded. Max devices is {0}")]
86 MaxDevicesExceeded(usize),
87 #[error("JsonSchemaToGrammarError: {0}")]
89 JsonSchemaToGrammarError(String),
90}
91
92#[derive(Debug, Eq, PartialEq, thiserror::Error)]
94pub enum ChatTemplateError {
95 #[error("chat template not found - returned null pointer")]
97 MissingTemplate,
98
99 #[error("null byte in string {0}")]
101 NullError(#[from] NulError),
102
103 #[error(transparent)]
105 Utf8Error(#[from] std::str::Utf8Error),
106}
107
108#[derive(Debug, Eq, PartialEq, thiserror::Error)]
110pub enum MetaValError {
111 #[error("null byte in string {0}")]
113 NullError(#[from] NulError),
114
115 #[error("FromUtf8Error {0}")]
117 FromUtf8Error(#[from] FromUtf8Error),
118
119 #[error("Negative return value. Likely due to a missing index or key. Got return value: {0}")]
121 NegativeReturn(i32),
122}
123
124#[derive(Debug, Eq, PartialEq, thiserror::Error)]
126pub enum LlamaContextLoadError {
127 #[error("null reference from llama.cpp")]
129 NullReturn,
130}
131
132#[derive(Debug, Eq, PartialEq, thiserror::Error)]
134pub enum DecodeError {
135 #[error("Decode Error 1: NoKvCacheSlot")]
137 NoKvCacheSlot,
138 #[error("Decode Error -1: n_tokens == 0")]
140 NTokensZero,
141 #[error("Decode Error {0}: unknown")]
143 Unknown(c_int),
144}
145
146#[derive(Debug, Eq, PartialEq, thiserror::Error)]
148pub enum EncodeError {
149 #[error("Encode Error 1: NoKvCacheSlot")]
151 NoKvCacheSlot,
152 #[error("Encode Error -1: n_tokens == 0")]
154 NTokensZero,
155 #[error("Encode Error {0}: unknown")]
157 Unknown(c_int),
158}
159
160#[derive(Debug, Eq, PartialEq, thiserror::Error)]
162pub enum EmbeddingsError {
163 #[error("Embeddings weren't enabled in the context options")]
165 NotEnabled,
166 #[error("Logits were not enabled for the given token")]
168 LogitsNotEnabled,
169 #[error("Can't use sequence embeddings with a model supporting only LLAMA_POOLING_TYPE_NONE")]
171 NonePoolType,
172}
173
174#[derive(Debug, Eq, PartialEq, thiserror::Error)]
176pub enum GrammarError {
177 #[error("Grammar root not found in grammar string")]
179 RootNotFound,
180 #[error("Trigger word contains null bytes")]
182 TriggerWordNullBytes,
183 #[error("Grammar string or root contains null bytes")]
185 GrammarNullBytes,
186 #[error("Grammar call returned null")]
188 NullGrammar,
189}
190
191impl From<NonZeroI32> for DecodeError {
193 fn from(value: NonZeroI32) -> Self {
194 match value.get() {
195 1 => DecodeError::NoKvCacheSlot,
196 -1 => DecodeError::NTokensZero,
197 i => DecodeError::Unknown(i),
198 }
199 }
200}
201
202impl From<NonZeroI32> for EncodeError {
204 fn from(value: NonZeroI32) -> Self {
205 match value.get() {
206 1 => EncodeError::NoKvCacheSlot,
207 -1 => EncodeError::NTokensZero,
208 i => EncodeError::Unknown(i),
209 }
210 }
211}
212
213#[derive(Debug, Eq, PartialEq, thiserror::Error)]
215pub enum LlamaModelLoadError {
216 #[error("null byte in string {0}")]
218 NullError(#[from] NulError),
219 #[error("null result from llama cpp")]
221 NullResult,
222 #[error("failed to convert path {0} to str")]
224 PathToStrError(PathBuf),
225}
226
227#[derive(Debug, Eq, PartialEq, thiserror::Error)]
229pub enum LlamaLoraAdapterInitError {
230 #[error("null byte in string {0}")]
232 NullError(#[from] NulError),
233 #[error("null result from llama cpp")]
235 NullResult,
236 #[error("failed to convert path {0} to str")]
238 PathToStrError(PathBuf),
239}
240
241#[derive(Debug, Eq, PartialEq, thiserror::Error)]
243pub enum LlamaLoraAdapterSetError {
244 #[error("error code from llama cpp")]
246 ErrorResult(i32),
247}
248
249#[derive(Debug, Eq, PartialEq, thiserror::Error)]
251pub enum LlamaLoraAdapterRemoveError {
252 #[error("error code from llama cpp")]
254 ErrorResult(i32),
255}
256
257#[must_use]
266pub fn llama_time_us() -> i64 {
267 unsafe { llama_cpp_sys_2::llama_time_us() }
268}
269
270#[must_use]
277pub fn max_devices() -> usize {
278 unsafe { llama_cpp_sys_2::llama_max_devices() }
279}
280
281#[must_use]
290pub fn mmap_supported() -> bool {
291 unsafe { llama_cpp_sys_2::llama_supports_mmap() }
292}
293
294#[must_use]
303pub fn mlock_supported() -> bool {
304 unsafe { llama_cpp_sys_2::llama_supports_mlock() }
305}
306
307pub fn json_schema_to_grammar(schema_json: &str) -> Result<String> {
309 let schema_cstr = CString::new(schema_json)
310 .map_err(|err| LlamaCppError::JsonSchemaToGrammarError(err.to_string()))?;
311 let mut out = std::ptr::null_mut();
312 let rc = unsafe {
313 llama_cpp_sys_2::llama_rs_json_schema_to_grammar(schema_cstr.as_ptr(), false, &mut out)
314 };
315
316 let result = {
317 if !status_is_ok(rc) || out.is_null() {
318 return Err(LlamaCppError::JsonSchemaToGrammarError(format!(
319 "ffi error {}",
320 status_to_i32(rc)
321 )));
322 }
323 let grammar_bytes = unsafe { CStr::from_ptr(out) }.to_bytes().to_vec();
324 let grammar = String::from_utf8(grammar_bytes)
325 .map_err(|err| LlamaCppError::JsonSchemaToGrammarError(err.to_string()))?;
326 Ok(grammar)
327 };
328
329 unsafe { llama_cpp_sys_2::llama_rs_string_free(out) };
330 result
331}
332
333#[derive(Debug, thiserror::Error, Clone)]
335#[non_exhaustive]
336pub enum TokenToStringError {
337 #[error("Unknown Token Type")]
339 UnknownTokenType,
340 #[error("Insufficient Buffer Space {0}")]
342 InsufficientBufferSpace(c_int),
343 #[error("FromUtf8Error {0}")]
345 FromUtf8Error(#[from] FromUtf8Error),
346}
347
348#[derive(Debug, thiserror::Error)]
350pub enum StringToTokenError {
351 #[error("{0}")]
353 NulError(#[from] NulError),
354 #[error("{0}")]
355 CIntConversionError(#[from] std::num::TryFromIntError),
357}
358
359#[derive(Debug, thiserror::Error)]
361pub enum NewLlamaChatMessageError {
362 #[error("{0}")]
364 NulError(#[from] NulError),
365}
366
367#[derive(Debug, thiserror::Error)]
369pub enum ApplyChatTemplateError {
370 #[error("{0}")]
372 NulError(#[from] NulError),
373 #[error("{0}")]
375 FromUtf8Error(#[from] FromUtf8Error),
376 #[error("null result from llama.cpp")]
378 NullResult,
379 #[error("ffi error {0}")]
381 FfiError(i32),
382 #[error("invalid grammar trigger data")]
384 InvalidGrammarTriggerType,
385}
386
387#[derive(Debug, thiserror::Error)]
389pub enum ChatParseError {
390 #[error("{0}")]
392 NulError(#[from] NulError),
393 #[error("{0}")]
395 Utf8Error(#[from] FromUtf8Error),
396 #[error("null result from llama.cpp")]
398 NullResult,
399 #[error("ffi error {0}")]
401 FfiError(i32),
402}
403
404#[derive(Debug, thiserror::Error)]
406pub enum SamplerAcceptError {
407 #[error("ffi error {0}")]
409 FfiError(i32),
410}
411
412#[must_use]
430pub fn ggml_time_us() -> i64 {
431 unsafe { llama_cpp_sys_2::ggml_time_us() }
432}
433
434#[must_use]
446pub fn llama_supports_mlock() -> bool {
447 unsafe { llama_cpp_sys_2::llama_supports_mlock() }
448}
449
450#[derive(Debug, Clone, Copy, PartialEq, Eq)]
452pub enum LlamaBackendDeviceType {
453 Cpu,
455 Accelerator,
457 Gpu,
459 IntegratedGpu,
461 Unknown,
463}
464
465#[derive(Debug, Clone)]
469pub struct LlamaBackendDevice {
470 pub index: usize,
474 pub name: String,
476 pub description: String,
478 pub backend: String,
480 pub memory_total: usize,
482 pub memory_free: usize,
484 pub device_type: LlamaBackendDeviceType,
486}
487
488#[must_use]
490pub fn list_llama_ggml_backend_devices() -> Vec<LlamaBackendDevice> {
491 let mut devices = Vec::new();
492 for i in 0..unsafe { llama_cpp_sys_2::ggml_backend_dev_count() } {
493 fn cstr_to_string(ptr: *const c_char) -> String {
494 if ptr.is_null() {
495 String::new()
496 } else {
497 unsafe { std::ffi::CStr::from_ptr(ptr) }
498 .to_string_lossy()
499 .to_string()
500 }
501 }
502 let dev = unsafe { llama_cpp_sys_2::ggml_backend_dev_get(i) };
503 let props = unsafe {
504 let mut props = std::mem::zeroed();
505 llama_cpp_sys_2::ggml_backend_dev_get_props(dev, &raw mut props);
506 props
507 };
508 let name = cstr_to_string(props.name);
509 let description = cstr_to_string(props.description);
510 let backend = unsafe { llama_cpp_sys_2::ggml_backend_dev_backend_reg(dev) };
511 let backend_name = unsafe { llama_cpp_sys_2::ggml_backend_reg_name(backend) };
512 let backend = cstr_to_string(backend_name);
513 let memory_total = props.memory_total;
514 let memory_free = props.memory_free;
515 let device_type = match props.type_ {
516 llama_cpp_sys_2::GGML_BACKEND_DEVICE_TYPE_CPU => LlamaBackendDeviceType::Cpu,
517 llama_cpp_sys_2::GGML_BACKEND_DEVICE_TYPE_ACCEL => LlamaBackendDeviceType::Accelerator,
518 llama_cpp_sys_2::GGML_BACKEND_DEVICE_TYPE_GPU => LlamaBackendDeviceType::Gpu,
519 llama_cpp_sys_2::GGML_BACKEND_DEVICE_TYPE_IGPU => LlamaBackendDeviceType::IntegratedGpu,
520 _ => LlamaBackendDeviceType::Unknown,
521 };
522 devices.push(LlamaBackendDevice {
523 index: i,
524 name,
525 description,
526 backend,
527 memory_total,
528 memory_free,
529 device_type,
530 });
531 }
532 devices
533}
534
535#[derive(Default, Debug, Clone)]
537pub struct LogOptions {
538 disabled: bool,
539}
540
541impl LogOptions {
542 #[must_use]
545 pub fn with_logs_enabled(mut self, enabled: bool) -> Self {
546 self.disabled = !enabled;
547 self
548 }
549}
550
551extern "C" fn logs_to_trace(
552 level: llama_cpp_sys_2::ggml_log_level,
553 text: *const ::std::os::raw::c_char,
554 data: *mut ::std::os::raw::c_void,
555) {
556 use std::borrow::Borrow;
561
562 let log_state = unsafe { &*(data as *const log::State) };
563
564 if log_state.options.disabled {
565 return;
566 }
567
568 if !log_state.is_enabled_for_level(level) {
570 log_state.update_previous_level_for_disabled_log(level);
571 return;
572 }
573
574 let text = unsafe { std::ffi::CStr::from_ptr(text) };
575 let text = text.to_string_lossy();
576 let text: &str = text.borrow();
577
578 if level == llama_cpp_sys_2::GGML_LOG_LEVEL_CONT {
584 log_state.cont_buffered_log(text);
585 } else if text.ends_with('\n') {
586 log_state.emit_non_cont_line(level, text);
587 } else {
588 log_state.buffer_non_cont(level, text);
589 }
590}
591
592pub fn send_logs_to_tracing(options: LogOptions) {
594 let llama_heap_state = Box::as_ref(
601 log::LLAMA_STATE
602 .get_or_init(|| Box::new(log::State::new(log::Module::LlamaCpp, options.clone()))),
603 ) as *const _;
604 let ggml_heap_state = Box::as_ref(
605 log::GGML_STATE.get_or_init(|| Box::new(log::State::new(log::Module::GGML, options))),
606 ) as *const _;
607
608 unsafe {
609 llama_cpp_sys_2::llama_log_set(Some(logs_to_trace), llama_heap_state as *mut _);
611 llama_cpp_sys_2::ggml_log_set(Some(logs_to_trace), ggml_heap_state as *mut _);
612 }
613}