1use std::ffi::{c_char, CStr, CString, NulError};
17use std::fmt::Debug;
18use std::num::NonZeroI32;
19
20use crate::llama_batch::BatchAddError;
21use std::os::raw::c_int;
22use std::path::PathBuf;
23use std::string::FromUtf8Error;
24
25pub mod context;
26pub mod llama_backend;
27pub mod llama_batch;
28#[cfg(feature = "llguidance")]
29pub(crate) mod llguidance_sampler;
30mod log;
31pub mod model;
32#[cfg(feature = "mtmd")]
33pub mod mtmd;
34pub mod openai;
35pub mod sampling;
36pub mod timing;
37pub mod token;
38pub mod token_type;
39
40pub(crate) fn status_is_ok(status: llama_cpp_sys_2::llama_rs_status) -> bool {
41 status == llama_cpp_sys_2::LLAMA_RS_STATUS_OK
42}
43
44pub(crate) fn status_to_i32(status: llama_cpp_sys_2::llama_rs_status) -> i32 {
45 status as i32
46}
47
48pub type Result<T> = std::result::Result<T, LlamaCppError>;
50
51#[derive(Debug, Eq, PartialEq, thiserror::Error)]
53pub enum LlamaCppError {
54 #[error("BackendAlreadyInitialized")]
57 BackendAlreadyInitialized,
58 #[error("{0}")]
60 ChatTemplateError(#[from] ChatTemplateError),
61 #[error("{0}")]
63 DecodeError(#[from] DecodeError),
64 #[error("{0}")]
66 EncodeError(#[from] EncodeError),
67 #[error("{0}")]
69 LlamaModelLoadError(#[from] LlamaModelLoadError),
70 #[error("{0}")]
72 LlamaContextLoadError(#[from] LlamaContextLoadError),
73 #[error["{0}"]]
75 BatchAddError(#[from] BatchAddError),
76 #[error(transparent)]
78 EmbeddingError(#[from] EmbeddingsError),
79 #[error("Backend device {0} not found")]
82 BackendDeviceNotFound(usize),
83 #[error("Max devices exceeded. Max devices is {0}")]
85 MaxDevicesExceeded(usize),
86 #[error("JsonSchemaToGrammarError: {0}")]
88 JsonSchemaToGrammarError(String),
89}
90
91#[derive(Debug, Eq, PartialEq, thiserror::Error)]
93pub enum ChatTemplateError {
94 #[error("chat template not found - returned null pointer")]
96 MissingTemplate,
97
98 #[error("null byte in string {0}")]
100 NullError(#[from] NulError),
101
102 #[error(transparent)]
104 Utf8Error(#[from] std::str::Utf8Error),
105}
106
107#[derive(Debug, Eq, PartialEq, thiserror::Error)]
109pub enum MetaValError {
110 #[error("null byte in string {0}")]
112 NullError(#[from] NulError),
113
114 #[error("FromUtf8Error {0}")]
116 FromUtf8Error(#[from] FromUtf8Error),
117
118 #[error("Negative return value. Likely due to a missing index or key. Got return value: {0}")]
120 NegativeReturn(i32),
121}
122
123#[derive(Debug, Eq, PartialEq, thiserror::Error)]
125pub enum LlamaContextLoadError {
126 #[error("null reference from llama.cpp")]
128 NullReturn,
129}
130
131#[derive(Debug, Eq, PartialEq, thiserror::Error)]
133pub enum DecodeError {
134 #[error("Decode Error 1: NoKvCacheSlot")]
136 NoKvCacheSlot,
137 #[error("Decode Error -1: n_tokens == 0")]
139 NTokensZero,
140 #[error("Decode Error {0}: unknown")]
142 Unknown(c_int),
143}
144
145#[derive(Debug, Eq, PartialEq, thiserror::Error)]
147pub enum EncodeError {
148 #[error("Encode Error 1: NoKvCacheSlot")]
150 NoKvCacheSlot,
151 #[error("Encode Error -1: n_tokens == 0")]
153 NTokensZero,
154 #[error("Encode Error {0}: unknown")]
156 Unknown(c_int),
157}
158
159#[derive(Debug, Eq, PartialEq, thiserror::Error)]
161pub enum EmbeddingsError {
162 #[error("Embeddings weren't enabled in the context options")]
164 NotEnabled,
165 #[error("Logits were not enabled for the given token")]
167 LogitsNotEnabled,
168 #[error("Can't use sequence embeddings with a model supporting only LLAMA_POOLING_TYPE_NONE")]
170 NonePoolType,
171}
172
173#[derive(Debug, Eq, PartialEq, thiserror::Error)]
175pub enum GrammarError {
176 #[error("Grammar root not found in grammar string")]
178 RootNotFound,
179 #[error("Trigger word contains null bytes")]
181 TriggerWordNullBytes,
182 #[error("Grammar string or root contains null bytes")]
184 GrammarNullBytes,
185 #[error("Grammar call returned null")]
187 NullGrammar,
188}
189
190impl From<NonZeroI32> for DecodeError {
192 fn from(value: NonZeroI32) -> Self {
193 match value.get() {
194 1 => DecodeError::NoKvCacheSlot,
195 -1 => DecodeError::NTokensZero,
196 i => DecodeError::Unknown(i),
197 }
198 }
199}
200
201impl From<NonZeroI32> for EncodeError {
203 fn from(value: NonZeroI32) -> Self {
204 match value.get() {
205 1 => EncodeError::NoKvCacheSlot,
206 -1 => EncodeError::NTokensZero,
207 i => EncodeError::Unknown(i),
208 }
209 }
210}
211
212#[derive(Debug, Eq, PartialEq, thiserror::Error)]
214pub enum LlamaModelLoadError {
215 #[error("null byte in string {0}")]
217 NullError(#[from] NulError),
218 #[error("null result from llama cpp")]
220 NullResult,
221 #[error("failed to convert path {0} to str")]
223 PathToStrError(PathBuf),
224}
225
226#[derive(Debug, Eq, PartialEq, thiserror::Error)]
228pub enum LlamaLoraAdapterInitError {
229 #[error("null byte in string {0}")]
231 NullError(#[from] NulError),
232 #[error("null result from llama cpp")]
234 NullResult,
235 #[error("failed to convert path {0} to str")]
237 PathToStrError(PathBuf),
238}
239
240#[derive(Debug, Eq, PartialEq, thiserror::Error)]
242pub enum LlamaLoraAdapterSetError {
243 #[error("error code from llama cpp")]
245 ErrorResult(i32),
246}
247
248#[derive(Debug, Eq, PartialEq, thiserror::Error)]
250pub enum LlamaLoraAdapterRemoveError {
251 #[error("error code from llama cpp")]
253 ErrorResult(i32),
254}
255
256#[must_use]
265pub fn llama_time_us() -> i64 {
266 unsafe { llama_cpp_sys_2::llama_time_us() }
267}
268
269#[must_use]
276pub fn max_devices() -> usize {
277 unsafe { llama_cpp_sys_2::llama_max_devices() }
278}
279
280#[must_use]
289pub fn mmap_supported() -> bool {
290 unsafe { llama_cpp_sys_2::llama_supports_mmap() }
291}
292
293#[must_use]
302pub fn mlock_supported() -> bool {
303 unsafe { llama_cpp_sys_2::llama_supports_mlock() }
304}
305
306pub fn json_schema_to_grammar(schema_json: &str) -> Result<String> {
308 let schema_cstr = CString::new(schema_json)
309 .map_err(|err| LlamaCppError::JsonSchemaToGrammarError(err.to_string()))?;
310 let mut out = std::ptr::null_mut();
311 let rc = unsafe {
312 llama_cpp_sys_2::llama_rs_json_schema_to_grammar(schema_cstr.as_ptr(), false, &mut out)
313 };
314
315 let result = {
316 if !status_is_ok(rc) || out.is_null() {
317 return Err(LlamaCppError::JsonSchemaToGrammarError(format!(
318 "ffi error {}",
319 status_to_i32(rc)
320 )));
321 }
322 let grammar_bytes = unsafe { CStr::from_ptr(out) }.to_bytes().to_vec();
323 let grammar = String::from_utf8(grammar_bytes)
324 .map_err(|err| LlamaCppError::JsonSchemaToGrammarError(err.to_string()))?;
325 Ok(grammar)
326 };
327
328 unsafe { llama_cpp_sys_2::llama_rs_string_free(out) };
329 result
330}
331
332#[derive(Debug, thiserror::Error, Clone)]
334#[non_exhaustive]
335pub enum TokenToStringError {
336 #[error("Unknown Token Type")]
338 UnknownTokenType,
339 #[error("Insufficient Buffer Space {0}")]
341 InsufficientBufferSpace(c_int),
342 #[error("FromUtf8Error {0}")]
344 FromUtf8Error(#[from] FromUtf8Error),
345}
346
347#[derive(Debug, thiserror::Error)]
349pub enum StringToTokenError {
350 #[error("{0}")]
352 NulError(#[from] NulError),
353 #[error("{0}")]
354 CIntConversionError(#[from] std::num::TryFromIntError),
356}
357
358#[derive(Debug, thiserror::Error)]
360pub enum NewLlamaChatMessageError {
361 #[error("{0}")]
363 NulError(#[from] NulError),
364}
365
366#[derive(Debug, thiserror::Error)]
368pub enum ApplyChatTemplateError {
369 #[error("{0}")]
371 NulError(#[from] NulError),
372 #[error("{0}")]
374 FromUtf8Error(#[from] FromUtf8Error),
375 #[error("null result from llama.cpp")]
377 NullResult,
378 #[error("ffi error {0}")]
380 FfiError(i32),
381 #[error("invalid grammar trigger data")]
383 InvalidGrammarTriggerType,
384}
385
386#[derive(Debug, thiserror::Error)]
388pub enum ChatParseError {
389 #[error("{0}")]
391 NulError(#[from] NulError),
392 #[error("{0}")]
394 Utf8Error(#[from] FromUtf8Error),
395 #[error("null result from llama.cpp")]
397 NullResult,
398 #[error("ffi error {0}")]
400 FfiError(i32),
401}
402
403#[derive(Debug, thiserror::Error)]
405pub enum SamplerAcceptError {
406 #[error("ffi error {0}")]
408 FfiError(i32),
409}
410
411#[must_use]
429pub fn ggml_time_us() -> i64 {
430 unsafe { llama_cpp_sys_2::ggml_time_us() }
431}
432
433#[must_use]
445pub fn llama_supports_mlock() -> bool {
446 unsafe { llama_cpp_sys_2::llama_supports_mlock() }
447}
448
449#[derive(Debug, Clone, Copy, PartialEq, Eq)]
451pub enum LlamaBackendDeviceType {
452 Cpu,
454 Accelerator,
456 Gpu,
458 IntegratedGpu,
460 Unknown,
462}
463
464#[derive(Debug, Clone)]
468pub struct LlamaBackendDevice {
469 pub index: usize,
473 pub name: String,
475 pub description: String,
477 pub backend: String,
479 pub memory_total: usize,
481 pub memory_free: usize,
483 pub device_type: LlamaBackendDeviceType,
485}
486
487#[must_use]
489pub fn list_llama_ggml_backend_devices() -> Vec<LlamaBackendDevice> {
490 let mut devices = Vec::new();
491 for i in 0..unsafe { llama_cpp_sys_2::ggml_backend_dev_count() } {
492 fn cstr_to_string(ptr: *const c_char) -> String {
493 if ptr.is_null() {
494 String::new()
495 } else {
496 unsafe { std::ffi::CStr::from_ptr(ptr) }
497 .to_string_lossy()
498 .to_string()
499 }
500 }
501 let dev = unsafe { llama_cpp_sys_2::ggml_backend_dev_get(i) };
502 let props = unsafe {
503 let mut props = std::mem::zeroed();
504 llama_cpp_sys_2::ggml_backend_dev_get_props(dev, &raw mut props);
505 props
506 };
507 let name = cstr_to_string(props.name);
508 let description = cstr_to_string(props.description);
509 let backend = unsafe { llama_cpp_sys_2::ggml_backend_dev_backend_reg(dev) };
510 let backend_name = unsafe { llama_cpp_sys_2::ggml_backend_reg_name(backend) };
511 let backend = cstr_to_string(backend_name);
512 let memory_total = props.memory_total;
513 let memory_free = props.memory_free;
514 let device_type = match props.type_ {
515 llama_cpp_sys_2::GGML_BACKEND_DEVICE_TYPE_CPU => LlamaBackendDeviceType::Cpu,
516 llama_cpp_sys_2::GGML_BACKEND_DEVICE_TYPE_ACCEL => LlamaBackendDeviceType::Accelerator,
517 llama_cpp_sys_2::GGML_BACKEND_DEVICE_TYPE_GPU => LlamaBackendDeviceType::Gpu,
518 llama_cpp_sys_2::GGML_BACKEND_DEVICE_TYPE_IGPU => LlamaBackendDeviceType::IntegratedGpu,
519 _ => LlamaBackendDeviceType::Unknown,
520 };
521 devices.push(LlamaBackendDevice {
522 index: i,
523 name,
524 description,
525 backend,
526 memory_total,
527 memory_free,
528 device_type,
529 });
530 }
531 devices
532}
533
534#[derive(Default, Debug, Clone)]
536pub struct LogOptions {
537 disabled: bool,
538}
539
540impl LogOptions {
541 #[must_use]
544 pub fn with_logs_enabled(mut self, enabled: bool) -> Self {
545 self.disabled = !enabled;
546 self
547 }
548}
549
550extern "C" fn logs_to_trace(
551 level: llama_cpp_sys_2::ggml_log_level,
552 text: *const ::std::os::raw::c_char,
553 data: *mut ::std::os::raw::c_void,
554) {
555 use std::borrow::Borrow;
560
561 let log_state = unsafe { &*(data as *const log::State) };
562
563 if log_state.options.disabled {
564 return;
565 }
566
567 if !log_state.is_enabled_for_level(level) {
569 log_state.update_previous_level_for_disabled_log(level);
570 return;
571 }
572
573 let text = unsafe { std::ffi::CStr::from_ptr(text) };
574 let text = text.to_string_lossy();
575 let text: &str = text.borrow();
576
577 if level == llama_cpp_sys_2::GGML_LOG_LEVEL_CONT {
583 log_state.cont_buffered_log(text);
584 } else if text.ends_with('\n') {
585 log_state.emit_non_cont_line(level, text);
586 } else {
587 log_state.buffer_non_cont(level, text);
588 }
589}
590
591pub fn send_logs_to_tracing(options: LogOptions) {
593 let llama_heap_state = Box::as_ref(
600 log::LLAMA_STATE
601 .get_or_init(|| Box::new(log::State::new(log::Module::LlamaCpp, options.clone()))),
602 ) as *const _;
603 let ggml_heap_state = Box::as_ref(
604 log::GGML_STATE.get_or_init(|| Box::new(log::State::new(log::Module::GGML, options))),
605 ) as *const _;
606
607 unsafe {
608 llama_cpp_sys_2::llama_log_set(Some(logs_to_trace), llama_heap_state as *mut _);
610 llama_cpp_sys_2::ggml_log_set(Some(logs_to_trace), ggml_heap_state as *mut _);
611 }
612}