1use std::ffi::{c_char, CStr, CString, NulError};
18use std::fmt::Debug;
19use std::num::NonZeroI32;
20
21use crate::llama_batch::BatchAddError;
22use std::os::raw::c_int;
23use std::path::PathBuf;
24use std::string::FromUtf8Error;
25
26pub mod context;
27pub mod gguf;
28pub mod llama_backend;
29pub mod llama_batch;
30#[cfg(feature = "llguidance")]
31pub(crate) mod llguidance_sampler;
32mod log;
33pub mod model;
34#[cfg(feature = "mtmd")]
35pub mod mtmd;
36pub mod openai;
37pub mod sampling;
38pub mod timing;
39pub mod token;
40pub mod token_type;
41
42pub use crate::context::session::LlamaStateSeqFlags;
43
44pub(crate) fn status_is_ok(status: llama_cpp_sys_2::llama_rs_status) -> bool {
45 status == llama_cpp_sys_2::LLAMA_RS_STATUS_OK
46}
47
48pub(crate) fn status_to_i32(status: llama_cpp_sys_2::llama_rs_status) -> i32 {
49 status as i32
50}
51
52pub type Result<T> = std::result::Result<T, LlamaCppError>;
54
55#[derive(Debug, Eq, PartialEq, thiserror::Error)]
57pub enum LlamaCppError {
58 #[error("BackendAlreadyInitialized")]
61 BackendAlreadyInitialized,
62 #[error("{0}")]
64 ChatTemplateError(#[from] ChatTemplateError),
65 #[error("{0}")]
67 DecodeError(#[from] DecodeError),
68 #[error("{0}")]
70 EncodeError(#[from] EncodeError),
71 #[error("{0}")]
73 LlamaModelLoadError(#[from] LlamaModelLoadError),
74 #[error("{0}")]
76 LlamaContextLoadError(#[from] LlamaContextLoadError),
77 #[error["{0}"]]
79 BatchAddError(#[from] BatchAddError),
80 #[error(transparent)]
82 EmbeddingError(#[from] EmbeddingsError),
83 #[error("Backend device {0} not found")]
86 BackendDeviceNotFound(usize),
87 #[error("Max devices exceeded. Max devices is {0}")]
89 MaxDevicesExceeded(usize),
90 #[error("JsonSchemaToGrammarError: {0}")]
92 JsonSchemaToGrammarError(String),
93}
94
95#[derive(Debug, Eq, PartialEq, thiserror::Error)]
97pub enum ChatTemplateError {
98 #[error("chat template not found - returned null pointer")]
100 MissingTemplate,
101
102 #[error("null byte in string {0}")]
104 NullError(#[from] NulError),
105
106 #[error(transparent)]
108 Utf8Error(#[from] std::str::Utf8Error),
109}
110
111#[derive(Debug, Eq, PartialEq, thiserror::Error)]
113pub enum MetaValError {
114 #[error("null byte in string {0}")]
116 NullError(#[from] NulError),
117
118 #[error("FromUtf8Error {0}")]
120 FromUtf8Error(#[from] FromUtf8Error),
121
122 #[error("Negative return value. Likely due to a missing index or key. Got return value: {0}")]
124 NegativeReturn(i32),
125}
126
127#[derive(Debug, Eq, PartialEq, thiserror::Error)]
129pub enum LlamaContextLoadError {
130 #[error("null reference from llama.cpp")]
132 NullReturn,
133}
134
135#[derive(Debug, Eq, PartialEq, thiserror::Error)]
137pub enum DecodeError {
138 #[error("Decode Error 1: NoKvCacheSlot")]
140 NoKvCacheSlot,
141 #[error("Decode Error -1: n_tokens == 0")]
143 NTokensZero,
144 #[error("Decode Error {0}: unknown")]
146 Unknown(c_int),
147}
148
149#[derive(Debug, Eq, PartialEq, thiserror::Error)]
151pub enum EncodeError {
152 #[error("Encode Error 1: NoKvCacheSlot")]
154 NoKvCacheSlot,
155 #[error("Encode Error -1: n_tokens == 0")]
157 NTokensZero,
158 #[error("Encode Error {0}: unknown")]
160 Unknown(c_int),
161}
162
163#[derive(Debug, Eq, PartialEq, thiserror::Error)]
165pub enum EmbeddingsError {
166 #[error("Embeddings weren't enabled in the context options")]
168 NotEnabled,
169 #[error("Logits were not enabled for the given token")]
171 LogitsNotEnabled,
172 #[error("Can't use sequence embeddings with a model supporting only LLAMA_POOLING_TYPE_NONE")]
174 NonePoolType,
175}
176
177#[derive(Debug, Eq, PartialEq, thiserror::Error)]
179pub enum GrammarError {
180 #[error("Grammar root not found in grammar string")]
182 RootNotFound,
183 #[error("Trigger word contains null bytes")]
185 TriggerWordNullBytes,
186 #[error("Grammar string or root contains null bytes")]
188 GrammarNullBytes,
189 #[error("Grammar call returned null")]
191 NullGrammar,
192}
193
194impl From<NonZeroI32> for DecodeError {
196 fn from(value: NonZeroI32) -> Self {
197 match value.get() {
198 1 => DecodeError::NoKvCacheSlot,
199 -1 => DecodeError::NTokensZero,
200 i => DecodeError::Unknown(i),
201 }
202 }
203}
204
205impl From<NonZeroI32> for EncodeError {
207 fn from(value: NonZeroI32) -> Self {
208 match value.get() {
209 1 => EncodeError::NoKvCacheSlot,
210 -1 => EncodeError::NTokensZero,
211 i => EncodeError::Unknown(i),
212 }
213 }
214}
215
216#[derive(Debug, Eq, PartialEq, thiserror::Error)]
218pub enum LlamaModelLoadError {
219 #[error("null byte in string {0}")]
221 NullError(#[from] NulError),
222 #[error("null result from llama cpp")]
224 NullResult,
225 #[error("failed to convert path {0} to str")]
227 PathToStrError(PathBuf),
228}
229
230#[derive(Debug, Eq, PartialEq, thiserror::Error)]
232pub enum LlamaLoraAdapterInitError {
233 #[error("null byte in string {0}")]
235 NullError(#[from] NulError),
236 #[error("null result from llama cpp")]
238 NullResult,
239 #[error("failed to convert path {0} to str")]
241 PathToStrError(PathBuf),
242}
243
244#[derive(Debug, Eq, PartialEq, thiserror::Error)]
246pub enum LlamaLoraAdapterSetError {
247 #[error("error code from llama cpp")]
249 ErrorResult(i32),
250}
251
252#[derive(Debug, Eq, PartialEq, thiserror::Error)]
254pub enum LlamaLoraAdapterRemoveError {
255 #[error("error code from llama cpp")]
257 ErrorResult(i32),
258}
259
260#[must_use]
269pub fn llama_time_us() -> i64 {
270 unsafe { llama_cpp_sys_2::llama_time_us() }
271}
272
273#[must_use]
280pub fn max_devices() -> usize {
281 unsafe { llama_cpp_sys_2::llama_max_devices() }
282}
283
284#[must_use]
293pub fn mmap_supported() -> bool {
294 unsafe { llama_cpp_sys_2::llama_supports_mmap() }
295}
296
297#[must_use]
306pub fn mlock_supported() -> bool {
307 unsafe { llama_cpp_sys_2::llama_supports_mlock() }
308}
309
310pub fn json_schema_to_grammar(schema_json: &str) -> Result<String> {
312 let schema_cstr = CString::new(schema_json)
313 .map_err(|err| LlamaCppError::JsonSchemaToGrammarError(err.to_string()))?;
314 let mut out = std::ptr::null_mut();
315 let rc = unsafe {
316 llama_cpp_sys_2::llama_rs_json_schema_to_grammar(schema_cstr.as_ptr(), false, &mut out)
317 };
318
319 let result = {
320 if !status_is_ok(rc) || out.is_null() {
321 return Err(LlamaCppError::JsonSchemaToGrammarError(format!(
322 "ffi error {}",
323 status_to_i32(rc)
324 )));
325 }
326 let grammar_bytes = unsafe { CStr::from_ptr(out) }.to_bytes().to_vec();
327 let grammar = String::from_utf8(grammar_bytes)
328 .map_err(|err| LlamaCppError::JsonSchemaToGrammarError(err.to_string()))?;
329 Ok(grammar)
330 };
331
332 unsafe { llama_cpp_sys_2::llama_rs_string_free(out) };
333 result
334}
335
336#[cfg(test)]
337mod tests {
338 use super::json_schema_to_grammar;
339
340 #[test]
341 fn json_schema_string_api_returns_grammar() {
342 let schema = r#"{
343 "type": "object",
344 "properties": {
345 "city": { "type": "string" },
346 "unit": { "enum": ["c", "f"] }
347 },
348 "required": ["city"]
349 }"#;
350
351 let grammar =
352 json_schema_to_grammar(schema).expect("string-based schema conversion should succeed");
353
354 assert!(grammar.contains("root ::="));
355 }
356}
357
358#[derive(Debug, thiserror::Error, Clone)]
360#[non_exhaustive]
361pub enum TokenToStringError {
362 #[error("Unknown Token Type")]
364 UnknownTokenType,
365 #[error("Insufficient Buffer Space {0}")]
367 InsufficientBufferSpace(c_int),
368 #[error("FromUtf8Error {0}")]
370 FromUtf8Error(#[from] FromUtf8Error),
371}
372
373#[derive(Debug, thiserror::Error)]
375pub enum StringToTokenError {
376 #[error("{0}")]
378 NulError(#[from] NulError),
379 #[error("{0}")]
380 CIntConversionError(#[from] std::num::TryFromIntError),
382}
383
384#[derive(Debug, thiserror::Error)]
386pub enum NewLlamaChatMessageError {
387 #[error("{0}")]
389 NulError(#[from] NulError),
390}
391
392#[derive(Debug, thiserror::Error)]
394pub enum ApplyChatTemplateError {
395 #[error("{0}")]
397 NulError(#[from] NulError),
398 #[error("{0}")]
400 FromUtf8Error(#[from] FromUtf8Error),
401 #[error("null result from llama.cpp")]
403 NullResult,
404 #[error("ffi error {0}")]
406 FfiError(i32),
407 #[error("invalid grammar trigger data")]
409 InvalidGrammarTriggerType,
410}
411
412#[derive(Debug, thiserror::Error)]
414pub enum ChatParseError {
415 #[error("{0}")]
417 NulError(#[from] NulError),
418 #[error("{0}")]
420 Utf8Error(#[from] FromUtf8Error),
421 #[error("null result from llama.cpp")]
423 NullResult,
424 #[error("ffi error {0}")]
426 FfiError(i32),
427}
428
429#[derive(Debug, thiserror::Error)]
431pub enum SamplerAcceptError {
432 #[error("ffi error {0}")]
434 FfiError(i32),
435}
436
437#[must_use]
455pub fn ggml_time_us() -> i64 {
456 unsafe { llama_cpp_sys_2::ggml_time_us() }
457}
458
459#[must_use]
471pub fn llama_supports_mlock() -> bool {
472 unsafe { llama_cpp_sys_2::llama_supports_mlock() }
473}
474
475#[derive(Debug, Clone, Copy, PartialEq, Eq)]
477pub enum LlamaBackendDeviceType {
478 Cpu,
480 Accelerator,
482 Gpu,
484 IntegratedGpu,
486 Unknown,
488}
489
490#[derive(Debug, Clone)]
494pub struct LlamaBackendDevice {
495 pub index: usize,
499 pub name: String,
501 pub description: String,
503 pub backend: String,
505 pub memory_total: usize,
507 pub memory_free: usize,
509 pub device_type: LlamaBackendDeviceType,
511}
512
513#[must_use]
515pub fn list_llama_ggml_backend_devices() -> Vec<LlamaBackendDevice> {
516 let mut devices = Vec::new();
517 for i in 0..unsafe { llama_cpp_sys_2::ggml_backend_dev_count() } {
518 fn cstr_to_string(ptr: *const c_char) -> String {
519 if ptr.is_null() {
520 String::new()
521 } else {
522 unsafe { std::ffi::CStr::from_ptr(ptr) }
523 .to_string_lossy()
524 .to_string()
525 }
526 }
527 let dev = unsafe { llama_cpp_sys_2::ggml_backend_dev_get(i) };
528 let props = unsafe {
529 let mut props = std::mem::zeroed();
530 llama_cpp_sys_2::ggml_backend_dev_get_props(dev, &raw mut props);
531 props
532 };
533 let name = cstr_to_string(props.name);
534 let description = cstr_to_string(props.description);
535 let backend = unsafe { llama_cpp_sys_2::ggml_backend_dev_backend_reg(dev) };
536 let backend_name = unsafe { llama_cpp_sys_2::ggml_backend_reg_name(backend) };
537 let backend = cstr_to_string(backend_name);
538 let memory_total = props.memory_total;
539 let memory_free = props.memory_free;
540 let device_type = match props.type_ {
541 llama_cpp_sys_2::GGML_BACKEND_DEVICE_TYPE_CPU => LlamaBackendDeviceType::Cpu,
542 llama_cpp_sys_2::GGML_BACKEND_DEVICE_TYPE_ACCEL => LlamaBackendDeviceType::Accelerator,
543 llama_cpp_sys_2::GGML_BACKEND_DEVICE_TYPE_GPU => LlamaBackendDeviceType::Gpu,
544 llama_cpp_sys_2::GGML_BACKEND_DEVICE_TYPE_IGPU => LlamaBackendDeviceType::IntegratedGpu,
545 _ => LlamaBackendDeviceType::Unknown,
546 };
547 devices.push(LlamaBackendDevice {
548 index: i,
549 name,
550 description,
551 backend,
552 memory_total,
553 memory_free,
554 device_type,
555 });
556 }
557 devices
558}
559
560#[derive(Default, Debug, Clone)]
562pub struct LogOptions {
563 disabled: bool,
564}
565
566impl LogOptions {
567 #[must_use]
570 pub fn with_logs_enabled(mut self, enabled: bool) -> Self {
571 self.disabled = !enabled;
572 self
573 }
574}
575
576extern "C" fn logs_to_trace(
577 level: llama_cpp_sys_2::ggml_log_level,
578 text: *const ::std::os::raw::c_char,
579 data: *mut ::std::os::raw::c_void,
580) {
581 use std::borrow::Borrow;
586
587 let log_state = unsafe { &*(data as *const log::State) };
588
589 if log_state.options.disabled {
590 return;
591 }
592
593 if !log_state.is_enabled_for_level(level) {
595 log_state.update_previous_level_for_disabled_log(level);
596 return;
597 }
598
599 let text = unsafe { std::ffi::CStr::from_ptr(text) };
600 let text = text.to_string_lossy();
601 let text: &str = text.borrow();
602
603 if level == llama_cpp_sys_2::GGML_LOG_LEVEL_CONT {
609 log_state.cont_buffered_log(text);
610 } else if text.ends_with('\n') {
611 log_state.emit_non_cont_line(level, text);
612 } else {
613 log_state.buffer_non_cont(level, text);
614 }
615}
616
617pub fn send_logs_to_tracing(options: LogOptions) {
619 let llama_heap_state = Box::as_ref(
626 log::LLAMA_STATE
627 .get_or_init(|| Box::new(log::State::new(log::Module::LlamaCpp, options.clone()))),
628 ) as *const _;
629 let ggml_heap_state = Box::as_ref(
630 log::GGML_STATE.get_or_init(|| Box::new(log::State::new(log::Module::GGML, options))),
631 ) as *const _;
632
633 unsafe {
634 llama_cpp_sys_2::llama_log_set(Some(logs_to_trace), llama_heap_state as *mut _);
636 llama_cpp_sys_2::ggml_log_set(Some(logs_to_trace), ggml_heap_state as *mut _);
637 }
638}