1use std::ffi::{c_char, CStr, CString, NulError};
18use std::fmt::Debug;
19use std::num::NonZeroI32;
20
21use crate::llama_batch::BatchAddError;
22use std::os::raw::c_int;
23use std::path::PathBuf;
24use std::string::FromUtf8Error;
25
26pub mod context;
27pub mod gguf;
28pub mod llama_backend;
29pub mod llama_batch;
30#[cfg(feature = "llguidance")]
31pub(crate) mod llguidance_sampler;
32mod log;
33pub mod model;
34#[cfg(feature = "mtmd")]
35pub mod mtmd;
36pub mod sampling;
37pub mod timing;
38pub mod token;
39pub mod token_type;
40
41pub use crate::context::session::LlamaStateSeqFlags;
42
43#[cfg(feature = "common")]
44pub(crate) fn status_is_ok(status: llama_cpp_sys_2::llama_rs_status) -> bool {
45 status == llama_cpp_sys_2::LLAMA_RS_STATUS_OK
46}
47
48pub type Result<T> = std::result::Result<T, LlamaCppError>;
50
51#[derive(Debug, Eq, PartialEq, thiserror::Error)]
53pub enum LlamaCppError {
54 #[error("BackendAlreadyInitialized")]
57 BackendAlreadyInitialized,
58 #[error("{0}")]
60 ChatTemplateError(#[from] ChatTemplateError),
61 #[error("{0}")]
63 DecodeError(#[from] DecodeError),
64 #[error("{0}")]
66 EncodeError(#[from] EncodeError),
67 #[error("{0}")]
69 LlamaModelLoadError(#[from] LlamaModelLoadError),
70 #[error("{0}")]
72 LlamaContextLoadError(#[from] LlamaContextLoadError),
73 #[error["{0}"]]
75 BatchAddError(#[from] BatchAddError),
76 #[error(transparent)]
78 EmbeddingError(#[from] EmbeddingsError),
79 #[error("Backend device {0} not found")]
82 BackendDeviceNotFound(usize),
83 #[error("Max devices exceeded. Max devices is {0}")]
85 MaxDevicesExceeded(usize),
86 #[cfg(feature = "common")]
88 #[error("JsonSchemaToGrammarError: {0}")]
89 JsonSchemaToGrammarError(String),
90 #[cfg(feature = "common")]
92 #[error("{0}")]
93 FitError(#[from] crate::model::params::FitError),
94}
95
96#[derive(Debug, Eq, PartialEq, thiserror::Error)]
98pub enum ChatTemplateError {
99 #[error("chat template not found - returned null pointer")]
101 MissingTemplate,
102
103 #[error("null byte in string {0}")]
105 NullError(#[from] NulError),
106
107 #[error(transparent)]
109 Utf8Error(#[from] std::str::Utf8Error),
110}
111
112#[derive(Debug, Eq, PartialEq, thiserror::Error)]
114pub enum MetaValError {
115 #[error("null byte in string {0}")]
117 NullError(#[from] NulError),
118
119 #[error("FromUtf8Error {0}")]
121 FromUtf8Error(#[from] FromUtf8Error),
122
123 #[error("Negative return value. Likely due to a missing index or key. Got return value: {0}")]
125 NegativeReturn(i32),
126}
127
128#[derive(Debug, Eq, PartialEq, thiserror::Error)]
130pub enum LlamaContextLoadError {
131 #[error("null reference from llama.cpp")]
133 NullReturn,
134}
135
136#[derive(Debug, Eq, PartialEq, thiserror::Error)]
138pub enum DecodeError {
139 #[error("Decode Error 1: NoKvCacheSlot")]
141 NoKvCacheSlot,
142 #[error("Decode Error -1: n_tokens == 0")]
144 NTokensZero,
145 #[error("Decode Error {0}: unknown")]
147 Unknown(c_int),
148}
149
150#[derive(Debug, Eq, PartialEq, thiserror::Error)]
152pub enum EncodeError {
153 #[error("Encode Error 1: NoKvCacheSlot")]
155 NoKvCacheSlot,
156 #[error("Encode Error -1: n_tokens == 0")]
158 NTokensZero,
159 #[error("Encode Error {0}: unknown")]
161 Unknown(c_int),
162}
163
164#[derive(Debug, Eq, PartialEq, thiserror::Error)]
166pub enum EmbeddingsError {
167 #[error("Embeddings weren't enabled in the context options")]
169 NotEnabled,
170 #[error("Logits were not enabled for the given token")]
172 LogitsNotEnabled,
173 #[error("Can't use sequence embeddings with a model supporting only LLAMA_POOLING_TYPE_NONE")]
175 NonePoolType,
176}
177
178#[derive(Debug, Eq, PartialEq, thiserror::Error)]
180pub enum GrammarError {
181 #[error("Grammar root not found in grammar string")]
183 RootNotFound,
184 #[error("Trigger word contains null bytes")]
186 TriggerWordNullBytes,
187 #[error("Grammar string or root contains null bytes")]
189 GrammarNullBytes,
190 #[error("Grammar call returned null")]
192 NullGrammar,
193}
194
195impl From<NonZeroI32> for DecodeError {
197 fn from(value: NonZeroI32) -> Self {
198 match value.get() {
199 1 => DecodeError::NoKvCacheSlot,
200 -1 => DecodeError::NTokensZero,
201 i => DecodeError::Unknown(i),
202 }
203 }
204}
205
206impl From<NonZeroI32> for EncodeError {
208 fn from(value: NonZeroI32) -> Self {
209 match value.get() {
210 1 => EncodeError::NoKvCacheSlot,
211 -1 => EncodeError::NTokensZero,
212 i => EncodeError::Unknown(i),
213 }
214 }
215}
216
217#[derive(Debug, Eq, PartialEq, thiserror::Error)]
219pub enum LlamaModelLoadError {
220 #[error("null byte in string {0}")]
222 NullError(#[from] NulError),
223 #[error("null result from llama cpp")]
225 NullResult,
226 #[error("failed to convert path {0} to str")]
228 PathToStrError(PathBuf),
229}
230
231#[derive(Debug, Eq, PartialEq, thiserror::Error)]
233pub enum LlamaLoraAdapterInitError {
234 #[error("null byte in string {0}")]
236 NullError(#[from] NulError),
237 #[error("null result from llama cpp")]
239 NullResult,
240 #[error("failed to convert path {0} to str")]
242 PathToStrError(PathBuf),
243}
244
245#[derive(Debug, Eq, PartialEq, thiserror::Error)]
247pub enum LlamaLoraAdapterSetError {
248 #[error("error code from llama cpp")]
250 ErrorResult(i32),
251}
252
253#[derive(Debug, Eq, PartialEq, thiserror::Error)]
255pub enum LlamaLoraAdapterRemoveError {
256 #[error("error code from llama cpp")]
258 ErrorResult(i32),
259}
260
261#[must_use]
270pub fn llama_time_us() -> i64 {
271 unsafe { llama_cpp_sys_2::llama_time_us() }
272}
273
274#[must_use]
281pub fn max_devices() -> usize {
282 unsafe { llama_cpp_sys_2::llama_max_devices() }
283}
284
285#[must_use]
294pub fn mmap_supported() -> bool {
295 unsafe { llama_cpp_sys_2::llama_supports_mmap() }
296}
297
298#[must_use]
307pub fn mlock_supported() -> bool {
308 unsafe { llama_cpp_sys_2::llama_supports_mlock() }
309}
310
311#[cfg(feature = "common")]
313pub fn json_schema_to_grammar(schema_json: &str) -> Result<String> {
314 let schema_cstr = CString::new(schema_json)
315 .map_err(|err| LlamaCppError::JsonSchemaToGrammarError(err.to_string()))?;
316 let mut out = std::ptr::null_mut();
317 let rc = unsafe {
318 llama_cpp_sys_2::llama_rs_json_schema_to_grammar(schema_cstr.as_ptr(), false, &mut out)
319 };
320
321 let result = {
322 if !status_is_ok(rc) || out.is_null() {
323 return Err(LlamaCppError::JsonSchemaToGrammarError(format!(
324 "ffi error {}",
325 rc
326 )));
327 }
328 let grammar_bytes = unsafe { CStr::from_ptr(out) }.to_bytes().to_vec();
329 let grammar = String::from_utf8(grammar_bytes)
330 .map_err(|err| LlamaCppError::JsonSchemaToGrammarError(err.to_string()))?;
331 Ok(grammar)
332 };
333
334 unsafe { llama_cpp_sys_2::llama_rs_string_free(out) };
335 result
336}
337
338#[cfg(all(test, feature = "common"))]
339mod tests {
340 use super::json_schema_to_grammar;
341
342 #[test]
343 fn json_schema_string_api_returns_grammar() {
344 let schema = r#"{
345 "type": "object",
346 "properties": {
347 "city": { "type": "string" },
348 "unit": { "enum": ["c", "f"] }
349 },
350 "required": ["city"]
351 }"#;
352
353 let grammar =
354 json_schema_to_grammar(schema).expect("string-based schema conversion should succeed");
355
356 assert!(grammar.contains("root ::="));
357 }
358}
359
360#[derive(Debug, thiserror::Error, Clone)]
362#[non_exhaustive]
363pub enum TokenToStringError {
364 #[error("Unknown Token Type")]
366 UnknownTokenType,
367 #[error("Insufficient Buffer Space {0}")]
369 InsufficientBufferSpace(c_int),
370 #[error("FromUtf8Error {0}")]
372 FromUtf8Error(#[from] FromUtf8Error),
373}
374
375#[derive(Debug, thiserror::Error)]
377pub enum StringToTokenError {
378 #[error("{0}")]
380 NulError(#[from] NulError),
381 #[error("{0}")]
382 CIntConversionError(#[from] std::num::TryFromIntError),
384}
385
386#[derive(Debug, thiserror::Error)]
388pub enum NewLlamaChatMessageError {
389 #[error("{0}")]
391 NulError(#[from] NulError),
392}
393
394#[derive(Debug, thiserror::Error)]
396pub enum ApplyChatTemplateError {
397 #[error("{0}")]
399 NulError(#[from] NulError),
400 #[error("{0}")]
402 FromUtf8Error(#[from] FromUtf8Error),
403 #[error("null result from llama.cpp")]
405 NullResult,
406 #[error("ffi error {0}")]
408 FfiError(i32),
409}
410
411#[derive(Debug, thiserror::Error)]
413pub enum SamplerAcceptError {
414 #[error("ffi error {0}")]
416 FfiError(i32),
417}
418
419#[must_use]
437pub fn ggml_time_us() -> i64 {
438 unsafe { llama_cpp_sys_2::ggml_time_us() }
439}
440
441#[must_use]
453pub fn llama_supports_mlock() -> bool {
454 unsafe { llama_cpp_sys_2::llama_supports_mlock() }
455}
456
457#[derive(Debug, Clone, Copy, PartialEq, Eq)]
459pub enum LlamaBackendDeviceType {
460 Cpu,
462 Accelerator,
464 Gpu,
466 IntegratedGpu,
468 Unknown,
470}
471
472#[derive(Debug, Clone)]
476pub struct LlamaBackendDevice {
477 pub index: usize,
481 pub name: String,
483 pub description: String,
485 pub backend: String,
487 pub memory_total: usize,
489 pub memory_free: usize,
491 pub device_type: LlamaBackendDeviceType,
493}
494
495#[must_use]
497pub fn list_llama_ggml_backend_devices() -> Vec<LlamaBackendDevice> {
498 let mut devices = Vec::new();
499 for i in 0..unsafe { llama_cpp_sys_2::ggml_backend_dev_count() } {
500 fn cstr_to_string(ptr: *const c_char) -> String {
501 if ptr.is_null() {
502 String::new()
503 } else {
504 unsafe { std::ffi::CStr::from_ptr(ptr) }
505 .to_string_lossy()
506 .to_string()
507 }
508 }
509 let dev = unsafe { llama_cpp_sys_2::ggml_backend_dev_get(i) };
510 let props = unsafe {
511 let mut props = std::mem::zeroed();
512 llama_cpp_sys_2::ggml_backend_dev_get_props(dev, &raw mut props);
513 props
514 };
515 let name = cstr_to_string(props.name);
516 let description = cstr_to_string(props.description);
517 let backend = unsafe { llama_cpp_sys_2::ggml_backend_dev_backend_reg(dev) };
518 let backend_name = unsafe { llama_cpp_sys_2::ggml_backend_reg_name(backend) };
519 let backend = cstr_to_string(backend_name);
520 let memory_total = props.memory_total;
521 let memory_free = props.memory_free;
522 let device_type = match props.type_ {
523 llama_cpp_sys_2::GGML_BACKEND_DEVICE_TYPE_CPU => LlamaBackendDeviceType::Cpu,
524 llama_cpp_sys_2::GGML_BACKEND_DEVICE_TYPE_ACCEL => LlamaBackendDeviceType::Accelerator,
525 llama_cpp_sys_2::GGML_BACKEND_DEVICE_TYPE_GPU => LlamaBackendDeviceType::Gpu,
526 llama_cpp_sys_2::GGML_BACKEND_DEVICE_TYPE_IGPU => LlamaBackendDeviceType::IntegratedGpu,
527 _ => LlamaBackendDeviceType::Unknown,
528 };
529 devices.push(LlamaBackendDevice {
530 index: i,
531 name,
532 description,
533 backend,
534 memory_total,
535 memory_free,
536 device_type,
537 });
538 }
539 devices
540}
541
542#[derive(Default, Debug, Clone)]
544pub struct LogOptions {
545 disabled: bool,
546}
547
548impl LogOptions {
549 #[must_use]
552 pub fn with_logs_enabled(mut self, enabled: bool) -> Self {
553 self.disabled = !enabled;
554 self
555 }
556}
557
558extern "C" fn logs_to_trace(
559 level: llama_cpp_sys_2::ggml_log_level,
560 text: *const ::std::os::raw::c_char,
561 data: *mut ::std::os::raw::c_void,
562) {
563 use std::borrow::Borrow;
568
569 let log_state = unsafe { &*(data as *const log::State) };
570
571 if log_state.options.disabled {
572 return;
573 }
574
575 if !log_state.is_enabled_for_level(level) {
577 log_state.update_previous_level_for_disabled_log(level);
578 return;
579 }
580
581 let text = unsafe { std::ffi::CStr::from_ptr(text) };
582 let text = text.to_string_lossy();
583 let text: &str = text.borrow();
584
585 if level == llama_cpp_sys_2::GGML_LOG_LEVEL_CONT {
591 log_state.cont_buffered_log(text);
592 } else if text.ends_with('\n') {
593 log_state.emit_non_cont_line(level, text);
594 } else {
595 log_state.buffer_non_cont(level, text);
596 }
597}
598
599pub fn send_logs_to_tracing(options: LogOptions) {
601 let llama_heap_state = Box::as_ref(
608 log::LLAMA_STATE
609 .get_or_init(|| Box::new(log::State::new(log::Module::LlamaCpp, options.clone()))),
610 ) as *const _;
611 let ggml_heap_state = Box::as_ref(
612 log::GGML_STATE.get_or_init(|| Box::new(log::State::new(log::Module::GGML, options))),
613 ) as *const _;
614
615 unsafe {
616 llama_cpp_sys_2::llama_log_set(Some(logs_to_trace), llama_heap_state as *mut _);
618 llama_cpp_sys_2::ggml_log_set(Some(logs_to_trace), ggml_heap_state as *mut _);
619 }
620}