1use std::ffi::{c_char, CStr, CString, NulError};
18use std::fmt::Debug;
19use std::num::NonZeroI32;
20
21use crate::llama_batch::BatchAddError;
22use std::os::raw::c_int;
23use std::path::PathBuf;
24use std::string::FromUtf8Error;
25
26pub mod context;
27pub mod gguf;
28pub mod llama_backend;
29pub mod llama_batch;
30#[cfg(feature = "llguidance")]
31pub(crate) mod llguidance_sampler;
32mod log;
33pub mod model;
34#[cfg(feature = "mtmd")]
35pub mod mtmd;
36pub mod openai;
37pub mod sampling;
38pub mod timing;
39pub mod token;
40pub mod token_type;
41
42pub use crate::context::session::LlamaStateSeqFlags;
43
44pub(crate) fn status_is_ok(status: llama_cpp_sys_2::llama_rs_status) -> bool {
45 status == llama_cpp_sys_2::LLAMA_RS_STATUS_OK
46}
47
48pub type Result<T> = std::result::Result<T, LlamaCppError>;
50
51#[derive(Debug, Eq, PartialEq, thiserror::Error)]
53pub enum LlamaCppError {
54 #[error("BackendAlreadyInitialized")]
57 BackendAlreadyInitialized,
58 #[error("{0}")]
60 ChatTemplateError(#[from] ChatTemplateError),
61 #[error("{0}")]
63 DecodeError(#[from] DecodeError),
64 #[error("{0}")]
66 EncodeError(#[from] EncodeError),
67 #[error("{0}")]
69 LlamaModelLoadError(#[from] LlamaModelLoadError),
70 #[error("{0}")]
72 LlamaContextLoadError(#[from] LlamaContextLoadError),
73 #[error["{0}"]]
75 BatchAddError(#[from] BatchAddError),
76 #[error(transparent)]
78 EmbeddingError(#[from] EmbeddingsError),
79 #[error("Backend device {0} not found")]
82 BackendDeviceNotFound(usize),
83 #[error("Max devices exceeded. Max devices is {0}")]
85 MaxDevicesExceeded(usize),
86 #[error("JsonSchemaToGrammarError: {0}")]
88 JsonSchemaToGrammarError(String),
89 #[error("{0}")]
91 FitError(#[from] crate::model::params::FitError),
92}
93
94#[derive(Debug, Eq, PartialEq, thiserror::Error)]
96pub enum ChatTemplateError {
97 #[error("chat template not found - returned null pointer")]
99 MissingTemplate,
100
101 #[error("null byte in string {0}")]
103 NullError(#[from] NulError),
104
105 #[error(transparent)]
107 Utf8Error(#[from] std::str::Utf8Error),
108}
109
110#[derive(Debug, Eq, PartialEq, thiserror::Error)]
112pub enum MetaValError {
113 #[error("null byte in string {0}")]
115 NullError(#[from] NulError),
116
117 #[error("FromUtf8Error {0}")]
119 FromUtf8Error(#[from] FromUtf8Error),
120
121 #[error("Negative return value. Likely due to a missing index or key. Got return value: {0}")]
123 NegativeReturn(i32),
124}
125
126#[derive(Debug, Eq, PartialEq, thiserror::Error)]
128pub enum LlamaContextLoadError {
129 #[error("null reference from llama.cpp")]
131 NullReturn,
132}
133
134#[derive(Debug, Eq, PartialEq, thiserror::Error)]
136pub enum DecodeError {
137 #[error("Decode Error 1: NoKvCacheSlot")]
139 NoKvCacheSlot,
140 #[error("Decode Error -1: n_tokens == 0")]
142 NTokensZero,
143 #[error("Decode Error {0}: unknown")]
145 Unknown(c_int),
146}
147
148#[derive(Debug, Eq, PartialEq, thiserror::Error)]
150pub enum EncodeError {
151 #[error("Encode Error 1: NoKvCacheSlot")]
153 NoKvCacheSlot,
154 #[error("Encode Error -1: n_tokens == 0")]
156 NTokensZero,
157 #[error("Encode Error {0}: unknown")]
159 Unknown(c_int),
160}
161
162#[derive(Debug, Eq, PartialEq, thiserror::Error)]
164pub enum EmbeddingsError {
165 #[error("Embeddings weren't enabled in the context options")]
167 NotEnabled,
168 #[error("Logits were not enabled for the given token")]
170 LogitsNotEnabled,
171 #[error("Can't use sequence embeddings with a model supporting only LLAMA_POOLING_TYPE_NONE")]
173 NonePoolType,
174}
175
176#[derive(Debug, Eq, PartialEq, thiserror::Error)]
178pub enum GrammarError {
179 #[error("Grammar root not found in grammar string")]
181 RootNotFound,
182 #[error("Trigger word contains null bytes")]
184 TriggerWordNullBytes,
185 #[error("Grammar string or root contains null bytes")]
187 GrammarNullBytes,
188 #[error("Grammar call returned null")]
190 NullGrammar,
191}
192
193impl From<NonZeroI32> for DecodeError {
195 fn from(value: NonZeroI32) -> Self {
196 match value.get() {
197 1 => DecodeError::NoKvCacheSlot,
198 -1 => DecodeError::NTokensZero,
199 i => DecodeError::Unknown(i),
200 }
201 }
202}
203
204impl From<NonZeroI32> for EncodeError {
206 fn from(value: NonZeroI32) -> Self {
207 match value.get() {
208 1 => EncodeError::NoKvCacheSlot,
209 -1 => EncodeError::NTokensZero,
210 i => EncodeError::Unknown(i),
211 }
212 }
213}
214
215#[derive(Debug, Eq, PartialEq, thiserror::Error)]
217pub enum LlamaModelLoadError {
218 #[error("null byte in string {0}")]
220 NullError(#[from] NulError),
221 #[error("null result from llama cpp")]
223 NullResult,
224 #[error("failed to convert path {0} to str")]
226 PathToStrError(PathBuf),
227}
228
229#[derive(Debug, Eq, PartialEq, thiserror::Error)]
231pub enum LlamaLoraAdapterInitError {
232 #[error("null byte in string {0}")]
234 NullError(#[from] NulError),
235 #[error("null result from llama cpp")]
237 NullResult,
238 #[error("failed to convert path {0} to str")]
240 PathToStrError(PathBuf),
241}
242
243#[derive(Debug, Eq, PartialEq, thiserror::Error)]
245pub enum LlamaLoraAdapterSetError {
246 #[error("error code from llama cpp")]
248 ErrorResult(i32),
249}
250
251#[derive(Debug, Eq, PartialEq, thiserror::Error)]
253pub enum LlamaLoraAdapterRemoveError {
254 #[error("error code from llama cpp")]
256 ErrorResult(i32),
257}
258
259#[must_use]
268pub fn llama_time_us() -> i64 {
269 unsafe { llama_cpp_sys_2::llama_time_us() }
270}
271
272#[must_use]
279pub fn max_devices() -> usize {
280 unsafe { llama_cpp_sys_2::llama_max_devices() }
281}
282
283#[must_use]
292pub fn mmap_supported() -> bool {
293 unsafe { llama_cpp_sys_2::llama_supports_mmap() }
294}
295
296#[must_use]
305pub fn mlock_supported() -> bool {
306 unsafe { llama_cpp_sys_2::llama_supports_mlock() }
307}
308
309pub fn json_schema_to_grammar(schema_json: &str) -> Result<String> {
311 let schema_cstr = CString::new(schema_json)
312 .map_err(|err| LlamaCppError::JsonSchemaToGrammarError(err.to_string()))?;
313 let mut out = std::ptr::null_mut();
314 let rc = unsafe {
315 llama_cpp_sys_2::llama_rs_json_schema_to_grammar(schema_cstr.as_ptr(), false, &mut out)
316 };
317
318 let result = {
319 if !status_is_ok(rc) || out.is_null() {
320 return Err(LlamaCppError::JsonSchemaToGrammarError(format!(
321 "ffi error {}",
322 rc
323 )));
324 }
325 let grammar_bytes = unsafe { CStr::from_ptr(out) }.to_bytes().to_vec();
326 let grammar = String::from_utf8(grammar_bytes)
327 .map_err(|err| LlamaCppError::JsonSchemaToGrammarError(err.to_string()))?;
328 Ok(grammar)
329 };
330
331 unsafe { llama_cpp_sys_2::llama_rs_string_free(out) };
332 result
333}
334
335#[cfg(test)]
336mod tests {
337 use super::json_schema_to_grammar;
338
339 #[test]
340 fn json_schema_string_api_returns_grammar() {
341 let schema = r#"{
342 "type": "object",
343 "properties": {
344 "city": { "type": "string" },
345 "unit": { "enum": ["c", "f"] }
346 },
347 "required": ["city"]
348 }"#;
349
350 let grammar =
351 json_schema_to_grammar(schema).expect("string-based schema conversion should succeed");
352
353 assert!(grammar.contains("root ::="));
354 }
355}
356
357#[derive(Debug, thiserror::Error, Clone)]
359#[non_exhaustive]
360pub enum TokenToStringError {
361 #[error("Unknown Token Type")]
363 UnknownTokenType,
364 #[error("Insufficient Buffer Space {0}")]
366 InsufficientBufferSpace(c_int),
367 #[error("FromUtf8Error {0}")]
369 FromUtf8Error(#[from] FromUtf8Error),
370}
371
372#[derive(Debug, thiserror::Error)]
374pub enum StringToTokenError {
375 #[error("{0}")]
377 NulError(#[from] NulError),
378 #[error("{0}")]
379 CIntConversionError(#[from] std::num::TryFromIntError),
381}
382
383#[derive(Debug, thiserror::Error)]
385pub enum NewLlamaChatMessageError {
386 #[error("{0}")]
388 NulError(#[from] NulError),
389}
390
391#[derive(Debug, thiserror::Error)]
393pub enum ApplyChatTemplateError {
394 #[error("{0}")]
396 NulError(#[from] NulError),
397 #[error("{0}")]
399 FromUtf8Error(#[from] FromUtf8Error),
400 #[error("null result from llama.cpp")]
402 NullResult,
403 #[error("ffi error {0}")]
405 FfiError(i32),
406 #[error("invalid grammar trigger data")]
408 InvalidGrammarTriggerType,
409}
410
411#[derive(Debug, thiserror::Error)]
413pub enum ChatParseError {
414 #[error("{0}")]
416 NulError(#[from] NulError),
417 #[error("{0}")]
419 Utf8Error(#[from] FromUtf8Error),
420 #[error("null result from llama.cpp")]
422 NullResult,
423 #[error("ffi error {0}")]
425 FfiError(i32),
426}
427
428#[derive(Debug, thiserror::Error)]
430pub enum SamplerAcceptError {
431 #[error("ffi error {0}")]
433 FfiError(i32),
434}
435
436#[must_use]
454pub fn ggml_time_us() -> i64 {
455 unsafe { llama_cpp_sys_2::ggml_time_us() }
456}
457
458#[must_use]
470pub fn llama_supports_mlock() -> bool {
471 unsafe { llama_cpp_sys_2::llama_supports_mlock() }
472}
473
474#[derive(Debug, Clone, Copy, PartialEq, Eq)]
476pub enum LlamaBackendDeviceType {
477 Cpu,
479 Accelerator,
481 Gpu,
483 IntegratedGpu,
485 Unknown,
487}
488
489#[derive(Debug, Clone)]
493pub struct LlamaBackendDevice {
494 pub index: usize,
498 pub name: String,
500 pub description: String,
502 pub backend: String,
504 pub memory_total: usize,
506 pub memory_free: usize,
508 pub device_type: LlamaBackendDeviceType,
510}
511
512#[must_use]
514pub fn list_llama_ggml_backend_devices() -> Vec<LlamaBackendDevice> {
515 let mut devices = Vec::new();
516 for i in 0..unsafe { llama_cpp_sys_2::ggml_backend_dev_count() } {
517 fn cstr_to_string(ptr: *const c_char) -> String {
518 if ptr.is_null() {
519 String::new()
520 } else {
521 unsafe { std::ffi::CStr::from_ptr(ptr) }
522 .to_string_lossy()
523 .to_string()
524 }
525 }
526 let dev = unsafe { llama_cpp_sys_2::ggml_backend_dev_get(i) };
527 let props = unsafe {
528 let mut props = std::mem::zeroed();
529 llama_cpp_sys_2::ggml_backend_dev_get_props(dev, &raw mut props);
530 props
531 };
532 let name = cstr_to_string(props.name);
533 let description = cstr_to_string(props.description);
534 let backend = unsafe { llama_cpp_sys_2::ggml_backend_dev_backend_reg(dev) };
535 let backend_name = unsafe { llama_cpp_sys_2::ggml_backend_reg_name(backend) };
536 let backend = cstr_to_string(backend_name);
537 let memory_total = props.memory_total;
538 let memory_free = props.memory_free;
539 let device_type = match props.type_ {
540 llama_cpp_sys_2::GGML_BACKEND_DEVICE_TYPE_CPU => LlamaBackendDeviceType::Cpu,
541 llama_cpp_sys_2::GGML_BACKEND_DEVICE_TYPE_ACCEL => LlamaBackendDeviceType::Accelerator,
542 llama_cpp_sys_2::GGML_BACKEND_DEVICE_TYPE_GPU => LlamaBackendDeviceType::Gpu,
543 llama_cpp_sys_2::GGML_BACKEND_DEVICE_TYPE_IGPU => LlamaBackendDeviceType::IntegratedGpu,
544 _ => LlamaBackendDeviceType::Unknown,
545 };
546 devices.push(LlamaBackendDevice {
547 index: i,
548 name,
549 description,
550 backend,
551 memory_total,
552 memory_free,
553 device_type,
554 });
555 }
556 devices
557}
558
559#[derive(Default, Debug, Clone)]
561pub struct LogOptions {
562 disabled: bool,
563}
564
565impl LogOptions {
566 #[must_use]
569 pub fn with_logs_enabled(mut self, enabled: bool) -> Self {
570 self.disabled = !enabled;
571 self
572 }
573}
574
575extern "C" fn logs_to_trace(
576 level: llama_cpp_sys_2::ggml_log_level,
577 text: *const ::std::os::raw::c_char,
578 data: *mut ::std::os::raw::c_void,
579) {
580 use std::borrow::Borrow;
585
586 let log_state = unsafe { &*(data as *const log::State) };
587
588 if log_state.options.disabled {
589 return;
590 }
591
592 if !log_state.is_enabled_for_level(level) {
594 log_state.update_previous_level_for_disabled_log(level);
595 return;
596 }
597
598 let text = unsafe { std::ffi::CStr::from_ptr(text) };
599 let text = text.to_string_lossy();
600 let text: &str = text.borrow();
601
602 if level == llama_cpp_sys_2::GGML_LOG_LEVEL_CONT {
608 log_state.cont_buffered_log(text);
609 } else if text.ends_with('\n') {
610 log_state.emit_non_cont_line(level, text);
611 } else {
612 log_state.buffer_non_cont(level, text);
613 }
614}
615
616pub fn send_logs_to_tracing(options: LogOptions) {
618 let llama_heap_state = Box::as_ref(
625 log::LLAMA_STATE
626 .get_or_init(|| Box::new(log::State::new(log::Module::LlamaCpp, options.clone()))),
627 ) as *const _;
628 let ggml_heap_state = Box::as_ref(
629 log::GGML_STATE.get_or_init(|| Box::new(log::State::new(log::Module::GGML, options))),
630 ) as *const _;
631
632 unsafe {
633 llama_cpp_sys_2::llama_log_set(Some(logs_to_trace), llama_heap_state as *mut _);
635 llama_cpp_sys_2::ggml_log_set(Some(logs_to_trace), ggml_heap_state as *mut _);
636 }
637}