1use std::ffi::{c_char, CStr, CString, NulError};
18use std::fmt::Debug;
19use std::num::NonZeroI32;
20
21use crate::llama_batch::BatchAddError;
22use std::os::raw::c_int;
23use std::path::PathBuf;
24use std::string::FromUtf8Error;
25
26pub mod context;
27pub mod gguf;
28pub mod llama_backend;
29pub mod llama_batch;
30#[cfg(feature = "llguidance")]
31pub(crate) mod llguidance_sampler;
32mod log;
33pub mod model;
34#[cfg(feature = "mtmd")]
35pub mod mtmd;
36pub mod openai;
37pub mod sampling;
38pub mod timing;
39pub mod token;
40pub mod token_type;
41
42pub use crate::context::session::LlamaStateSeqFlags;
43
44pub(crate) fn status_is_ok(status: llama_cpp_sys_2::llama_rs_status) -> bool {
45 status == llama_cpp_sys_2::LLAMA_RS_STATUS_OK
46}
47
48pub type Result<T> = std::result::Result<T, LlamaCppError>;
50
51#[derive(Debug, Eq, PartialEq, thiserror::Error)]
53pub enum LlamaCppError {
54 #[error("BackendAlreadyInitialized")]
57 BackendAlreadyInitialized,
58 #[error("{0}")]
60 ChatTemplateError(#[from] ChatTemplateError),
61 #[error("{0}")]
63 DecodeError(#[from] DecodeError),
64 #[error("{0}")]
66 EncodeError(#[from] EncodeError),
67 #[error("{0}")]
69 LlamaModelLoadError(#[from] LlamaModelLoadError),
70 #[error("{0}")]
72 LlamaContextLoadError(#[from] LlamaContextLoadError),
73 #[error["{0}"]]
75 BatchAddError(#[from] BatchAddError),
76 #[error(transparent)]
78 EmbeddingError(#[from] EmbeddingsError),
79 #[error("Backend device {0} not found")]
82 BackendDeviceNotFound(usize),
83 #[error("Max devices exceeded. Max devices is {0}")]
85 MaxDevicesExceeded(usize),
86 #[error("JsonSchemaToGrammarError: {0}")]
88 JsonSchemaToGrammarError(String),
89}
90
91#[derive(Debug, Eq, PartialEq, thiserror::Error)]
93pub enum ChatTemplateError {
94 #[error("chat template not found - returned null pointer")]
96 MissingTemplate,
97
98 #[error("null byte in string {0}")]
100 NullError(#[from] NulError),
101
102 #[error(transparent)]
104 Utf8Error(#[from] std::str::Utf8Error),
105}
106
107#[derive(Debug, Eq, PartialEq, thiserror::Error)]
109pub enum MetaValError {
110 #[error("null byte in string {0}")]
112 NullError(#[from] NulError),
113
114 #[error("FromUtf8Error {0}")]
116 FromUtf8Error(#[from] FromUtf8Error),
117
118 #[error("Negative return value. Likely due to a missing index or key. Got return value: {0}")]
120 NegativeReturn(i32),
121}
122
123#[derive(Debug, Eq, PartialEq, thiserror::Error)]
125pub enum LlamaContextLoadError {
126 #[error("null reference from llama.cpp")]
128 NullReturn,
129}
130
131#[derive(Debug, Eq, PartialEq, thiserror::Error)]
133pub enum DecodeError {
134 #[error("Decode Error 1: NoKvCacheSlot")]
136 NoKvCacheSlot,
137 #[error("Decode Error -1: n_tokens == 0")]
139 NTokensZero,
140 #[error("Decode Error {0}: unknown")]
142 Unknown(c_int),
143}
144
145#[derive(Debug, Eq, PartialEq, thiserror::Error)]
147pub enum EncodeError {
148 #[error("Encode Error 1: NoKvCacheSlot")]
150 NoKvCacheSlot,
151 #[error("Encode Error -1: n_tokens == 0")]
153 NTokensZero,
154 #[error("Encode Error {0}: unknown")]
156 Unknown(c_int),
157}
158
159#[derive(Debug, Eq, PartialEq, thiserror::Error)]
161pub enum EmbeddingsError {
162 #[error("Embeddings weren't enabled in the context options")]
164 NotEnabled,
165 #[error("Logits were not enabled for the given token")]
167 LogitsNotEnabled,
168 #[error("Can't use sequence embeddings with a model supporting only LLAMA_POOLING_TYPE_NONE")]
170 NonePoolType,
171}
172
173#[derive(Debug, Eq, PartialEq, thiserror::Error)]
175pub enum GrammarError {
176 #[error("Grammar root not found in grammar string")]
178 RootNotFound,
179 #[error("Trigger word contains null bytes")]
181 TriggerWordNullBytes,
182 #[error("Grammar string or root contains null bytes")]
184 GrammarNullBytes,
185 #[error("Grammar call returned null")]
187 NullGrammar,
188}
189
190impl From<NonZeroI32> for DecodeError {
192 fn from(value: NonZeroI32) -> Self {
193 match value.get() {
194 1 => DecodeError::NoKvCacheSlot,
195 -1 => DecodeError::NTokensZero,
196 i => DecodeError::Unknown(i),
197 }
198 }
199}
200
201impl From<NonZeroI32> for EncodeError {
203 fn from(value: NonZeroI32) -> Self {
204 match value.get() {
205 1 => EncodeError::NoKvCacheSlot,
206 -1 => EncodeError::NTokensZero,
207 i => EncodeError::Unknown(i),
208 }
209 }
210}
211
212#[derive(Debug, Eq, PartialEq, thiserror::Error)]
214pub enum LlamaModelLoadError {
215 #[error("null byte in string {0}")]
217 NullError(#[from] NulError),
218 #[error("null result from llama cpp")]
220 NullResult,
221 #[error("failed to convert path {0} to str")]
223 PathToStrError(PathBuf),
224}
225
226#[derive(Debug, Eq, PartialEq, thiserror::Error)]
228pub enum LlamaLoraAdapterInitError {
229 #[error("null byte in string {0}")]
231 NullError(#[from] NulError),
232 #[error("null result from llama cpp")]
234 NullResult,
235 #[error("failed to convert path {0} to str")]
237 PathToStrError(PathBuf),
238}
239
240#[derive(Debug, Eq, PartialEq, thiserror::Error)]
242pub enum LlamaLoraAdapterSetError {
243 #[error("error code from llama cpp")]
245 ErrorResult(i32),
246}
247
248#[derive(Debug, Eq, PartialEq, thiserror::Error)]
250pub enum LlamaLoraAdapterRemoveError {
251 #[error("error code from llama cpp")]
253 ErrorResult(i32),
254}
255
256#[must_use]
265pub fn llama_time_us() -> i64 {
266 unsafe { llama_cpp_sys_2::llama_time_us() }
267}
268
269#[must_use]
276pub fn max_devices() -> usize {
277 unsafe { llama_cpp_sys_2::llama_max_devices() }
278}
279
280#[must_use]
289pub fn mmap_supported() -> bool {
290 unsafe { llama_cpp_sys_2::llama_supports_mmap() }
291}
292
293#[must_use]
302pub fn mlock_supported() -> bool {
303 unsafe { llama_cpp_sys_2::llama_supports_mlock() }
304}
305
306pub fn json_schema_to_grammar(schema_json: &str) -> Result<String> {
308 let schema_cstr = CString::new(schema_json)
309 .map_err(|err| LlamaCppError::JsonSchemaToGrammarError(err.to_string()))?;
310 let mut out = std::ptr::null_mut();
311 let rc = unsafe {
312 llama_cpp_sys_2::llama_rs_json_schema_to_grammar(schema_cstr.as_ptr(), false, &mut out)
313 };
314
315 let result = {
316 if !status_is_ok(rc) || out.is_null() {
317 return Err(LlamaCppError::JsonSchemaToGrammarError(format!(
318 "ffi error {}",
319 rc
320 )));
321 }
322 let grammar_bytes = unsafe { CStr::from_ptr(out) }.to_bytes().to_vec();
323 let grammar = String::from_utf8(grammar_bytes)
324 .map_err(|err| LlamaCppError::JsonSchemaToGrammarError(err.to_string()))?;
325 Ok(grammar)
326 };
327
328 unsafe { llama_cpp_sys_2::llama_rs_string_free(out) };
329 result
330}
331
332#[cfg(test)]
333mod tests {
334 use super::json_schema_to_grammar;
335
336 #[test]
337 fn json_schema_string_api_returns_grammar() {
338 let schema = r#"{
339 "type": "object",
340 "properties": {
341 "city": { "type": "string" },
342 "unit": { "enum": ["c", "f"] }
343 },
344 "required": ["city"]
345 }"#;
346
347 let grammar =
348 json_schema_to_grammar(schema).expect("string-based schema conversion should succeed");
349
350 assert!(grammar.contains("root ::="));
351 }
352}
353
354#[derive(Debug, thiserror::Error, Clone)]
356#[non_exhaustive]
357pub enum TokenToStringError {
358 #[error("Unknown Token Type")]
360 UnknownTokenType,
361 #[error("Insufficient Buffer Space {0}")]
363 InsufficientBufferSpace(c_int),
364 #[error("FromUtf8Error {0}")]
366 FromUtf8Error(#[from] FromUtf8Error),
367}
368
369#[derive(Debug, thiserror::Error)]
371pub enum StringToTokenError {
372 #[error("{0}")]
374 NulError(#[from] NulError),
375 #[error("{0}")]
376 CIntConversionError(#[from] std::num::TryFromIntError),
378}
379
380#[derive(Debug, thiserror::Error)]
382pub enum NewLlamaChatMessageError {
383 #[error("{0}")]
385 NulError(#[from] NulError),
386}
387
388#[derive(Debug, thiserror::Error)]
390pub enum ApplyChatTemplateError {
391 #[error("{0}")]
393 NulError(#[from] NulError),
394 #[error("{0}")]
396 FromUtf8Error(#[from] FromUtf8Error),
397 #[error("null result from llama.cpp")]
399 NullResult,
400 #[error("ffi error {0}")]
402 FfiError(i32),
403 #[error("invalid grammar trigger data")]
405 InvalidGrammarTriggerType,
406}
407
408#[derive(Debug, thiserror::Error)]
410pub enum ChatParseError {
411 #[error("{0}")]
413 NulError(#[from] NulError),
414 #[error("{0}")]
416 Utf8Error(#[from] FromUtf8Error),
417 #[error("null result from llama.cpp")]
419 NullResult,
420 #[error("ffi error {0}")]
422 FfiError(i32),
423}
424
425#[derive(Debug, thiserror::Error)]
427pub enum SamplerAcceptError {
428 #[error("ffi error {0}")]
430 FfiError(i32),
431}
432
433#[must_use]
451pub fn ggml_time_us() -> i64 {
452 unsafe { llama_cpp_sys_2::ggml_time_us() }
453}
454
455#[must_use]
467pub fn llama_supports_mlock() -> bool {
468 unsafe { llama_cpp_sys_2::llama_supports_mlock() }
469}
470
471#[derive(Debug, Clone, Copy, PartialEq, Eq)]
473pub enum LlamaBackendDeviceType {
474 Cpu,
476 Accelerator,
478 Gpu,
480 IntegratedGpu,
482 Unknown,
484}
485
486#[derive(Debug, Clone)]
490pub struct LlamaBackendDevice {
491 pub index: usize,
495 pub name: String,
497 pub description: String,
499 pub backend: String,
501 pub memory_total: usize,
503 pub memory_free: usize,
505 pub device_type: LlamaBackendDeviceType,
507}
508
509#[must_use]
511pub fn list_llama_ggml_backend_devices() -> Vec<LlamaBackendDevice> {
512 let mut devices = Vec::new();
513 for i in 0..unsafe { llama_cpp_sys_2::ggml_backend_dev_count() } {
514 fn cstr_to_string(ptr: *const c_char) -> String {
515 if ptr.is_null() {
516 String::new()
517 } else {
518 unsafe { std::ffi::CStr::from_ptr(ptr) }
519 .to_string_lossy()
520 .to_string()
521 }
522 }
523 let dev = unsafe { llama_cpp_sys_2::ggml_backend_dev_get(i) };
524 let props = unsafe {
525 let mut props = std::mem::zeroed();
526 llama_cpp_sys_2::ggml_backend_dev_get_props(dev, &raw mut props);
527 props
528 };
529 let name = cstr_to_string(props.name);
530 let description = cstr_to_string(props.description);
531 let backend = unsafe { llama_cpp_sys_2::ggml_backend_dev_backend_reg(dev) };
532 let backend_name = unsafe { llama_cpp_sys_2::ggml_backend_reg_name(backend) };
533 let backend = cstr_to_string(backend_name);
534 let memory_total = props.memory_total;
535 let memory_free = props.memory_free;
536 let device_type = match props.type_ {
537 llama_cpp_sys_2::GGML_BACKEND_DEVICE_TYPE_CPU => LlamaBackendDeviceType::Cpu,
538 llama_cpp_sys_2::GGML_BACKEND_DEVICE_TYPE_ACCEL => LlamaBackendDeviceType::Accelerator,
539 llama_cpp_sys_2::GGML_BACKEND_DEVICE_TYPE_GPU => LlamaBackendDeviceType::Gpu,
540 llama_cpp_sys_2::GGML_BACKEND_DEVICE_TYPE_IGPU => LlamaBackendDeviceType::IntegratedGpu,
541 _ => LlamaBackendDeviceType::Unknown,
542 };
543 devices.push(LlamaBackendDevice {
544 index: i,
545 name,
546 description,
547 backend,
548 memory_total,
549 memory_free,
550 device_type,
551 });
552 }
553 devices
554}
555
556#[derive(Default, Debug, Clone)]
558pub struct LogOptions {
559 disabled: bool,
560}
561
562impl LogOptions {
563 #[must_use]
566 pub fn with_logs_enabled(mut self, enabled: bool) -> Self {
567 self.disabled = !enabled;
568 self
569 }
570}
571
572extern "C" fn logs_to_trace(
573 level: llama_cpp_sys_2::ggml_log_level,
574 text: *const ::std::os::raw::c_char,
575 data: *mut ::std::os::raw::c_void,
576) {
577 use std::borrow::Borrow;
582
583 let log_state = unsafe { &*(data as *const log::State) };
584
585 if log_state.options.disabled {
586 return;
587 }
588
589 if !log_state.is_enabled_for_level(level) {
591 log_state.update_previous_level_for_disabled_log(level);
592 return;
593 }
594
595 let text = unsafe { std::ffi::CStr::from_ptr(text) };
596 let text = text.to_string_lossy();
597 let text: &str = text.borrow();
598
599 if level == llama_cpp_sys_2::GGML_LOG_LEVEL_CONT {
605 log_state.cont_buffered_log(text);
606 } else if text.ends_with('\n') {
607 log_state.emit_non_cont_line(level, text);
608 } else {
609 log_state.buffer_non_cont(level, text);
610 }
611}
612
613pub fn send_logs_to_tracing(options: LogOptions) {
615 let llama_heap_state = Box::as_ref(
622 log::LLAMA_STATE
623 .get_or_init(|| Box::new(log::State::new(log::Module::LlamaCpp, options.clone()))),
624 ) as *const _;
625 let ggml_heap_state = Box::as_ref(
626 log::GGML_STATE.get_or_init(|| Box::new(log::State::new(log::Module::GGML, options))),
627 ) as *const _;
628
629 unsafe {
630 llama_cpp_sys_2::llama_log_set(Some(logs_to_trace), llama_heap_state as *mut _);
632 llama_cpp_sys_2::ggml_log_set(Some(logs_to_trace), ggml_heap_state as *mut _);
633 }
634}