1use std::ffi::{c_char, CStr, CString, NulError};
17use std::fmt::Debug;
18use std::num::NonZeroI32;
19
20use crate::llama_batch::BatchAddError;
21use std::os::raw::c_int;
22use std::path::PathBuf;
23use std::string::FromUtf8Error;
24
25pub mod context;
26pub mod llama_backend;
27pub mod llama_batch;
28mod log;
29pub mod model;
30#[cfg(feature = "mtmd")]
31pub mod mtmd;
32pub mod openai;
33pub mod sampling;
34pub mod timing;
35pub mod token;
36pub mod token_type;
37
38pub(crate) fn status_is_ok(status: llama_cpp_sys_2::llama_rs_status) -> bool {
39 status == llama_cpp_sys_2::LLAMA_RS_STATUS_OK
40}
41
42pub(crate) fn status_to_i32(status: llama_cpp_sys_2::llama_rs_status) -> i32 {
43 status as i32
44}
45
46pub type Result<T> = std::result::Result<T, LlamaCppError>;
48
49#[derive(Debug, Eq, PartialEq, thiserror::Error)]
51pub enum LlamaCppError {
52 #[error("BackendAlreadyInitialized")]
55 BackendAlreadyInitialized,
56 #[error("{0}")]
58 ChatTemplateError(#[from] ChatTemplateError),
59 #[error("{0}")]
61 DecodeError(#[from] DecodeError),
62 #[error("{0}")]
64 EncodeError(#[from] EncodeError),
65 #[error("{0}")]
67 LlamaModelLoadError(#[from] LlamaModelLoadError),
68 #[error("{0}")]
70 LlamaContextLoadError(#[from] LlamaContextLoadError),
71 #[error["{0}"]]
73 BatchAddError(#[from] BatchAddError),
74 #[error(transparent)]
76 EmbeddingError(#[from] EmbeddingsError),
77 #[error("Backend device {0} not found")]
80 BackendDeviceNotFound(usize),
81 #[error("Max devices exceeded. Max devices is {0}")]
83 MaxDevicesExceeded(usize),
84 #[error("JsonSchemaToGrammarError: {0}")]
86 JsonSchemaToGrammarError(String),
87}
88
89#[derive(Debug, Eq, PartialEq, thiserror::Error)]
91pub enum ChatTemplateError {
92 #[error("chat template not found - returned null pointer")]
94 MissingTemplate,
95
96 #[error("null byte in string {0}")]
98 NullError(#[from] NulError),
99
100 #[error(transparent)]
102 Utf8Error(#[from] std::str::Utf8Error),
103}
104
105#[derive(Debug, Eq, PartialEq, thiserror::Error)]
107pub enum MetaValError {
108 #[error("null byte in string {0}")]
110 NullError(#[from] NulError),
111
112 #[error("FromUtf8Error {0}")]
114 FromUtf8Error(#[from] FromUtf8Error),
115
116 #[error("Negative return value. Likely due to a missing index or key. Got return value: {0}")]
118 NegativeReturn(i32),
119}
120
121#[derive(Debug, Eq, PartialEq, thiserror::Error)]
123pub enum LlamaContextLoadError {
124 #[error("null reference from llama.cpp")]
126 NullReturn,
127}
128
129#[derive(Debug, Eq, PartialEq, thiserror::Error)]
131pub enum DecodeError {
132 #[error("Decode Error 1: NoKvCacheSlot")]
134 NoKvCacheSlot,
135 #[error("Decode Error -1: n_tokens == 0")]
137 NTokensZero,
138 #[error("Decode Error {0}: unknown")]
140 Unknown(c_int),
141}
142
143#[derive(Debug, Eq, PartialEq, thiserror::Error)]
145pub enum EncodeError {
146 #[error("Encode Error 1: NoKvCacheSlot")]
148 NoKvCacheSlot,
149 #[error("Encode Error -1: n_tokens == 0")]
151 NTokensZero,
152 #[error("Encode Error {0}: unknown")]
154 Unknown(c_int),
155}
156
157#[derive(Debug, Eq, PartialEq, thiserror::Error)]
159pub enum EmbeddingsError {
160 #[error("Embeddings weren't enabled in the context options")]
162 NotEnabled,
163 #[error("Logits were not enabled for the given token")]
165 LogitsNotEnabled,
166 #[error("Can't use sequence embeddings with a model supporting only LLAMA_POOLING_TYPE_NONE")]
168 NonePoolType,
169}
170
171#[derive(Debug, Eq, PartialEq, thiserror::Error)]
173pub enum GrammarError {
174 #[error("Grammar root not found in grammar string")]
176 RootNotFound,
177 #[error("Trigger word contains null bytes")]
179 TriggerWordNullBytes,
180 #[error("Grammar string or root contains null bytes")]
182 GrammarNullBytes,
183 #[error("Grammar call returned null")]
185 NullGrammar,
186}
187
188impl From<NonZeroI32> for DecodeError {
190 fn from(value: NonZeroI32) -> Self {
191 match value.get() {
192 1 => DecodeError::NoKvCacheSlot,
193 -1 => DecodeError::NTokensZero,
194 i => DecodeError::Unknown(i),
195 }
196 }
197}
198
199impl From<NonZeroI32> for EncodeError {
201 fn from(value: NonZeroI32) -> Self {
202 match value.get() {
203 1 => EncodeError::NoKvCacheSlot,
204 -1 => EncodeError::NTokensZero,
205 i => EncodeError::Unknown(i),
206 }
207 }
208}
209
210#[derive(Debug, Eq, PartialEq, thiserror::Error)]
212pub enum LlamaModelLoadError {
213 #[error("null byte in string {0}")]
215 NullError(#[from] NulError),
216 #[error("null result from llama cpp")]
218 NullResult,
219 #[error("failed to convert path {0} to str")]
221 PathToStrError(PathBuf),
222}
223
224#[derive(Debug, Eq, PartialEq, thiserror::Error)]
226pub enum LlamaLoraAdapterInitError {
227 #[error("null byte in string {0}")]
229 NullError(#[from] NulError),
230 #[error("null result from llama cpp")]
232 NullResult,
233 #[error("failed to convert path {0} to str")]
235 PathToStrError(PathBuf),
236}
237
238#[derive(Debug, Eq, PartialEq, thiserror::Error)]
240pub enum LlamaLoraAdapterSetError {
241 #[error("error code from llama cpp")]
243 ErrorResult(i32),
244}
245
246#[derive(Debug, Eq, PartialEq, thiserror::Error)]
248pub enum LlamaLoraAdapterRemoveError {
249 #[error("error code from llama cpp")]
251 ErrorResult(i32),
252}
253
254#[must_use]
263pub fn llama_time_us() -> i64 {
264 unsafe { llama_cpp_sys_2::llama_time_us() }
265}
266
267#[must_use]
274pub fn max_devices() -> usize {
275 unsafe { llama_cpp_sys_2::llama_max_devices() }
276}
277
278#[must_use]
287pub fn mmap_supported() -> bool {
288 unsafe { llama_cpp_sys_2::llama_supports_mmap() }
289}
290
291#[must_use]
300pub fn mlock_supported() -> bool {
301 unsafe { llama_cpp_sys_2::llama_supports_mlock() }
302}
303
304pub fn json_schema_to_grammar(schema_json: &str) -> Result<String> {
306 let schema_cstr = CString::new(schema_json)
307 .map_err(|err| LlamaCppError::JsonSchemaToGrammarError(err.to_string()))?;
308 let mut out = std::ptr::null_mut();
309 let rc = unsafe {
310 llama_cpp_sys_2::llama_rs_json_schema_to_grammar(schema_cstr.as_ptr(), false, &mut out)
311 };
312
313 let result = {
314 if !status_is_ok(rc) || out.is_null() {
315 return Err(LlamaCppError::JsonSchemaToGrammarError(format!(
316 "ffi error {}",
317 status_to_i32(rc)
318 )));
319 }
320 let grammar_bytes = unsafe { CStr::from_ptr(out) }.to_bytes().to_vec();
321 let grammar = String::from_utf8(grammar_bytes)
322 .map_err(|err| LlamaCppError::JsonSchemaToGrammarError(err.to_string()))?;
323 Ok(grammar)
324 };
325
326 unsafe { llama_cpp_sys_2::llama_rs_string_free(out) };
327 result
328}
329
330#[derive(Debug, thiserror::Error, Clone)]
332#[non_exhaustive]
333pub enum TokenToStringError {
334 #[error("Unknown Token Type")]
336 UnknownTokenType,
337 #[error("Insufficient Buffer Space {0}")]
339 InsufficientBufferSpace(c_int),
340 #[error("FromUtf8Error {0}")]
342 FromUtf8Error(#[from] FromUtf8Error),
343}
344
345#[derive(Debug, thiserror::Error)]
347pub enum StringToTokenError {
348 #[error("{0}")]
350 NulError(#[from] NulError),
351 #[error("{0}")]
352 CIntConversionError(#[from] std::num::TryFromIntError),
354}
355
356#[derive(Debug, thiserror::Error)]
358pub enum NewLlamaChatMessageError {
359 #[error("{0}")]
361 NulError(#[from] NulError),
362}
363
364#[derive(Debug, thiserror::Error)]
366pub enum ApplyChatTemplateError {
367 #[error("{0}")]
369 NulError(#[from] NulError),
370 #[error("{0}")]
372 FromUtf8Error(#[from] FromUtf8Error),
373 #[error("null result from llama.cpp")]
375 NullResult,
376 #[error("ffi error {0}")]
378 FfiError(i32),
379 #[error("invalid grammar trigger data")]
381 InvalidGrammarTriggerType,
382}
383
384#[derive(Debug, thiserror::Error)]
386pub enum ChatParseError {
387 #[error("{0}")]
389 NulError(#[from] NulError),
390 #[error("{0}")]
392 Utf8Error(#[from] FromUtf8Error),
393 #[error("null result from llama.cpp")]
395 NullResult,
396 #[error("ffi error {0}")]
398 FfiError(i32),
399}
400
401#[derive(Debug, thiserror::Error)]
403pub enum SamplerAcceptError {
404 #[error("ffi error {0}")]
406 FfiError(i32),
407}
408
409#[must_use]
427pub fn ggml_time_us() -> i64 {
428 unsafe { llama_cpp_sys_2::ggml_time_us() }
429}
430
431#[must_use]
443pub fn llama_supports_mlock() -> bool {
444 unsafe { llama_cpp_sys_2::llama_supports_mlock() }
445}
446
447#[derive(Debug, Clone, Copy, PartialEq, Eq)]
449pub enum LlamaBackendDeviceType {
450 Cpu,
452 Accelerator,
454 Gpu,
456 IntegratedGpu,
458 Unknown,
460}
461
462#[derive(Debug, Clone)]
466pub struct LlamaBackendDevice {
467 pub index: usize,
471 pub name: String,
473 pub description: String,
475 pub backend: String,
477 pub memory_total: usize,
479 pub memory_free: usize,
481 pub device_type: LlamaBackendDeviceType,
483}
484
485#[must_use]
487pub fn list_llama_ggml_backend_devices() -> Vec<LlamaBackendDevice> {
488 let mut devices = Vec::new();
489 for i in 0..unsafe { llama_cpp_sys_2::ggml_backend_dev_count() } {
490 fn cstr_to_string(ptr: *const c_char) -> String {
491 if ptr.is_null() {
492 String::new()
493 } else {
494 unsafe { std::ffi::CStr::from_ptr(ptr) }
495 .to_string_lossy()
496 .to_string()
497 }
498 }
499 let dev = unsafe { llama_cpp_sys_2::ggml_backend_dev_get(i) };
500 let props = unsafe {
501 let mut props = std::mem::zeroed();
502 llama_cpp_sys_2::ggml_backend_dev_get_props(dev, &raw mut props);
503 props
504 };
505 let name = cstr_to_string(props.name);
506 let description = cstr_to_string(props.description);
507 let backend = unsafe { llama_cpp_sys_2::ggml_backend_dev_backend_reg(dev) };
508 let backend_name = unsafe { llama_cpp_sys_2::ggml_backend_reg_name(backend) };
509 let backend = cstr_to_string(backend_name);
510 let memory_total = props.memory_total;
511 let memory_free = props.memory_free;
512 let device_type = match props.type_ {
513 llama_cpp_sys_2::GGML_BACKEND_DEVICE_TYPE_CPU => LlamaBackendDeviceType::Cpu,
514 llama_cpp_sys_2::GGML_BACKEND_DEVICE_TYPE_ACCEL => LlamaBackendDeviceType::Accelerator,
515 llama_cpp_sys_2::GGML_BACKEND_DEVICE_TYPE_GPU => LlamaBackendDeviceType::Gpu,
516 llama_cpp_sys_2::GGML_BACKEND_DEVICE_TYPE_IGPU => LlamaBackendDeviceType::IntegratedGpu,
517 _ => LlamaBackendDeviceType::Unknown,
518 };
519 devices.push(LlamaBackendDevice {
520 index: i,
521 name,
522 description,
523 backend,
524 memory_total,
525 memory_free,
526 device_type,
527 });
528 }
529 devices
530}
531
532#[derive(Default, Debug, Clone)]
534pub struct LogOptions {
535 disabled: bool,
536}
537
538impl LogOptions {
539 #[must_use]
542 pub fn with_logs_enabled(mut self, enabled: bool) -> Self {
543 self.disabled = !enabled;
544 self
545 }
546}
547
548extern "C" fn logs_to_trace(
549 level: llama_cpp_sys_2::ggml_log_level,
550 text: *const ::std::os::raw::c_char,
551 data: *mut ::std::os::raw::c_void,
552) {
553 use std::borrow::Borrow;
558
559 let log_state = unsafe { &*(data as *const log::State) };
560
561 if log_state.options.disabled {
562 return;
563 }
564
565 if !log_state.is_enabled_for_level(level) {
567 log_state.update_previous_level_for_disabled_log(level);
568 return;
569 }
570
571 let text = unsafe { std::ffi::CStr::from_ptr(text) };
572 let text = text.to_string_lossy();
573 let text: &str = text.borrow();
574
575 if level == llama_cpp_sys_2::GGML_LOG_LEVEL_CONT {
581 log_state.cont_buffered_log(text);
582 } else if text.ends_with('\n') {
583 log_state.emit_non_cont_line(level, text);
584 } else {
585 log_state.buffer_non_cont(level, text);
586 }
587}
588
589pub fn send_logs_to_tracing(options: LogOptions) {
591 let llama_heap_state = Box::as_ref(
598 log::LLAMA_STATE
599 .get_or_init(|| Box::new(log::State::new(log::Module::LlamaCpp, options.clone()))),
600 ) as *const _;
601 let ggml_heap_state = Box::as_ref(
602 log::GGML_STATE.get_or_init(|| Box::new(log::State::new(log::Module::GGML, options))),
603 ) as *const _;
604
605 unsafe {
606 llama_cpp_sys_2::llama_log_set(Some(logs_to_trace), llama_heap_state as *mut _);
608 llama_cpp_sys_2::ggml_log_set(Some(logs_to_trace), ggml_heap_state as *mut _);
609 }
610}