Skip to main content

llama_cpp_bindings/
error.rs

1use std::ffi::NulError;
2use std::num::NonZeroI32;
3use std::os::raw::c_int;
4use std::path::PathBuf;
5use std::string::FromUtf8Error;
6
7use crate::llama_batch::BatchAddError;
8
9/// A failable result from a llama.cpp function.
10pub type Result<TValue> = std::result::Result<TValue, LlamaCppError>;
11
12/// All errors that can occur in the llama-cpp crate.
13#[derive(Debug, Eq, PartialEq, thiserror::Error)]
14pub enum LlamaCppError {
15    /// The backend was already initialized. This can generally be ignored as initializing the backend
16    /// is idempotent.
17    #[error("BackendAlreadyInitialized")]
18    BackendAlreadyInitialized,
19    /// There was an error while get the chat template from model.
20    #[error("{0}")]
21    ChatTemplateError(#[from] ChatTemplateError),
22    /// There was an error while decoding a batch.
23    #[error("{0}")]
24    DecodeError(#[from] DecodeError),
25    /// There was an error while encoding a batch.
26    #[error("{0}")]
27    EncodeError(#[from] EncodeError),
28    /// There was an error loading a model.
29    #[error("{0}")]
30    LlamaModelLoadError(#[from] LlamaModelLoadError),
31    /// There was an error creating a new model context.
32    #[error("{0}")]
33    LlamaContextLoadError(#[from] LlamaContextLoadError),
34    /// There was an error adding a token to a batch.
35    #[error["{0}"]]
36    BatchAddError(#[from] BatchAddError),
37    /// see [`EmbeddingsError`]
38    #[error(transparent)]
39    EmbeddingError(#[from] EmbeddingsError),
40    // See [`LlamaSamplerError`]
41    /// Backend device not found
42    #[error("Backend device {0} not found")]
43    BackendDeviceNotFound(usize),
44    /// Max devices exceeded
45    #[error("Max devices exceeded. Max devices is {0}")]
46    MaxDevicesExceeded(usize),
47    /// Failed to convert JSON schema to grammar.
48    #[error("JsonSchemaToGrammarError: {0}")]
49    JsonSchemaToGrammarError(String),
50}
51
52/// There was an error while getting the chat template from a model.
53#[derive(Debug, Eq, PartialEq, thiserror::Error)]
54pub enum ChatTemplateError {
55    /// gguf has no chat template (by that name)
56    #[error("chat template not found - returned null pointer")]
57    MissingTemplate,
58
59    /// chat template contained a null byte
60    #[error("null byte in string {0}")]
61    NullError(#[from] NulError),
62
63    /// The chat template was not valid utf8.
64    #[error(transparent)]
65    Utf8Error(#[from] std::str::Utf8Error),
66}
67
68/// Failed fetching metadata value
69#[derive(Debug, Eq, PartialEq, thiserror::Error)]
70pub enum MetaValError {
71    /// The provided string contains an unexpected null-byte
72    #[error("null byte in string {0}")]
73    NullError(#[from] NulError),
74
75    /// The returned data contains invalid UTF8 data
76    #[error("FromUtf8Error {0}")]
77    FromUtf8Error(#[from] FromUtf8Error),
78
79    /// Got negative return value. This happens if the key or index queried does not exist.
80    #[error("Negative return value. Likely due to a missing index or key. Got return value: {0}")]
81    NegativeReturn(i32),
82}
83
84/// Failed to Load context
85#[derive(Debug, Eq, PartialEq, thiserror::Error)]
86pub enum LlamaContextLoadError {
87    /// llama.cpp returned null
88    #[error("null reference from llama.cpp")]
89    NullReturn,
90}
91
92/// Failed to decode a batch.
93#[derive(Debug, Eq, PartialEq, thiserror::Error)]
94pub enum DecodeError {
95    /// No kv cache slot was available.
96    #[error("Decode Error 1: NoKvCacheSlot")]
97    NoKvCacheSlot,
98    /// The computation was aborted by the abort callback.
99    #[error("Decode Error 2: Aborted")]
100    Aborted,
101    /// The number of tokens in the batch was 0.
102    #[error("Decode Error -1: n_tokens == 0")]
103    NTokensZero,
104    /// An unknown error occurred.
105    #[error("Decode Error {0}: unknown")]
106    Unknown(c_int),
107}
108
109/// Failed to decode a batch.
110#[derive(Debug, Eq, PartialEq, thiserror::Error)]
111pub enum EncodeError {
112    /// No kv cache slot was available.
113    #[error("Encode Error 1: NoKvCacheSlot")]
114    NoKvCacheSlot,
115    /// The number of tokens in the batch was 0.
116    #[error("Encode Error -1: n_tokens == 0")]
117    NTokensZero,
118    /// An unknown error occurred.
119    #[error("Encode Error {0}: unknown")]
120    Unknown(c_int),
121}
122
123/// When embedding related functions fail
124#[derive(Debug, Eq, PartialEq, thiserror::Error)]
125pub enum EmbeddingsError {
126    /// Embeddings weren't enabled in the context options
127    #[error("Embeddings weren't enabled in the context options")]
128    NotEnabled,
129    /// Logits weren't enabled for the given token
130    #[error("Logits were not enabled for the given token")]
131    LogitsNotEnabled,
132    /// The given sequence index exceeds the max sequence id
133    #[error("Can't use sequence embeddings with a model supporting only LLAMA_POOLING_TYPE_NONE")]
134    NonePoolType,
135    /// The embedding dimension does not fit into a usize.
136    #[error("Invalid embedding dimension: {0}")]
137    InvalidEmbeddingDimension(#[source] std::num::TryFromIntError),
138}
139
140/// When logits-related functions fail
141#[derive(Debug, Eq, PartialEq, thiserror::Error)]
142pub enum LogitsError {
143    /// The logits data pointer is null.
144    #[error("logits data pointer is null")]
145    NullLogits,
146    /// The requested token index has not been initialized for logits.
147    #[error("logit for token index {0} is not initialized")]
148    TokenNotInitialized(i32),
149    /// The token index exceeds the context size.
150    #[error("token index {token_index} exceeds context size {context_size}")]
151    TokenIndexExceedsContext {
152        /// The token index that was requested.
153        token_index: u32,
154        /// The context size.
155        context_size: u32,
156    },
157    /// The vocabulary size does not fit into a usize.
158    #[error("n_vocab does not fit into usize: {0}")]
159    VocabSizeOverflow(#[source] std::num::TryFromIntError),
160    /// The token index does not fit into a u32.
161    #[error("token_index does not fit into u32: {0}")]
162    TokenIndexOverflow(#[source] std::num::TryFromIntError),
163}
164
165/// Errors that can occur when initializing a grammar sampler
166#[derive(Debug, Eq, PartialEq, thiserror::Error)]
167pub enum GrammarError {
168    /// The grammar root was not found in the grammar string
169    #[error("Grammar root not found in grammar string")]
170    RootNotFound,
171    /// The trigger word contains null bytes
172    #[error("Trigger word contains null bytes: {0}")]
173    TriggerWordNullBytes(NulError),
174    /// The grammar string or root contains null bytes
175    #[error("Grammar string or root contains null bytes: {0}")]
176    GrammarNullBytes(NulError),
177    /// A string contains null bytes
178    #[error("String contains null bytes: {0}")]
179    NulError(#[from] NulError),
180    /// The grammar call returned null
181    #[error("Grammar initialization failed: {0}")]
182    NullGrammar(String),
183    /// An integer value exceeded the allowed range
184    #[error("Integer overflow: {0}")]
185    IntegerOverflow(String),
186    /// An error from the llguidance library
187    #[error("llguidance error: {0}")]
188    LlguidanceError(String),
189}
190
191/// Errors that can occur when creating a sampling configuration.
192#[derive(Debug, Eq, PartialEq, thiserror::Error)]
193pub enum SamplingError {
194    /// An integer value exceeded the allowed range
195    #[error("Integer overflow: {0}")]
196    IntegerOverflow(String),
197}
198
199/// Errors that can occur when sampling a token.
200#[derive(Debug, Eq, PartialEq, thiserror::Error)]
201pub enum SampleError {
202    /// A C++ exception was thrown during sampling
203    #[error("C++ exception during sampling: {0}")]
204    CppException(String),
205
206    /// An invalid argument was passed to the sampler
207    #[error("Invalid argument passed to sampler")]
208    InvalidArgument,
209}
210
211/// Decode a error from llama.cpp into a [`DecodeError`].
212impl From<NonZeroI32> for DecodeError {
213    fn from(value: NonZeroI32) -> Self {
214        match value.get() {
215            1 => Self::NoKvCacheSlot,
216            2 => Self::Aborted,
217            -1 => Self::NTokensZero,
218            error_code => Self::Unknown(error_code),
219        }
220    }
221}
222
223/// Encode a error from llama.cpp into a [`EncodeError`].
224impl From<NonZeroI32> for EncodeError {
225    fn from(value: NonZeroI32) -> Self {
226        match value.get() {
227            1 => Self::NoKvCacheSlot,
228            -1 => Self::NTokensZero,
229            error_code => Self::Unknown(error_code),
230        }
231    }
232}
233
234/// An error that can occur when loading a model.
235#[derive(Debug, Eq, PartialEq, thiserror::Error)]
236pub enum LlamaModelLoadError {
237    /// There was a null byte in a provided string and thus it could not be converted to a C string.
238    #[error("null byte in string {0}")]
239    NullError(#[from] NulError),
240    /// llama.cpp returned a nullptr - this could be many different causes.
241    #[error("null result from llama cpp")]
242    NullResult,
243    /// Failed to convert the path to a rust str. This means the path was not valid unicode
244    #[error("failed to convert path {0} to str")]
245    PathToStrError(PathBuf),
246    /// The model file does not exist at the given path.
247    #[error("model file not found: {0}")]
248    FileNotFound(PathBuf),
249}
250
251/// An error that can occur when loading a model.
252#[derive(Debug, Eq, PartialEq, thiserror::Error)]
253pub enum LlamaLoraAdapterInitError {
254    /// There was a null byte in a provided string and thus it could not be converted to a C string.
255    #[error("null byte in string {0}")]
256    NullError(#[from] NulError),
257    /// llama.cpp returned a nullptr - this could be many different causes.
258    #[error("null result from llama cpp")]
259    NullResult,
260    /// Failed to convert the path to a rust str. This means the path was not valid unicode
261    #[error("failed to convert path {0} to str")]
262    PathToStrError(PathBuf),
263    /// The adapter file does not exist at the given path.
264    #[error("adapter file not found: {0}")]
265    FileNotFound(PathBuf),
266}
267
268/// An error that can occur when loading a model.
269#[derive(Debug, Eq, PartialEq, thiserror::Error)]
270pub enum LlamaLoraAdapterSetError {
271    /// llama.cpp returned a non-zero error code.
272    #[error("error code from llama cpp")]
273    ErrorResult(i32),
274}
275
276/// An error that can occur when loading a model.
277#[derive(Debug, Eq, PartialEq, thiserror::Error)]
278pub enum LlamaLoraAdapterRemoveError {
279    /// llama.cpp returned a non-zero error code.
280    #[error("error code from llama cpp")]
281    ErrorResult(i32),
282}
283
284/// An error that can occur when converting a token to a string.
285#[derive(Debug, thiserror::Error, Clone)]
286#[non_exhaustive]
287pub enum TokenToStringError {
288    /// the token type was unknown
289    #[error("Unknown Token Type")]
290    UnknownTokenType,
291    /// There was insufficient buffer space to convert the token to a string.
292    #[error("Insufficient Buffer Space {0}")]
293    InsufficientBufferSpace(c_int),
294    /// The token was not valid utf8.
295    #[error("FromUtf8Error {0}")]
296    FromUtf8Error(#[from] FromUtf8Error),
297    /// An integer conversion failed.
298    #[error("Integer conversion error: {0}")]
299    IntConversionError(#[from] std::num::TryFromIntError),
300}
301
302/// Failed to convert a string to a token sequence.
303#[derive(Debug, thiserror::Error)]
304pub enum StringToTokenError {
305    /// the string contained a null byte and thus could not be converted to a c string.
306    #[error("{0}")]
307    NulError(#[from] NulError),
308    #[error("{0}")]
309    /// Failed to convert a provided integer to a [`c_int`].
310    CIntConversionError(#[from] std::num::TryFromIntError),
311}
312
313/// Failed to apply model chat template.
314#[derive(Debug, thiserror::Error)]
315pub enum NewLlamaChatMessageError {
316    /// the string contained a null byte and thus could not be converted to a c string.
317    #[error("{0}")]
318    NulError(#[from] NulError),
319}
320
321/// Failed to apply model chat template.
322#[derive(Debug, thiserror::Error)]
323pub enum ApplyChatTemplateError {
324    /// the string contained a null byte and thus could not be converted to a c string.
325    #[error("{0}")]
326    NulError(#[from] NulError),
327    /// the string could not be converted to utf8.
328    #[error("{0}")]
329    FromUtf8Error(#[from] FromUtf8Error),
330    /// llama.cpp returned a null pointer for the template result.
331    #[error("null result from llama.cpp")]
332    NullResult,
333    /// llama.cpp returned an error code.
334    #[error("ffi error {0}")]
335    FfiError(i32),
336    /// invalid grammar trigger data returned by llama.cpp.
337    #[error("invalid grammar trigger data")]
338    InvalidGrammarTriggerType,
339    /// An integer conversion failed.
340    #[error("Integer conversion error: {0}")]
341    IntConversionError(#[from] std::num::TryFromIntError),
342}
343
344/// Failed to parse a chat response.
345#[derive(Debug, thiserror::Error)]
346pub enum ChatParseError {
347    /// the string contained a null byte and thus could not be converted to a c string.
348    #[error("{0}")]
349    NulError(#[from] NulError),
350    /// the string could not be converted to utf8.
351    #[error("{0}")]
352    Utf8Error(#[from] FromUtf8Error),
353    /// llama.cpp returned a null pointer for the parse result.
354    #[error("null result from llama.cpp")]
355    NullResult,
356    /// llama.cpp returned an error code.
357    #[error("ffi error {0}")]
358    FfiError(i32),
359}
360
361/// Failed to accept a token in a sampler.
362#[derive(Debug, thiserror::Error)]
363pub enum SamplerAcceptError {
364    /// A C++ exception was thrown during accept
365    #[error("C++ exception during sampler accept: {0}")]
366    CppException(String),
367
368    /// An invalid argument was passed (null sampler or null error pointer)
369    #[error("Invalid argument passed to sampler accept")]
370    InvalidArgument,
371}
372
373/// Errors that can occur when modifying model parameters.
374#[derive(Debug, Eq, PartialEq, thiserror::Error)]
375pub enum ModelParamsError {
376    /// The internal override vector has no available slot.
377    #[error("No available slot in override vector")]
378    NoAvailableSlot,
379    /// The first override slot is not empty.
380    #[error("Override slot is not empty")]
381    SlotNotEmpty,
382    /// A character in the key is not a valid C char.
383    #[error("Invalid character in key: byte {byte}, {reason}")]
384    InvalidCharacterInKey {
385        /// The byte value that failed conversion.
386        byte: u8,
387        /// The reason the conversion failed.
388        reason: String,
389    },
390}
391
392/// Failed to sample a token from the data array.
393#[derive(Debug, Eq, PartialEq, thiserror::Error)]
394pub enum TokenSamplingError {
395    /// The sampler did not select any token.
396    #[error("No token was selected by the sampler")]
397    NoTokenSelected,
398}
399
400#[cfg(test)]
401mod tests {
402    use std::num::NonZeroI32;
403
404    use super::{DecodeError, EncodeError};
405
406    #[test]
407    fn decode_error_no_kv_cache_slot() {
408        let error = DecodeError::from(NonZeroI32::new(1).expect("1 is non-zero"));
409
410        assert_eq!(error, DecodeError::NoKvCacheSlot);
411        assert_eq!(error.to_string(), "Decode Error 1: NoKvCacheSlot");
412    }
413
414    #[test]
415    fn decode_error_n_tokens_zero() {
416        let error = DecodeError::from(NonZeroI32::new(-1).expect("-1 is non-zero"));
417
418        assert_eq!(error, DecodeError::NTokensZero);
419        assert_eq!(error.to_string(), "Decode Error -1: n_tokens == 0");
420    }
421
422    #[test]
423    fn decode_error_aborted() {
424        let error = DecodeError::from(NonZeroI32::new(2).expect("2 is non-zero"));
425
426        assert_eq!(error, DecodeError::Aborted);
427        assert_eq!(error.to_string(), "Decode Error 2: Aborted");
428    }
429
430    #[test]
431    fn decode_error_unknown() {
432        let error = DecodeError::from(NonZeroI32::new(42).expect("42 is non-zero"));
433
434        assert_eq!(error, DecodeError::Unknown(42));
435        assert_eq!(error.to_string(), "Decode Error 42: unknown");
436    }
437
438    #[test]
439    fn encode_error_no_kv_cache_slot() {
440        let error = EncodeError::from(NonZeroI32::new(1).expect("1 is non-zero"));
441
442        assert_eq!(error, EncodeError::NoKvCacheSlot);
443        assert_eq!(error.to_string(), "Encode Error 1: NoKvCacheSlot");
444    }
445
446    #[test]
447    fn encode_error_n_tokens_zero() {
448        let error = EncodeError::from(NonZeroI32::new(-1).expect("-1 is non-zero"));
449
450        assert_eq!(error, EncodeError::NTokensZero);
451        assert_eq!(error.to_string(), "Encode Error -1: n_tokens == 0");
452    }
453
454    #[test]
455    fn encode_error_unknown() {
456        let error = EncodeError::from(NonZeroI32::new(99).expect("99 is non-zero"));
457
458        assert_eq!(error, EncodeError::Unknown(99));
459        assert_eq!(error.to_string(), "Encode Error 99: unknown");
460    }
461}