Skip to main content

oxillama_runtime/
error.rs

1//! Error types for the inference runtime.
2
3use thiserror::Error;
4
5/// Result type alias for runtime operations.
6pub type RuntimeResult<T> = Result<T, RuntimeError>;
7
8/// Errors that can occur during inference.
9#[derive(Error, Debug)]
10pub enum RuntimeError {
11    /// No model has been loaded yet.
12    #[error("no model loaded")]
13    ModelNotLoaded,
14
15    /// Tokenizer is not available because neither `tokenizer-wasm` nor `tokenizer-onig`
16    /// feature is enabled.
17    ///
18    /// Enable the `tokenizer-wasm` feature (default, pure Rust) to use
19    /// the HuggingFace tokenizers library.
20    #[error("tokenizer not available: rebuild with the `tokenizer-wasm` feature enabled")]
21    TokenizerNotAvailable,
22
23    /// Tokenizer initialization or encoding/decoding failed.
24    #[error("tokenizer error: {message}")]
25    TokenizerError {
26        /// Description of the tokenizer error.
27        message: String,
28    },
29
30    /// Sampling operation failed.
31    #[error("sampling error: {message}")]
32    SamplingError {
33        /// Description of the sampling error.
34        message: String,
35    },
36
37    /// KV cache has reached its maximum capacity.
38    #[error("KV cache full: maximum context length {max_ctx} reached")]
39    KvCacheFull {
40        /// Maximum context length supported.
41        max_ctx: usize,
42    },
43
44    /// Model file could not be loaded.
45    #[error("model loading error: {message}")]
46    ModelLoadError {
47        /// Description of the loading error.
48        message: String,
49    },
50
51    /// Generation was interrupted or cancelled.
52    #[error("generation cancelled")]
53    Cancelled,
54
55    /// Error propagated from architecture layer.
56    #[error("architecture error: {0}")]
57    Arch(#[from] oxillama_arch::ArchError),
58
59    /// Error propagated from GGUF parser.
60    #[error("GGUF error: {0}")]
61    Gguf(#[from] oxillama_gguf::GgufError),
62
63    /// Error propagated from quantization kernel.
64    #[error("quantization error: {0}")]
65    Quant(#[from] oxillama_quant::QuantError),
66
67    /// I/O error during model loading.
68    #[error("I/O error: {0}")]
69    Io(#[from] std::io::Error),
70
71    /// Grammar error from GBNF constrained sampling.
72    #[error("grammar error: {0}")]
73    Grammar(#[from] crate::sampling::grammar::GrammarError),
74
75    /// Attention computation error.
76    #[error("attention error: {message}")]
77    AttentionError {
78        /// Description of the attention error.
79        message: String,
80    },
81
82    /// Snapshot format version is incompatible with this runtime.
83    #[error("snapshot incompatible: {detail}")]
84    SnapshotIncompatible {
85        /// Details about the incompatibility.
86        detail: String,
87    },
88
89    /// Model fingerprint in snapshot does not match the file on disk.
90    #[error("model fingerprint mismatch: expected={expected}, found={found}, detail={detail}")]
91    ModelFingerprintMismatch {
92        /// The fingerprint expected (from snapshot).
93        expected: String,
94        /// The fingerprint found (computed from disk).
95        found: String,
96        /// Additional detail about the mismatch.
97        detail: String,
98    },
99
100    /// Offload pager read past end of backing store.
101    #[error(
102        "offload: unexpected EOF at offset {offset}, needed {needed} bytes, {available} available"
103    )]
104    OffloadEof {
105        /// Byte offset at which the read was attempted.
106        offset: u64,
107        /// Number of bytes requested.
108        needed: usize,
109        /// Number of bytes available from `offset` to end.
110        available: usize,
111    },
112
113    /// A tensor name was not found in the weight offset map.
114    #[error("tensor not found in weight map: {0}")]
115    TensorNotFound(String),
116
117    /// An internal RwLock or Mutex was poisoned.
118    #[error("lock poisoned")]
119    LockPoisoned,
120
121    /// Speculative-engine snapshot format is incompatible with this runtime.
122    #[error("speculative snapshot incompatible: {0}")]
123    SpecSnapshotIncompatible(String),
124
125    /// Cannot pool an empty sequence (seq_len = 0).
126    #[error("cannot pool empty sequence")]
127    EmptySequence,
128}