oxillama_runtime/error.rs
1//! Error types for the inference runtime.
2
3use thiserror::Error;
4
5/// Result type alias for runtime operations.
6pub type RuntimeResult<T> = Result<T, RuntimeError>;
7
8/// Errors that can occur during inference.
9#[derive(Error, Debug)]
10pub enum RuntimeError {
11 /// No model has been loaded yet.
12 #[error("no model loaded")]
13 ModelNotLoaded,
14
15 /// Tokenizer is not available because neither `tokenizer-wasm` nor `tokenizer-onig`
16 /// feature is enabled.
17 ///
18 /// Enable the `tokenizer-wasm` feature (default, pure Rust) to use
19 /// the HuggingFace tokenizers library.
20 #[error("tokenizer not available: rebuild with the `tokenizer-wasm` feature enabled")]
21 TokenizerNotAvailable,
22
23 /// Tokenizer initialization or encoding/decoding failed.
24 #[error("tokenizer error: {message}")]
25 TokenizerError {
26 /// Description of the tokenizer error.
27 message: String,
28 },
29
30 /// Sampling operation failed.
31 #[error("sampling error: {message}")]
32 SamplingError {
33 /// Description of the sampling error.
34 message: String,
35 },
36
37 /// KV cache has reached its maximum capacity.
38 #[error("KV cache full: maximum context length {max_ctx} reached")]
39 KvCacheFull {
40 /// Maximum context length supported.
41 max_ctx: usize,
42 },
43
44 /// Model file could not be loaded.
45 #[error("model loading error: {message}")]
46 ModelLoadError {
47 /// Description of the loading error.
48 message: String,
49 },
50
51 /// Generation was interrupted or cancelled.
52 #[error("generation cancelled")]
53 Cancelled,
54
55 /// Error propagated from architecture layer.
56 #[error("architecture error: {0}")]
57 Arch(#[from] oxillama_arch::ArchError),
58
59 /// Error propagated from GGUF parser.
60 #[error("GGUF error: {0}")]
61 Gguf(#[from] oxillama_gguf::GgufError),
62
63 /// Error propagated from quantization kernel.
64 #[error("quantization error: {0}")]
65 Quant(#[from] oxillama_quant::QuantError),
66
67 /// I/O error during model loading.
68 #[error("I/O error: {0}")]
69 Io(#[from] std::io::Error),
70
71 /// Grammar error from GBNF constrained sampling.
72 #[error("grammar error: {0}")]
73 Grammar(#[from] crate::sampling::grammar::GrammarError),
74
75 /// Attention computation error.
76 #[error("attention error: {message}")]
77 AttentionError {
78 /// Description of the attention error.
79 message: String,
80 },
81
82 /// Snapshot format version is incompatible with this runtime.
83 #[error("snapshot incompatible: {detail}")]
84 SnapshotIncompatible {
85 /// Details about the incompatibility.
86 detail: String,
87 },
88
89 /// Model fingerprint in snapshot does not match the file on disk.
90 #[error("model fingerprint mismatch: expected={expected}, found={found}, detail={detail}")]
91 ModelFingerprintMismatch {
92 /// The fingerprint expected (from snapshot).
93 expected: String,
94 /// The fingerprint found (computed from disk).
95 found: String,
96 /// Additional detail about the mismatch.
97 detail: String,
98 },
99
100 /// Offload pager read past end of backing store.
101 #[error(
102 "offload: unexpected EOF at offset {offset}, needed {needed} bytes, {available} available"
103 )]
104 OffloadEof {
105 /// Byte offset at which the read was attempted.
106 offset: u64,
107 /// Number of bytes requested.
108 needed: usize,
109 /// Number of bytes available from `offset` to end.
110 available: usize,
111 },
112
113 /// A tensor name was not found in the weight offset map.
114 #[error("tensor not found in weight map: {0}")]
115 TensorNotFound(String),
116
117 /// An internal RwLock or Mutex was poisoned.
118 #[error("lock poisoned")]
119 LockPoisoned,
120
121 /// Speculative-engine snapshot format is incompatible with this runtime.
122 #[error("speculative snapshot incompatible: {0}")]
123 SpecSnapshotIncompatible(String),
124
125 /// Cannot pool an empty sequence (seq_len = 0).
126 #[error("cannot pool empty sequence")]
127 EmptySequence,
128}