kizzasi_tokenizer/
error.rs

1//! Error types for kizzasi-tokenizer
2//!
3//! Provides comprehensive error types with detailed context for debugging
4//! and user-friendly error messages.
5
6use thiserror::Error;
7
8/// Result type alias for tokenizer operations
9pub type TokenizerResult<T> = Result<T, TokenizerError>;
10
11/// Errors that can occur in tokenizer operations
12#[derive(Error, Debug)]
13pub enum TokenizerError {
14    /// Configuration parameter is invalid
15    ///
16    /// This error includes detailed context about what parameter was invalid
17    /// and why, to help users fix their configuration.
18    #[error("Invalid configuration: {0}")]
19    InvalidConfig(String),
20
21    /// Array dimensions don't match expected values
22    ///
23    /// Provides both the expected and actual dimensions to help diagnose
24    /// shape mismatches in multi-dimensional arrays.
25    #[error("Dimension mismatch in {context}: expected {expected}, got {got}")]
26    DimensionMismatch {
27        expected: usize,
28        got: usize,
29        context: String,
30    },
31
32    /// Signal encoding operation failed
33    ///
34    /// Wraps detailed information about why encoding failed, such as
35    /// invalid input ranges, NaN values, or algorithmic failures.
36    #[error("Encoding failed in {operation}: {reason}")]
37    EncodingError { operation: String, reason: String },
38
39    /// Signal decoding operation failed
40    ///
41    /// Includes context about which decoding step failed and why.
42    #[error("Decoding failed in {operation}: {reason}")]
43    DecodingError { operation: String, reason: String },
44
45    /// Codebook has not been initialized before use
46    ///
47    /// This typically happens when trying to use a VQ-VAE tokenizer
48    /// before training or loading a pretrained codebook.
49    #[error("Codebook not initialized: {hint}")]
50    CodebookNotInitialized { hint: String },
51
52    /// Value is outside the valid range
53    ///
54    /// Provides the offending value and the valid range for debugging.
55    #[error("Value out of range in {context}: {value} not in [{min}, {max}]")]
56    ValueOutOfRange {
57        value: f32,
58        min: f32,
59        max: f32,
60        context: String,
61    },
62
63    /// Error from kizzasi-core crate
64    #[error("Core error: {0}")]
65    CoreError(#[from] kizzasi_core::CoreError),
66
67    /// Internal error that should not happen under normal circumstances
68    ///
69    /// If you encounter this error, it likely indicates a bug in the library.
70    /// Please report it with the full error message and context.
71    #[error("Internal error (please report this): {0}")]
72    InternalError(String),
73
74    /// Invalid input data provided to a function
75    ///
76    /// This error provides context about what was invalid in the input,
77    /// such as empty arrays, NaN values, or incorrect types.
78    #[error("Invalid input in {operation}: {reason}")]
79    InvalidInput { operation: String, reason: String },
80
81    /// Serialization or deserialization failed
82    #[error("Serialization error: {0}")]
83    SerializationError(String),
84
85    /// File I/O operation failed
86    #[error("I/O error: {0}")]
87    IoError(#[from] std::io::Error),
88
89    /// Training operation failed
90    #[error("Training error at epoch {epoch}: {reason}")]
91    TrainingError { epoch: usize, reason: String },
92
93    /// Numerical computation resulted in invalid values (NaN, Inf)
94    #[error("Numerical error in {operation}: {reason}")]
95    NumericalError { operation: String, reason: String },
96
97    /// Resource limit exceeded
98    #[error("Resource limit exceeded: {resource} - {details}")]
99    ResourceLimitExceeded { resource: String, details: String },
100
101    /// Feature not yet implemented
102    #[error("Feature not implemented: {0}")]
103    NotImplemented(String),
104}
105
106impl TokenizerError {
107    /// Create a dimension mismatch error with context
108    pub fn dim_mismatch(expected: usize, got: usize, context: impl Into<String>) -> Self {
109        Self::DimensionMismatch {
110            expected,
111            got,
112            context: context.into(),
113        }
114    }
115
116    /// Create an encoding error with context
117    pub fn encoding(operation: impl Into<String>, reason: impl Into<String>) -> Self {
118        Self::EncodingError {
119            operation: operation.into(),
120            reason: reason.into(),
121        }
122    }
123
124    /// Create a decoding error with context
125    pub fn decoding(operation: impl Into<String>, reason: impl Into<String>) -> Self {
126        Self::DecodingError {
127            operation: operation.into(),
128            reason: reason.into(),
129        }
130    }
131
132    /// Create a value out of range error with context
133    pub fn out_of_range(value: f32, min: f32, max: f32, context: impl Into<String>) -> Self {
134        Self::ValueOutOfRange {
135            value,
136            min,
137            max,
138            context: context.into(),
139        }
140    }
141
142    /// Create an invalid input error with context
143    pub fn invalid_input(operation: impl Into<String>, reason: impl Into<String>) -> Self {
144        Self::InvalidInput {
145            operation: operation.into(),
146            reason: reason.into(),
147        }
148    }
149
150    /// Create a numerical error with context
151    pub fn numerical(operation: impl Into<String>, reason: impl Into<String>) -> Self {
152        Self::NumericalError {
153            operation: operation.into(),
154            reason: reason.into(),
155        }
156    }
157
158    /// Create a training error with context
159    pub fn training(epoch: usize, reason: impl Into<String>) -> Self {
160        Self::TrainingError {
161            epoch,
162            reason: reason.into(),
163        }
164    }
165}
166
167#[cfg(test)]
168mod tests {
169    use super::*;
170
171    #[test]
172    fn test_error_messages() {
173        let err = TokenizerError::dim_mismatch(10, 20, "input validation");
174        assert!(err.to_string().contains("10"));
175        assert!(err.to_string().contains("20"));
176        assert!(err.to_string().contains("input validation"));
177
178        let err2 = TokenizerError::encoding("VQ-VAE", "codebook lookup failed");
179        assert!(err2.to_string().contains("VQ-VAE"));
180        assert!(err2.to_string().contains("codebook lookup failed"));
181    }
182
183    #[test]
184    fn test_helper_constructors() {
185        let err = TokenizerError::out_of_range(1.5, 0.0, 1.0, "quantization");
186        assert!(matches!(err, TokenizerError::ValueOutOfRange { .. }));
187
188        let err2 = TokenizerError::invalid_input("encode", "empty array");
189        assert!(matches!(err2, TokenizerError::InvalidInput { .. }));
190    }
191}