inference/
error.rs

1//! Error types for the inference engine.
2
3use thiserror::Error;
4
5/// Errors that can occur during inference operations.
6#[derive(Error, Debug)]
7pub enum InferenceError {
8    /// Model not found or failed to download
9    #[error("Model not found: {0}")]
10    ModelNotFound(String),
11
12    /// Failed to load model weights
13    #[error("Failed to load model: {0}")]
14    ModelLoadError(String),
15
16    /// Tokenization error
17    #[error("Tokenization failed: {0}")]
18    TokenizationError(String),
19
20    /// Inference/forward pass error
21    #[error("Inference failed: {0}")]
22    InferenceError(String),
23
24    /// Invalid input
25    #[error("Invalid input: {0}")]
26    InvalidInput(String),
27
28    /// IO error
29    #[error("IO error: {0}")]
30    IoError(#[from] std::io::Error),
31
32    /// ONNX Runtime error
33    #[error("ONNX Runtime error: {0}")]
34    OrtError(String),
35
36    /// HuggingFace Hub error
37    #[error("HuggingFace Hub error: {0}")]
38    HubError(String),
39
40    /// External extraction provider error (EXT-1)
41    #[error("Extraction failed: {0}")]
42    ExtractionFailed(String),
43
44    /// Cross-encoder reranker is at capacity — caller should fall back to unranked results.
45    ///
46    /// Root cause of the LoCoMo SIGTERM (DAK-5893): 8 concurrent bench recall requests
47    /// saturated the 2-session ONNX pool; the 7th/8th request waited >120s for a mutex slot,
48    /// triggering client-side timeout + 8 retries = ~19-minute stall per question.
49    /// Fix: return immediately so the API falls back to unranked results rather than queuing.
50    #[error("Cross-encoder reranker at capacity ({active}/{max} active)")]
51    Overloaded { active: usize, max: usize },
52
53    /// Candle tensor/model error (requires `candle` feature).
54    #[error("Candle error: {0}")]
55    CandleError(String),
56
57    /// GGUF model loading or quantization error (requires `candle` feature).
58    #[error("GGUF error: {0}")]
59    GgufError(String),
60
61    /// Binary quantization error — dimension mismatch or empty input.
62    #[error("Binary quantization error: {0}")]
63    BinaryQuantError(String),
64}
65
66impl From<ort::Error> for InferenceError {
67    fn from(err: ort::Error) -> Self {
68        InferenceError::OrtError(err.to_string())
69    }
70}
71
72impl From<tokenizers::Error> for InferenceError {
73    fn from(err: tokenizers::Error) -> Self {
74        InferenceError::TokenizationError(err.to_string())
75    }
76}
77
78/// Result type for inference operations.
79pub type Result<T> = std::result::Result<T, InferenceError>;
inference/error.rs

inference/
error.rs