dakera-inference 0.11.74

Embedded inference engine for Dakera - generates embeddings locally via ONNX Runtime
Documentation
//! Error types for the inference engine.

use thiserror::Error;

/// Errors that can occur during inference operations.
#[derive(Error, Debug)]
pub enum InferenceError {
    /// Model not found or failed to download
    #[error("Model not found: {0}")]
    ModelNotFound(String),

    /// Failed to load model weights
    #[error("Failed to load model: {0}")]
    ModelLoadError(String),

    /// Tokenization error
    #[error("Tokenization failed: {0}")]
    TokenizationError(String),

    /// Inference/forward pass error
    #[error("Inference failed: {0}")]
    InferenceError(String),

    /// Invalid input
    #[error("Invalid input: {0}")]
    InvalidInput(String),

    /// IO error
    #[error("IO error: {0}")]
    IoError(#[from] std::io::Error),

    /// ONNX Runtime error
    #[error("ONNX Runtime error: {0}")]
    OrtError(String),

    /// HuggingFace Hub error
    #[error("HuggingFace Hub error: {0}")]
    HubError(String),

    /// External extraction provider error (EXT-1)
    #[error("Extraction failed: {0}")]
    ExtractionFailed(String),

    /// Cross-encoder reranker is at capacity — caller should fall back to unranked results.
    ///
    /// Root cause of the LoCoMo SIGTERM (DAK-5893): 8 concurrent bench recall requests
    /// saturated the 2-session ONNX pool; the 7th/8th request waited >120s for a mutex slot,
    /// triggering client-side timeout + 8 retries = ~19-minute stall per question.
    /// Fix: return immediately so the API falls back to unranked results rather than queuing.
    #[error("Cross-encoder reranker at capacity ({active}/{max} active)")]
    Overloaded { active: usize, max: usize },
}

impl From<ort::Error> for InferenceError {
    fn from(err: ort::Error) -> Self {
        InferenceError::OrtError(err.to_string())
    }
}

impl From<tokenizers::Error> for InferenceError {
    fn from(err: tokenizers::Error) -> Self {
        InferenceError::TokenizationError(err.to_string())
    }
}

/// Result type for inference operations.
pub type Result<T> = std::result::Result<T, InferenceError>;