use thiserror::Error;
pub type RuntimeResult<T> = Result<T, RuntimeError>;
#[derive(Error, Debug)]
pub enum RuntimeError {
#[error("no model loaded")]
ModelNotLoaded,
#[error("tokenizer not available: rebuild with the `tokenizer-wasm` feature enabled")]
TokenizerNotAvailable,
#[error("tokenizer error: {message}")]
TokenizerError {
message: String,
},
#[error("sampling error: {message}")]
SamplingError {
message: String,
},
#[error("KV cache full: maximum context length {max_ctx} reached")]
KvCacheFull {
max_ctx: usize,
},
#[error("model loading error: {message}")]
ModelLoadError {
message: String,
},
#[error("generation cancelled")]
Cancelled,
#[error("architecture error: {0}")]
Arch(#[from] oxillama_arch::ArchError),
#[error("GGUF error: {0}")]
Gguf(#[from] oxillama_gguf::GgufError),
#[error("quantization error: {0}")]
Quant(#[from] oxillama_quant::QuantError),
#[error("I/O error: {0}")]
Io(#[from] std::io::Error),
#[error("grammar error: {0}")]
Grammar(#[from] crate::sampling::grammar::GrammarError),
#[error("attention error: {message}")]
AttentionError {
message: String,
},
}