pub mod attention;
pub mod kv_cache;
pub mod quantize;
pub mod sampling;
pub mod scheduler;
pub mod speculative;
pub use attention::{AttentionConfig, AttentionDispatch, AttentionKind, HeadConfig};
pub use kv_cache::{
BlockId, CacheEvictionPolicy, CacheStats, PagedKvCache, PagedKvCacheConfig, SequenceId,
};
pub use quantize::{QuantConfig, QuantMethod, QuantizedTensor, QuantizedWeight};
pub use sampling::{BeamSearchConfig, MirostatState, SamplingOutput, SamplingParams, TokenSampler};
pub use scheduler::{
ContinuousBatchScheduler, Priority, SchedulerBudget, SchedulerConfig, SchedulerOutput,
Sequence, SequenceGroup, SequenceStatus,
};
pub use speculative::{
AcceptanceMethod, DraftModelConfig, SpeculativeDecoder, SpeculativeDecoderConfig,
SpeculativeOutput,
};
#[derive(Debug, Clone, PartialEq)]
pub enum TransformerError {
CacheError(String),
SchedulerError(String),
AttentionError(String),
SamplingError(String),
QuantizationError(String),
SpeculativeError(String),
InvalidConfig(String),
}
impl std::fmt::Display for TransformerError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::CacheError(msg) => write!(f, "cache error: {msg}"),
Self::SchedulerError(msg) => write!(f, "scheduler error: {msg}"),
Self::AttentionError(msg) => write!(f, "attention error: {msg}"),
Self::SamplingError(msg) => write!(f, "sampling error: {msg}"),
Self::QuantizationError(msg) => write!(f, "quantization error: {msg}"),
Self::SpeculativeError(msg) => write!(f, "speculative error: {msg}"),
Self::InvalidConfig(msg) => write!(f, "invalid config: {msg}"),
}
}
}
impl std::error::Error for TransformerError {}
pub type TransformerResult<T> = Result<T, TransformerError>;