abyo-speculate 0.5.0

Pure Rust Speculative Decoding library for local LLMs — vanilla SD + Medusa, Qwen2 + Llama, batch-1 optimised
Documentation
//! SD method implementations.
//!
//! Each method exposes the same `Decoder` trait so the [`crate::SpeculateEngine`]
//! can dispatch generically.

pub mod eagle;
pub mod eagle3;
pub mod medusa;
pub mod vanilla;

/// Identifier for which Speculative Decoding algorithm to use.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "kebab-case")]
pub enum Method {
    /// Plain autoregressive — no speculation, useful as a baseline.
    Autoregressive,
    /// Leviathan et al. 2023 — separate draft model + rejection sampling.
    Vanilla,
    /// Cai et al. 2024 — multiple decoding heads, no separate draft model.
    Medusa,
    /// Li et al. 2024 — feature-level draft + dynamic tree.
    Eagle2,
    /// Li et al. 2025 — multi-layer feature draft.
    Eagle3,
}

impl Method {
    /// Human-readable name (matches the published paper convention).
    pub const fn name(self) -> &'static str {
        match self {
            Method::Autoregressive => "autoregressive",
            Method::Vanilla => "vanilla-sd",
            Method::Medusa => "medusa",
            Method::Eagle2 => "eagle-2",
            Method::Eagle3 => "eagle-3",
        }
    }

    /// Whether this method requires a separate draft / heads asset
    /// loaded alongside the target. Medusa counts even though its heads
    /// aren't a "draft model" in the Leviathan sense — they're still a
    /// separate artifact that must be attached.
    pub const fn needs_draft_model(self) -> bool {
        matches!(
            self,
            Method::Vanilla | Method::Medusa | Method::Eagle2 | Method::Eagle3
        )
    }
}