pub mod hub;
pub mod llama;
pub mod llama_local;
pub mod loader;
pub mod mock;
pub mod phi3;
pub mod phi3_local;
pub mod quantized_llama;
pub mod quantized_llama_local;
pub mod quantized_qwen2;
pub mod quantized_qwen2_local;
pub mod qwen2;
pub mod qwen2_local;
use crate::Result;
pub trait Decoder {
fn vocab_size(&self) -> usize;
fn history(&self) -> &[u32];
fn history_len(&self) -> usize {
self.history().len()
}
fn reset(&mut self);
fn observe(&mut self, ids: &[u32]) -> Result<()>;
fn next_logits(&mut self) -> Result<Vec<f32>>;
fn batched_logits(&mut self, drafts: &[u32]) -> Result<Vec<Vec<f32>>>;
fn rollback_to(&mut self, len: usize) -> Result<()>;
fn encode(&self, text: &str, add_special_tokens: bool) -> Result<Vec<u32>> {
let _ = (text, add_special_tokens);
Err(crate::Error::UnsupportedMethod {
method: "encode",
reason: "this decoder has no bundled tokenizer".into(),
})
}
fn decode(&self, ids: &[u32], skip_special_tokens: bool) -> Result<String> {
let _ = (ids, skip_special_tokens);
Err(crate::Error::UnsupportedMethod {
method: "decode",
reason: "this decoder has no bundled tokenizer".into(),
})
}
fn eos_token_ids(&self) -> Vec<u32> {
Vec::new()
}
}
pub trait TreeDecoder: Decoder {
fn last_hidden_state(&mut self) -> Result<candle_core::Tensor> {
Err(crate::Error::UnsupportedMethod {
method: "last_hidden_state",
reason: "this TreeDecoder does not expose hidden states".into(),
})
}
fn tree_logits(&mut self, tree: &crate::tree::DraftTree) -> Result<Vec<Vec<f32>>> {
let _ = tree;
Err(crate::Error::UnsupportedMethod {
method: "tree_logits",
reason: "this TreeDecoder does not implement tree-attention forward".into(),
})
}
fn apply_lm_head(&self, hidden: &candle_core::Tensor) -> Result<candle_core::Tensor> {
let _ = hidden;
Err(crate::Error::UnsupportedMethod {
method: "apply_lm_head",
reason: "this TreeDecoder does not expose its lm_head".into(),
})
}
fn last_hidden_states_multi(
&mut self,
layers: &[usize],
) -> Result<(candle_core::Tensor, Vec<candle_core::Tensor>)> {
let _ = layers;
Err(crate::Error::UnsupportedMethod {
method: "last_hidden_states_multi",
reason: "this TreeDecoder does not expose intermediate layers".into(),
})
}
fn num_hidden_layers(&self) -> usize {
0
}
fn embed_tokens(&self, input_ids: &candle_core::Tensor) -> Result<candle_core::Tensor> {
let _ = input_ids;
Err(crate::Error::UnsupportedMethod {
method: "embed_tokens",
reason: "this TreeDecoder does not expose its embedding table".into(),
})
}
fn tree_logits_keep_kv(
&mut self,
tree: &crate::tree::DraftTree,
) -> Result<(Vec<Vec<f32>>, Vec<candle_core::Tensor>)> {
let _ = tree;
Err(crate::Error::UnsupportedMethod {
method: "tree_logits_keep_kv",
reason: "this TreeDecoder hasn't implemented the EAGLE fast path yet".into(),
})
}
fn observe_returning_last_hidden(
&mut self,
ids: &[u32],
) -> Result<candle_core::Tensor> {
self.observe(ids)?;
self.last_hidden_state()
}
fn commit_tree_path(
&mut self,
tree: &crate::tree::DraftTree,
accepted_indices: &[usize],
) -> Result<()> {
let _ = (tree, accepted_indices);
Err(crate::Error::UnsupportedMethod {
method: "commit_tree_path",
reason: "this TreeDecoder hasn't implemented the EAGLE fast path yet".into(),
})
}
}