pub mod hub;
pub mod llama;
pub mod llama_local;
pub mod loader;
pub mod mock;
pub mod phi3;
pub mod phi3_local;
pub mod quantized_llama;
pub mod quantized_llama_local;
pub mod quantized_qwen2;
pub mod quantized_qwen2_local;
pub mod qwen2;
pub mod qwen2_local;
use crate::Result;
pub trait Decoder {
fn vocab_size(&self) -> usize;
fn history(&self) -> &[u32];
fn history_len(&self) -> usize {
self.history().len()
}
fn reset(&mut self);
fn observe(&mut self, ids: &[u32]) -> Result<()>;
fn next_logits(&mut self) -> Result<Vec<f32>>;
fn batched_logits(&mut self, drafts: &[u32]) -> Result<Vec<Vec<f32>>>;
fn rollback_to(&mut self, len: usize) -> Result<()>;
fn encode(&self, text: &str, add_special_tokens: bool) -> Result<Vec<u32>> {
let _ = (text, add_special_tokens);
Err(crate::Error::UnsupportedMethod {
method: "encode",
reason: "this decoder has no bundled tokenizer".into(),
})
}
fn decode(&self, ids: &[u32], skip_special_tokens: bool) -> Result<String> {
let _ = (ids, skip_special_tokens);
Err(crate::Error::UnsupportedMethod {
method: "decode",
reason: "this decoder has no bundled tokenizer".into(),
})
}
fn eos_token_ids(&self) -> Vec<u32> {
Vec::new()
}
}
pub trait TreeDecoder: Decoder {
fn last_hidden_state(&mut self) -> Result<candle_core::Tensor>;
fn tree_logits(&mut self, tree: &crate::tree::DraftTree) -> Result<Vec<Vec<f32>>>;
fn apply_lm_head(&self, hidden: &candle_core::Tensor) -> Result<candle_core::Tensor> {
let _ = hidden;
Err(crate::Error::UnsupportedMethod {
method: "apply_lm_head",
reason: "this TreeDecoder does not expose its lm_head".into(),
})
}
fn last_hidden_states_multi(
&mut self,
layers: &[usize],
) -> Result<(candle_core::Tensor, Vec<candle_core::Tensor>)> {
let _ = layers;
Err(crate::Error::UnsupportedMethod {
method: "last_hidden_states_multi",
reason: "this TreeDecoder does not expose intermediate layers".into(),
})
}
fn num_hidden_layers(&self) -> usize {
0
}
fn embed_tokens(&self, input_ids: &candle_core::Tensor) -> Result<candle_core::Tensor> {
let _ = input_ids;
Err(crate::Error::UnsupportedMethod {
method: "embed_tokens",
reason: "this TreeDecoder does not expose its embedding table".into(),
})
}
}