use crate::error::Result;
use numr::runtime::Runtime;
use numr::tensor::Tensor;
#[derive(Debug, Clone)]
pub struct VerificationResult {
pub num_accepted: usize,
pub accepted_tokens: Vec<u32>,
pub bonus_token: u32,
pub first_rejection_pos: i32,
}
pub trait SpeculativeOps<R: Runtime> {
fn verify_speculative_tokens(
&self,
draft_probs: &Tensor<R>,
target_probs: &Tensor<R>,
draft_tokens: &Tensor<R>,
seed: u64,
) -> Result<Vec<VerificationResult>>;
fn compute_acceptance_probs(
&self,
draft_probs: &Tensor<R>,
target_probs: &Tensor<R>,
) -> Result<(Tensor<R>, Tensor<R>)>;
fn compute_expected_tokens(&self, acceptance_rates: &Tensor<R>) -> Result<Tensor<R>>;
}