slm_inference 0.1.0

pub mod hf;
pub use hf::HfModelInfo;
pub mod core;
pub mod errors;
pub mod inference;

use std::path::Path;
use std::result::Result;

pub use crate::errors::*;
pub use crate::inference::SlmInference;

pub trait SlmToken: Copy {
    fn as_i32(&self) -> i32;
}

pub trait SlmBatch<T: SlmToken> {
    fn add(
        &mut self,
        token: T,
        pos: usize,
        seq_ids: &[i32],
        logits: bool,
    ) -> Result<(), BatchError>;
    fn clear(&mut self);
    fn n_tokens(&self) -> usize;
}

pub trait SlmModelConfig {
    type Context: SlmContext;
    type Model: SlmModel<Context = Self::Context>;
    fn load_gguf(self, path: impl AsRef<Path>) -> Result<Self::Model, GgufLoaderError>;
}

pub trait SlmModel {
    type Context: SlmContext;
    fn context(&self) -> impl SlmContextBuilder<Self::Context>;
}

pub trait SlmContextBuilder<T> {
    fn build(self) -> Result<T, ContextBuilderError>;
    fn with_sampler(self, temperature: f32, top_k: i32, top_p: f32) -> Self;
}

pub trait SlmContext {
    type Token: SlmToken;
    type Batch: SlmBatch<Self::Token>;
    fn new_batch(&self, tokens: usize, sequences: usize) -> Result<Self::Batch, BatchError>;
    fn max_batch_len(&self) -> usize;
    fn decode(&mut self, batch: &mut Self::Batch) -> Result<(), DecodeError>;
    fn sample(&mut self, logit_idx: usize) -> Result<Option<Self::Token>, SamplingError>;
    fn token_to_bytes(
        &self,
        token: Self::Token,
        buffer_size: usize,
        special: bool,
        lstrip: Option<usize>,
    ) -> Result<Vec<u8>, TokenToStringError>;
    fn str_to_tokens(
        &self,
        str: &str,
        add_special: bool,
        parse_special: bool,
    ) -> Result<Vec<Self::Token>, StringToTokenError>;
}