#![deny(missing_docs)]
use thiserror::Error;
mod inference_session;
mod loader;
mod quantize;
mod vocabulary;
pub mod model;
pub mod util;
pub use ggml;
pub use ggml::Type as ElementType;
pub use inference_session::{
InferenceRequest, InferenceSession, InferenceSessionConfig, InferenceSnapshot, InferenceStats,
ModelKVMemoryType, SnapshotError,
};
pub use loader::{
load, load_progress_callback_stdout, ContainerType, FileType, LoadError, LoadProgress, Loader,
TensorLoader,
};
pub use memmap2::Mmap;
pub use model::{Hyperparameters, KnownModel, Model, ModelParameters, OutputRequest};
pub use quantize::{quantize, QuantizeError, QuantizeProgress};
pub use util::TokenUtf8Buffer;
pub use vocabulary::{TokenBias, TokenId, Vocabulary};
#[derive(Clone, Debug, PartialEq)]
pub struct InferenceParameters {
pub n_threads: usize,
pub n_batch: usize,
pub top_k: usize,
pub top_p: f32,
pub repeat_penalty: f32,
pub temperature: f32,
pub bias_tokens: TokenBias,
pub repetition_penalty_last_n: usize,
}
impl Default for InferenceParameters {
fn default() -> Self {
Self {
n_threads: 8,
n_batch: 8,
top_k: 40,
top_p: 0.95,
repeat_penalty: 1.30,
temperature: 0.80,
bias_tokens: TokenBias::default(),
repetition_penalty_last_n: 512,
}
}
}
#[derive(Error, Debug)]
pub enum InferenceError {
#[error("an invalid token was encountered during tokenization")]
TokenizationFailed,
#[error("the context window is full")]
ContextFull,
#[error("reached end of text")]
EndOfText,
#[error("the user-specified callback returned an error")]
UserCallback(Box<dyn std::error::Error>),
}