pub mod backend;
pub mod gguf;
pub mod huggingface;
pub mod model;
pub mod rag;
pub mod sampling;
#[cfg(feature = "server")]
pub mod server;
pub mod tensor;
pub mod tokenizer;
pub use backend::{default_backend, Backend, BackendError};
pub use gguf::{GgufBuilder, GgufData, GgufFile, GgufReader, GgufWriter, TensorToWrite};
pub use model::{
Architecture, InferenceContext, KVCache, LlamaModel, Model, ModelConfig, ModelError,
ModelLoader, load_llama_model,
LoraAdapter, LoraAdapters, LoraConfig,
MoeConfig, MoeExpert, MoeLayer, MoeRouter, MoeStats,
SpeculativeConfig, SpeculativeDecoder, SpeculativeStats,
EmbeddingConfig, EmbeddingError, EmbeddingExtractor, PoolingStrategy, TruncationStrategy,
cosine_similarity, dot_product, euclidean_distance, find_nearest,
CachedPrefix, PrefixId, PrefixSharing, PromptCache, PromptCacheConfig, PromptCacheStats,
};
pub use sampling::{
Grammar, GrammarSampler, GbnfGrammar, JsonGrammar, RegexGrammar,
MirostatConfig, Sampler, SamplerConfig,
};
pub use tensor::{DType, Tensor, TensorError, TensorStorage};
pub use tokenizer::{Tokenizer, TokenizerError};
pub use huggingface::{HfClient, HfError, HfFileInfo, format_bytes};
#[cfg(feature = "rag")]
pub use rag::{RagConfig, RagStore, RagError, RagResult, Document, NewDocument, RagContextBuilder, TextChunker};
#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("IO error: {0}")]
Io(#[from] std::io::Error),
#[error("GGUF error: {0}")]
Gguf(#[from] gguf::GgufError),
#[error("Tensor error: {0}")]
Tensor(#[from] tensor::TensorError),
#[error("Backend error: {0}")]
Backend(#[from] backend::BackendError),
}
pub type Result<T> = std::result::Result<T, Error>;