alith-models 0.4.3

Load and Download LLM Models, Metadata, and Tokenizers
Documentation
pub mod config_json;
pub mod general;
pub mod llm;
pub mod tokenizer;
use super::gguf::tools::{gguf_file::GgufFile, gguf_layers::GgufLayers};
use general::GeneralMetadata;
use llm::Architecture;
use tokenizer::TokenizerMetadata;

pub struct LocalLLMMetadata {
    pub general: GeneralMetadata,
    pub llm: Architecture,
    pub tokenizer: TokenizerMetadata,
    pub gguf_file: GgufFile,
    pub layers: GgufLayers,
}

impl LocalLLMMetadata {
    pub fn from_gguf_path(path: &std::path::Path) -> crate::Result<Self> {
        let mut reader = std::fs::File::open(path)?;
        let gguf: GgufFile = GgufFile::read(&mut reader)?;

        Ok(Self {
            general: GeneralMetadata::from_gguf(&gguf)?,
            llm: Architecture::from_gguf(&gguf)?,
            tokenizer: TokenizerMetadata::from_gguf(&gguf)?,
            layers: GgufLayers::from_tensors(&gguf.tensors),
            gguf_file: gguf,
        })
    }

    pub fn estimate_model_size(&self) -> crate::Result<u64> {
        Ok(self.gguf_file.size())
    }

    pub fn estimate_context_size(&self, ctx_size: u64, batch_size: Option<u64>) -> u64 {
        self.llm.estimate_context_size(ctx_size, batch_size)
    }

    pub fn average_layer_size_bytes(
        &self,
        ctx_size: u64,
        batch_size: Option<u64>,
    ) -> crate::Result<u64> {
        let total_layers_size = self.layers.total_size_blocks_bytes();
        let block_count = self.layers.count_blocks();
        let context_size = self.estimate_context_size(ctx_size, batch_size);
        let total_size = total_layers_size + context_size;
        Ok(total_size / block_count)
    }

    pub fn context_length(&self) -> u64 {
        self.llm.context_length()
    }
}

impl std::fmt::Debug for LocalLLMMetadata {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        let mut debug_struct = f.debug_struct("LocalLLMMetadata");
        debug_struct.field("GeneralMetadata", &self.general);
        debug_struct.field("Architecture", &self.llm);
        debug_struct.field("TokenizerMetadata", &self.tokenizer);
        debug_struct.finish()
    }
}