use super::{
GgufPresetTrait, HfTokenTrait, LLMChatTemplate, LocalLLMModel, hf_loader::HuggingFaceLoader,
metadata::LocalLLMMetadata,
};
use crate::tokenizer::Tokenizer;
use loaders::{hf::GgufHfLoader, local::GgufLocalLoader, preset::GgufPresetLoader};
use std::sync::Arc;
use tools::gguf_tokenizer::convert_gguf_to_hf_tokenizer;
pub mod loaders;
pub mod memory;
pub mod preset;
pub mod tools;
#[derive(Default, Clone)]
pub struct GgufLoader {
pub gguf_preset_loader: GgufPresetLoader,
pub gguf_local_loader: GgufLocalLoader,
pub gguf_hf_loader: GgufHfLoader,
pub hf_loader: HuggingFaceLoader,
}
impl GgufLoader {
pub fn new() -> Self {
Self::default()
}
pub fn load(&mut self) -> crate::Result<LocalLLMModel> {
if self.gguf_local_loader.local_quant_file_path.is_some() {
self.gguf_local_loader.load()
} else if self.gguf_hf_loader.hf_quant_file_url.is_some() {
self.gguf_hf_loader.load(&self.hf_loader)
} else {
self.gguf_preset_loader.load(&self.hf_loader)
}
}
}
impl GgufLoaderTrait for GgufLoader {
fn gguf_loader(&mut self) -> &mut GgufLoader {
self
}
}
impl HfTokenTrait for GgufLoader {
fn hf_token_mut(&mut self) -> &mut Option<String> {
&mut self.hf_loader.hf_token
}
fn hf_token_env_var_mut(&mut self) -> &mut String {
&mut self.hf_loader.hf_token_env_var
}
}
impl GgufPresetTrait for GgufLoader {
fn preset_loader(&mut self) -> &mut GgufPresetLoader {
&mut self.gguf_preset_loader
}
}
pub(crate) fn load_tokenizer(
local_tokenizer_path: &Option<std::path::PathBuf>,
model_metadata: &LocalLLMMetadata,
) -> crate::Result<Arc<Tokenizer>> {
if let Some(local_tokenizer_path) = &local_tokenizer_path {
match Tokenizer::new_from_tokenizer_json(local_tokenizer_path) {
Ok(tokenizer) => Ok(Arc::new(tokenizer)),
Err(e) => {
crate::warn!("Failed to load tokenizer from local path: {}", e);
let ggml = if let Some(ggml) = &model_metadata.tokenizer.ggml {
ggml
} else {
crate::bail!("GGML tokenizer model not found.");
};
let tokenizer = convert_gguf_to_hf_tokenizer(ggml)?;
Ok(Arc::new(Tokenizer::new_from_tokenizer(tokenizer)?))
}
}
} else if let Some(ggml) = &model_metadata.tokenizer.ggml {
let tokenizer = convert_gguf_to_hf_tokenizer(ggml)?;
Ok(Arc::new(Tokenizer::new_from_tokenizer(tokenizer)?))
} else {
crate::bail!("No tokenizer found in model metadata")
}
}
pub(crate) fn load_chat_template(
local_tokenizer_config_path: &Option<std::path::PathBuf>,
model_metadata: &LocalLLMMetadata,
) -> crate::Result<LLMChatTemplate> {
if let Some(local_tokenizer_config_path) = local_tokenizer_config_path {
match LLMChatTemplate::from_local_path(local_tokenizer_config_path) {
Ok(chat_template) => Ok(chat_template),
Err(e) => {
crate::warn!("Failed to load chat template from local path: {}", e);
LLMChatTemplate::from_gguf_tokenizer(&model_metadata.tokenizer)
}
}
} else {
LLMChatTemplate::from_gguf_tokenizer(&model_metadata.tokenizer)
}
}
pub trait GgufLoaderTrait {
fn gguf_loader(&mut self) -> &mut GgufLoader;
fn model_id<S: AsRef<str>>(&mut self, model_id: S) -> &mut Self {
self.gguf_loader().gguf_hf_loader.model_id = Some(model_id.as_ref().into());
self.gguf_loader().gguf_local_loader.model_id = Some(model_id.as_ref().into());
self
}
fn local_quant_file_path<S: Into<std::path::PathBuf>>(
&mut self,
local_quant_file_path: S,
) -> &mut Self {
self.gguf_loader().gguf_local_loader.local_quant_file_path =
Some(local_quant_file_path.into());
self
}
fn local_config_path<P: AsRef<std::path::Path>>(&mut self, local_config_path: P) -> &mut Self {
self.gguf_loader().gguf_local_loader.local_config_path =
Some(local_config_path.as_ref().to_owned());
self
}
fn local_tokenizer_path<P: AsRef<std::path::Path>>(
&mut self,
local_tokenizer_path: P,
) -> &mut Self {
self.gguf_loader().gguf_local_loader.local_tokenizer_path =
Some(local_tokenizer_path.as_ref().to_owned());
self
}
fn local_tokenizer_config_path<P: AsRef<std::path::Path>>(
&mut self,
local_tokenizer_config_path: P,
) -> &mut Self {
self.gguf_loader()
.gguf_local_loader
.local_tokenizer_config_path = Some(local_tokenizer_config_path.as_ref().to_owned());
self
}
fn hf_quant_file_url<S: Into<String>>(&mut self, hf_quant_file_url: S) -> &mut Self {
self.gguf_loader().gguf_hf_loader.hf_quant_file_url = Some(hf_quant_file_url.into());
self
}
fn hf_config_repo_id<S: Into<String>>(&mut self, hf_config_repo_id: S) -> &mut Self {
self.gguf_loader().gguf_hf_loader.hf_config_repo_id = Some(hf_config_repo_id.into());
self
}
fn hf_tokenizer_repo_id<S: Into<String>>(&mut self, hf_tokenizer_repo_id: S) -> &mut Self {
self.gguf_loader().gguf_hf_loader.hf_tokenizer_repo_id = Some(hf_tokenizer_repo_id.into());
self
}
fn hf_tokenizer_config_repo_id<S: Into<String>>(
&mut self,
hf_tokenizer_config_repo_id: S,
) -> &mut Self {
self.gguf_loader()
.gguf_hf_loader
.hf_tokenizer_config_repo_id = Some(hf_tokenizer_config_repo_id.into());
self
}
}