use std::sync::Once;
use hf_hub::Repo;
use tokenizers::Tokenizer;
use tracing::Level;
use xgrammar::{
TOKENIZER_ALLOW_PATTERN,
huggingface_hub::{self, Params, compile_glob_pattern},
};
static INIT: Once = Once::new();
#[ctor::ctor]
fn auto_init_subscriber() {
INIT.call_once(|| {
tracing_subscriber::fmt().with_max_level(Level::INFO).init();
});
}
#[allow(dead_code)]
pub fn load_tokenizer(model_id: &str) -> Result<Tokenizer, String> {
let allow_patterns = compile_glob_pattern(TOKENIZER_ALLOW_PATTERN)
.map_err(|e| format!("Failed to compile glob pattern: {}", e))?;
let download_options =
Some(Params { allow_patterns: Some(allow_patterns), ..Default::default() });
let path =
huggingface_hub::snapshot_download(Repo::model(model_id.to_string()), download_options)
.map_err(|e| format!("Failed to download tokenizer: {}", e))?;
Tokenizer::from_file(path.join("tokenizer.json").to_str().unwrap())
.map_err(|e| format!("Failed to load tokenizer from file: {}", e))
}