use std::sync::Arc;
use wordchipper::{
Tokenizer,
UnifiedTokenVocab,
disk_cache::WordchipperDiskCache,
};
#[derive(clap::Args, Debug)]
#[group(required = true, multiple = false)]
pub struct ModelSelectorArgs {
#[arg(long, default_value = "openai:r50k_base")]
model: String,
}
impl ModelSelectorArgs {
pub fn model(&self) -> &str {
&self.model
}
pub fn load_vocab(
&self,
disk_cache: &mut WordchipperDiskCache,
) -> Result<Arc<UnifiedTokenVocab<u32>>, Box<dyn std::error::Error>> {
let vocab = wordchipper::load_vocab(self.model(), disk_cache)?
.vocab()
.clone();
Ok(vocab)
}
pub fn load_tokenizer(
&self,
disk_cache: &mut WordchipperDiskCache,
) -> Result<Arc<Tokenizer<u32>>, Box<dyn std::error::Error>> {
let vocab = self.load_vocab(disk_cache)?;
let tokenizer = wordchipper::TokenizerOptions::default().build(vocab);
Ok(tokenizer)
}
}