use tokie::Tokenizer;
fn main() {
let tokenizer = Tokenizer::from_pretrained("bert-base-uncased").unwrap();
println!("Loaded BERT tokenizer (vocab_size={})", tokenizer.vocab_size());
let encoding = tokenizer.encode("Hello, world!", true);
println!("\nEncoding for \"Hello, world!\" (with special tokens):");
println!(" ids: {:?}", encoding.ids);
println!(" attention_mask: {:?}", encoding.attention_mask);
println!(" type_ids: {:?}", encoding.type_ids);
let encoding_raw = tokenizer.encode("Hello, world!", false);
println!("\nWithout special tokens:");
println!(" ids: {:?}", encoding_raw.ids);
let decoded = tokenizer.decode(&encoding.ids).unwrap();
println!("\nDecoded: \"{}\"", decoded);
let count = tokenizer.count_tokens("The quick brown fox jumps over the lazy dog.");
println!("\nToken count: {}", count);
tokenizer.to_file("/tmp/bert.tkz").unwrap();
let fast = Tokenizer::from_file("/tmp/bert.tkz").unwrap();
println!("\nLoaded from .tkz (vocab_size={})", fast.vocab_size());
}