tokens_of/tokens_of.rs
1//! Real-BPE token count of the files passed as arguments. Zero assumptions: it
2//! counts the exact bytes of each file with the real cl100k + o200k tiktoken
3//! (when built `--features real-tokens`), so token figures are tied to the same
4//! source that was compiled and executed elsewhere — not a paraphrased snippet.
5//!
6//! cargo run -p agentic-eval --example tokens_of --features real-tokens -- FILE...
7
8use agentic_eval::tokens::Model;
9use std::fs;
10
11fn main() {
12 let cl = Model::OpenAiGpt4;
13 let o2 = Model::OpenAiGpt4o;
14 println!(
15 "tokenizer exact: cl100k={} o200k={}",
16 cl.is_exact(),
17 o2.is_exact()
18 );
19 println!("{:>7} {:>7} file", "cl100k", "o200k");
20 for path in std::env::args().skip(1) {
21 match fs::read_to_string(&path) {
22 Ok(s) => println!("{:>7} {:>7} {}", cl.count(&s), o2.count(&s), path),
23 Err(e) => println!(" ERR ERR {path}: {e}"),
24 }
25 }
26}