Skip to main content

tokens_of/
tokens_of.rs

1//! Real-BPE token count of the files passed as arguments. Zero assumptions: it
2//! counts the exact bytes of each file with the real cl100k + o200k tiktoken
3//! (when built `--features real-tokens`), so token figures are tied to the same
4//! source that was compiled and executed elsewhere — not a paraphrased snippet.
5//!
6//!   cargo run -p agentic-eval --example tokens_of --features real-tokens -- FILE...
7
8use agentic_eval::tokens::Model;
9use std::fs;
10
11fn main() {
12    let cl = Model::OpenAiGpt4;
13    let o2 = Model::OpenAiGpt4o;
14    println!(
15        "tokenizer exact: cl100k={} o200k={}",
16        cl.is_exact(),
17        o2.is_exact()
18    );
19    println!("{:>7} {:>7}   file", "cl100k", "o200k");
20    for path in std::env::args().skip(1) {
21        match fs::read_to_string(&path) {
22            Ok(s) => println!("{:>7} {:>7}   {}", cl.count(&s), o2.count(&s), path),
23            Err(e) => println!("    ERR     ERR   {path}: {e}"),
24        }
25    }
26}