pub fn estimate_tokens(text: &str) -> usize {
let mut tokens = 0usize;
let mut in_word = false;
for ch in text.chars() {
if ch.is_alphanumeric() || ch == '_' {
if !in_word {
tokens += 1; in_word = true;
}
} else {
in_word = false;
if !ch.is_whitespace() {
tokens += 1; }
}
}
tokens
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn empty_and_whitespace_are_zero() {
assert_eq!(estimate_tokens(""), 0);
assert_eq!(estimate_tokens(" \n\t "), 0);
}
#[test]
fn counts_words_and_punctuation() {
assert_eq!(estimate_tokens("fn add(a, b)"), 7);
}
#[test]
fn elided_body_is_cheaper_than_full_body() {
let full = "fn f() {\n let x = compute(1, 2, 3);\n x\n}";
let skel = "fn f() { … }";
assert!(estimate_tokens(skel) < estimate_tokens(full));
}
}