pub fn approx_token_count(text: &str) -> u32 {
let chars = text.chars().count() as u32;
chars.saturating_add(3) / 4
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn linear() {
assert_eq!(approx_token_count(""), 0);
assert_eq!(approx_token_count("abcd"), 1);
assert_eq!(approx_token_count("abcde"), 2);
assert_eq!(approx_token_count(&"x".repeat(400)), 100);
}
#[test]
fn cjk_counts_chars_not_bytes() {
assert_eq!(approx_token_count("中文"), 1);
}
}