#[must_use]
pub fn estimate_tokens(text: &str) -> usize {
text.len().div_ceil(4)
}
#[must_use]
pub fn truncate_to_budget(text: &str, max_tokens: usize) -> &str {
let max_chars = max_tokens * 4;
if text.len() <= max_chars {
return text;
}
let mut end = max_chars.min(text.len());
while !text.is_char_boundary(end) {
end -= 1;
}
let slice = &text[..end];
match slice.rfind('\n') {
Some(pos) => &text[..=pos],
None => slice,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn estimate_empty() {
assert_eq!(estimate_tokens(""), 0);
}
#[test]
fn estimate_short_text() {
assert_eq!(estimate_tokens("hello"), 2);
}
#[test]
fn estimate_longer_text() {
let text = "a".repeat(400);
assert_eq!(estimate_tokens(&text), 100);
}
#[test]
fn truncate_within_budget() {
let text = "line one\nline two\nline three\n";
let result = truncate_to_budget(text, 1000);
assert_eq!(result, text);
}
#[test]
fn truncate_at_line_boundary() {
let text = "line one\nline two\nline three\n";
let result = truncate_to_budget(text, 5);
assert_eq!(result, "line one\nline two\n");
}
#[test]
fn truncate_empty() {
assert_eq!(truncate_to_budget("", 100), "");
}
#[test]
fn truncate_single_long_line() {
let text = "a".repeat(100);
let result = truncate_to_budget(&text, 10);
assert!(result.len() <= 40);
}
}