context_builder/
token_count.rs1use ignore::DirEntry;
2use once_cell::sync::Lazy;
3use std::collections::BTreeMap;
4use std::fs;
5use std::path::Path;
6use tiktoken_rs::{CoreBPE, cl100k_base};
8
9static TOKENIZER: Lazy<CoreBPE> = Lazy::new(|| cl100k_base().unwrap());
11
12pub fn estimate_tokens(text: &str) -> usize {
14 TOKENIZER.encode_with_special_tokens(text).len()
15}
16
17pub fn count_file_tokens(base_path: &Path, entry: &DirEntry, line_numbers: bool) -> usize {
19 let file_path = entry.path();
20 let relative_path = file_path.strip_prefix(base_path).unwrap_or(file_path);
21
22 let mut token_count = estimate_tokens(&format!(
24 "\n## File: `{}`\n\n- Size: {} bytes\n- Modified: {}\n\n",
25 relative_path.display(),
26 entry.metadata().map(|m| m.len()).unwrap_or(0),
27 "Unknown"
28 )); token_count += estimate_tokens("```\n```");
32
33 if let Ok(content) = fs::read_to_string(file_path) {
35 if line_numbers {
36 let lines_with_numbers: String = content
38 .lines()
39 .enumerate()
40 .map(|(i, line)| format!("{:>4} | {}\n", i + 1, line))
41 .collect();
42 token_count += estimate_tokens(&lines_with_numbers);
43 } else {
44 token_count += estimate_tokens(&content);
45 }
46 }
47
48 token_count
49}
50
51pub fn count_tree_tokens(tree: &BTreeMap<String, crate::tree::FileNode>, depth: usize) -> usize {
53 let mut token_count = 0;
54
55 let indent = " ".repeat(depth);
57
58 for (name, node) in tree {
59 match node {
60 crate::tree::FileNode::File => {
61 token_count += estimate_tokens(&format!("{}- 📄 {}\n", indent, name));
62 }
63 crate::tree::FileNode::Directory(children) => {
64 token_count += estimate_tokens(&format!("{}- 📁 {}\n", indent, name));
65 token_count += count_tree_tokens(children, depth + 1);
66 }
67 }
68 }
69
70 token_count
71}
72
73#[cfg(test)]
74mod tests {
75 use super::*;
76 use std::collections::BTreeMap;
77
78 #[test]
79 fn test_estimate_tokens() {
80 let text = "Hello, world!";
82 let tokens = estimate_tokens(text);
83 assert_eq!(tokens, 4);
85
86 let code_text = "fn main() {\n println!(\"Hello, world!\");\n}";
88 let tokens = estimate_tokens(code_text);
89 assert_eq!(tokens, 12);
91 }
92
93 #[test]
94 fn test_count_tree_tokens() {
95 let mut tree = BTreeMap::new();
97 tree.insert("file1.rs".to_string(), crate::tree::FileNode::File);
98
99 let mut subdir = BTreeMap::new();
100 subdir.insert("file2.md".to_string(), crate::tree::FileNode::File);
101 tree.insert("src".to_string(), crate::tree::FileNode::Directory(subdir));
102
103 let tokens = count_tree_tokens(&tree, 0);
104 assert_eq!(tokens, 23);
109 }
110}