token_count/output/
debug.rs1use crate::output::OutputFormatter;
4use crate::tokenizers::TokenizationResult;
5
6pub struct DebugFormatter;
8
9impl OutputFormatter for DebugFormatter {
10 fn format(&self, result: &TokenizationResult) -> String {
11 let percentage =
12 (result.token_count as f64 / result.model_info.context_window as f64) * 100.0;
13
14 let mut output = format!(
15 "Model: {} ({})\nTokens: {}\nContext window: {} tokens ({:.4}% used)\n",
16 result.model_info.name,
17 result.model_info.encoding,
18 result.token_count,
19 result.model_info.context_window,
20 percentage
21 );
22
23 if let Some(details) = &result.token_details {
25 if details.is_empty() {
26 output.push_str("\nNo tokens to display (empty input)");
27 } else {
28 let ids: Vec<String> = details.iter().map(|d| d.id.to_string()).collect();
30 output.push_str(&format!("\nToken IDs: [{}]", ids.join(", ")));
31
32 output.push_str("\nDecoded tokens:");
34 for (i, detail) in details.iter().enumerate() {
35 output.push_str(&format!("\n [{}] {} → {:?}", i, detail.id, detail.text));
36 }
37
38 if result.token_count > 10 {
40 output.push_str(&format!(
41 "\n\n(Showing first 10 of {} tokens)",
42 result.token_count
43 ));
44 }
45 }
46 } else {
47 output.push_str(
48 "\n\nNote: Token IDs not available for this model (estimation-based tokenization)",
49 );
50 }
51
52 output
53 }
54}
55
56#[cfg(test)]
57mod tests {
58 use super::*;
59 use crate::tokenizers::ModelInfo;
60
61 #[test]
62 fn test_debug_formatter() {
63 let formatter = DebugFormatter;
64 let result = TokenizationResult {
65 token_count: 2,
66 model_info: ModelInfo {
67 name: "gpt-4".to_string(),
68 encoding: "cl100k_base".to_string(),
69 context_window: 128000,
70 description: "GPT-4".to_string(),
71 },
72 token_details: None,
73 };
74
75 let output = formatter.format(&result);
76 assert!(output.contains("Model: gpt-4"));
77 assert!(output.contains("Tokens: 2"));
78 assert!(output.contains("Token IDs not available"));
79 }
80}