Skip to main content

token_count/output/
debug.rs

1//! Debug formatter - outputs token IDs and sample decoded tokens
2
3use crate::output::OutputFormatter;
4use crate::tokenizers::TokenizationResult;
5
6/// Debug formatter that outputs token IDs and sample decoded tokens
7pub struct DebugFormatter;
8
9impl OutputFormatter for DebugFormatter {
10    fn format(&self, result: &TokenizationResult) -> String {
11        let percentage =
12            (result.token_count as f64 / result.model_info.context_window as f64) * 100.0;
13
14        let mut output = format!(
15            "Model: {} ({})\nTokens: {}\nContext window: {} tokens ({:.4}% used)\n",
16            result.model_info.name,
17            result.model_info.encoding,
18            result.token_count,
19            result.model_info.context_window,
20            percentage
21        );
22
23        // Add token details if available
24        if let Some(details) = &result.token_details {
25            if details.is_empty() {
26                output.push_str("\nNo tokens to display (empty input)");
27            } else {
28                // Show token IDs
29                let ids: Vec<String> = details.iter().map(|d| d.id.to_string()).collect();
30                output.push_str(&format!("\nToken IDs: [{}]", ids.join(", ")));
31
32                // Show decoded tokens
33                output.push_str("\nDecoded tokens:");
34                for (i, detail) in details.iter().enumerate() {
35                    output.push_str(&format!("\n  [{}] {} → {:?}", i, detail.id, detail.text));
36                }
37
38                // Note if truncated
39                if result.token_count > 10 {
40                    output.push_str(&format!(
41                        "\n\n(Showing first 10 of {} tokens)",
42                        result.token_count
43                    ));
44                }
45            }
46        } else {
47            output.push_str(
48                "\n\nNote: Token IDs not available for this model (estimation-based tokenization)",
49            );
50        }
51
52        output
53    }
54}
55
56#[cfg(test)]
57mod tests {
58    use super::*;
59    use crate::tokenizers::ModelInfo;
60
61    #[test]
62    fn test_debug_formatter() {
63        let formatter = DebugFormatter;
64        let result = TokenizationResult {
65            token_count: 2,
66            model_info: ModelInfo {
67                name: "gpt-4".to_string(),
68                encoding: "cl100k_base".to_string(),
69                context_window: 128000,
70                description: "GPT-4".to_string(),
71            },
72            token_details: None,
73        };
74
75        let output = formatter.format(&result);
76        assert!(output.contains("Model: gpt-4"));
77        assert!(output.contains("Tokens: 2"));
78        assert!(output.contains("Token IDs not available"));
79    }
80}