Skip to main content

token_count/output/
mod.rs

1//! Output formatting for different verbosity levels
2//!
3//! This module provides output formatters that display tokenization results
4//! in different formats based on user preferences.
5//!
6//! # Formatters
7//!
8//! Three formatters are available via the [`OutputFormatter`] trait:
9//!
10//! - [`SimpleFormatter`] - Verbosity 0 (default): Just the token count
11//! - [`VerboseFormatter`] - Verbosity 1-2: Model info and context window usage
12//! - [`DebugFormatter`] - Verbosity 3+: Debug information for troubleshooting
13//!
14//! # Example
15//!
16//! ```
17//! use token_count::output::{select_formatter, OutputFormatter};
18//! use token_count::tokenizers::{TokenizationResult, ModelInfo};
19//!
20//! let result = TokenizationResult {
21//!     token_count: 2,
22//!     model_info: ModelInfo {
23//!         name: "gpt-4".to_string(),
24//!         encoding: "cl100k_base".to_string(),
25//!         context_window: 128000,
26//!         description: "GPT-4".to_string(),
27//!     },
28//! };
29//!
30//! // Simple output (verbosity 0)
31//! let simple = select_formatter(0);
32//! assert_eq!(simple.format(&result), "2");
33//!
34//! // Verbose output (verbosity 1)
35//! let verbose = select_formatter(1);
36//! let output = verbose.format(&result);
37//! assert!(output.contains("Model: gpt-4"));
38//! assert!(output.contains("Tokens: 2"));
39//! ```
40//!
41//! # Strategy Pattern
42//!
43//! The formatters use the Strategy pattern, allowing easy extension with
44//! new output formats without modifying existing code. To add a new formatter:
45//!
46//! 1. Create a new struct (e.g., `JsonFormatter`)
47//! 2. Implement the [`OutputFormatter`] trait
48//! 3. Update [`select_formatter`] to return it for the appropriate verbosity
49//!
50//! # Verbosity Levels
51//!
52//! | Level | Flag | Formatter | Output |
53//! |-------|------|-----------|--------|
54//! | 0 | (default) | Simple | `2` |
55//! | 1-2 | `-v`, `-vv` | Verbose | Model info + context % |
56//! | 3+ | `-vvv` | Debug | Diagnostic information |
57
58pub mod debug;
59pub mod simple;
60pub mod verbose;
61
62pub use debug::DebugFormatter;
63pub use simple::SimpleFormatter;
64pub use verbose::VerboseFormatter;
65
66use crate::tokenizers::{ModelInfo, TokenizationResult};
67
68/// Trait for formatting tokenization output
69pub trait OutputFormatter {
70    /// Format the tokenization result as a string
71    fn format(&self, result: &TokenizationResult) -> String;
72}
73
74/// Select the appropriate formatter based on verbosity level
75///
76/// - 0: Simple (number only)
77/// - 1-2: Verbose (model info, context window percentage)
78/// - 3+: Debug (token IDs, decoded tokens)
79pub fn select_formatter(verbosity: u8) -> Box<dyn OutputFormatter> {
80    match verbosity {
81        0 => Box::new(SimpleFormatter),
82        1 | 2 => Box::new(VerboseFormatter),
83        _ => Box::new(DebugFormatter),
84    }
85}
86
87/// Extended tokenization result with token details for debug mode
88///
89/// Reserved for future enhancement in v0.2.0 when full token ID
90/// decoding will be implemented in the debug formatter.
91#[allow(dead_code)]
92#[derive(Debug, Clone)]
93pub struct DetailedResult {
94    pub token_count: usize,
95    pub model_info: ModelInfo,
96    pub token_ids: Vec<usize>,
97    pub sample_tokens: Vec<String>,
98}
99
100#[cfg(test)]
101mod tests {
102    use super::*;
103
104    #[test]
105    fn test_formatter_selection() {
106        let f0 = select_formatter(0);
107        let f1 = select_formatter(1);
108        let f2 = select_formatter(2);
109        let f3 = select_formatter(3);
110
111        // Just ensure they're different types (compilation test)
112        drop(f0);
113        drop(f1);
114        drop(f2);
115        drop(f3);
116    }
117}