token_count/output/mod.rs
1//! Output formatting for different verbosity levels
2//!
3//! This module provides output formatters that display tokenization results
4//! in different formats based on user preferences.
5//!
6//! # Formatters
7//!
8//! Three formatters are available via the [`OutputFormatter`] trait:
9//!
10//! - [`SimpleFormatter`] - Verbosity 0 (default): Just the token count
11//! - [`VerboseFormatter`] - Verbosity 1-2: Model info and context window usage
12//! - [`DebugFormatter`] - Verbosity 3+: Debug information for troubleshooting
13//!
14//! # Example
15//!
16//! ```
17//! use token_count::output::{select_formatter, OutputFormatter};
18//! use token_count::tokenizers::{TokenizationResult, ModelInfo};
19//!
20//! let result = TokenizationResult {
21//! token_count: 2,
22//! model_info: ModelInfo {
23//! name: "gpt-4".to_string(),
24//! encoding: "cl100k_base".to_string(),
25//! context_window: 128000,
26//! description: "GPT-4".to_string(),
27//! },
28//! };
29//!
30//! // Simple output (verbosity 0)
31//! let simple = select_formatter(0);
32//! assert_eq!(simple.format(&result), "2");
33//!
34//! // Verbose output (verbosity 1)
35//! let verbose = select_formatter(1);
36//! let output = verbose.format(&result);
37//! assert!(output.contains("Model: gpt-4"));
38//! assert!(output.contains("Tokens: 2"));
39//! ```
40//!
41//! # Strategy Pattern
42//!
43//! The formatters use the Strategy pattern, allowing easy extension with
44//! new output formats without modifying existing code. To add a new formatter:
45//!
46//! 1. Create a new struct (e.g., `JsonFormatter`)
47//! 2. Implement the [`OutputFormatter`] trait
48//! 3. Update [`select_formatter`] to return it for the appropriate verbosity
49//!
50//! # Verbosity Levels
51//!
52//! | Level | Flag | Formatter | Output |
53//! |-------|------|-----------|--------|
54//! | 0 | (default) | Simple | `2` |
55//! | 1-2 | `-v`, `-vv` | Verbose | Model info + context % |
56//! | 3+ | `-vvv` | Debug | Diagnostic information |
57
58pub mod debug;
59pub mod simple;
60pub mod verbose;
61
62pub use debug::DebugFormatter;
63pub use simple::SimpleFormatter;
64pub use verbose::VerboseFormatter;
65
66use crate::tokenizers::{ModelInfo, TokenizationResult};
67
68/// Trait for formatting tokenization output
69pub trait OutputFormatter {
70 /// Format the tokenization result as a string
71 fn format(&self, result: &TokenizationResult) -> String;
72}
73
74/// Select the appropriate formatter based on verbosity level
75///
76/// - 0: Simple (number only)
77/// - 1-2: Verbose (model info, context window percentage)
78/// - 3+: Debug (token IDs, decoded tokens)
79pub fn select_formatter(verbosity: u8) -> Box<dyn OutputFormatter> {
80 match verbosity {
81 0 => Box::new(SimpleFormatter),
82 1 | 2 => Box::new(VerboseFormatter),
83 _ => Box::new(DebugFormatter),
84 }
85}
86
87/// Extended tokenization result with token details for debug mode
88///
89/// Reserved for future enhancement in v0.2.0 when full token ID
90/// decoding will be implemented in the debug formatter.
91#[allow(dead_code)]
92#[derive(Debug, Clone)]
93pub struct DetailedResult {
94 pub token_count: usize,
95 pub model_info: ModelInfo,
96 pub token_ids: Vec<usize>,
97 pub sample_tokens: Vec<String>,
98}
99
100#[cfg(test)]
101mod tests {
102 use super::*;
103
104 #[test]
105 fn test_formatter_selection() {
106 let f0 = select_formatter(0);
107 let f1 = select_formatter(1);
108 let f2 = select_formatter(2);
109 let f3 = select_formatter(3);
110
111 // Just ensure they're different types (compilation test)
112 drop(f0);
113 drop(f1);
114 drop(f2);
115 drop(f3);
116 }
117}