token_count/output/mod.rs
1//! Output formatting for different verbosity levels
2//!
3//! This module provides output formatters that display tokenization results
4//! in different formats based on user preferences.
5//!
6//! # Formatters
7//!
8//! Four formatters are available via the [`OutputFormatter`] trait:
9//!
10//! - [`SimpleFormatter`] - Verbosity 0 (default): Just the token count
11//! - [`BasicFormatter`] - Verbosity 1 (`-v`): Model info and token count
12//! - [`VerboseFormatter`] - Verbosity 2 (`-vv`): Add context window usage percentage
13//! - [`DebugFormatter`] - Verbosity 3+ (`-vvv`): Add token IDs and decoded tokens
14//!
15//! # Example
16//!
17//! ```
18//! use token_count::output::{select_formatter, OutputFormatter};
19//! use token_count::tokenizers::{TokenizationResult, ModelInfo};
20//!
21//! let result = TokenizationResult {
22//! token_count: 2,
23//! model_info: ModelInfo {
24//! name: "gpt-4".to_string(),
25//! encoding: "cl100k_base".to_string(),
26//! context_window: 128000,
27//! description: "GPT-4".to_string(),
28//! },
29//! token_details: None,
30//! };
31//!
32//! // Simple output (verbosity 0)
33//! let simple = select_formatter(0);
34//! assert_eq!(simple.format(&result), "2");
35//!
36//! // Basic output (verbosity 1)
37//! let basic = select_formatter(1);
38//! let output = basic.format(&result);
39//! assert!(output.contains("Model: gpt-4"));
40//! assert!(output.contains("Tokens: 2"));
41//! ```
42//!
43//! # Strategy Pattern
44//!
45//! The formatters use the Strategy pattern, allowing easy extension with
46//! new output formats without modifying existing code. To add a new formatter:
47//!
48//! 1. Create a new struct (e.g., `JsonFormatter`)
49//! 2. Implement the [`OutputFormatter`] trait
50//! 3. Update [`select_formatter`] to return it for the appropriate verbosity
51//!
52//! # Verbosity Levels
53//!
54//! | Level | Flag | Formatter | Output |
55//! |-------|------|-----------|--------|
56//! | 0 | (default) | Simple | `2` |
57//! | 1 | `-v` | Basic | Model info + token count |
58//! | 2 | `-vv` | Verbose | Model info + context % |
59//! | 3+ | `-vvv` | Debug | Token IDs + decoded tokens |
60
61pub mod basic;
62pub mod debug;
63pub mod simple;
64pub mod verbose;
65
66pub use basic::BasicFormatter;
67pub use debug::DebugFormatter;
68pub use simple::SimpleFormatter;
69pub use verbose::VerboseFormatter;
70
71use crate::tokenizers::TokenizationResult;
72
73/// Trait for formatting tokenization output
74pub trait OutputFormatter {
75 /// Format the tokenization result as a string
76 fn format(&self, result: &TokenizationResult) -> String;
77}
78
79/// Select the appropriate formatter based on verbosity level
80///
81/// - 0: Simple (number only)
82/// - 1: Basic (model info and token count)
83/// - 2: Verbose (add context window percentage)
84/// - 3+: Debug (add token IDs and decoded tokens)
85pub fn select_formatter(verbosity: u8) -> Box<dyn OutputFormatter> {
86 match verbosity {
87 0 => Box::new(SimpleFormatter),
88 1 => Box::new(BasicFormatter),
89 2 => Box::new(VerboseFormatter),
90 _ => Box::new(DebugFormatter),
91 }
92}
93
94#[cfg(test)]
95mod tests {
96 use super::*;
97
98 #[test]
99 fn test_formatter_selection() {
100 let f0 = select_formatter(0);
101 let f1 = select_formatter(1);
102 let f2 = select_formatter(2);
103 let f3 = select_formatter(3);
104
105 // Just ensure they're different types (compilation test)
106 drop(f0);
107 drop(f1);
108 drop(f2);
109 drop(f3);
110 }
111}