use crate::output::OutputFormatter;
use crate::tokenizers::TokenizationResult;
pub struct DebugFormatter;
impl OutputFormatter for DebugFormatter {
fn format(&self, result: &TokenizationResult) -> String {
let percentage =
(result.token_count as f64 / result.model_info.context_window as f64) * 100.0;
let mut output = format!(
"Model: {} ({})\nTokens: {}\nContext window: {} tokens ({:.4}% used)\n",
result.model_info.name,
result.model_info.encoding,
result.token_count,
result.model_info.context_window,
percentage
);
if let Some(details) = &result.token_details {
if details.is_empty() {
output.push_str("\nNo tokens to display (empty input)");
} else {
let ids: Vec<String> = details.iter().map(|d| d.id.to_string()).collect();
output.push_str(&format!("\nToken IDs: [{}]", ids.join(", ")));
output.push_str("\nDecoded tokens:");
for (i, detail) in details.iter().enumerate() {
output.push_str(&format!("\n [{}] {} → {:?}", i, detail.id, detail.text));
}
if result.token_count > 10 {
output.push_str(&format!(
"\n\n(Showing first 10 of {} tokens)",
result.token_count
));
}
}
} else {
output.push_str(
"\n\nNote: Token IDs not available for this model (estimation-based tokenization)",
);
}
output
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::tokenizers::ModelInfo;
#[test]
fn test_debug_formatter() {
let formatter = DebugFormatter;
let result = TokenizationResult {
token_count: 2,
model_info: ModelInfo {
name: "gpt-4".to_string(),
encoding: "cl100k_base".to_string(),
context_window: 128000,
description: "GPT-4".to_string(),
},
token_details: None,
};
let output = formatter.format(&result);
assert!(output.contains("Model: gpt-4"));
assert!(output.contains("Tokens: 2"));
assert!(output.contains("Token IDs not available"));
}
}