use colored::*;
use crate::content::FileEntry;
#[derive(Debug, Default, Clone)]
pub struct Stats {
pub file_count: usize,
pub total_lines: usize,
pub total_bytes: usize,
pub truncated_count: usize,
pub skipped_count: usize,
pub token_estimate: Option<usize>,
pub duration_ms: u64,
}
impl Stats {
pub fn new() -> Self {
Self::default()
}
pub fn add_file(&mut self, entry: &FileEntry) {
self.file_count += 1;
self.total_lines += entry.original_lines;
self.total_bytes += entry.original_bytes;
if entry.truncated {
self.truncated_count += 1;
}
}
#[cfg(feature = "tokens")]
pub fn estimate_tokens(&mut self, content: &str, model: &str) {
use tiktoken_rs::get_bpe_from_model;
let model_name = match model.to_lowercase().as_str() {
"gpt-4" | "gpt4" | "gpt-4o" | "gpt-4-turbo" => "gpt-4",
"gpt-3.5" | "gpt-3.5-turbo" | "gpt35" => "gpt-3.5-turbo",
"claude" | "claude-3" | "claude-3.5" => "gpt-4", _ => "gpt-4",
};
if let Ok(bpe) = get_bpe_from_model(model_name) {
self.token_estimate = Some(bpe.encode_ordinary(content).len());
} else {
self.token_estimate = Some(content.len() / 4);
}
}
#[cfg(not(feature = "tokens"))]
pub fn estimate_tokens(&mut self, content: &str, _model: &str) {
self.token_estimate = Some(content.len() / 4);
}
pub fn print_summary(&self) {
eprintln!();
eprintln!("{}", "─".repeat(40).dimmed());
eprintln!("{}", "Statistics".bold());
eprintln!("{}", "─".repeat(40).dimmed());
eprintln!(
" {} {}",
"Files:".dimmed(),
self.file_count.to_string().cyan()
);
eprintln!(
" {} {}",
"Lines:".dimmed(),
format_number(self.total_lines).cyan()
);
eprintln!(
" {} {}",
"Size:".dimmed(),
format_bytes(self.total_bytes).cyan()
);
if self.truncated_count > 0 {
eprintln!(
" {} {}",
"Truncated:".dimmed(),
self.truncated_count.to_string().yellow()
);
}
if let Some(tokens) = self.token_estimate {
eprintln!(
" {} ~{}",
"Tokens:".dimmed(),
format_number(tokens).cyan()
);
}
if self.duration_ms > 0 {
eprintln!(
" {} {}ms",
"Duration:".dimmed(),
self.duration_ms.to_string().dimmed()
);
}
eprintln!("{}", "─".repeat(40).dimmed());
}
}
fn format_number(n: usize) -> String {
let s = n.to_string();
let mut result = String::new();
for (i, c) in s.chars().rev().enumerate() {
if i > 0 && i % 3 == 0 {
result.insert(0, ',');
}
result.insert(0, c);
}
result
}
fn format_bytes(bytes: usize) -> String {
const KB: usize = 1024;
const MB: usize = KB * 1024;
const GB: usize = MB * 1024;
if bytes >= GB {
format!("{:.2} GB", bytes as f64 / GB as f64)
} else if bytes >= MB {
format!("{:.2} MB", bytes as f64 / MB as f64)
} else if bytes >= KB {
format!("{:.2} KB", bytes as f64 / KB as f64)
} else {
format!("{} bytes", bytes)
}
}