1use crate::config::SessionFile;
4use crate::models;
5use anyhow::Result;
6use colored::*;
7use indicatif::{ProgressBar, ProgressStyle};
8use rayon::prelude::*;
9use std::collections::HashMap;
10use std::io::BufRead;
11use std::sync::atomic::{AtomicU64, Ordering};
12use std::sync::Mutex;
13
14pub fn format_count(n: u64) -> String {
16 let s = n.to_string();
17 let mut result = String::new();
18 for (i, c) in s.chars().rev().enumerate() {
19 if i > 0 && i % 3 == 0 {
20 result.push(',');
21 }
22 result.push(c);
23 }
24 result.chars().rev().collect()
25}
26
27pub fn format_bytes(bytes: u64) -> String {
29 if bytes < 1024 {
30 format!("{}B", bytes)
31 } else if bytes < 1024 * 1024 {
32 format!("{:.1}KB", bytes as f64 / 1024.0)
33 } else if bytes < 1024 * 1024 * 1024 {
34 format!("{:.1}MB", bytes as f64 / (1024.0 * 1024.0))
35 } else {
36 format!("{:.2}GB", bytes as f64 / (1024.0 * 1024.0 * 1024.0))
37 }
38}
39
40fn make_progress_bar(len: u64) -> ProgressBar {
41 let pb = ProgressBar::new(len);
42 pb.set_style(
43 ProgressStyle::default_bar()
44 .template("{spinner:.green} [{bar:40.cyan/blue}] {pos}/{len} files")
45 .unwrap()
46 .progress_chars("█▓░"),
47 );
48 pb
49}
50
51pub fn print_stats(files: &[SessionFile]) -> Result<()> {
53 let total_files = files.len();
54 let total_size: u64 = files.iter().map(|f| f.size_bytes).sum();
55
56 let mut projects: HashMap<String, (usize, u64)> = HashMap::new();
57 for f in files {
58 let entry = projects.entry(f.project_name.clone()).or_default();
59 entry.0 += 1;
60 entry.1 += f.size_bytes;
61 }
62
63 println!("{}", "smc Stats".bold().cyan());
64 println!("{}", "═".repeat(50));
65 println!(" Total sessions: {}", total_files.to_string().bold());
66 println!(
67 " Total size: {}",
68 format_bytes(total_size).bold()
69 );
70 println!(" Projects: {}", projects.len().to_string().bold());
71 println!();
72
73 println!("{}", "Top Projects by Size".bold());
74 println!("{}", "─".repeat(50));
75
76 let mut sorted: Vec<_> = projects.into_iter().collect();
77 sorted.sort_by(|a, b| b.1 .1.cmp(&a.1 .1));
78
79 for (name, (count, size)) in sorted.iter().take(15) {
80 println!(
81 " {:30} {:>4} sessions {:>8}",
82 name.cyan(),
83 count,
84 format_bytes(*size)
85 );
86 }
87
88 if sorted.len() > 15 {
89 println!(" ... and {} more projects", sorted.len() - 15);
90 }
91
92 Ok(())
93}
94
95pub fn print_projects(files: &[SessionFile]) -> Result<()> {
97 struct ProjectInfo {
98 sessions: usize,
99 total_size: u64,
100 earliest: Option<String>,
101 latest: Option<String>,
102 }
103
104 let mut projects: HashMap<String, ProjectInfo> = HashMap::new();
105
106 for file in files {
107 let entry = projects
108 .entry(file.project_name.clone())
109 .or_insert(ProjectInfo {
110 sessions: 0,
111 total_size: 0,
112 earliest: None,
113 latest: None,
114 });
115 entry.sessions += 1;
116 entry.total_size += file.size_bytes;
117
118 if let Ok(f) = std::fs::File::open(&file.path) {
119 let reader = std::io::BufReader::new(f);
120 for line in reader.lines().take(5) {
121 let Ok(line) = line else { continue };
122 if let Ok(record) = serde_json::from_str::<models::Record>(&line) {
123 if let Some(msg) = record.as_message_record() {
124 if let Some(ts) = &msg.timestamp {
125 let ts_date = ts.get(..10).unwrap_or(ts);
126 if entry.earliest.is_none()
127 || entry.earliest.as_deref().unwrap_or("") > ts_date
128 {
129 entry.earliest = Some(ts_date.to_string());
130 }
131 if entry.latest.is_none()
132 || entry.latest.as_deref().unwrap_or("") < ts_date
133 {
134 entry.latest = Some(ts_date.to_string());
135 }
136 break;
137 }
138 }
139 }
140 }
141 }
142 }
143
144 let mut sorted: Vec<_> = projects.into_iter().collect();
145 sorted.sort_by(|a, b| {
146 b.1.latest
147 .as_deref()
148 .unwrap_or("")
149 .cmp(a.1.latest.as_deref().unwrap_or(""))
150 });
151
152 println!(
153 "{} projects\n",
154 sorted.len().to_string().bold()
155 );
156
157 for (name, info) in &sorted {
158 let date_range = match (&info.earliest, &info.latest) {
159 (Some(e), Some(l)) if e == l => e.clone(),
160 (Some(e), Some(l)) => format!("{} → {}", e, l),
161 (Some(d), None) | (None, Some(d)) => d.clone(),
162 (None, None) => "unknown".to_string(),
163 };
164
165 println!(
166 " {:30} {:>4} sessions {:>8} {}",
167 name.cyan(),
168 info.sessions,
169 format_bytes(info.total_size),
170 date_range.dimmed()
171 );
172 }
173
174 Ok(())
175}
176
177pub fn print_freq_chars(files: &[SessionFile]) -> Result<()> {
179 let counts: Vec<AtomicU64> = (0..26).map(|_| AtomicU64::new(0)).collect();
180 let pb = make_progress_bar(files.len() as u64);
181
182 files.par_iter().for_each(|file| {
183 if let Ok(f) = std::fs::File::open(&file.path) {
184 let reader = std::io::BufReader::with_capacity(256 * 1024, f);
185 for line in reader.lines() {
186 let Ok(line) = line else { continue };
187 let Ok(record) = serde_json::from_str::<models::Record>(&line) else { continue };
188 let Some(msg) = record.as_message_record() else { continue };
189 let text = msg.text_content();
190 for b in text.bytes() {
191 let idx = match b {
192 b'a'..=b'z' => (b - b'a') as usize,
193 b'A'..=b'Z' => (b - b'A') as usize,
194 _ => continue,
195 };
196 counts[idx].fetch_add(1, Ordering::Relaxed);
197 }
198 }
199 }
200 pb.inc(1);
201 });
202
203 pb.finish_and_clear();
204 print_char_table(&counts, "parsed content", files);
205 Ok(())
206}
207
208pub fn print_freq_chars_raw(files: &[SessionFile]) -> Result<()> {
210 let counts: Vec<AtomicU64> = (0..26).map(|_| AtomicU64::new(0)).collect();
211 let pb = make_progress_bar(files.len() as u64);
212
213 files.par_iter().for_each(|file| {
214 if let Ok(data) = std::fs::read(&file.path) {
215 for &b in &data {
216 let idx = match b {
217 b'a'..=b'z' => (b - b'a') as usize,
218 b'A'..=b'Z' => (b - b'A') as usize,
219 _ => continue,
220 };
221 counts[idx].fetch_add(1, Ordering::Relaxed);
222 }
223 }
224 pb.inc(1);
225 });
226
227 pb.finish_and_clear();
228 print_char_table(&counts, "raw JSONL bytes", files);
229 Ok(())
230}
231
232fn print_char_table(counts: &[AtomicU64], label: &str, files: &[SessionFile]) {
233 let totals: Vec<u64> = counts.iter().map(|c| c.load(Ordering::Relaxed)).collect();
234 let max_count = *totals.iter().max().unwrap_or(&1);
235 let grand_total: u64 = totals.iter().sum();
236
237 println!("{}", format!("Character Frequency (a-z, case-insensitive, {})", label).bold().cyan());
238 println!("{}", "═".repeat(60));
239
240 for (i, count) in totals.iter().enumerate() {
241 let letter = (b'a' + i as u8) as char;
242 let bar_len = (*count as f64 / max_count as f64 * 40.0) as usize;
243 let bar = "█".repeat(bar_len);
244 let pct = *count as f64 / grand_total as f64 * 100.0;
245 println!(
246 " {} {:>12} ({:>5.2}%) {}",
247 letter.to_string().bold(),
248 format_count(*count),
249 pct,
250 bar.cyan()
251 );
252 }
253
254 println!("{}", "─".repeat(60));
255 println!(
256 " Total: {} across {} files ({})",
257 format_count(grand_total).bold(),
258 files.len(),
259 format_bytes(files.iter().map(|f| f.size_bytes).sum())
260 );
261}
262
263pub fn print_freq_words(files: &[SessionFile], limit: usize) -> Result<()> {
265 let word_counts: Mutex<HashMap<String, u64>> = Mutex::new(HashMap::new());
266 let pb = make_progress_bar(files.len() as u64);
267
268 files.par_iter().for_each(|file| {
269 let mut local: HashMap<String, u64> = HashMap::new();
270 if let Ok(f) = std::fs::File::open(&file.path) {
271 let reader = std::io::BufReader::with_capacity(256 * 1024, f);
272 for line in reader.lines() {
273 let Ok(line) = line else { continue };
274 let Ok(record) = serde_json::from_str::<models::Record>(&line) else { continue };
275 let Some(msg) = record.as_message_record() else { continue };
276 let text = msg.text_content();
277 for word in text.split(|c: char| !c.is_alphanumeric()) {
278 if word.len() >= 3 {
279 *local.entry(word.to_lowercase()).or_default() += 1;
280 }
281 }
282 }
283 }
284 let mut global = word_counts.lock().unwrap();
285 for (word, count) in local {
286 *global.entry(word).or_default() += count;
287 }
288 pb.inc(1);
289 });
290
291 pb.finish_and_clear();
292
293 let counts = word_counts.into_inner().unwrap();
294 let mut sorted: Vec<_> = counts.into_iter().collect();
295 sorted.sort_by(|a, b| b.1.cmp(&a.1));
296
297 let max_count = sorted.first().map(|(_, c)| *c).unwrap_or(1);
298
299 println!("{}", "Word Frequency (top words, 3+ chars)".bold().cyan());
300 println!("{}", "═".repeat(60));
301
302 for (word, count) in sorted.iter().take(limit) {
303 let bar_len = (*count as f64 / max_count as f64 * 30.0) as usize;
304 let bar = "█".repeat(bar_len);
305 println!(" {:20} {:>12} {}", word.bold(), format_count(*count), bar.cyan());
306 }
307
308 let grand_total: u64 = sorted.iter().map(|(_, c)| c).sum();
309 println!("{}", "─".repeat(60));
310 println!(" {} unique words, {} total occurrences", format_count(sorted.len() as u64), format_count(grand_total));
311
312 Ok(())
313}
314
315pub fn print_freq_tools(files: &[SessionFile], limit: usize) -> Result<()> {
317 let tool_counts: Mutex<HashMap<String, u64>> = Mutex::new(HashMap::new());
318 let pb = make_progress_bar(files.len() as u64);
319
320 files.par_iter().for_each(|file| {
321 let mut local: HashMap<String, u64> = HashMap::new();
322 if let Ok(f) = std::fs::File::open(&file.path) {
323 let reader = std::io::BufReader::with_capacity(256 * 1024, f);
324 for line in reader.lines() {
325 let Ok(line) = line else { continue };
326 let Ok(record) = serde_json::from_str::<models::Record>(&line) else { continue };
327 let Some(msg) = record.as_message_record() else { continue };
328 for tool in msg.tool_calls() {
329 *local.entry(tool.to_string()).or_default() += 1;
330 }
331 }
332 }
333 let mut global = tool_counts.lock().unwrap();
334 for (tool, count) in local {
335 *global.entry(tool).or_default() += count;
336 }
337 pb.inc(1);
338 });
339
340 pb.finish_and_clear();
341
342 let counts = tool_counts.into_inner().unwrap();
343 let mut sorted: Vec<_> = counts.into_iter().collect();
344 sorted.sort_by(|a, b| b.1.cmp(&a.1));
345
346 let max_count = sorted.first().map(|(_, c)| *c).unwrap_or(1);
347 let grand_total: u64 = sorted.iter().map(|(_, c)| c).sum();
348
349 println!("{}", "Tool Usage Frequency".bold().cyan());
350 println!("{}", "═".repeat(60));
351
352 for (tool, count) in sorted.iter().take(limit) {
353 let bar_len = (*count as f64 / max_count as f64 * 30.0) as usize;
354 let bar = "█".repeat(bar_len);
355 let pct = *count as f64 / grand_total as f64 * 100.0;
356 println!(" {:20} {:>10} ({:>5.1}%) {}", tool.bold(), format_count(*count), pct, bar.cyan());
357 }
358
359 println!("{}", "─".repeat(60));
360 println!(" {} total tool calls", format_count(grand_total));
361
362 Ok(())
363}
364
365pub fn print_freq_roles(files: &[SessionFile]) -> Result<()> {
367 let role_counts: Mutex<HashMap<String, u64>> = Mutex::new(HashMap::new());
368 let pb = make_progress_bar(files.len() as u64);
369
370 files.par_iter().for_each(|file| {
371 let mut local: HashMap<String, u64> = HashMap::new();
372 if let Ok(f) = std::fs::File::open(&file.path) {
373 let reader = std::io::BufReader::with_capacity(256 * 1024, f);
374 for line in reader.lines() {
375 let Ok(line) = line else { continue };
376 let Ok(record) = serde_json::from_str::<models::Record>(&line) else { continue };
377 if record.is_message() {
378 *local.entry(record.role_str().to_string()).or_default() += 1;
379 }
380 }
381 }
382 let mut global = role_counts.lock().unwrap();
383 for (role, count) in local {
384 *global.entry(role).or_default() += count;
385 }
386 pb.inc(1);
387 });
388
389 pb.finish_and_clear();
390
391 let counts = role_counts.into_inner().unwrap();
392 let mut sorted: Vec<_> = counts.into_iter().collect();
393 sorted.sort_by(|a, b| b.1.cmp(&a.1));
394
395 let max_count = sorted.first().map(|(_, c)| *c).unwrap_or(1);
396 let grand_total: u64 = sorted.iter().map(|(_, c)| c).sum();
397
398 println!("{}", "Message Role Frequency".bold().cyan());
399 println!("{}", "═".repeat(60));
400
401 for (role, count) in &sorted {
402 let bar_len = (*count as f64 / max_count as f64 * 40.0) as usize;
403 let bar = "█".repeat(bar_len);
404 let pct = *count as f64 / grand_total as f64 * 100.0;
405 println!(" {:20} {:>10} ({:>5.1}%) {}", role.bold(), format_count(*count), pct, bar.cyan());
406 }
407
408 println!("{}", "─".repeat(60));
409 println!(" {} total messages", format_count(grand_total));
410
411 Ok(())
412}