use spectre_parse::Document;
use std::collections::HashMap;
use std::fs;
use std::path::{Path, PathBuf};
fn main() {
let arg = std::env::args().nth(1).expect("usage: diagnose_errors <dir>");
let dir = Path::new(&arg);
let pdfs: Vec<PathBuf> = if dir.is_dir() {
let mut v: Vec<_> = fs::read_dir(dir)
.unwrap()
.filter_map(|e| e.ok())
.map(|e| e.path())
.filter(|p| p.extension().and_then(|e| e.to_str()) == Some("pdf"))
.collect();
v.sort();
v
} else {
vec![dir.to_path_buf()]
};
let mut buckets: HashMap<String, Vec<String>> = HashMap::new();
for p in &pdfs {
let bytes = match fs::read(p) {
Ok(b) => b,
Err(_) => continue,
};
match Document::open(&bytes) {
Ok(_) => continue,
Err(e) => {
let short = format!("{e}");
let bucket = if short.len() > 70 {
short[..70].to_string()
} else {
short
};
buckets
.entry(bucket)
.or_default()
.push(p.file_name().unwrap().to_string_lossy().into_owned());
}
}
}
let mut summary: Vec<_> = buckets.into_iter().collect();
summary.sort_by_key(|(_, v)| std::cmp::Reverse(v.len()));
for (msg, files) in summary {
println!("[{}] {}", files.len(), msg);
for f in files.iter().take(3) {
println!(" {f}");
}
if files.len() > 3 {
println!(" ...");
}
}
}