use forge::signal::compactor;
use once_cell::sync::Lazy;
use regex::Regex;
use std::collections::HashMap;
static DIAG_RE: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?m)^([^:]+\.py):(\d+):(\d+):\s+([A-Z]\d+)\s+(.+)$").unwrap());
pub fn compress_flake8(raw: &str) -> String {
let cleaned = compactor::normalise(raw);
let mut by_file: HashMap<&str, Vec<(u32, u32, &str, &str)>> = HashMap::new();
let mut code_counts: HashMap<&str, usize> = HashMap::new();
let mut total = 0usize;
for caps in DIAG_RE.captures_iter(&cleaned) {
let file = caps.get(1).map(|m| m.as_str()).unwrap_or("");
let line: u32 = caps
.get(2)
.and_then(|m| m.as_str().parse().ok())
.unwrap_or(0);
let col: u32 = caps
.get(3)
.and_then(|m| m.as_str().parse().ok())
.unwrap_or(0);
let code = caps.get(4).map(|m| m.as_str()).unwrap_or("");
let msg = caps.get(5).map(|m| m.as_str()).unwrap_or("").trim();
by_file
.entry(file)
.or_default()
.push((line, col, code, msg));
*code_counts.entry(code).or_insert(0) += 1;
total += 1;
}
if by_file.is_empty() {
return compactor::collapse_blanks(&cleaned);
}
let mut out_lines: Vec<String> = Vec::new();
let mut files: Vec<&&str> = by_file.keys().collect();
files.sort();
for file in files {
let diags = &by_file[file];
let mut sorted = diags.clone();
sorted.sort_by_key(|(l, c, _, _)| (*l, *c));
out_lines.push(file.to_string());
for (i, (line, col, code, msg)) in sorted.iter().enumerate() {
if i >= 15 {
out_lines.push(format!(
" … {} more issues in this file",
sorted.len() - 15
));
break;
}
out_lines.push(format!(" {line}:{col} {code} {msg}"));
}
}
let mut code_vec: Vec<(&&str, &usize)> = code_counts.iter().collect();
code_vec.sort_by(|a, b| b.1.cmp(a.1));
let top: Vec<String> = code_vec
.iter()
.take(5)
.map(|(code, count)| format!("{}: {}", code, count))
.collect();
out_lines.push(format!(
"\nTotal: {} violations [{}]",
total,
top.join(", ")
));
out_lines.join("\n")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn groups_by_file_and_sorts_by_line() {
let raw = "src/main.py:10:5: E302 expected 2 blank lines, found 1\nsrc/main.py:3:1: F401 'os' imported but unused\nsrc/utils.py:7:1: W291 trailing whitespace\n";
let out = compress_flake8(raw);
assert!(out.contains("src/main.py"), "{out}");
assert!(out.contains("src/utils.py"), "{out}");
assert!(out.contains("F401"), "{out}");
assert!(out.contains("W291"), "{out}");
let main_start = out.find("src/main.py").unwrap_or(0);
let f401_pos = out.find("F401").unwrap_or(0);
let e302_pos = out.find("E302").unwrap_or(0);
assert!(f401_pos < e302_pos || main_start == 0, "{out}");
}
#[test]
fn shows_code_frequency_summary() {
let raw = "a.py:1:1: E501 line too long\na.py:2:1: E501 line too long\na.py:3:1: F401 imported but unused\n";
let out = compress_flake8(raw);
assert!(out.contains("E501: 2"), "{out}");
assert!(out.contains("Total:"), "{out}");
}
#[test]
fn caps_per_file_at_15() {
let lines: Vec<String> = (1..=20)
.map(|i| format!("app.py:{i}:1: E501 line too long (100 > 79 characters)"))
.collect();
let out = compress_flake8(&lines.join("\n"));
assert!(out.contains("more issues"), "{out}");
}
#[test]
fn passthrough_on_no_diagnostics() {
let raw = "All checks passed.\n";
let out = compress_flake8(raw);
assert!(out.contains("All checks passed") || out.is_empty(), "{out}");
}
}