use forge::signal::compactor;
use once_cell::sync::Lazy;
use regex::Regex;
static PROGRESS_RE: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"(?m)^(Scanning|Running|Skipping|Loading|Fetching|Initializing) [^\n]+\n?").unwrap()
});
static CONTEXT_RE: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?m)^[\s]*(>|\||\d+ \||[\^~]+)[^\n]*\n?").unwrap());
pub fn compress_semgrep(_subcmd: &str, raw: &str) -> String {
let cleaned = compactor::normalise(raw);
if cleaned.trim_start().starts_with('{') && cleaned.contains("\"results\"") {
return compress_semgrep_json(&cleaned);
}
let s = PROGRESS_RE.replace_all(&cleaned, "");
let s = CONTEXT_RE.replace_all(&s, "");
if s.contains("No findings.") || s.contains("0 findings") {
let summary = s
.lines()
.find(|l| l.contains("findings") || l.contains("Findings"))
.unwrap_or("semgrep: no findings");
return summary.trim().to_string();
}
let mut by_rule: std::collections::HashMap<String, Vec<String>> =
std::collections::HashMap::new();
let mut current_rule = String::new();
let mut current_file = String::new();
for line in s.lines() {
let t = line.trim();
if t.is_empty() {
continue;
}
if t.starts_with("--") || t.starts_with("==") || t.starts_with("Findings:") {
continue;
}
if t.contains(':') && !t.starts_with('/') {
let parts: Vec<&str> = t.splitn(2, ':').collect();
if parts.len() == 2 && parts[1].parse::<u32>().is_ok() {
current_file = t.to_string();
continue;
}
}
if t.starts_with('/') || t.starts_with("./") {
if let Some(rule_pos) = t.rfind(':') {
let rule = t[rule_pos + 1..].trim().to_string();
if !rule.is_empty() {
current_rule = rule.clone();
by_rule
.entry(rule)
.or_default()
.push(t[..rule_pos].to_string());
continue;
}
}
}
if !t.contains(' ') && (t.contains('.') || t.contains('/')) && !t.ends_with('/') {
current_rule = t.to_string();
continue;
}
if !current_rule.is_empty() {
let entry = if current_file.is_empty() {
t.to_string()
} else {
format!("{current_file}: {t}")
};
by_rule.entry(current_rule.clone()).or_default().push(entry);
current_file.clear();
}
}
if by_rule.is_empty() {
let kept: Vec<&str> = s.lines().filter(|l| !l.trim().is_empty()).collect();
return if kept.len() > 30 {
format!(
"{}\n… [{} more lines]",
kept[..30].join("\n"),
kept.len() - 30
)
} else {
kept.join("\n")
};
}
let total: usize = by_rule.values().map(|v| v.len()).sum();
let mut rules: Vec<&String> = by_rule.keys().collect();
rules.sort();
let mut result: Vec<String> = Vec::new();
for rule in &rules {
let locs = &by_rule[*rule];
result.push(format!("{rule} — {} finding(s)", locs.len()));
for loc in locs.iter().take(3) {
result.push(format!(" {loc}"));
}
if locs.len() > 3 {
result.push(format!(" … {} more", locs.len() - 3));
}
}
result.push(format!(
"semgrep: {total} findings across {} rules",
rules.len()
));
result.join("\n")
}
fn compress_semgrep_json(raw: &str) -> String {
use once_cell::sync::Lazy;
use regex::Regex;
static RULE_RE: Lazy<Regex> =
Lazy::new(|| Regex::new(r#""check_id"\s*:\s*"([^"]+)""#).unwrap());
static PATH_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r#""path"\s*:\s*"([^"]+)""#).unwrap());
let rules: Vec<&str> = RULE_RE
.captures_iter(raw)
.filter_map(|c| c.get(1).map(|m| m.as_str()))
.collect();
let paths: Vec<&str> = PATH_RE
.captures_iter(raw)
.filter_map(|c| c.get(1).map(|m| m.as_str()))
.collect();
if rules.is_empty() {
return "semgrep [json]: no findings".to_string();
}
let mut counts: std::collections::HashMap<&str, usize> = std::collections::HashMap::new();
for rule in &rules {
*counts.entry(rule).or_insert(0) += 1;
}
let unique_files: std::collections::HashSet<&str> = paths.iter().copied().collect();
let total = rules.len();
let mut rule_list: Vec<String> = counts
.iter()
.map(|(r, c)| {
if *c > 1 {
format!("{r}(×{c})")
} else {
(*r).to_string()
}
})
.collect();
rule_list.sort();
format!(
"semgrep [json]: {total} findings in {} files — {}",
unique_files.len(),
rule_list.join(", ")
)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn strips_progress_lines() {
let raw = "Scanning 100 files...\nRunning 42 rules...\n/app/main.py:10: python.lang.security.audit.eval-detected\n Use of eval() detected\nFindings: 1\n";
let out = compress_semgrep("scan", &raw);
assert!(!out.contains("Scanning"), "{out}");
assert!(!out.contains("Running 42"), "{out}");
}
#[test]
fn no_findings_clean() {
let raw = "Scanning 50 files...\nNo findings.\n";
let out = compress_semgrep("scan", raw);
assert!(
out.contains("no findings") || out.contains("No findings"),
"{out}"
);
assert!(!out.contains("Scanning"), "{out}");
}
#[test]
fn json_mode_extracts_rule_counts() {
let raw = r#"{"results":[{"check_id":"python.lang.eval","path":"app/main.py"},{"check_id":"python.lang.eval","path":"app/utils.py"},{"check_id":"python.sqli","path":"app/db.py"}],"errors":[]}"#;
let out = compress_semgrep("scan", raw);
assert!(out.contains("json") || out.contains("python"), "{out}");
assert!(out.contains("×2") || out.contains("2"), "{out}");
}
#[test]
fn strips_code_context_lines() {
let raw = "/app/foo.py:10: python.eval\n> eval(user_input)\n ^^^^^^^^^^^^^^^^\n| context line\nFindings: 1\n";
let out = compress_semgrep("scan", &raw);
assert!(!out.contains("> eval"), "{out}");
assert!(!out.contains("^^^^^^^^"), "{out}");
}
}