bctx-weave 0.1.11

bctx-weave — FilterMesh lens pipeline, CLI interception, domain compression
Documentation
use forge::signal::compactor;
use once_cell::sync::Lazy;
use regex::Regex;
use std::collections::HashMap;

// flake8 format: "path/to/file.py:line:col: E/W/F/C code message"
static DIAG_RE: Lazy<Regex> =
    Lazy::new(|| Regex::new(r"(?m)^([^:]+\.py):(\d+):(\d+):\s+([A-Z]\d+)\s+(.+)$").unwrap());

// ── compress flake8 output ────────────────────────────────────────────────────

pub fn compress_flake8(raw: &str) -> String {
    let cleaned = compactor::normalise(raw);

    // Collect all diagnostics
    let mut by_file: HashMap<&str, Vec<(u32, u32, &str, &str)>> = HashMap::new();
    let mut code_counts: HashMap<&str, usize> = HashMap::new();
    let mut total = 0usize;

    for caps in DIAG_RE.captures_iter(&cleaned) {
        let file = caps.get(1).map(|m| m.as_str()).unwrap_or("");
        let line: u32 = caps
            .get(2)
            .and_then(|m| m.as_str().parse().ok())
            .unwrap_or(0);
        let col: u32 = caps
            .get(3)
            .and_then(|m| m.as_str().parse().ok())
            .unwrap_or(0);
        let code = caps.get(4).map(|m| m.as_str()).unwrap_or("");
        let msg = caps.get(5).map(|m| m.as_str()).unwrap_or("").trim();
        by_file
            .entry(file)
            .or_default()
            .push((line, col, code, msg));
        *code_counts.entry(code).or_insert(0) += 1;
        total += 1;
    }

    if by_file.is_empty() {
        // No matches — passthrough (might be a syntax error or empty)
        return compactor::collapse_blanks(&cleaned);
    }

    let mut out_lines: Vec<String> = Vec::new();

    // Per-file output: sort by line, cap at 15 per file
    let mut files: Vec<&&str> = by_file.keys().collect();
    files.sort();
    for file in files {
        let diags = &by_file[file];
        let mut sorted = diags.clone();
        sorted.sort_by_key(|(l, c, _, _)| (*l, *c));
        out_lines.push(file.to_string());
        for (i, (line, col, code, msg)) in sorted.iter().enumerate() {
            if i >= 15 {
                out_lines.push(format!(
                    "{} more issues in this file",
                    sorted.len() - 15
                ));
                break;
            }
            out_lines.push(format!("  {line}:{col}  {code}  {msg}"));
        }
    }

    // Code frequency summary (top 5)
    let mut code_vec: Vec<(&&str, &usize)> = code_counts.iter().collect();
    code_vec.sort_by_key(|b| std::cmp::Reverse(b.1));
    let top: Vec<String> = code_vec
        .iter()
        .take(5)
        .map(|(code, count)| format!("{}: {}", code, count))
        .collect();

    out_lines.push(format!(
        "\nTotal: {} violations  [{}]",
        total,
        top.join(", ")
    ));
    out_lines.join("\n")
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn groups_by_file_and_sorts_by_line() {
        let raw = "src/main.py:10:5: E302 expected 2 blank lines, found 1\nsrc/main.py:3:1: F401 'os' imported but unused\nsrc/utils.py:7:1: W291 trailing whitespace\n";
        let out = compress_flake8(raw);
        assert!(out.contains("src/main.py"), "{out}");
        assert!(out.contains("src/utils.py"), "{out}");
        assert!(out.contains("F401"), "{out}");
        assert!(out.contains("W291"), "{out}");
        // line 3 should appear before line 10 within main.py
        let main_start = out.find("src/main.py").unwrap_or(0);
        let f401_pos = out.find("F401").unwrap_or(0);
        let e302_pos = out.find("E302").unwrap_or(0);
        assert!(f401_pos < e302_pos || main_start == 0, "{out}");
    }

    #[test]
    fn shows_code_frequency_summary() {
        let raw = "a.py:1:1: E501 line too long\na.py:2:1: E501 line too long\na.py:3:1: F401 imported but unused\n";
        let out = compress_flake8(raw);
        assert!(out.contains("E501: 2"), "{out}");
        assert!(out.contains("Total:"), "{out}");
    }

    #[test]
    fn caps_per_file_at_15() {
        let lines: Vec<String> = (1..=20)
            .map(|i| format!("app.py:{i}:1: E501 line too long (100 > 79 characters)"))
            .collect();
        let out = compress_flake8(&lines.join("\n"));
        assert!(out.contains("more issues"), "{out}");
    }

    #[test]
    fn passthrough_on_no_diagnostics() {
        let raw = "All checks passed.\n";
        let out = compress_flake8(raw);
        assert!(out.contains("All checks passed") || out.is_empty(), "{out}");
    }
}