Skip to main content

aft/compress/
ruff.rs

1use std::collections::BTreeMap;
2
3use serde_json::Value;
4
5use crate::compress::generic::{dedup_consecutive, middle_truncate, strip_ansi, GenericCompressor};
6use crate::compress::Compressor;
7
8const MAX_LINES: usize = 250;
9const MAX_LOCATIONS_PER_RULE: usize = 25;
10
11pub struct RuffCompressor;
12
13impl Compressor for RuffCompressor {
14    fn matches(&self, command: &str) -> bool {
15        command_tokens(command).any(|token| token == "ruff")
16    }
17
18    fn compress(&self, _command: &str, output: &str) -> String {
19        compress_ruff(output)
20    }
21
22    fn matches_output(&self, output: &str) -> bool {
23        looks_like_ruff_clean_output(output)
24            || looks_like_ruff_text_output(output)
25            || looks_like_ruff_json_output(output)
26    }
27}
28
29fn looks_like_ruff_clean_output(output: &str) -> bool {
30    output
31        .lines()
32        .any(|line| line.trim() == "All checks passed!")
33}
34
35fn looks_like_ruff_text_output(output: &str) -> bool {
36    let mut has_violation = false;
37    let mut has_summary = false;
38    for line in output.lines() {
39        let trimmed = line.trim();
40        has_violation |= is_violation_line(trimmed);
41        has_summary |= is_ruff_error_summary_line(trimmed);
42    }
43    has_violation && has_summary
44}
45
46fn looks_like_ruff_json_output(output: &str) -> bool {
47    let trimmed = output.trim_start();
48    if !trimmed.starts_with('[') {
49        return false;
50    }
51
52    serde_json::from_str::<Value>(trimmed)
53        .ok()
54        .is_some_and(|value| {
55            value.as_array().is_some_and(|diagnostics| {
56                !diagnostics.is_empty()
57                    && diagnostics.iter().any(|diagnostic| {
58                        diagnostic.get("code").is_some()
59                            && diagnostic.get("filename").is_some()
60                            && diagnostic.get("location").is_some()
61                    })
62            })
63        })
64}
65
66fn compress_ruff(output: &str) -> String {
67    let trimmed = output.trim();
68    if trimmed.is_empty() || trimmed == "All checks passed!" {
69        return "ruff: clean".to_string();
70    }
71
72    if trimmed.starts_with('[') && trimmed.ends_with(']') {
73        if let Some(compressed) = compress_json(trimmed) {
74            return finish(&compressed);
75        }
76        return GenericCompressor::compress_output(output);
77    }
78
79    let mut kept = Vec::new();
80    for line in output.lines() {
81        let trimmed = line.trim();
82        if is_violation_line(trimmed) || is_summary_line(trimmed) || trimmed.starts_with("[*]") {
83            kept.push(line.to_string());
84        }
85    }
86
87    if kept.is_empty() {
88        return GenericCompressor::compress_output(output);
89    }
90
91    finish(&kept.join("\n"))
92}
93
94fn command_tokens(command: &str) -> impl Iterator<Item = String> + '_ {
95    command
96        .split_whitespace()
97        .map(|token| token.trim_matches(|ch| matches!(ch, '\'' | '"')))
98        .filter(|token| {
99            !matches!(
100                *token,
101                "npx" | "pnpm" | "yarn" | "bun" | "bunx" | "exec" | "-m"
102            )
103        })
104        .map(|token| {
105            token
106                .rsplit(['/', '\\'])
107                .next()
108                .unwrap_or(token)
109                .trim_end_matches(".cmd")
110                .to_string()
111        })
112}
113
114fn compress_json(input: &str) -> Option<String> {
115    let diagnostics: Vec<Value> = serde_json::from_str(input).ok()?;
116    if diagnostics.is_empty() {
117        return Some("ruff: clean".to_string());
118    }
119
120    let mut by_rule: BTreeMap<String, Vec<String>> = BTreeMap::new();
121    for diagnostic in diagnostics {
122        let code = string_field(&diagnostic, "code").unwrap_or("RUF");
123        let filename = string_field(&diagnostic, "filename").unwrap_or("<unknown>");
124        let row = diagnostic
125            .pointer("/location/row")
126            .and_then(Value::as_u64)
127            .unwrap_or(0);
128        by_rule
129            .entry(code.to_string())
130            .or_default()
131            .push(format!("{filename}:{row}"));
132    }
133
134    let total = by_rule.values().map(Vec::len).sum::<usize>();
135    let mut lines = Vec::new();
136    for (rule, locations) in &by_rule {
137        let shown = locations
138            .iter()
139            .take(MAX_LOCATIONS_PER_RULE)
140            .cloned()
141            .collect::<Vec<_>>()
142            .join(", ");
143        if locations.len() > MAX_LOCATIONS_PER_RULE {
144            lines.push(format!(
145                "{rule}: {shown}, ... (+{} more)",
146                locations.len() - MAX_LOCATIONS_PER_RULE
147            ));
148        } else {
149            lines.push(format!("{rule}: {shown}"));
150        }
151    }
152    lines.push(format!(
153        "ruff: {total} violations across {} rules",
154        by_rule.len()
155    ));
156    for (rule, locations) in by_rule {
157        lines.push(format!("{rule}: {}", locations.len()));
158    }
159
160    Some(lines.join("\n"))
161}
162
163fn is_violation_line(trimmed: &str) -> bool {
164    let parts: Vec<&str> = trimmed.splitn(4, ':').collect();
165    if parts.len() != 4 {
166        return false;
167    }
168    if parts[0].is_empty()
169        || parts[1].parse::<usize>().is_err()
170        || parts[2].parse::<usize>().is_err()
171    {
172        return false;
173    }
174    parts[3].split_whitespace().next().is_some_and(is_rule_code)
175}
176
177fn is_rule_code(token: &str) -> bool {
178    let mut chars = token.chars();
179    chars.next().is_some_and(|ch| ch.is_ascii_uppercase()) && chars.any(|ch| ch.is_ascii_digit())
180}
181
182fn is_ruff_error_summary_line(trimmed: &str) -> bool {
183    let Some(rest) = trimmed.strip_prefix("Found ") else {
184        return false;
185    };
186    let Some((count, rest)) = rest.split_once(' ') else {
187        return false;
188    };
189    !count.is_empty()
190        && count.chars().all(|ch| ch.is_ascii_digit())
191        && (rest.starts_with("error.") || rest.starts_with("errors."))
192}
193
194fn is_summary_line(trimmed: &str) -> bool {
195    is_ruff_error_summary_line(trimmed)
196}
197
198fn string_field<'a>(value: &'a Value, key: &str) -> Option<&'a str> {
199    value.get(key).and_then(Value::as_str)
200}
201
202fn finish(input: &str) -> String {
203    let stripped = strip_ansi(input);
204    let deduped = dedup_consecutive(&stripped);
205    cap_lines(
206        &middle_truncate(&deduped, 32 * 1024, 16 * 1024, 16 * 1024),
207        MAX_LINES,
208    )
209}
210
211fn cap_lines(input: &str, max_lines: usize) -> String {
212    let lines: Vec<&str> = input.lines().collect();
213    if lines.len() <= max_lines {
214        return input.trim_end().to_string();
215    }
216    let mut kept = lines
217        .iter()
218        .take(max_lines)
219        .copied()
220        .collect::<Vec<_>>()
221        .join("\n");
222    kept.push_str(&format!(
223        "\n... truncated {} lines",
224        lines.len() - max_lines
225    ));
226    kept
227}
228
229#[cfg(test)]
230mod tests {
231    use super::*;
232
233    #[test]
234    fn matches_ruff_invocations() {
235        let compressor = RuffCompressor;
236        assert!(compressor.matches("ruff check ."));
237        assert!(compressor.matches("python -m ruff format"));
238        assert!(compressor.matches("python3 -m ruff check"));
239        assert!(compressor.matches("pnpm exec ruff check"));
240        assert!(!compressor.matches("cargo build"));
241        assert!(!compressor.matches("ls"));
242    }
243
244    #[test]
245    fn compresses_real_clean_text_pass() {
246        let output = "All checks passed!\n";
247        let compressed = compress_ruff(output);
248        assert_eq!(compressed, "ruff: clean");
249        assert!(compressed.len() < output.len());
250    }
251
252    #[test]
253    fn preserves_text_errors_verbatim() {
254        let output = "src/a.py:10:5: E501 Line too long (88 > 79 characters)\nsrc/a.py:25:1: F401 `os` imported but unused\nsrc/b.py:3:8: E711 Comparison to None should be 'cond is None'\nFound 3 errors.\n[*] 1 fixable with the `--fix` option.\n";
255        let compressed = compress_ruff(output);
256        assert!(compressed.contains("src/a.py:10:5: E501 Line too long (88 > 79 characters)"));
257        assert!(compressed.contains("src/a.py:25:1: F401 `os` imported but unused"));
258        assert!(
259            compressed.contains("src/b.py:3:8: E711 Comparison to None should be 'cond is None'")
260        );
261        assert!(compressed.contains("Found 3 errors."));
262    }
263
264    #[test]
265    fn groups_json_output_by_rule() {
266        let output = r#"[{"code":"E501","filename":"src/a.py","location":{"row":10,"column":5},"message":"Line too long"},{"code":"E501","filename":"src/b.py","location":{"row":5,"column":1},"message":"Line too long"},{"code":"F401","filename":"src/c.py","location":{"row":1,"column":8},"message":"unused"}]"#;
267        let compressed = compress_ruff(output);
268        assert!(compressed.contains("E501: src/a.py:10, src/b.py:5"));
269        assert!(compressed.contains("F401: src/c.py:1"));
270        assert!(compressed.contains("ruff: 3 violations across 2 rules"));
271        assert!(compressed.contains("E501: 2"));
272    }
273
274    #[test]
275    fn compresses_large_json_input() {
276        let mut items = Vec::new();
277        for index in 0..500 {
278            items.push(format!(
279                r#"{{"code":"E501","filename":"src/file{index}.py","location":{{"row":{},"column":5}},"message":"Line too long"}}"#,
280                index + 1
281            ));
282        }
283        let output = format!("[{}]", items.join(","));
284        let compressed = compress_ruff(&output);
285        assert!(compressed.contains("ruff: 500 violations across 1 rules"));
286        assert!(compressed.contains("E501: 500"));
287        assert!(compressed.len() < output.len() / 2);
288    }
289}