Skip to main content

aft/compress/
ruff.rs

1use std::collections::BTreeMap;
2
3use serde_json::Value;
4
5use crate::compress::caps::{cap_classified_blocks, ClassifiedBlock, DropClass};
6use crate::compress::generic::{dedup_consecutive, strip_ansi, GenericCompressor};
7use crate::compress::{CompressionResult, Compressor};
8
9pub struct RuffCompressor;
10
11impl Compressor for RuffCompressor {
12    fn matches(&self, command: &str) -> bool {
13        command_tokens(command).any(|token| token == "ruff")
14    }
15
16    fn compress(&self, _command: &str, output: &str) -> CompressionResult {
17        compress_ruff(output)
18    }
19
20    fn matches_output(&self, output: &str) -> bool {
21        looks_like_ruff_clean_output(output)
22            || looks_like_ruff_text_output(output)
23            || looks_like_ruff_json_output(output)
24    }
25}
26
27fn looks_like_ruff_clean_output(output: &str) -> bool {
28    output
29        .lines()
30        .any(|line| line.trim() == "All checks passed!")
31}
32
33fn looks_like_ruff_text_output(output: &str) -> bool {
34    let mut has_violation = false;
35    let mut has_summary = false;
36    for line in output.lines() {
37        let trimmed = line.trim();
38        has_violation |= is_violation_line(trimmed);
39        has_summary |= is_ruff_error_summary_line(trimmed);
40    }
41    has_violation && has_summary
42}
43
44fn looks_like_ruff_json_output(output: &str) -> bool {
45    let trimmed = output.trim_start();
46    if !trimmed.starts_with('[') {
47        return false;
48    }
49
50    serde_json::from_str::<Value>(trimmed)
51        .ok()
52        .is_some_and(|value| {
53            value.as_array().is_some_and(|diagnostics| {
54                !diagnostics.is_empty()
55                    && diagnostics.iter().any(|diagnostic| {
56                        diagnostic.get("code").is_some()
57                            && diagnostic.get("filename").is_some()
58                            && diagnostic.get("location").is_some()
59                    })
60            })
61        })
62}
63
64fn compress_ruff(output: &str) -> CompressionResult {
65    let trimmed = output.trim();
66    if trimmed.is_empty() || trimmed == "All checks passed!" {
67        return CompressionResult::new("ruff: clean");
68    }
69
70    if trimmed.starts_with('[') && trimmed.ends_with(']') {
71        if let Some(compressed) = compress_json(trimmed) {
72            return finish(compressed);
73        }
74        return GenericCompressor::compress_output(output).into();
75    }
76
77    let mut blocks = Vec::new();
78    for line in output.lines() {
79        let trimmed = line.trim();
80        if is_violation_line(trimmed) {
81            blocks.push(ClassifiedBlock::new(DropClass::Error, line.to_string()));
82        } else if is_summary_line(trimmed) || trimmed.starts_with("[*]") {
83            blocks.push(ClassifiedBlock::unclassified(line.to_string()));
84        }
85    }
86
87    if blocks.is_empty() {
88        return GenericCompressor::compress_output(output).into();
89    }
90
91    let capped = cap_classified_blocks(blocks);
92    finish(CompressionResult::with_class_drops(
93        capped.text,
94        capped.dropped_by_class,
95    ))
96}
97
98fn command_tokens(command: &str) -> impl Iterator<Item = String> + '_ {
99    command
100        .split_whitespace()
101        .map(|token| token.trim_matches(|ch| matches!(ch, '\'' | '"')))
102        .filter(|token| {
103            !matches!(
104                *token,
105                "npx" | "pnpm" | "yarn" | "bun" | "bunx" | "exec" | "-m"
106            )
107        })
108        .map(|token| {
109            token
110                .rsplit(['/', '\\'])
111                .next()
112                .unwrap_or(token)
113                .trim_end_matches(".cmd")
114                .to_string()
115        })
116}
117
118fn compress_json(input: &str) -> Option<CompressionResult> {
119    let diagnostics: Vec<Value> = serde_json::from_str(input).ok()?;
120    if diagnostics.is_empty() {
121        return Some(CompressionResult::new("ruff: clean"));
122    }
123
124    let mut by_rule: BTreeMap<String, Vec<String>> = BTreeMap::new();
125    let mut blocks = Vec::new();
126    for diagnostic in diagnostics {
127        let code = string_field(&diagnostic, "code").unwrap_or("RUF");
128        let filename = string_field(&diagnostic, "filename").unwrap_or("<unknown>");
129        let row = diagnostic
130            .pointer("/location/row")
131            .and_then(Value::as_u64)
132            .unwrap_or(0);
133        let location = format!("{filename}:{row}");
134        by_rule
135            .entry(code.to_string())
136            .or_default()
137            .push(location.clone());
138        blocks.push(ClassifiedBlock::new(
139            DropClass::Error,
140            format!("{code}: {location}"),
141        ));
142    }
143
144    let total = by_rule.values().map(Vec::len).sum::<usize>();
145    blocks.push(ClassifiedBlock::unclassified(format!(
146        "ruff: {total} violations across {} rules",
147        by_rule.len()
148    )));
149    for (rule, locations) in by_rule {
150        blocks.push(ClassifiedBlock::unclassified(format!(
151            "{rule}: {}",
152            locations.len()
153        )));
154    }
155
156    let capped = cap_classified_blocks(blocks);
157    Some(CompressionResult::with_class_drops(
158        capped.text,
159        capped.dropped_by_class,
160    ))
161}
162
163fn is_violation_line(trimmed: &str) -> bool {
164    let parts: Vec<&str> = trimmed.splitn(4, ':').collect();
165    if parts.len() != 4 {
166        return false;
167    }
168    if parts[0].is_empty()
169        || parts[1].parse::<usize>().is_err()
170        || parts[2].parse::<usize>().is_err()
171    {
172        return false;
173    }
174    parts[3].split_whitespace().next().is_some_and(is_rule_code)
175}
176
177fn is_rule_code(token: &str) -> bool {
178    let mut chars = token.chars();
179    chars.next().is_some_and(|ch| ch.is_ascii_uppercase()) && chars.any(|ch| ch.is_ascii_digit())
180}
181
182fn is_ruff_error_summary_line(trimmed: &str) -> bool {
183    let Some(rest) = trimmed.strip_prefix("Found ") else {
184        return false;
185    };
186    let Some((count, rest)) = rest.split_once(' ') else {
187        return false;
188    };
189    !count.is_empty()
190        && count.chars().all(|ch| ch.is_ascii_digit())
191        && (rest.starts_with("error.") || rest.starts_with("errors."))
192}
193
194fn is_summary_line(trimmed: &str) -> bool {
195    is_ruff_error_summary_line(trimmed)
196}
197
198fn string_field<'a>(value: &'a Value, key: &str) -> Option<&'a str> {
199    value.get(key).and_then(Value::as_str)
200}
201
202fn finish(input: CompressionResult) -> CompressionResult {
203    input.map_text(|text| {
204        let stripped = strip_ansi(text);
205        dedup_consecutive(&stripped).trim_end().to_string()
206    })
207}
208
209#[cfg(test)]
210mod tests {
211    use super::*;
212
213    #[test]
214    fn matches_ruff_invocations() {
215        let compressor = RuffCompressor;
216        assert!(compressor.matches("ruff check ."));
217        assert!(compressor.matches("python -m ruff format"));
218        assert!(compressor.matches("python3 -m ruff check"));
219        assert!(compressor.matches("pnpm exec ruff check"));
220        assert!(!compressor.matches("cargo build"));
221        assert!(!compressor.matches("ls"));
222    }
223
224    #[test]
225    fn compresses_real_clean_text_pass() {
226        let output = "All checks passed!\n";
227        let compressed = compress_ruff(output).text;
228        assert_eq!(compressed, "ruff: clean");
229        assert!(compressed.len() < output.len());
230    }
231
232    #[test]
233    fn preserves_text_errors_verbatim() {
234        let output = "src/a.py:10:5: E501 Line too long (88 > 79 characters)\nsrc/a.py:25:1: F401 `os` imported but unused\nsrc/b.py:3:8: E711 Comparison to None should be 'cond is None'\nFound 3 errors.\n[*] 1 fixable with the `--fix` option.\n";
235        let compressed = compress_ruff(output).text;
236        assert!(compressed.contains("src/a.py:10:5: E501 Line too long (88 > 79 characters)"));
237        assert!(compressed.contains("src/a.py:25:1: F401 `os` imported but unused"));
238        assert!(
239            compressed.contains("src/b.py:3:8: E711 Comparison to None should be 'cond is None'")
240        );
241        assert!(compressed.contains("Found 3 errors."));
242    }
243
244    #[test]
245    fn groups_json_output_by_rule() {
246        let output = r#"[{"code":"E501","filename":"src/a.py","location":{"row":10,"column":5},"message":"Line too long"},{"code":"E501","filename":"src/b.py","location":{"row":5,"column":1},"message":"Line too long"},{"code":"F401","filename":"src/c.py","location":{"row":1,"column":8},"message":"unused"}]"#;
247        let compressed = compress_ruff(output).text;
248        assert!(compressed.contains("E501: src/a.py:10"));
249        assert!(compressed.contains("E501: src/b.py:5"));
250        assert!(compressed.contains("F401: src/c.py:1"));
251        assert!(compressed.contains("ruff: 3 violations across 2 rules"));
252        assert!(compressed.contains("E501: 2"));
253    }
254
255    #[test]
256    fn compresses_large_json_input() {
257        let mut items = Vec::new();
258        for index in 0..500 {
259            items.push(format!(
260                r#"{{"code":"E501","filename":"src/file{index}.py","location":{{"row":{},"column":5}},"message":"Line too long"}}"#,
261                index + 1
262            ));
263        }
264        let output = format!("[{}]", items.join(","));
265        let result = compress_ruff(&output);
266        let compressed = result.text;
267        assert!(compressed.contains("ruff: 500 violations across 1 rules"));
268        assert!(compressed.contains("E501: 500"));
269        assert_eq!(result.dropped_by_class.get(&DropClass::Error), Some(&480));
270        assert!(compressed.len() < output.len() / 2);
271    }
272}