1use std::collections::BTreeMap;
2
3use serde_json::Value;
4
5use crate::compress::caps::{cap_classified_blocks, ClassifiedBlock, DropClass};
6use crate::compress::generic::{dedup_consecutive, strip_ansi, GenericCompressor};
7use crate::compress::{CompressionResult, Compressor};
8
9pub struct RuffCompressor;
10
11impl Compressor for RuffCompressor {
12 fn matches(&self, command: &str) -> bool {
13 command_tokens(command).any(|token| token == "ruff")
14 }
15
16 fn compress(&self, _command: &str, output: &str) -> CompressionResult {
17 compress_ruff(output)
18 }
19
20 fn matches_output(&self, output: &str) -> bool {
21 looks_like_ruff_clean_output(output)
22 || looks_like_ruff_text_output(output)
23 || looks_like_ruff_json_output(output)
24 }
25}
26
27fn looks_like_ruff_clean_output(output: &str) -> bool {
28 output
29 .lines()
30 .any(|line| line.trim() == "All checks passed!")
31}
32
33fn looks_like_ruff_text_output(output: &str) -> bool {
34 let mut has_violation = false;
35 let mut has_summary = false;
36 for line in output.lines() {
37 let trimmed = line.trim();
38 has_violation |= is_violation_line(trimmed);
39 has_summary |= is_ruff_error_summary_line(trimmed);
40 }
41 has_violation && has_summary
42}
43
44fn looks_like_ruff_json_output(output: &str) -> bool {
45 let trimmed = output.trim_start();
46 if !trimmed.starts_with('[') {
47 return false;
48 }
49
50 serde_json::from_str::<Value>(trimmed)
51 .ok()
52 .is_some_and(|value| {
53 value.as_array().is_some_and(|diagnostics| {
54 !diagnostics.is_empty()
55 && diagnostics.iter().any(|diagnostic| {
56 diagnostic.get("code").is_some()
57 && diagnostic.get("filename").is_some()
58 && diagnostic.get("location").is_some()
59 })
60 })
61 })
62}
63
64fn compress_ruff(output: &str) -> CompressionResult {
65 let trimmed = output.trim();
66 if trimmed.is_empty() || trimmed == "All checks passed!" {
67 return CompressionResult::new("ruff: clean");
68 }
69
70 if trimmed.starts_with('[') && trimmed.ends_with(']') {
71 if let Some(compressed) = compress_json(trimmed) {
72 return finish(compressed);
73 }
74 return GenericCompressor::compress_output(output).into();
75 }
76
77 let mut blocks = Vec::new();
78 for line in output.lines() {
79 let trimmed = line.trim();
80 if is_violation_line(trimmed) {
81 blocks.push(ClassifiedBlock::new(DropClass::Error, line.to_string()));
82 } else if is_summary_line(trimmed) || trimmed.starts_with("[*]") {
83 blocks.push(ClassifiedBlock::unclassified(line.to_string()));
84 }
85 }
86
87 if blocks.is_empty() {
88 return GenericCompressor::compress_output(output).into();
89 }
90
91 let capped = cap_classified_blocks(blocks);
92 finish(CompressionResult::with_class_drops(
93 capped.text,
94 capped.dropped_by_class,
95 ))
96}
97
98fn command_tokens(command: &str) -> impl Iterator<Item = String> + '_ {
99 command
100 .split_whitespace()
101 .map(|token| token.trim_matches(|ch| matches!(ch, '\'' | '"')))
102 .filter(|token| {
103 !matches!(
104 *token,
105 "npx" | "pnpm" | "yarn" | "bun" | "bunx" | "exec" | "-m"
106 )
107 })
108 .map(|token| {
109 token
110 .rsplit(['/', '\\'])
111 .next()
112 .unwrap_or(token)
113 .trim_end_matches(".cmd")
114 .to_string()
115 })
116}
117
118fn compress_json(input: &str) -> Option<CompressionResult> {
119 let diagnostics: Vec<Value> = serde_json::from_str(input).ok()?;
120 if diagnostics.is_empty() {
121 return Some(CompressionResult::new("ruff: clean"));
122 }
123
124 let mut by_rule: BTreeMap<String, Vec<String>> = BTreeMap::new();
125 let mut blocks = Vec::new();
126 for diagnostic in diagnostics {
127 let code = string_field(&diagnostic, "code").unwrap_or("RUF");
128 let filename = string_field(&diagnostic, "filename").unwrap_or("<unknown>");
129 let row = diagnostic
130 .pointer("/location/row")
131 .and_then(Value::as_u64)
132 .unwrap_or(0);
133 let location = format!("{filename}:{row}");
134 by_rule
135 .entry(code.to_string())
136 .or_default()
137 .push(location.clone());
138 blocks.push(ClassifiedBlock::new(
139 DropClass::Error,
140 format!("{code}: {location}"),
141 ));
142 }
143
144 let total = by_rule.values().map(Vec::len).sum::<usize>();
145 blocks.push(ClassifiedBlock::unclassified(format!(
146 "ruff: {total} violations across {} rules",
147 by_rule.len()
148 )));
149 for (rule, locations) in by_rule {
150 blocks.push(ClassifiedBlock::unclassified(format!(
151 "{rule}: {}",
152 locations.len()
153 )));
154 }
155
156 let capped = cap_classified_blocks(blocks);
157 Some(CompressionResult::with_class_drops(
158 capped.text,
159 capped.dropped_by_class,
160 ))
161}
162
163fn is_violation_line(trimmed: &str) -> bool {
164 let parts: Vec<&str> = trimmed.splitn(4, ':').collect();
165 if parts.len() != 4 {
166 return false;
167 }
168 if parts[0].is_empty()
169 || parts[1].parse::<usize>().is_err()
170 || parts[2].parse::<usize>().is_err()
171 {
172 return false;
173 }
174 parts[3].split_whitespace().next().is_some_and(is_rule_code)
175}
176
177fn is_rule_code(token: &str) -> bool {
178 let mut chars = token.chars();
179 chars.next().is_some_and(|ch| ch.is_ascii_uppercase()) && chars.any(|ch| ch.is_ascii_digit())
180}
181
182fn is_ruff_error_summary_line(trimmed: &str) -> bool {
183 let Some(rest) = trimmed.strip_prefix("Found ") else {
184 return false;
185 };
186 let Some((count, rest)) = rest.split_once(' ') else {
187 return false;
188 };
189 !count.is_empty()
190 && count.chars().all(|ch| ch.is_ascii_digit())
191 && (rest.starts_with("error.") || rest.starts_with("errors."))
192}
193
194fn is_summary_line(trimmed: &str) -> bool {
195 is_ruff_error_summary_line(trimmed)
196}
197
198fn string_field<'a>(value: &'a Value, key: &str) -> Option<&'a str> {
199 value.get(key).and_then(Value::as_str)
200}
201
202fn finish(input: CompressionResult) -> CompressionResult {
203 input.map_text(|text| {
204 let stripped = strip_ansi(text);
205 dedup_consecutive(&stripped).trim_end().to_string()
206 })
207}
208
209#[cfg(test)]
210mod tests {
211 use super::*;
212
213 #[test]
214 fn matches_ruff_invocations() {
215 let compressor = RuffCompressor;
216 assert!(compressor.matches("ruff check ."));
217 assert!(compressor.matches("python -m ruff format"));
218 assert!(compressor.matches("python3 -m ruff check"));
219 assert!(compressor.matches("pnpm exec ruff check"));
220 assert!(!compressor.matches("cargo build"));
221 assert!(!compressor.matches("ls"));
222 }
223
224 #[test]
225 fn compresses_real_clean_text_pass() {
226 let output = "All checks passed!\n";
227 let compressed = compress_ruff(output).text;
228 assert_eq!(compressed, "ruff: clean");
229 assert!(compressed.len() < output.len());
230 }
231
232 #[test]
233 fn preserves_text_errors_verbatim() {
234 let output = "src/a.py:10:5: E501 Line too long (88 > 79 characters)\nsrc/a.py:25:1: F401 `os` imported but unused\nsrc/b.py:3:8: E711 Comparison to None should be 'cond is None'\nFound 3 errors.\n[*] 1 fixable with the `--fix` option.\n";
235 let compressed = compress_ruff(output).text;
236 assert!(compressed.contains("src/a.py:10:5: E501 Line too long (88 > 79 characters)"));
237 assert!(compressed.contains("src/a.py:25:1: F401 `os` imported but unused"));
238 assert!(
239 compressed.contains("src/b.py:3:8: E711 Comparison to None should be 'cond is None'")
240 );
241 assert!(compressed.contains("Found 3 errors."));
242 }
243
244 #[test]
245 fn groups_json_output_by_rule() {
246 let output = r#"[{"code":"E501","filename":"src/a.py","location":{"row":10,"column":5},"message":"Line too long"},{"code":"E501","filename":"src/b.py","location":{"row":5,"column":1},"message":"Line too long"},{"code":"F401","filename":"src/c.py","location":{"row":1,"column":8},"message":"unused"}]"#;
247 let compressed = compress_ruff(output).text;
248 assert!(compressed.contains("E501: src/a.py:10"));
249 assert!(compressed.contains("E501: src/b.py:5"));
250 assert!(compressed.contains("F401: src/c.py:1"));
251 assert!(compressed.contains("ruff: 3 violations across 2 rules"));
252 assert!(compressed.contains("E501: 2"));
253 }
254
255 #[test]
256 fn compresses_large_json_input() {
257 let mut items = Vec::new();
258 for index in 0..500 {
259 items.push(format!(
260 r#"{{"code":"E501","filename":"src/file{index}.py","location":{{"row":{},"column":5}},"message":"Line too long"}}"#,
261 index + 1
262 ));
263 }
264 let output = format!("[{}]", items.join(","));
265 let result = compress_ruff(&output);
266 let compressed = result.text;
267 assert!(compressed.contains("ruff: 500 violations across 1 rules"));
268 assert!(compressed.contains("E501: 500"));
269 assert_eq!(result.dropped_by_class.get(&DropClass::Error), Some(&480));
270 assert!(compressed.len() < output.len() / 2);
271 }
272}