1use std::collections::BTreeMap;
2
3use serde_json::Value;
4
5use crate::compress::caps::{cap_classified_blocks, ClassifiedBlock, DropClass};
6use crate::compress::generic::{dedup_consecutive, strip_ansi, GenericCompressor};
7use crate::compress::{CompressionResult, Compressor};
8
9pub struct RuffCompressor;
10
11impl Compressor for RuffCompressor {
12 fn matches(&self, command: &str) -> bool {
13 command_tokens(command).any(|token| token == "ruff")
14 }
15
16 fn compress_with_exit_code(
17 &self,
18 _command: &str,
19 output: &str,
20 exit_code: Option<i32>,
21 ) -> CompressionResult {
22 let compressed = compress_ruff(output);
23 if matches!(exit_code, Some(code) if code != 0) && compressed.text.trim() == "ruff: clean" {
24 GenericCompressor::compress_output(output).into()
25 } else {
26 compressed
27 }
28 }
29
30 fn matches_output(&self, output: &str) -> bool {
31 looks_like_ruff_clean_output(output)
32 || looks_like_ruff_text_output(output)
33 || looks_like_ruff_json_output(output)
34 }
35}
36
37fn looks_like_ruff_clean_output(output: &str) -> bool {
38 output
39 .lines()
40 .any(|line| line.trim() == "All checks passed!")
41}
42
43fn looks_like_ruff_text_output(output: &str) -> bool {
44 let mut has_violation = false;
45 let mut has_summary = false;
46 for line in output.lines() {
47 let trimmed = line.trim();
48 has_violation |= is_violation_line(trimmed);
49 has_summary |= is_ruff_error_summary_line(trimmed);
50 }
51 has_violation && has_summary
52}
53
54fn looks_like_ruff_json_output(output: &str) -> bool {
55 let trimmed = output.trim_start();
56 if !trimmed.starts_with('[') {
57 return false;
58 }
59
60 serde_json::from_str::<Value>(trimmed)
61 .ok()
62 .is_some_and(|value| {
63 value.as_array().is_some_and(|diagnostics| {
64 !diagnostics.is_empty()
65 && diagnostics.iter().any(|diagnostic| {
66 diagnostic.get("code").is_some()
67 && diagnostic.get("filename").is_some()
68 && diagnostic.get("location").is_some()
69 })
70 })
71 })
72}
73
74fn compress_ruff(output: &str) -> CompressionResult {
75 let trimmed = output.trim();
76 if trimmed.is_empty() || trimmed == "All checks passed!" {
77 return CompressionResult::new("ruff: clean");
78 }
79
80 if trimmed.starts_with('[') && trimmed.ends_with(']') {
81 if let Some(compressed) = compress_json(trimmed) {
82 return finish(compressed);
83 }
84 return GenericCompressor::compress_output(output).into();
85 }
86
87 let mut blocks = Vec::new();
88 for line in output.lines() {
89 let trimmed = line.trim();
90 if is_violation_line(trimmed) {
91 blocks.push(ClassifiedBlock::new(DropClass::Error, line.to_string()));
92 } else if is_summary_line(trimmed) || trimmed.starts_with("[*]") {
93 blocks.push(ClassifiedBlock::unclassified(line.to_string()));
94 }
95 }
96
97 if blocks.is_empty() {
98 return GenericCompressor::compress_output(output).into();
99 }
100
101 let capped = cap_classified_blocks(blocks);
102 finish(CompressionResult::with_class_drops(
103 capped.text,
104 capped.dropped_by_class,
105 ))
106}
107
108fn command_tokens(command: &str) -> impl Iterator<Item = String> + '_ {
109 command
110 .split_whitespace()
111 .map(|token| token.trim_matches(|ch| matches!(ch, '\'' | '"')))
112 .filter(|token| {
113 !matches!(
114 *token,
115 "npx" | "pnpm" | "yarn" | "bun" | "bunx" | "exec" | "-m"
116 )
117 })
118 .map(|token| {
119 token
120 .rsplit(['/', '\\'])
121 .next()
122 .unwrap_or(token)
123 .trim_end_matches(".cmd")
124 .to_string()
125 })
126}
127
128fn compress_json(input: &str) -> Option<CompressionResult> {
129 let diagnostics: Vec<Value> = serde_json::from_str(input).ok()?;
130 if diagnostics.is_empty() {
131 return Some(CompressionResult::new("ruff: clean"));
132 }
133
134 let mut by_rule: BTreeMap<String, Vec<String>> = BTreeMap::new();
135 let mut blocks = Vec::new();
136 for diagnostic in diagnostics {
137 let code = string_field(&diagnostic, "code").unwrap_or("RUF");
138 let filename = string_field(&diagnostic, "filename").unwrap_or("<unknown>");
139 let row = diagnostic
140 .pointer("/location/row")
141 .and_then(Value::as_u64)
142 .unwrap_or(0);
143 let location = format!("{filename}:{row}");
144 by_rule
145 .entry(code.to_string())
146 .or_default()
147 .push(location.clone());
148 blocks.push(ClassifiedBlock::new(
149 DropClass::Error,
150 format!("{code}: {location}"),
151 ));
152 }
153
154 let total = by_rule.values().map(Vec::len).sum::<usize>();
155 blocks.push(ClassifiedBlock::unclassified(format!(
156 "ruff: {total} violations across {} rules",
157 by_rule.len()
158 )));
159 for (rule, locations) in by_rule {
160 blocks.push(ClassifiedBlock::unclassified(format!(
161 "{rule}: {}",
162 locations.len()
163 )));
164 }
165
166 let capped = cap_classified_blocks(blocks);
167 Some(CompressionResult::with_class_drops(
168 capped.text,
169 capped.dropped_by_class,
170 ))
171}
172
173fn is_violation_line(trimmed: &str) -> bool {
174 let parts: Vec<&str> = trimmed.splitn(4, ':').collect();
175 if parts.len() != 4 {
176 return false;
177 }
178 if parts[0].is_empty()
179 || parts[1].parse::<usize>().is_err()
180 || parts[2].parse::<usize>().is_err()
181 {
182 return false;
183 }
184 parts[3].split_whitespace().next().is_some_and(is_rule_code)
185}
186
187fn is_rule_code(token: &str) -> bool {
188 let mut chars = token.chars();
189 chars.next().is_some_and(|ch| ch.is_ascii_uppercase()) && chars.any(|ch| ch.is_ascii_digit())
190}
191
192fn is_ruff_error_summary_line(trimmed: &str) -> bool {
193 let Some(rest) = trimmed.strip_prefix("Found ") else {
194 return false;
195 };
196 let Some((count, rest)) = rest.split_once(' ') else {
197 return false;
198 };
199 !count.is_empty()
200 && count.chars().all(|ch| ch.is_ascii_digit())
201 && (rest.starts_with("error.") || rest.starts_with("errors."))
202}
203
204fn is_summary_line(trimmed: &str) -> bool {
205 is_ruff_error_summary_line(trimmed)
206}
207
208fn string_field<'a>(value: &'a Value, key: &str) -> Option<&'a str> {
209 value.get(key).and_then(Value::as_str)
210}
211
212fn finish(input: CompressionResult) -> CompressionResult {
213 input.map_text(|text| {
214 let stripped = strip_ansi(text);
215 dedup_consecutive(&stripped).trim_end().to_string()
216 })
217}
218
219#[cfg(test)]
220mod tests {
221 use super::*;
222
223 #[test]
224 fn matches_ruff_invocations() {
225 let compressor = RuffCompressor;
226 assert!(compressor.matches("ruff check ."));
227 assert!(compressor.matches("python -m ruff format"));
228 assert!(compressor.matches("python3 -m ruff check"));
229 assert!(compressor.matches("pnpm exec ruff check"));
230 assert!(!compressor.matches("cargo build"));
231 assert!(!compressor.matches("ls"));
232 }
233
234 #[test]
235 fn compresses_real_clean_text_pass() {
236 let output = "All checks passed!\n";
237 let compressed = compress_ruff(output).text;
238 assert_eq!(compressed, "ruff: clean");
239 assert!(compressed.len() < output.len());
240 }
241
242 #[test]
243 fn preserves_text_errors_verbatim() {
244 let output = "src/a.py:10:5: E501 Line too long (88 > 79 characters)\nsrc/a.py:25:1: F401 `os` imported but unused\nsrc/b.py:3:8: E711 Comparison to None should be 'cond is None'\nFound 3 errors.\n[*] 1 fixable with the `--fix` option.\n";
245 let compressed = compress_ruff(output).text;
246 assert!(compressed.contains("src/a.py:10:5: E501 Line too long (88 > 79 characters)"));
247 assert!(compressed.contains("src/a.py:25:1: F401 `os` imported but unused"));
248 assert!(
249 compressed.contains("src/b.py:3:8: E711 Comparison to None should be 'cond is None'")
250 );
251 assert!(compressed.contains("Found 3 errors."));
252 }
253
254 #[test]
255 fn groups_json_output_by_rule() {
256 let output = r#"[{"code":"E501","filename":"src/a.py","location":{"row":10,"column":5},"message":"Line too long"},{"code":"E501","filename":"src/b.py","location":{"row":5,"column":1},"message":"Line too long"},{"code":"F401","filename":"src/c.py","location":{"row":1,"column":8},"message":"unused"}]"#;
257 let compressed = compress_ruff(output).text;
258 assert!(compressed.contains("E501: src/a.py:10"));
259 assert!(compressed.contains("E501: src/b.py:5"));
260 assert!(compressed.contains("F401: src/c.py:1"));
261 assert!(compressed.contains("ruff: 3 violations across 2 rules"));
262 assert!(compressed.contains("E501: 2"));
263 }
264
265 #[test]
266 fn compresses_large_json_input() {
267 let mut items = Vec::new();
268 for index in 0..500 {
269 items.push(format!(
270 r#"{{"code":"E501","filename":"src/file{index}.py","location":{{"row":{},"column":5}},"message":"Line too long"}}"#,
271 index + 1
272 ));
273 }
274 let output = format!("[{}]", items.join(","));
275 let result = compress_ruff(&output);
276 let compressed = result.text;
277 assert!(compressed.contains("ruff: 500 violations across 1 rules"));
278 assert!(compressed.contains("E501: 500"));
279 assert_eq!(result.dropped_by_class.get(&DropClass::Error), Some(&480));
280 assert!(compressed.len() < output.len() / 2);
281 }
282}