1use std::collections::BTreeMap;
2
3use serde_json::Value;
4
5use crate::compress::generic::{dedup_consecutive, middle_truncate, strip_ansi, GenericCompressor};
6use crate::compress::Compressor;
7
8const MAX_LINES: usize = 250;
9const MAX_LOCATIONS_PER_RULE: usize = 25;
10
11pub struct RuffCompressor;
12
13impl Compressor for RuffCompressor {
14 fn matches(&self, command: &str) -> bool {
15 command_tokens(command).any(|token| token == "ruff")
16 }
17
18 fn compress(&self, _command: &str, output: &str) -> String {
19 compress_ruff(output)
20 }
21
22 fn matches_output(&self, output: &str) -> bool {
23 looks_like_ruff_clean_output(output)
24 || looks_like_ruff_text_output(output)
25 || looks_like_ruff_json_output(output)
26 }
27}
28
29fn looks_like_ruff_clean_output(output: &str) -> bool {
30 output
31 .lines()
32 .any(|line| line.trim() == "All checks passed!")
33}
34
35fn looks_like_ruff_text_output(output: &str) -> bool {
36 let mut has_violation = false;
37 let mut has_summary = false;
38 for line in output.lines() {
39 let trimmed = line.trim();
40 has_violation |= is_violation_line(trimmed);
41 has_summary |= is_ruff_error_summary_line(trimmed);
42 }
43 has_violation && has_summary
44}
45
46fn looks_like_ruff_json_output(output: &str) -> bool {
47 let trimmed = output.trim_start();
48 if !trimmed.starts_with('[') {
49 return false;
50 }
51
52 serde_json::from_str::<Value>(trimmed)
53 .ok()
54 .is_some_and(|value| {
55 value.as_array().is_some_and(|diagnostics| {
56 !diagnostics.is_empty()
57 && diagnostics.iter().any(|diagnostic| {
58 diagnostic.get("code").is_some()
59 && diagnostic.get("filename").is_some()
60 && diagnostic.get("location").is_some()
61 })
62 })
63 })
64}
65
66fn compress_ruff(output: &str) -> String {
67 let trimmed = output.trim();
68 if trimmed.is_empty() || trimmed == "All checks passed!" {
69 return "ruff: clean".to_string();
70 }
71
72 if trimmed.starts_with('[') && trimmed.ends_with(']') {
73 if let Some(compressed) = compress_json(trimmed) {
74 return finish(&compressed);
75 }
76 return GenericCompressor::compress_output(output);
77 }
78
79 let mut kept = Vec::new();
80 for line in output.lines() {
81 let trimmed = line.trim();
82 if is_violation_line(trimmed) || is_summary_line(trimmed) || trimmed.starts_with("[*]") {
83 kept.push(line.to_string());
84 }
85 }
86
87 if kept.is_empty() {
88 return GenericCompressor::compress_output(output);
89 }
90
91 finish(&kept.join("\n"))
92}
93
94fn command_tokens(command: &str) -> impl Iterator<Item = String> + '_ {
95 command
96 .split_whitespace()
97 .map(|token| token.trim_matches(|ch| matches!(ch, '\'' | '"')))
98 .filter(|token| {
99 !matches!(
100 *token,
101 "npx" | "pnpm" | "yarn" | "bun" | "bunx" | "exec" | "-m"
102 )
103 })
104 .map(|token| {
105 token
106 .rsplit(['/', '\\'])
107 .next()
108 .unwrap_or(token)
109 .trim_end_matches(".cmd")
110 .to_string()
111 })
112}
113
114fn compress_json(input: &str) -> Option<String> {
115 let diagnostics: Vec<Value> = serde_json::from_str(input).ok()?;
116 if diagnostics.is_empty() {
117 return Some("ruff: clean".to_string());
118 }
119
120 let mut by_rule: BTreeMap<String, Vec<String>> = BTreeMap::new();
121 for diagnostic in diagnostics {
122 let code = string_field(&diagnostic, "code").unwrap_or("RUF");
123 let filename = string_field(&diagnostic, "filename").unwrap_or("<unknown>");
124 let row = diagnostic
125 .pointer("/location/row")
126 .and_then(Value::as_u64)
127 .unwrap_or(0);
128 by_rule
129 .entry(code.to_string())
130 .or_default()
131 .push(format!("{filename}:{row}"));
132 }
133
134 let total = by_rule.values().map(Vec::len).sum::<usize>();
135 let mut lines = Vec::new();
136 for (rule, locations) in &by_rule {
137 let shown = locations
138 .iter()
139 .take(MAX_LOCATIONS_PER_RULE)
140 .cloned()
141 .collect::<Vec<_>>()
142 .join(", ");
143 if locations.len() > MAX_LOCATIONS_PER_RULE {
144 lines.push(format!(
145 "{rule}: {shown}, ... (+{} more)",
146 locations.len() - MAX_LOCATIONS_PER_RULE
147 ));
148 } else {
149 lines.push(format!("{rule}: {shown}"));
150 }
151 }
152 lines.push(format!(
153 "ruff: {total} violations across {} rules",
154 by_rule.len()
155 ));
156 for (rule, locations) in by_rule {
157 lines.push(format!("{rule}: {}", locations.len()));
158 }
159
160 Some(lines.join("\n"))
161}
162
163fn is_violation_line(trimmed: &str) -> bool {
164 let parts: Vec<&str> = trimmed.splitn(4, ':').collect();
165 if parts.len() != 4 {
166 return false;
167 }
168 if parts[0].is_empty()
169 || parts[1].parse::<usize>().is_err()
170 || parts[2].parse::<usize>().is_err()
171 {
172 return false;
173 }
174 parts[3].split_whitespace().next().is_some_and(is_rule_code)
175}
176
177fn is_rule_code(token: &str) -> bool {
178 let mut chars = token.chars();
179 chars.next().is_some_and(|ch| ch.is_ascii_uppercase()) && chars.any(|ch| ch.is_ascii_digit())
180}
181
182fn is_ruff_error_summary_line(trimmed: &str) -> bool {
183 let Some(rest) = trimmed.strip_prefix("Found ") else {
184 return false;
185 };
186 let Some((count, rest)) = rest.split_once(' ') else {
187 return false;
188 };
189 !count.is_empty()
190 && count.chars().all(|ch| ch.is_ascii_digit())
191 && (rest.starts_with("error.") || rest.starts_with("errors."))
192}
193
194fn is_summary_line(trimmed: &str) -> bool {
195 is_ruff_error_summary_line(trimmed)
196}
197
198fn string_field<'a>(value: &'a Value, key: &str) -> Option<&'a str> {
199 value.get(key).and_then(Value::as_str)
200}
201
202fn finish(input: &str) -> String {
203 let stripped = strip_ansi(input);
204 let deduped = dedup_consecutive(&stripped);
205 cap_lines(
206 &middle_truncate(&deduped, 32 * 1024, 16 * 1024, 16 * 1024),
207 MAX_LINES,
208 )
209}
210
211fn cap_lines(input: &str, max_lines: usize) -> String {
212 let lines: Vec<&str> = input.lines().collect();
213 if lines.len() <= max_lines {
214 return input.trim_end().to_string();
215 }
216 let mut kept = lines
217 .iter()
218 .take(max_lines)
219 .copied()
220 .collect::<Vec<_>>()
221 .join("\n");
222 kept.push_str(&format!(
223 "\n... truncated {} lines",
224 lines.len() - max_lines
225 ));
226 kept
227}
228
229#[cfg(test)]
230mod tests {
231 use super::*;
232
233 #[test]
234 fn matches_ruff_invocations() {
235 let compressor = RuffCompressor;
236 assert!(compressor.matches("ruff check ."));
237 assert!(compressor.matches("python -m ruff format"));
238 assert!(compressor.matches("python3 -m ruff check"));
239 assert!(compressor.matches("pnpm exec ruff check"));
240 assert!(!compressor.matches("cargo build"));
241 assert!(!compressor.matches("ls"));
242 }
243
244 #[test]
245 fn compresses_real_clean_text_pass() {
246 let output = "All checks passed!\n";
247 let compressed = compress_ruff(output);
248 assert_eq!(compressed, "ruff: clean");
249 assert!(compressed.len() < output.len());
250 }
251
252 #[test]
253 fn preserves_text_errors_verbatim() {
254 let output = "src/a.py:10:5: E501 Line too long (88 > 79 characters)\nsrc/a.py:25:1: F401 `os` imported but unused\nsrc/b.py:3:8: E711 Comparison to None should be 'cond is None'\nFound 3 errors.\n[*] 1 fixable with the `--fix` option.\n";
255 let compressed = compress_ruff(output);
256 assert!(compressed.contains("src/a.py:10:5: E501 Line too long (88 > 79 characters)"));
257 assert!(compressed.contains("src/a.py:25:1: F401 `os` imported but unused"));
258 assert!(
259 compressed.contains("src/b.py:3:8: E711 Comparison to None should be 'cond is None'")
260 );
261 assert!(compressed.contains("Found 3 errors."));
262 }
263
264 #[test]
265 fn groups_json_output_by_rule() {
266 let output = r#"[{"code":"E501","filename":"src/a.py","location":{"row":10,"column":5},"message":"Line too long"},{"code":"E501","filename":"src/b.py","location":{"row":5,"column":1},"message":"Line too long"},{"code":"F401","filename":"src/c.py","location":{"row":1,"column":8},"message":"unused"}]"#;
267 let compressed = compress_ruff(output);
268 assert!(compressed.contains("E501: src/a.py:10, src/b.py:5"));
269 assert!(compressed.contains("F401: src/c.py:1"));
270 assert!(compressed.contains("ruff: 3 violations across 2 rules"));
271 assert!(compressed.contains("E501: 2"));
272 }
273
274 #[test]
275 fn compresses_large_json_input() {
276 let mut items = Vec::new();
277 for index in 0..500 {
278 items.push(format!(
279 r#"{{"code":"E501","filename":"src/file{index}.py","location":{{"row":{},"column":5}},"message":"Line too long"}}"#,
280 index + 1
281 ));
282 }
283 let output = format!("[{}]", items.join(","));
284 let compressed = compress_ruff(&output);
285 assert!(compressed.contains("ruff: 500 violations across 1 rules"));
286 assert!(compressed.contains("E501: 500"));
287 assert!(compressed.len() < output.len() / 2);
288 }
289}