1use std::collections::BTreeMap;
2
3use serde_json::Value;
4
5use crate::compress::generic::{dedup_consecutive, middle_truncate, strip_ansi, GenericCompressor};
6use crate::compress::Compressor;
7
8const MAX_LINES: usize = 250;
9const MAX_LOCATIONS_PER_RULE: usize = 25;
10
11pub struct RuffCompressor;
12
13impl Compressor for RuffCompressor {
14 fn matches(&self, command: &str) -> bool {
15 command_tokens(command).any(|token| token == "ruff")
16 }
17
18 fn compress(&self, _command: &str, output: &str) -> String {
19 compress_ruff(output)
20 }
21}
22
23fn compress_ruff(output: &str) -> String {
24 let trimmed = output.trim();
25 if trimmed.is_empty() || trimmed == "All checks passed!" {
26 return "ruff: clean".to_string();
27 }
28
29 if trimmed.starts_with('[') && trimmed.ends_with(']') {
30 if let Some(compressed) = compress_json(trimmed) {
31 return finish(&compressed);
32 }
33 return GenericCompressor::compress_output(output);
34 }
35
36 let mut kept = Vec::new();
37 for line in output.lines() {
38 let trimmed = line.trim();
39 if is_violation_line(trimmed) || is_summary_line(trimmed) || trimmed.starts_with("[*]") {
40 kept.push(line.to_string());
41 }
42 }
43
44 if kept.is_empty() {
45 return GenericCompressor::compress_output(output);
46 }
47
48 finish(&kept.join("\n"))
49}
50
51fn command_tokens(command: &str) -> impl Iterator<Item = String> + '_ {
52 command
53 .split_whitespace()
54 .map(|token| token.trim_matches(|ch| matches!(ch, '\'' | '"')))
55 .filter(|token| {
56 !matches!(
57 *token,
58 "npx" | "pnpm" | "yarn" | "bun" | "bunx" | "exec" | "-m"
59 )
60 })
61 .map(|token| {
62 token
63 .rsplit(['/', '\\'])
64 .next()
65 .unwrap_or(token)
66 .trim_end_matches(".cmd")
67 .to_string()
68 })
69}
70
71fn compress_json(input: &str) -> Option<String> {
72 let diagnostics: Vec<Value> = serde_json::from_str(input).ok()?;
73 if diagnostics.is_empty() {
74 return Some("ruff: clean".to_string());
75 }
76
77 let mut by_rule: BTreeMap<String, Vec<String>> = BTreeMap::new();
78 for diagnostic in diagnostics {
79 let code = string_field(&diagnostic, "code").unwrap_or("RUF");
80 let filename = string_field(&diagnostic, "filename").unwrap_or("<unknown>");
81 let row = diagnostic
82 .pointer("/location/row")
83 .and_then(Value::as_u64)
84 .unwrap_or(0);
85 by_rule
86 .entry(code.to_string())
87 .or_default()
88 .push(format!("{filename}:{row}"));
89 }
90
91 let total = by_rule.values().map(Vec::len).sum::<usize>();
92 let mut lines = Vec::new();
93 for (rule, locations) in &by_rule {
94 let shown = locations
95 .iter()
96 .take(MAX_LOCATIONS_PER_RULE)
97 .cloned()
98 .collect::<Vec<_>>()
99 .join(", ");
100 if locations.len() > MAX_LOCATIONS_PER_RULE {
101 lines.push(format!(
102 "{rule}: {shown}, ... (+{} more)",
103 locations.len() - MAX_LOCATIONS_PER_RULE
104 ));
105 } else {
106 lines.push(format!("{rule}: {shown}"));
107 }
108 }
109 lines.push(format!(
110 "ruff: {total} violations across {} rules",
111 by_rule.len()
112 ));
113 for (rule, locations) in by_rule {
114 lines.push(format!("{rule}: {}", locations.len()));
115 }
116
117 Some(lines.join("\n"))
118}
119
120fn is_violation_line(trimmed: &str) -> bool {
121 let parts: Vec<&str> = trimmed.splitn(4, ':').collect();
122 if parts.len() != 4 {
123 return false;
124 }
125 if parts[0].is_empty()
126 || parts[1].parse::<usize>().is_err()
127 || parts[2].parse::<usize>().is_err()
128 {
129 return false;
130 }
131 parts[3].split_whitespace().next().is_some_and(is_rule_code)
132}
133
134fn is_rule_code(token: &str) -> bool {
135 let mut chars = token.chars();
136 chars.next().is_some_and(|ch| ch.is_ascii_uppercase()) && chars.any(|ch| ch.is_ascii_digit())
137}
138
139fn is_summary_line(trimmed: &str) -> bool {
140 trimmed.starts_with("Found ") && trimmed.contains(" error")
141}
142
143fn string_field<'a>(value: &'a Value, key: &str) -> Option<&'a str> {
144 value.get(key).and_then(Value::as_str)
145}
146
147fn finish(input: &str) -> String {
148 let stripped = strip_ansi(input);
149 let deduped = dedup_consecutive(&stripped);
150 cap_lines(
151 &middle_truncate(&deduped, 32 * 1024, 16 * 1024, 16 * 1024),
152 MAX_LINES,
153 )
154}
155
156fn cap_lines(input: &str, max_lines: usize) -> String {
157 let lines: Vec<&str> = input.lines().collect();
158 if lines.len() <= max_lines {
159 return input.trim_end().to_string();
160 }
161 let mut kept = lines
162 .iter()
163 .take(max_lines)
164 .copied()
165 .collect::<Vec<_>>()
166 .join("\n");
167 kept.push_str(&format!(
168 "\n... truncated {} lines",
169 lines.len() - max_lines
170 ));
171 kept
172}
173
174#[cfg(test)]
175mod tests {
176 use super::*;
177
178 #[test]
179 fn matches_ruff_invocations() {
180 let compressor = RuffCompressor;
181 assert!(compressor.matches("ruff check ."));
182 assert!(compressor.matches("python -m ruff format"));
183 assert!(compressor.matches("python3 -m ruff check"));
184 assert!(compressor.matches("pnpm exec ruff check"));
185 assert!(!compressor.matches("cargo build"));
186 assert!(!compressor.matches("ls"));
187 }
188
189 #[test]
190 fn compresses_real_clean_text_pass() {
191 let output = "All checks passed!\n";
192 let compressed = compress_ruff(output);
193 assert_eq!(compressed, "ruff: clean");
194 assert!(compressed.len() < output.len());
195 }
196
197 #[test]
198 fn preserves_text_errors_verbatim() {
199 let output = "src/a.py:10:5: E501 Line too long (88 > 79 characters)\nsrc/a.py:25:1: F401 `os` imported but unused\nsrc/b.py:3:8: E711 Comparison to None should be 'cond is None'\nFound 3 errors.\n[*] 1 fixable with the `--fix` option.\n";
200 let compressed = compress_ruff(output);
201 assert!(compressed.contains("src/a.py:10:5: E501 Line too long (88 > 79 characters)"));
202 assert!(compressed.contains("src/a.py:25:1: F401 `os` imported but unused"));
203 assert!(
204 compressed.contains("src/b.py:3:8: E711 Comparison to None should be 'cond is None'")
205 );
206 assert!(compressed.contains("Found 3 errors."));
207 }
208
209 #[test]
210 fn groups_json_output_by_rule() {
211 let output = r#"[{"code":"E501","filename":"src/a.py","location":{"row":10,"column":5},"message":"Line too long"},{"code":"E501","filename":"src/b.py","location":{"row":5,"column":1},"message":"Line too long"},{"code":"F401","filename":"src/c.py","location":{"row":1,"column":8},"message":"unused"}]"#;
212 let compressed = compress_ruff(output);
213 assert!(compressed.contains("E501: src/a.py:10, src/b.py:5"));
214 assert!(compressed.contains("F401: src/c.py:1"));
215 assert!(compressed.contains("ruff: 3 violations across 2 rules"));
216 assert!(compressed.contains("E501: 2"));
217 }
218
219 #[test]
220 fn compresses_large_json_input() {
221 let mut items = Vec::new();
222 for index in 0..500 {
223 items.push(format!(
224 r#"{{"code":"E501","filename":"src/file{index}.py","location":{{"row":{},"column":5}},"message":"Line too long"}}"#,
225 index + 1
226 ));
227 }
228 let output = format!("[{}]", items.join(","));
229 let compressed = compress_ruff(&output);
230 assert!(compressed.contains("ruff: 500 violations across 1 rules"));
231 assert!(compressed.contains("E501: 500"));
232 assert!(compressed.len() < output.len() / 2);
233 }
234}