lean_ctx/core/patterns/
log_dedup.rs1macro_rules! static_regex {
2 ($pattern:expr) => {{
3 static RE: std::sync::OnceLock<regex::Regex> = std::sync::OnceLock::new();
4 RE.get_or_init(|| {
5 regex::Regex::new($pattern).expect(concat!("BUG: invalid static regex: ", $pattern))
6 })
7 }};
8}
9
10fn timestamp_re() -> &'static regex::Regex {
11 static_regex!(r"^\[?\d{4}[-/]\d{2}[-/]\d{2}[T ]\d{2}:\d{2}:\d{2}[^\]\s]*\]?\s*")
12}
13
14fn is_block_separator(line: &str) -> bool {
15 let t = line.trim();
16 if t.is_empty() {
17 return false;
18 }
19 if t.len() >= 3 && t.chars().all(|c| c == '=' || c == '-') {
20 return true;
21 }
22 if t.starts_with("===") || t.starts_with("---") {
23 return true;
24 }
25 if t.starts_with("commit ")
26 && t.len() >= 12
27 && t[7..].starts_with(|c: char| c.is_ascii_hexdigit())
28 {
29 return true;
30 }
31 if t.starts_with("diff --git ") {
32 return true;
33 }
34 if t.starts_with("##") || t.starts_with("Step ") || t.starts_with("STEP ") {
35 return true;
36 }
37 false
38}
39
40struct Block {
41 separator: Option<String>,
42 entries: Vec<(String, u32)>,
43}
44
45pub fn compress(output: &str) -> Option<String> {
46 let lines: Vec<&str> = output.lines().collect();
47 if lines.len() <= 10 {
48 return None;
49 }
50
51 let mut blocks: Vec<Block> = Vec::new();
52 let mut current = Block {
53 separator: None,
54 entries: Vec::new(),
55 };
56 let mut error_lines = Vec::new();
57 let total_lines = lines.len();
58
59 for line in &lines {
60 let stripped = timestamp_re().replace(line, "").trim().to_string();
61 if stripped.is_empty() {
62 continue;
63 }
64
65 if is_block_separator(&stripped) {
66 if !current.entries.is_empty() || current.separator.is_some() {
67 blocks.push(current);
68 }
69 current = Block {
70 separator: Some(stripped.clone()),
71 entries: Vec::new(),
72 };
73 continue;
74 }
75
76 let lower = stripped.to_lowercase();
77 if lower.contains("error")
78 || lower.contains("critical")
79 || lower.contains("fatal")
80 || lower.contains("panic")
81 || lower.contains("exception")
82 {
83 error_lines.push(stripped.clone());
84 }
85
86 if let Some(last) = current.entries.last_mut() {
87 if last.0 == stripped {
88 last.1 += 1;
89 continue;
90 }
91 }
92 current.entries.push((stripped, 1));
93 }
94 if !current.entries.is_empty() || current.separator.is_some() {
95 blocks.push(current);
96 }
97
98 let total_unique: usize = blocks.iter().map(|b| b.entries.len()).sum();
99
100 let mut parts = Vec::new();
101 parts.push(format!("{total_lines} lines → {total_unique} unique"));
102
103 if !error_lines.is_empty() {
104 parts.push(format!("{} errors:", error_lines.len()));
105 for e in error_lines.iter().take(5) {
106 parts.push(format!(" {e}"));
107 }
108 if error_lines.len() > 5 {
109 parts.push(format!(" ... +{} more errors", error_lines.len() - 5));
110 }
111 }
112
113 let has_multiple_blocks = blocks.len() > 1;
114
115 for block in &blocks {
116 if let Some(sep) = &block.separator {
117 parts.push(sep.clone());
118 }
119
120 let formatted: Vec<String> = block
121 .entries
122 .iter()
123 .map(|(line, count)| {
124 if *count > 1 {
125 format!("{line} (x{count})")
126 } else {
127 line.clone()
128 }
129 })
130 .collect();
131
132 if !has_multiple_blocks && formatted.len() > 30 {
133 let tail = &formatted[formatted.len() - 15..];
134 parts.push(format!("last 15 unique lines:\n{}", tail.join("\n")));
135 } else if has_multiple_blocks && formatted.len() > 20 {
136 for line in formatted.iter().take(5) {
137 parts.push(line.clone());
138 }
139 let omitted = formatted.len() - 10;
140 parts.push(format!("[{omitted} lines omitted]"));
141 for line in formatted.iter().skip(formatted.len() - 5) {
142 parts.push(line.clone());
143 }
144 } else {
145 for line in &formatted {
146 parts.push(line.clone());
147 }
148 }
149 }
150
151 Some(parts.join("\n"))
152}
153
154#[cfg(test)]
155mod tests {
156 use super::*;
157
158 #[test]
159 fn short_output_returns_none() {
160 let output = "line1\nline2\nline3";
161 assert!(compress(output).is_none());
162 }
163
164 #[test]
165 fn deduplicates_consecutive_lines() {
166 let lines = vec!["INFO Processing request"; 15];
167 let output = lines.join("\n");
168 let result = compress(&output).unwrap();
169 assert!(result.contains("(x15)"), "must show repeat count: {result}");
170 assert!(
171 result.contains("15 lines"),
172 "must show total lines: {result}"
173 );
174 }
175
176 #[test]
177 fn respects_block_separators_equals() {
178 let mut lines = vec!["=== commit aaaa001 ==="];
179 lines.extend(vec!["file_a.rs | 10 +++++"; 5]);
180 lines.push("=== commit aaaa002 ===");
181 lines.extend(vec!["file_b.rs | 20 ++++++++++"; 5]);
182 let output = lines.join("\n");
183 let result = compress(&output).unwrap();
184 assert!(
185 result.contains("=== commit aaaa001 ==="),
186 "first block separator must be preserved: {result}"
187 );
188 assert!(
189 result.contains("=== commit aaaa002 ==="),
190 "second block separator must be preserved: {result}"
191 );
192 assert!(
193 result.contains("file_a.rs"),
194 "first block content must be preserved: {result}"
195 );
196 assert!(
197 result.contains("file_b.rs"),
198 "second block content must be preserved: {result}"
199 );
200 }
201
202 #[test]
203 fn does_not_merge_across_blocks() {
204 let lines = vec![
205 "=== block 1 ===",
206 "same line",
207 "same line",
208 "same line",
209 "=== block 2 ===",
210 "same line",
211 "same line",
212 "=== block 3 ===",
213 "same line",
214 "same line",
215 "different line here",
216 ];
217 let output = lines.join("\n");
218 let result = compress(&output).unwrap();
219 assert!(
220 result.contains("=== block 1 ==="),
221 "block 1 must exist: {result}"
222 );
223 assert!(
224 result.contains("=== block 2 ==="),
225 "block 2 must exist: {result}"
226 );
227 assert!(
228 result.contains("=== block 3 ==="),
229 "block 3 must exist: {result}"
230 );
231 let count_same = result.matches("same line").count();
232 assert!(
233 count_same >= 3,
234 "each block must have its own 'same line' entry, got {count_same}: {result}"
235 );
236 }
237
238 #[test]
239 fn git_commit_separator_detected() {
240 assert!(is_block_separator("commit abc1234def5678"));
241 assert!(is_block_separator("commit 1a2b3c4d5e6f7890"));
242 assert!(!is_block_separator("committed to fixing"));
243 }
244
245 #[test]
246 fn diff_separator_detected() {
247 assert!(is_block_separator("diff --git a/file.rs b/file.rs"));
248 assert!(!is_block_separator("different approach"));
249 }
250
251 #[test]
252 fn triple_equals_dashes_detected() {
253 assert!(is_block_separator("==="));
254 assert!(is_block_separator("=========="));
255 assert!(is_block_separator("---"));
256 assert!(is_block_separator("-----------"));
257 assert!(is_block_separator("=== test block ==="));
258 assert!(is_block_separator("--- a/file.rs"));
259 }
260
261 #[test]
262 fn error_lines_preserved_across_blocks() {
263 let lines = vec![
264 "=== step 1 ===",
265 "ok line",
266 "ok line",
267 "ok line",
268 "ERROR: something failed",
269 "ok line",
270 "ok line",
271 "ok line",
272 "=== step 2 ===",
273 "ok line 2",
274 "ok line 2",
275 "ok line 2",
276 "ok line 2",
277 "ok line 2",
278 "ok line 2",
279 ];
280 let output = lines.join("\n");
281 let result = compress(&output).unwrap();
282 assert!(
283 result.contains("1 errors:"),
284 "error count must be shown: {result}"
285 );
286 assert!(
287 result.contains("ERROR: something failed"),
288 "error line must be preserved: {result}"
289 );
290 }
291
292 #[test]
293 fn git_show_loop_not_deduplicated() {
294 let commits = [
295 (
296 "aaaa001",
297 "accounts_test.exs | 70 ++",
298 "schema_test.exs | 30 ++",
299 ),
300 ("aaaa002", "query_test.exs | 45 ++", "api_test.exs | 12 ++"),
301 ("aaaa003", "main_test.exs | 55 ++", "helper_test.exs | 8 ++"),
302 ];
303 let mut lines = Vec::new();
304 for (sha, file1, file2) in &commits {
305 lines.push(format!("=== {sha} ==="));
306 lines.push(file1.to_string());
307 lines.push(file2.to_string());
308 lines.push("2 files changed".to_string());
309 lines.push(String::new());
310 }
311 let output = lines.join("\n");
312 let result = compress(&output).unwrap();
313 assert!(
314 result.contains("aaaa001") && result.contains("aaaa002") && result.contains("aaaa003"),
315 "all commit separators must be preserved: {result}"
316 );
317 assert!(
318 result.contains("accounts_test.exs"),
319 "first commit files must be present: {result}"
320 );
321 assert!(
322 result.contains("query_test.exs"),
323 "second commit files must be present: {result}"
324 );
325 assert!(
326 result.contains("main_test.exs"),
327 "third commit files must be present: {result}"
328 );
329 }
330}