Skip to main content

lean_ctx/core/patterns/
log_dedup.rs

1macro_rules! static_regex {
2    ($pattern:expr) => {{
3        static RE: std::sync::OnceLock<regex::Regex> = std::sync::OnceLock::new();
4        RE.get_or_init(|| {
5            regex::Regex::new($pattern).expect(concat!("BUG: invalid static regex: ", $pattern))
6        })
7    }};
8}
9
10fn timestamp_re() -> &'static regex::Regex {
11    static_regex!(r"^\[?\d{4}[-/]\d{2}[-/]\d{2}[T ]\d{2}:\d{2}:\d{2}[^\]\s]*\]?\s*")
12}
13
14fn is_block_separator(line: &str) -> bool {
15    let t = line.trim();
16    if t.is_empty() {
17        return false;
18    }
19    if t.len() >= 3 && t.chars().all(|c| c == '=' || c == '-') {
20        return true;
21    }
22    if t.starts_with("===") || t.starts_with("---") {
23        return true;
24    }
25    if t.starts_with("commit ")
26        && t.len() >= 12
27        && t[7..].starts_with(|c: char| c.is_ascii_hexdigit())
28    {
29        return true;
30    }
31    if t.starts_with("diff --git ") {
32        return true;
33    }
34    if t.starts_with("##") || t.starts_with("Step ") || t.starts_with("STEP ") {
35        return true;
36    }
37    false
38}
39
40struct Block {
41    separator: Option<String>,
42    entries: Vec<(String, u32)>,
43}
44
45pub fn compress(output: &str) -> Option<String> {
46    let lines: Vec<&str> = output.lines().collect();
47    if lines.len() <= 10 {
48        return None;
49    }
50
51    let mut blocks: Vec<Block> = Vec::new();
52    let mut current = Block {
53        separator: None,
54        entries: Vec::new(),
55    };
56    let mut error_lines = Vec::new();
57    let total_lines = lines.len();
58
59    for line in &lines {
60        let stripped = timestamp_re().replace(line, "").trim().to_string();
61        if stripped.is_empty() {
62            continue;
63        }
64
65        if is_block_separator(&stripped) {
66            if !current.entries.is_empty() || current.separator.is_some() {
67                blocks.push(current);
68            }
69            current = Block {
70                separator: Some(stripped.clone()),
71                entries: Vec::new(),
72            };
73            continue;
74        }
75
76        let lower = stripped.to_lowercase();
77        if lower.contains("error")
78            || lower.contains("critical")
79            || lower.contains("fatal")
80            || lower.contains("panic")
81            || lower.contains("exception")
82        {
83            error_lines.push(stripped.clone());
84        }
85
86        if let Some(last) = current.entries.last_mut() {
87            if last.0 == stripped {
88                last.1 += 1;
89                continue;
90            }
91        }
92        current.entries.push((stripped, 1));
93    }
94    if !current.entries.is_empty() || current.separator.is_some() {
95        blocks.push(current);
96    }
97
98    let total_unique: usize = blocks.iter().map(|b| b.entries.len()).sum();
99
100    let mut parts = Vec::new();
101    parts.push(format!("{total_lines} lines → {total_unique} unique"));
102
103    if !error_lines.is_empty() {
104        parts.push(format!("{} errors:", error_lines.len()));
105        for e in error_lines.iter().take(5) {
106            parts.push(format!("  {e}"));
107        }
108        if error_lines.len() > 5 {
109            parts.push(format!("  ... +{} more errors", error_lines.len() - 5));
110        }
111    }
112
113    let has_multiple_blocks = blocks.len() > 1;
114
115    for block in &blocks {
116        if let Some(sep) = &block.separator {
117            parts.push(sep.clone());
118        }
119
120        let formatted: Vec<String> = block
121            .entries
122            .iter()
123            .map(|(line, count)| {
124                if *count > 1 {
125                    format!("{line} (x{count})")
126                } else {
127                    line.clone()
128                }
129            })
130            .collect();
131
132        if !has_multiple_blocks && formatted.len() > 30 {
133            let tail = &formatted[formatted.len() - 15..];
134            parts.push(format!("last 15 unique lines:\n{}", tail.join("\n")));
135        } else if has_multiple_blocks && formatted.len() > 20 {
136            for line in formatted.iter().take(5) {
137                parts.push(line.clone());
138            }
139            let omitted = formatted.len() - 10;
140            parts.push(format!("[{omitted} lines omitted]"));
141            for line in formatted.iter().skip(formatted.len() - 5) {
142                parts.push(line.clone());
143            }
144        } else {
145            for line in &formatted {
146                parts.push(line.clone());
147            }
148        }
149    }
150
151    Some(parts.join("\n"))
152}
153
154#[cfg(test)]
155mod tests {
156    use super::*;
157
158    #[test]
159    fn short_output_returns_none() {
160        let output = "line1\nline2\nline3";
161        assert!(compress(output).is_none());
162    }
163
164    #[test]
165    fn deduplicates_consecutive_lines() {
166        let lines = vec!["INFO Processing request"; 15];
167        let output = lines.join("\n");
168        let result = compress(&output).unwrap();
169        assert!(result.contains("(x15)"), "must show repeat count: {result}");
170        assert!(
171            result.contains("15 lines"),
172            "must show total lines: {result}"
173        );
174    }
175
176    #[test]
177    fn respects_block_separators_equals() {
178        let mut lines = vec!["=== commit aaaa001 ==="];
179        lines.extend(vec!["file_a.rs | 10 +++++"; 5]);
180        lines.push("=== commit aaaa002 ===");
181        lines.extend(vec!["file_b.rs | 20 ++++++++++"; 5]);
182        let output = lines.join("\n");
183        let result = compress(&output).unwrap();
184        assert!(
185            result.contains("=== commit aaaa001 ==="),
186            "first block separator must be preserved: {result}"
187        );
188        assert!(
189            result.contains("=== commit aaaa002 ==="),
190            "second block separator must be preserved: {result}"
191        );
192        assert!(
193            result.contains("file_a.rs"),
194            "first block content must be preserved: {result}"
195        );
196        assert!(
197            result.contains("file_b.rs"),
198            "second block content must be preserved: {result}"
199        );
200    }
201
202    #[test]
203    fn does_not_merge_across_blocks() {
204        let lines = vec![
205            "=== block 1 ===",
206            "same line",
207            "same line",
208            "same line",
209            "=== block 2 ===",
210            "same line",
211            "same line",
212            "=== block 3 ===",
213            "same line",
214            "same line",
215            "different line here",
216        ];
217        let output = lines.join("\n");
218        let result = compress(&output).unwrap();
219        assert!(
220            result.contains("=== block 1 ==="),
221            "block 1 must exist: {result}"
222        );
223        assert!(
224            result.contains("=== block 2 ==="),
225            "block 2 must exist: {result}"
226        );
227        assert!(
228            result.contains("=== block 3 ==="),
229            "block 3 must exist: {result}"
230        );
231        let count_same = result.matches("same line").count();
232        assert!(
233            count_same >= 3,
234            "each block must have its own 'same line' entry, got {count_same}: {result}"
235        );
236    }
237
238    #[test]
239    fn git_commit_separator_detected() {
240        assert!(is_block_separator("commit abc1234def5678"));
241        assert!(is_block_separator("commit 1a2b3c4d5e6f7890"));
242        assert!(!is_block_separator("committed to fixing"));
243    }
244
245    #[test]
246    fn diff_separator_detected() {
247        assert!(is_block_separator("diff --git a/file.rs b/file.rs"));
248        assert!(!is_block_separator("different approach"));
249    }
250
251    #[test]
252    fn triple_equals_dashes_detected() {
253        assert!(is_block_separator("==="));
254        assert!(is_block_separator("=========="));
255        assert!(is_block_separator("---"));
256        assert!(is_block_separator("-----------"));
257        assert!(is_block_separator("=== test block ==="));
258        assert!(is_block_separator("--- a/file.rs"));
259    }
260
261    #[test]
262    fn error_lines_preserved_across_blocks() {
263        let lines = vec![
264            "=== step 1 ===",
265            "ok line",
266            "ok line",
267            "ok line",
268            "ERROR: something failed",
269            "ok line",
270            "ok line",
271            "ok line",
272            "=== step 2 ===",
273            "ok line 2",
274            "ok line 2",
275            "ok line 2",
276            "ok line 2",
277            "ok line 2",
278            "ok line 2",
279        ];
280        let output = lines.join("\n");
281        let result = compress(&output).unwrap();
282        assert!(
283            result.contains("1 errors:"),
284            "error count must be shown: {result}"
285        );
286        assert!(
287            result.contains("ERROR: something failed"),
288            "error line must be preserved: {result}"
289        );
290    }
291
292    #[test]
293    fn git_show_loop_not_deduplicated() {
294        let commits = [
295            (
296                "aaaa001",
297                "accounts_test.exs | 70 ++",
298                "schema_test.exs | 30 ++",
299            ),
300            ("aaaa002", "query_test.exs | 45 ++", "api_test.exs | 12 ++"),
301            ("aaaa003", "main_test.exs | 55 ++", "helper_test.exs | 8 ++"),
302        ];
303        let mut lines = Vec::new();
304        for (sha, file1, file2) in &commits {
305            lines.push(format!("=== {sha} ==="));
306            lines.push(file1.to_string());
307            lines.push(file2.to_string());
308            lines.push("2 files changed".to_string());
309            lines.push(String::new());
310        }
311        let output = lines.join("\n");
312        let result = compress(&output).unwrap();
313        assert!(
314            result.contains("aaaa001") && result.contains("aaaa002") && result.contains("aaaa003"),
315            "all commit separators must be preserved: {result}"
316        );
317        assert!(
318            result.contains("accounts_test.exs"),
319            "first commit files must be present: {result}"
320        );
321        assert!(
322            result.contains("query_test.exs"),
323            "second commit files must be present: {result}"
324        );
325        assert!(
326            result.contains("main_test.exs"),
327            "third commit files must be present: {result}"
328        );
329    }
330}