Skip to main content

cli_denoiser/filters/
dedup.rs

1use super::{Filter, FilterResult};
2
3const DEDUP_THRESHOLD: usize = 3;
4
5/// Collapses consecutive duplicate lines.
6/// e.g., 50 identical "npm warn deprecated" lines become one + "[repeated 49x]"
7///
8/// Uses exact-match deduplication only (zero false positives).
9/// Near-duplicate detection is deliberately excluded from v1
10/// to avoid collapsing lines that look similar but carry different info.
11pub struct DedupFilter;
12
13impl DedupFilter {
14    #[must_use]
15    pub fn new() -> Self {
16        Self
17    }
18}
19
20impl Default for DedupFilter {
21    fn default() -> Self {
22        Self::new()
23    }
24}
25
26impl Filter for DedupFilter {
27    fn name(&self) -> &'static str {
28        "dedup"
29    }
30
31    fn filter_line(&self, _line: &str) -> FilterResult {
32        // Dedup operates at block level, not line level
33        FilterResult::Keep
34    }
35
36    fn filter_block(&self, lines: &[String]) -> Vec<String> {
37        collapse_duplicates(lines)
38    }
39}
40
41fn collapse_duplicates(lines: &[String]) -> Vec<String> {
42    let mut result = Vec::with_capacity(lines.len());
43    let mut run_count: usize = 0;
44    let mut run_line: Option<&str> = None;
45
46    for line in lines {
47        let trimmed = line.trim();
48        if let Some(prev) = run_line {
49            if trimmed == prev {
50                run_count += 1;
51                continue;
52            }
53            flush_run(&mut result, prev, run_count);
54        }
55        run_line = Some(trimmed);
56        run_count = 1;
57    }
58
59    if let Some(prev) = run_line {
60        flush_run(&mut result, prev, run_count);
61    }
62
63    result
64}
65
66fn flush_run(result: &mut Vec<String>, line: &str, count: usize) {
67    if count >= DEDUP_THRESHOLD {
68        result.push(line.to_string());
69        result.push(format!("[repeated {count}x]"));
70    } else {
71        for _ in 0..count {
72            result.push(line.to_string());
73        }
74    }
75}
76
77#[cfg(test)]
78mod tests {
79    use super::*;
80
81    #[test]
82    fn collapses_exact_duplicates() {
83        let lines: Vec<String> = (0..10)
84            .map(|_| "npm warn deprecated glob@7.2.3".to_string())
85            .collect();
86        let result = collapse_duplicates(&lines);
87        assert_eq!(result.len(), 2);
88        assert_eq!(result[0], "npm warn deprecated glob@7.2.3");
89        assert_eq!(result[1], "[repeated 10x]");
90    }
91
92    #[test]
93    fn preserves_unique_lines() {
94        let lines = vec![
95            "line one".to_string(),
96            "line two".to_string(),
97            "line three".to_string(),
98        ];
99        let result = collapse_duplicates(&lines);
100        assert_eq!(result, lines);
101    }
102
103    #[test]
104    fn below_threshold_not_collapsed() {
105        let lines = vec!["same".to_string(), "same".to_string()];
106        let result = collapse_duplicates(&lines);
107        assert_eq!(result.len(), 2);
108        assert_eq!(result[0], "same");
109        assert_eq!(result[1], "same");
110    }
111
112    #[test]
113    fn mixed_runs() {
114        let lines = vec![
115            "start".to_string(),
116            "warn: x".to_string(),
117            "warn: x".to_string(),
118            "warn: x".to_string(),
119            "warn: x".to_string(),
120            "end".to_string(),
121        ];
122        let result = collapse_duplicates(&lines);
123        assert_eq!(result.len(), 4);
124        assert_eq!(result[0], "start");
125        assert_eq!(result[1], "warn: x");
126        assert_eq!(result[2], "[repeated 4x]");
127        assert_eq!(result[3], "end");
128    }
129}