Skip to main content

cli_denoiser/filters/
git.rs

1use regex::Regex;
2use std::sync::LazyLock;
3
4use super::{Filter, FilterResult};
5
6static GIT_PUSH_NOISE: LazyLock<Regex> = LazyLock::new(|| {
7    Regex::new(
8        r"(?x)
9        ^(?:
10            Enumerating\sobjects     |
11            Counting\sobjects        |
12            Compressing\sobjects     |
13            Delta\s(?:resolution|compression) |
14            Writing\sobjects         |
15            Total\s\d+               |
16            remote:\s*$              |
17            remote:\sCompressing     |
18            remote:\sCounting        |
19            remote:\sResolving       |
20            remote:\sTotal           |
21            \s*\(delta\s\d+\)
22        )",
23    )
24    .expect("git push noise regex valid")
25});
26
27static GIT_FETCH_NOISE: LazyLock<Regex> = LazyLock::new(|| {
28    Regex::new(
29        r"(?x)
30        ^(?:
31            remote:\sEnumerating   |
32            remote:\sCounting      |
33            remote:\sCompressing   |
34            Receiving\sobjects     |
35            Resolving\sdeltas      |
36            Unpacking\sobjects     |
37            From\s                 |
38            POST\sgit-upload-pack
39        )",
40    )
41    .expect("git fetch noise regex valid")
42});
43
44static GIT_CLONE_PROGRESS: LazyLock<Regex> = LazyLock::new(|| {
45    Regex::new(r"^(?:Cloning into|Receiving|Resolving|Updating files).*\d+%")
46        .expect("git clone progress regex valid")
47});
48
49/// Git-specific noise filter.
50/// Strips transfer stats, pack compression, and delta resolution lines
51/// that carry zero information for an LLM.
52///
53/// Preserves: branch names, commit hashes, conflict markers,
54/// error messages, diff output, status output.
55pub struct GitFilter;
56
57impl Filter for GitFilter {
58    fn name(&self) -> &'static str {
59        "git"
60    }
61
62    fn filter_line(&self, line: &str) -> FilterResult {
63        let trimmed = line.trim();
64
65        // Never filter empty lines (context-dependent)
66        if trimmed.is_empty() {
67            return FilterResult::Keep;
68        }
69
70        // git push/pull transfer noise
71        if GIT_PUSH_NOISE.is_match(trimmed) {
72            return FilterResult::Drop;
73        }
74
75        // git fetch/clone transfer noise
76        if GIT_FETCH_NOISE.is_match(trimmed) {
77            return FilterResult::Drop;
78        }
79
80        // git clone progress with percentages
81        if GIT_CLONE_PROGRESS.is_match(trimmed) {
82            return FilterResult::Drop;
83        }
84
85        // "remote:" lines that are just whitespace
86        if trimmed == "remote:" {
87            return FilterResult::Drop;
88        }
89
90        FilterResult::Keep
91    }
92
93    fn filter_block(&self, lines: &[String]) -> Vec<String> {
94        let mut result = Vec::with_capacity(lines.len());
95        let mut dropped_transfer = false;
96
97        for line in lines {
98            let trimmed = line.trim();
99            let is_noise = GIT_PUSH_NOISE.is_match(trimmed)
100                || GIT_FETCH_NOISE.is_match(trimmed)
101                || GIT_CLONE_PROGRESS.is_match(trimmed)
102                || trimmed == "remote:";
103
104            if is_noise {
105                if !dropped_transfer {
106                    dropped_transfer = true;
107                }
108            } else {
109                if dropped_transfer {
110                    result.push("[git transfer stats collapsed]".to_string());
111                    dropped_transfer = false;
112                }
113                result.push(line.clone());
114            }
115        }
116
117        if dropped_transfer {
118            result.push("[git transfer stats collapsed]".to_string());
119        }
120
121        result
122    }
123}
124
125#[cfg(test)]
126mod tests {
127    use super::*;
128
129    #[test]
130    fn drops_push_stats() {
131        let filter = GitFilter;
132        assert_eq!(
133            filter.filter_line("Enumerating objects: 15, done."),
134            FilterResult::Drop
135        );
136        assert_eq!(
137            filter.filter_line("Counting objects: 100% (15/15), done."),
138            FilterResult::Drop
139        );
140        assert_eq!(
141            filter.filter_line("Writing objects: 100% (8/8), 2.51 KiB | 2.51 MiB/s, done."),
142            FilterResult::Drop
143        );
144    }
145
146    #[test]
147    fn keeps_branch_info() {
148        let filter = GitFilter;
149        assert_eq!(
150            filter.filter_line("   abc1234..def5678  main -> main"),
151            FilterResult::Keep
152        );
153    }
154
155    #[test]
156    fn keeps_error_messages() {
157        let filter = GitFilter;
158        assert_eq!(
159            filter.filter_line("error: failed to push some refs"),
160            FilterResult::Keep
161        );
162    }
163
164    #[test]
165    fn keeps_status_output() {
166        let filter = GitFilter;
167        assert_eq!(filter.filter_line("M  src/main.rs"), FilterResult::Keep);
168        assert_eq!(filter.filter_line("?? new_file.txt"), FilterResult::Keep);
169    }
170
171    #[test]
172    fn block_collapses_transfer() {
173        let filter = GitFilter;
174        let lines = vec![
175            "Enumerating objects: 15, done.".to_string(),
176            "Counting objects: 100% (15/15), done.".to_string(),
177            "Delta compression using up to 8 threads".to_string(),
178            "Compressing objects: 100% (8/8), done.".to_string(),
179            "Writing objects: 100% (8/8), 2.51 KiB, done.".to_string(),
180            "   abc1234..def5678  main -> main".to_string(),
181        ];
182        let result = filter.filter_block(&lines);
183        assert_eq!(result.len(), 2);
184        assert_eq!(result[0], "[git transfer stats collapsed]");
185        assert!(result[1].contains("main -> main"));
186    }
187}