Skip to main content

lean_ctx/core/patterns/
grep.rs

1use std::collections::HashMap;
2
3use crate::core::tokens::count_tokens;
4
5fn normalize_shell_tokens(text: &str) -> String {
6    text.split_whitespace().collect::<Vec<_>>().join(" ")
7}
8
9pub fn compress(output: &str) -> Option<String> {
10    let lines: Vec<&str> = output.lines().collect();
11    if lines.len() < 3 {
12        return None;
13    }
14
15    let mut by_file: HashMap<&str, Vec<(usize, &str)>> = HashMap::new();
16    let mut total_matches = 0usize;
17
18    for line in &lines {
19        if let Some((file, rest)) = parse_grep_line(line) {
20            total_matches += 1;
21            let line_num = extract_line_num(rest);
22            let content = strip_line_num(rest);
23            by_file.entry(file).or_default().push((line_num, content));
24        }
25    }
26
27    if total_matches == 0 {
28        return None;
29    }
30
31    let max_matches_per_file = if total_matches > 200 { 5 } else { 10 };
32
33    let mut result = format!("{total_matches} matches in {}F:\n", by_file.len());
34    let mut sorted_files: Vec<_> = by_file.iter().collect();
35    sorted_files.sort_by(|a, b| b.1.len().cmp(&a.1.len()).then_with(|| a.0.cmp(b.0)));
36
37    for (file, matches) in &sorted_files {
38        let short = shorten_path(file);
39        result.push_str(&format!("\n{short} ({}):", matches.len()));
40        let show = matches.iter().take(max_matches_per_file);
41        for (ln, content) in show {
42            let trimmed = content.trim();
43            let short_content = if trimmed.len() > 120 {
44                let truncated: String = trimmed.chars().take(119).collect();
45                format!("{truncated}…")
46            } else {
47                trimmed.to_string()
48            };
49            if *ln > 0 {
50                result.push_str(&format!("\n  {ln}: {short_content}"));
51            } else {
52                result.push_str(&format!("\n  {short_content}"));
53            }
54        }
55        if matches.len() > max_matches_per_file {
56            result.push_str(&format!(
57                "\n  ... +{} more",
58                matches.len() - max_matches_per_file
59            ));
60        }
61    }
62
63    let out_n = normalize_shell_tokens(output);
64    let res_n = normalize_shell_tokens(&result);
65    let ct_r = count_tokens(&res_n);
66    let ct_o = count_tokens(&out_n);
67    if ct_r >= ct_o && !(ct_r == ct_o && res_n.len() < out_n.len()) {
68        return None;
69    }
70
71    Some(result)
72}
73
74fn parse_grep_line(line: &str) -> Option<(&str, &str)> {
75    // Skip Windows drive letter (e.g. "C:" at position 1)
76    let start = if line.len() >= 2
77        && line.as_bytes()[0].is_ascii_alphabetic()
78        && line.as_bytes()[1] == b':'
79    {
80        2
81    } else {
82        0
83    };
84    if let Some(rel_pos) = line[start..].find(':') {
85        let pos = start + rel_pos;
86        let file = &line[..pos];
87        if file.contains('/') || file.contains('\\') || file.contains('.') {
88            let rest = &line[pos + 1..];
89            return Some((file, rest));
90        }
91    }
92    None
93}
94
95fn extract_line_num(rest: &str) -> usize {
96    if let Some(pos) = rest.find(':') {
97        rest[..pos].parse().unwrap_or(0)
98    } else {
99        0
100    }
101}
102
103fn strip_line_num(rest: &str) -> &str {
104    if let Some(pos) = rest.find(':') {
105        if rest[..pos].chars().all(|c| c.is_ascii_digit()) {
106            return &rest[pos + 1..];
107        }
108    }
109    rest
110}
111
112fn shorten_path(path: &str) -> &str {
113    path.strip_prefix("./").unwrap_or(path)
114}
115
116#[cfg(test)]
117mod tests {
118    use super::*;
119
120    #[test]
121    fn small_grep_output_is_not_claimed_without_matches() {
122        assert!(compress("hello\nworld").is_none());
123    }
124
125    #[test]
126    fn small_grep_output_still_compresses() {
127        let output = (0..20)
128            .map(|i| format!("src/main.rs:{i}: let x = {i};"))
129            .collect::<Vec<_>>()
130            .join("\n");
131        let result = compress(&output);
132        assert!(result.is_some());
133        let compressed = result.unwrap();
134        assert!(
135            compressed.contains("20 matches in 1F:"),
136            "should group by file: {compressed}"
137        );
138        assert!(
139            count_tokens(&compressed) < count_tokens(&output),
140            "should compress: {} vs {}",
141            count_tokens(&compressed),
142            count_tokens(&output)
143        );
144    }
145
146    #[test]
147    fn large_output_reduces_per_file_lines() {
148        let mut lines = Vec::new();
149        for i in 0..250 {
150            lines.push(format!("src/a.rs:{i}: line content {i}"));
151        }
152        let output = lines.join("\n");
153        let result = compress(&output).unwrap();
154        assert!(
155            result.contains("... +245 more"),
156            "should show +more for large output: {result}"
157        );
158    }
159
160    #[test]
161    fn non_grep_output_returns_none() {
162        let output = "no file:line pattern here\njust regular text\nmore text\nand more";
163        assert!(compress(output).is_none());
164    }
165
166    #[test]
167    fn tiny_grep_output_returns_none_if_inflation() {
168        let output = "a.rs:1:x\nb.rs:2:y\nc.rs:3:z\n";
169        let result = compress(output);
170        if let Some(ref compressed) = result {
171            assert!(
172                count_tokens(compressed) < count_tokens(output),
173                "must never inflate: compressed={} vs original={}",
174                count_tokens(compressed),
175                count_tokens(output)
176            );
177        }
178    }
179
180    #[test]
181    fn multi_file_many_matches_compresses_well() {
182        let mut lines = Vec::new();
183        for i in 0..50 {
184            lines.push(format!(
185                "src/models/user.rs:{}: pub fn method_{i}() {{}}",
186                i + 1
187            ));
188        }
189        for i in 0..30 {
190            lines.push(format!(
191                "src/controllers/auth.rs:{}: let val = method_{i}();",
192                i + 1
193            ));
194        }
195        let output = lines.join("\n");
196        let result = compress(&output).expect("80 matches should compress");
197        assert!(
198            count_tokens(&result) < count_tokens(&output),
199            "must compress: {} vs {}",
200            count_tokens(&result),
201            count_tokens(&output)
202        );
203        assert!(result.contains("80 matches in 2F:"));
204        assert!(result.contains("src/models/user.rs (50):"));
205        assert!(result.contains("src/controllers/auth.rs (30):"));
206    }
207
208    #[test]
209    fn many_single_match_files_falls_back_to_none() {
210        let lines: Vec<String> = (1..=30)
211            .map(|i| format!("src/file{i}.rs:42: fn search_result()"))
212            .collect();
213        let output = lines.join("\n");
214        let result = compress(&output);
215        if let Some(ref c) = result {
216            assert!(
217                count_tokens(c) < count_tokens(&output),
218                "if claimed, must be shorter in tokens: {} vs {}",
219                count_tokens(c),
220                count_tokens(&output)
221            );
222        }
223    }
224
225    #[test]
226    fn never_returns_inflated_output() {
227        for count in [3, 5, 10, 15, 25, 50] {
228            let lines: Vec<String> = (0..count).map(|i| format!("f{i}.rs:{i}:x")).collect();
229            let output = lines.join("\n");
230            if let Some(ref c) = compress(&output) {
231                assert!(
232                    count_tokens(c) < count_tokens(&output),
233                    "count={count}: inflated {} vs {}",
234                    count_tokens(c),
235                    count_tokens(&output)
236                );
237            }
238        }
239    }
240}