Skip to main content

lean_ctx/core/patterns/
grep.rs

1use std::collections::HashMap;
2
3use crate::core::tokens::count_tokens;
4
5fn normalize_shell_tokens(text: &str) -> String {
6    text.split_whitespace().collect::<Vec<_>>().join(" ")
7}
8
9pub fn compress(output: &str) -> Option<String> {
10    let lines: Vec<&str> = output.lines().collect();
11    if lines.len() < 3 {
12        return None;
13    }
14
15    let mut by_file: HashMap<&str, Vec<(usize, &str)>> = HashMap::new();
16    let mut total_matches = 0usize;
17
18    for line in &lines {
19        if let Some((file, rest)) = parse_grep_line(line) {
20            total_matches += 1;
21            let line_num = extract_line_num(rest);
22            let content = strip_line_num(rest);
23            by_file.entry(file).or_default().push((line_num, content));
24        }
25    }
26
27    if total_matches == 0 {
28        return None;
29    }
30
31    let max_matches_per_file = if total_matches > 200 { 5 } else { 10 };
32
33    let mut result = format!("{total_matches} matches in {}F:\n", by_file.len());
34    let mut sorted_files: Vec<_> = by_file.iter().collect();
35    sorted_files.sort_by_key(|(_, matches)| std::cmp::Reverse(matches.len()));
36
37    for (file, matches) in &sorted_files {
38        let short = shorten_path(file);
39        result.push_str(&format!("\n{short} ({}):", matches.len()));
40        let show = matches.iter().take(max_matches_per_file);
41        for (ln, content) in show {
42            let trimmed = content.trim();
43            let short_content = if trimmed.len() > 120 {
44                let truncated: String = trimmed.chars().take(119).collect();
45                format!("{truncated}…")
46            } else {
47                trimmed.to_string()
48            };
49            if *ln > 0 {
50                result.push_str(&format!("\n  {ln}: {short_content}"));
51            } else {
52                result.push_str(&format!("\n  {short_content}"));
53            }
54        }
55        if matches.len() > max_matches_per_file {
56            result.push_str(&format!(
57                "\n  ... +{} more",
58                matches.len() - max_matches_per_file
59            ));
60        }
61    }
62
63    let out_n = normalize_shell_tokens(output);
64    let res_n = normalize_shell_tokens(&result);
65    let ct_r = count_tokens(&res_n);
66    let ct_o = count_tokens(&out_n);
67    if ct_r >= ct_o && !(ct_r == ct_o && res_n.len() < out_n.len()) {
68        return None;
69    }
70
71    Some(result)
72}
73
74fn parse_grep_line(line: &str) -> Option<(&str, &str)> {
75    if let Some(pos) = line.find(':') {
76        let file = &line[..pos];
77        if file.contains('/') || file.contains('.') {
78            let rest = &line[pos + 1..];
79            return Some((file, rest));
80        }
81    }
82    None
83}
84
85fn extract_line_num(rest: &str) -> usize {
86    if let Some(pos) = rest.find(':') {
87        rest[..pos].parse().unwrap_or(0)
88    } else {
89        0
90    }
91}
92
93fn strip_line_num(rest: &str) -> &str {
94    if let Some(pos) = rest.find(':') {
95        if rest[..pos].chars().all(|c| c.is_ascii_digit()) {
96            return &rest[pos + 1..];
97        }
98    }
99    rest
100}
101
102fn shorten_path(path: &str) -> &str {
103    path.strip_prefix("./").unwrap_or(path)
104}
105
106#[cfg(test)]
107mod tests {
108    use super::*;
109
110    #[test]
111    fn small_grep_output_is_not_claimed_without_matches() {
112        assert!(compress("hello\nworld").is_none());
113    }
114
115    #[test]
116    fn small_grep_output_still_compresses() {
117        let output = (0..20)
118            .map(|i| format!("src/main.rs:{i}: let x = {i};"))
119            .collect::<Vec<_>>()
120            .join("\n");
121        let result = compress(&output);
122        assert!(result.is_some());
123        let compressed = result.unwrap();
124        assert!(
125            compressed.contains("20 matches in 1F:"),
126            "should group by file: {compressed}"
127        );
128        assert!(
129            count_tokens(&compressed) < count_tokens(&output),
130            "should compress: {} vs {}",
131            count_tokens(&compressed),
132            count_tokens(&output)
133        );
134    }
135
136    #[test]
137    fn large_output_reduces_per_file_lines() {
138        let mut lines = Vec::new();
139        for i in 0..250 {
140            lines.push(format!("src/a.rs:{i}: line content {i}"));
141        }
142        let output = lines.join("\n");
143        let result = compress(&output).unwrap();
144        assert!(
145            result.contains("... +245 more"),
146            "should show +more for large output: {result}"
147        );
148    }
149
150    #[test]
151    fn non_grep_output_returns_none() {
152        let output = "no file:line pattern here\njust regular text\nmore text\nand more";
153        assert!(compress(output).is_none());
154    }
155
156    #[test]
157    fn tiny_grep_output_returns_none_if_inflation() {
158        let output = "a.rs:1:x\nb.rs:2:y\nc.rs:3:z\n";
159        let result = compress(output);
160        if let Some(ref compressed) = result {
161            assert!(
162                count_tokens(compressed) < count_tokens(output),
163                "must never inflate: compressed={} vs original={}",
164                count_tokens(compressed),
165                count_tokens(output)
166            );
167        }
168    }
169
170    #[test]
171    fn multi_file_many_matches_compresses_well() {
172        let mut lines = Vec::new();
173        for i in 0..50 {
174            lines.push(format!(
175                "src/models/user.rs:{}: pub fn method_{i}() {{}}",
176                i + 1
177            ));
178        }
179        for i in 0..30 {
180            lines.push(format!(
181                "src/controllers/auth.rs:{}: let val = method_{i}();",
182                i + 1
183            ));
184        }
185        let output = lines.join("\n");
186        let result = compress(&output).expect("80 matches should compress");
187        assert!(
188            count_tokens(&result) < count_tokens(&output),
189            "must compress: {} vs {}",
190            count_tokens(&result),
191            count_tokens(&output)
192        );
193        assert!(result.contains("80 matches in 2F:"));
194        assert!(result.contains("src/models/user.rs (50):"));
195        assert!(result.contains("src/controllers/auth.rs (30):"));
196    }
197
198    #[test]
199    fn many_single_match_files_falls_back_to_none() {
200        let lines: Vec<String> = (1..=30)
201            .map(|i| format!("src/file{i}.rs:42: fn search_result()"))
202            .collect();
203        let output = lines.join("\n");
204        let result = compress(&output);
205        if let Some(ref c) = result {
206            assert!(
207                count_tokens(c) < count_tokens(&output),
208                "if claimed, must be shorter in tokens: {} vs {}",
209                count_tokens(c),
210                count_tokens(&output)
211            );
212        }
213    }
214
215    #[test]
216    fn never_returns_inflated_output() {
217        for count in [3, 5, 10, 15, 25, 50] {
218            let lines: Vec<String> = (0..count).map(|i| format!("f{i}.rs:{i}:x")).collect();
219            let output = lines.join("\n");
220            if let Some(ref c) = compress(&output) {
221                assert!(
222                    count_tokens(c) < count_tokens(&output),
223                    "count={count}: inflated {} vs {}",
224                    count_tokens(c),
225                    count_tokens(&output)
226                );
227            }
228        }
229    }
230}