lean_ctx/core/patterns/
grep.rs1use std::collections::HashMap;
2
3use crate::core::tokens::count_tokens;
4
5fn normalize_shell_tokens(text: &str) -> String {
6 text.split_whitespace().collect::<Vec<_>>().join(" ")
7}
8
9pub fn compress(output: &str) -> Option<String> {
10 let lines: Vec<&str> = output.lines().collect();
11 if lines.len() < 3 {
12 return None;
13 }
14
15 let mut by_file: HashMap<&str, Vec<(usize, &str)>> = HashMap::new();
16 let mut total_matches = 0usize;
17
18 for line in &lines {
19 if let Some((file, rest)) = parse_grep_line(line) {
20 total_matches += 1;
21 let line_num = extract_line_num(rest);
22 let content = strip_line_num(rest);
23 by_file.entry(file).or_default().push((line_num, content));
24 }
25 }
26
27 if total_matches == 0 {
28 return None;
29 }
30
31 let max_matches_per_file = if total_matches > 200 { 5 } else { 10 };
32
33 let mut result = format!("{total_matches} matches in {}F:\n", by_file.len());
34 let mut sorted_files: Vec<_> = by_file.iter().collect();
35 sorted_files.sort_by(|a, b| b.1.len().cmp(&a.1.len()).then_with(|| a.0.cmp(b.0)));
36
37 for (file, matches) in &sorted_files {
38 let short = shorten_path(file);
39 result.push_str(&format!("\n{short} ({}):", matches.len()));
40 let show = matches.iter().take(max_matches_per_file);
41 for (ln, content) in show {
42 let trimmed = content.trim();
43 let short_content = if trimmed.len() > 120 {
44 let truncated: String = trimmed.chars().take(119).collect();
45 format!("{truncated}…")
46 } else {
47 trimmed.to_string()
48 };
49 if *ln > 0 {
50 result.push_str(&format!("\n {ln}: {short_content}"));
51 } else {
52 result.push_str(&format!("\n {short_content}"));
53 }
54 }
55 if matches.len() > max_matches_per_file {
56 result.push_str(&format!(
57 "\n ... +{} more",
58 matches.len() - max_matches_per_file
59 ));
60 }
61 }
62
63 let out_n = normalize_shell_tokens(output);
64 let res_n = normalize_shell_tokens(&result);
65 let ct_r = count_tokens(&res_n);
66 let ct_o = count_tokens(&out_n);
67 if ct_r >= ct_o && !(ct_r == ct_o && res_n.len() < out_n.len()) {
68 return None;
69 }
70
71 Some(result)
72}
73
74fn parse_grep_line(line: &str) -> Option<(&str, &str)> {
75 let start = if line.len() >= 2
77 && line.as_bytes()[0].is_ascii_alphabetic()
78 && line.as_bytes()[1] == b':'
79 {
80 2
81 } else {
82 0
83 };
84 if let Some(rel_pos) = line[start..].find(':') {
85 let pos = start + rel_pos;
86 let file = &line[..pos];
87 if file.contains('/') || file.contains('\\') || file.contains('.') {
88 let rest = &line[pos + 1..];
89 return Some((file, rest));
90 }
91 }
92 None
93}
94
95fn extract_line_num(rest: &str) -> usize {
96 if let Some(pos) = rest.find(':') {
97 rest[..pos].parse().unwrap_or(0)
98 } else {
99 0
100 }
101}
102
103fn strip_line_num(rest: &str) -> &str {
104 if let Some(pos) = rest.find(':') {
105 if rest[..pos].chars().all(|c| c.is_ascii_digit()) {
106 return &rest[pos + 1..];
107 }
108 }
109 rest
110}
111
112fn shorten_path(path: &str) -> &str {
113 path.strip_prefix("./").unwrap_or(path)
114}
115
116#[cfg(test)]
117mod tests {
118 use super::*;
119
120 #[test]
121 fn small_grep_output_is_not_claimed_without_matches() {
122 assert!(compress("hello\nworld").is_none());
123 }
124
125 #[test]
126 fn small_grep_output_still_compresses() {
127 let output = (0..20)
128 .map(|i| format!("src/main.rs:{i}: let x = {i};"))
129 .collect::<Vec<_>>()
130 .join("\n");
131 let result = compress(&output);
132 assert!(result.is_some());
133 let compressed = result.unwrap();
134 assert!(
135 compressed.contains("20 matches in 1F:"),
136 "should group by file: {compressed}"
137 );
138 assert!(
139 count_tokens(&compressed) < count_tokens(&output),
140 "should compress: {} vs {}",
141 count_tokens(&compressed),
142 count_tokens(&output)
143 );
144 }
145
146 #[test]
147 fn large_output_reduces_per_file_lines() {
148 let mut lines = Vec::new();
149 for i in 0..250 {
150 lines.push(format!("src/a.rs:{i}: line content {i}"));
151 }
152 let output = lines.join("\n");
153 let result = compress(&output).unwrap();
154 assert!(
155 result.contains("... +245 more"),
156 "should show +more for large output: {result}"
157 );
158 }
159
160 #[test]
161 fn non_grep_output_returns_none() {
162 let output = "no file:line pattern here\njust regular text\nmore text\nand more";
163 assert!(compress(output).is_none());
164 }
165
166 #[test]
167 fn tiny_grep_output_returns_none_if_inflation() {
168 let output = "a.rs:1:x\nb.rs:2:y\nc.rs:3:z\n";
169 let result = compress(output);
170 if let Some(ref compressed) = result {
171 assert!(
172 count_tokens(compressed) < count_tokens(output),
173 "must never inflate: compressed={} vs original={}",
174 count_tokens(compressed),
175 count_tokens(output)
176 );
177 }
178 }
179
180 #[test]
181 fn multi_file_many_matches_compresses_well() {
182 let mut lines = Vec::new();
183 for i in 0..50 {
184 lines.push(format!(
185 "src/models/user.rs:{}: pub fn method_{i}() {{}}",
186 i + 1
187 ));
188 }
189 for i in 0..30 {
190 lines.push(format!(
191 "src/controllers/auth.rs:{}: let val = method_{i}();",
192 i + 1
193 ));
194 }
195 let output = lines.join("\n");
196 let result = compress(&output).expect("80 matches should compress");
197 assert!(
198 count_tokens(&result) < count_tokens(&output),
199 "must compress: {} vs {}",
200 count_tokens(&result),
201 count_tokens(&output)
202 );
203 assert!(result.contains("80 matches in 2F:"));
204 assert!(result.contains("src/models/user.rs (50):"));
205 assert!(result.contains("src/controllers/auth.rs (30):"));
206 }
207
208 #[test]
209 fn many_single_match_files_falls_back_to_none() {
210 let lines: Vec<String> = (1..=30)
211 .map(|i| format!("src/file{i}.rs:42: fn search_result()"))
212 .collect();
213 let output = lines.join("\n");
214 let result = compress(&output);
215 if let Some(ref c) = result {
216 assert!(
217 count_tokens(c) < count_tokens(&output),
218 "if claimed, must be shorter in tokens: {} vs {}",
219 count_tokens(c),
220 count_tokens(&output)
221 );
222 }
223 }
224
225 #[test]
226 fn never_returns_inflated_output() {
227 for count in [3, 5, 10, 15, 25, 50] {
228 let lines: Vec<String> = (0..count).map(|i| format!("f{i}.rs:{i}:x")).collect();
229 let output = lines.join("\n");
230 if let Some(ref c) = compress(&output) {
231 assert!(
232 count_tokens(c) < count_tokens(&output),
233 "count={count}: inflated {} vs {}",
234 count_tokens(c),
235 count_tokens(&output)
236 );
237 }
238 }
239 }
240}