lean_ctx/core/patterns/
grep.rs1use std::collections::HashMap;
2
3use crate::core::tokens::count_tokens;
4
5fn normalize_shell_tokens(text: &str) -> String {
6 text.split_whitespace().collect::<Vec<_>>().join(" ")
7}
8
9pub fn compress(output: &str) -> Option<String> {
10 let lines: Vec<&str> = output.lines().collect();
11 if lines.len() < 3 {
12 return None;
13 }
14
15 let mut by_file: HashMap<&str, Vec<(usize, &str)>> = HashMap::new();
16 let mut total_matches = 0usize;
17
18 for line in &lines {
19 if let Some((file, rest)) = parse_grep_line(line) {
20 total_matches += 1;
21 let line_num = extract_line_num(rest);
22 let content = strip_line_num(rest);
23 by_file.entry(file).or_default().push((line_num, content));
24 }
25 }
26
27 if total_matches == 0 {
28 return None;
29 }
30
31 let max_matches_per_file = if total_matches > 200 { 5 } else { 10 };
32
33 let mut result = format!("{total_matches} matches in {}F:\n", by_file.len());
34 let mut sorted_files: Vec<_> = by_file.iter().collect();
35 sorted_files.sort_by_key(|(_, matches)| std::cmp::Reverse(matches.len()));
36
37 for (file, matches) in &sorted_files {
38 let short = shorten_path(file);
39 result.push_str(&format!("\n{short} ({}):", matches.len()));
40 let show = matches.iter().take(max_matches_per_file);
41 for (ln, content) in show {
42 let trimmed = content.trim();
43 let short_content = if trimmed.len() > 120 {
44 let truncated: String = trimmed.chars().take(119).collect();
45 format!("{truncated}…")
46 } else {
47 trimmed.to_string()
48 };
49 if *ln > 0 {
50 result.push_str(&format!("\n {ln}: {short_content}"));
51 } else {
52 result.push_str(&format!("\n {short_content}"));
53 }
54 }
55 if matches.len() > max_matches_per_file {
56 result.push_str(&format!(
57 "\n ... +{} more",
58 matches.len() - max_matches_per_file
59 ));
60 }
61 }
62
63 let out_n = normalize_shell_tokens(output);
64 let res_n = normalize_shell_tokens(&result);
65 let ct_r = count_tokens(&res_n);
66 let ct_o = count_tokens(&out_n);
67 if ct_r >= ct_o && !(ct_r == ct_o && res_n.len() < out_n.len()) {
68 return None;
69 }
70
71 Some(result)
72}
73
74fn parse_grep_line(line: &str) -> Option<(&str, &str)> {
75 if let Some(pos) = line.find(':') {
76 let file = &line[..pos];
77 if file.contains('/') || file.contains('.') {
78 let rest = &line[pos + 1..];
79 return Some((file, rest));
80 }
81 }
82 None
83}
84
85fn extract_line_num(rest: &str) -> usize {
86 if let Some(pos) = rest.find(':') {
87 rest[..pos].parse().unwrap_or(0)
88 } else {
89 0
90 }
91}
92
93fn strip_line_num(rest: &str) -> &str {
94 if let Some(pos) = rest.find(':') {
95 if rest[..pos].chars().all(|c| c.is_ascii_digit()) {
96 return &rest[pos + 1..];
97 }
98 }
99 rest
100}
101
102fn shorten_path(path: &str) -> &str {
103 path.strip_prefix("./").unwrap_or(path)
104}
105
106#[cfg(test)]
107mod tests {
108 use super::*;
109
110 #[test]
111 fn small_grep_output_is_not_claimed_without_matches() {
112 assert!(compress("hello\nworld").is_none());
113 }
114
115 #[test]
116 fn small_grep_output_still_compresses() {
117 let output = (0..20)
118 .map(|i| format!("src/main.rs:{i}: let x = {i};"))
119 .collect::<Vec<_>>()
120 .join("\n");
121 let result = compress(&output);
122 assert!(result.is_some());
123 let compressed = result.unwrap();
124 assert!(
125 compressed.contains("20 matches in 1F:"),
126 "should group by file: {compressed}"
127 );
128 assert!(
129 count_tokens(&compressed) < count_tokens(&output),
130 "should compress: {} vs {}",
131 count_tokens(&compressed),
132 count_tokens(&output)
133 );
134 }
135
136 #[test]
137 fn large_output_reduces_per_file_lines() {
138 let mut lines = Vec::new();
139 for i in 0..250 {
140 lines.push(format!("src/a.rs:{i}: line content {i}"));
141 }
142 let output = lines.join("\n");
143 let result = compress(&output).unwrap();
144 assert!(
145 result.contains("... +245 more"),
146 "should show +more for large output: {result}"
147 );
148 }
149
150 #[test]
151 fn non_grep_output_returns_none() {
152 let output = "no file:line pattern here\njust regular text\nmore text\nand more";
153 assert!(compress(output).is_none());
154 }
155
156 #[test]
157 fn tiny_grep_output_returns_none_if_inflation() {
158 let output = "a.rs:1:x\nb.rs:2:y\nc.rs:3:z\n";
159 let result = compress(output);
160 if let Some(ref compressed) = result {
161 assert!(
162 count_tokens(compressed) < count_tokens(output),
163 "must never inflate: compressed={} vs original={}",
164 count_tokens(compressed),
165 count_tokens(output)
166 );
167 }
168 }
169
170 #[test]
171 fn multi_file_many_matches_compresses_well() {
172 let mut lines = Vec::new();
173 for i in 0..50 {
174 lines.push(format!(
175 "src/models/user.rs:{}: pub fn method_{i}() {{}}",
176 i + 1
177 ));
178 }
179 for i in 0..30 {
180 lines.push(format!(
181 "src/controllers/auth.rs:{}: let val = method_{i}();",
182 i + 1
183 ));
184 }
185 let output = lines.join("\n");
186 let result = compress(&output).expect("80 matches should compress");
187 assert!(
188 count_tokens(&result) < count_tokens(&output),
189 "must compress: {} vs {}",
190 count_tokens(&result),
191 count_tokens(&output)
192 );
193 assert!(result.contains("80 matches in 2F:"));
194 assert!(result.contains("src/models/user.rs (50):"));
195 assert!(result.contains("src/controllers/auth.rs (30):"));
196 }
197
198 #[test]
199 fn many_single_match_files_falls_back_to_none() {
200 let lines: Vec<String> = (1..=30)
201 .map(|i| format!("src/file{i}.rs:42: fn search_result()"))
202 .collect();
203 let output = lines.join("\n");
204 let result = compress(&output);
205 if let Some(ref c) = result {
206 assert!(
207 count_tokens(c) < count_tokens(&output),
208 "if claimed, must be shorter in tokens: {} vs {}",
209 count_tokens(c),
210 count_tokens(&output)
211 );
212 }
213 }
214
215 #[test]
216 fn never_returns_inflated_output() {
217 for count in [3, 5, 10, 15, 25, 50] {
218 let lines: Vec<String> = (0..count).map(|i| format!("f{i}.rs:{i}:x")).collect();
219 let output = lines.join("\n");
220 if let Some(ref c) = compress(&output) {
221 assert!(
222 count_tokens(c) < count_tokens(&output),
223 "count={count}: inflated {} vs {}",
224 count_tokens(c),
225 count_tokens(&output)
226 );
227 }
228 }
229 }
230}