1use crate::Error;
2use crate::format::extract_peek;
3use crate::store::StorageBackend;
4use globset::{GlobBuilder, GlobMatcher};
5use grep_matcher::Matcher;
6use grep_regex::{RegexMatcher, RegexMatcherBuilder};
7use serde::Serialize;
8use std::path::Path;
9use std::rc::Rc;
10
11const BINARY_DETECTION_BYTES: usize = 8 * 1024;
12
13pub trait SearchEngine {
14 fn search_default(&self, path: &str) -> Result<Vec<PeekResult>, Error>;
15 fn search_glob(&self, path: &str, glob: &str) -> Result<Vec<PeekResult>, Error>;
16 fn search_grep(
17 &self,
18 path: &str,
19 regex: &str,
20 case_insensitive: bool,
21 ) -> Result<Vec<GrepResult>, Error>;
22 fn search_grep_glob(
23 &self,
24 path: &str,
25 regex: &str,
26 glob: &str,
27 case_insensitive: bool,
28 ) -> Result<Vec<GrepResult>, Error>;
29}
30
31#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
32pub struct PeekResult {
33 pub path: String,
34 pub peek: String,
35}
36
37#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
38pub struct GrepResult {
39 pub path: String,
40 pub matches: Vec<(usize, String)>,
41}
42
43pub struct TreeNavEngine {
44 store: Rc<dyn StorageBackend>,
45}
46
47impl TreeNavEngine {
48 pub fn new(store: Rc<dyn StorageBackend>) -> Self {
49 Self { store }
50 }
51
52 fn search_peeks(
53 &self,
54 path: &str,
55 glob_matcher: Option<&GlobMatcher>,
56 ) -> Result<Vec<PeekResult>, Error> {
57 let mut results = Vec::new();
58
59 for relative_path in self.store.as_ref().walk(path)? {
60 if !matches_glob(glob_matcher, &relative_path) {
61 continue;
62 }
63 let content = self.store.as_ref().read(&relative_path)?;
64 results.push(PeekResult {
65 path: relative_path,
66 peek: extract_peek(&String::from_utf8_lossy(&content)),
67 });
68 }
69
70 Ok(results)
71 }
72
73 fn search_matches(
74 &self,
75 path: &str,
76 matcher: &RegexMatcher,
77 glob_matcher: Option<&GlobMatcher>,
78 ) -> Result<Vec<GrepResult>, Error> {
79 let mut results = Vec::new();
80
81 for relative_path in self.store.as_ref().walk(path)? {
82 if !matches_glob(glob_matcher, &relative_path) {
83 continue;
84 }
85 let content = self.store.as_ref().read(&relative_path)?;
86 if contains_nul_byte(&content[..content.len().min(BINARY_DETECTION_BYTES)]) {
87 continue;
88 }
89
90 let matches = grep_lines(matcher, &content)?;
91 if matches.is_empty() {
92 continue;
93 }
94
95 results.push(GrepResult {
96 path: relative_path,
97 matches,
98 });
99 }
100
101 Ok(results)
102 }
103}
104
105impl SearchEngine for TreeNavEngine {
106 fn search_default(&self, path: &str) -> Result<Vec<PeekResult>, Error> {
107 self.search_peeks(path, None)
108 }
109
110 fn search_glob(&self, path: &str, glob: &str) -> Result<Vec<PeekResult>, Error> {
111 let matcher = compile_glob(glob)?;
112 self.search_peeks(path, Some(&matcher))
113 }
114
115 fn search_grep(
116 &self,
117 path: &str,
118 regex: &str,
119 case_insensitive: bool,
120 ) -> Result<Vec<GrepResult>, Error> {
121 let matcher = compile_regex(regex, case_insensitive)?;
122 self.search_matches(path, &matcher, None)
123 }
124
125 fn search_grep_glob(
126 &self,
127 path: &str,
128 regex: &str,
129 glob: &str,
130 case_insensitive: bool,
131 ) -> Result<Vec<GrepResult>, Error> {
132 let regex_matcher = compile_regex(regex, case_insensitive)?;
133 let glob_matcher = compile_glob(glob)?;
134 self.search_matches(path, ®ex_matcher, Some(&glob_matcher))
135 }
136}
137
138fn compile_glob(glob: &str) -> Result<GlobMatcher, Error> {
139 GlobBuilder::new(glob)
140 .literal_separator(true)
141 .build()
142 .map(|compiled| compiled.compile_matcher())
143 .map_err(|error| Error::Parse(format!("invalid glob pattern: {error}")))
144}
145
146fn compile_regex(pattern: &str, case_insensitive: bool) -> Result<RegexMatcher, Error> {
147 let mut builder = RegexMatcherBuilder::new();
148 builder.case_insensitive(case_insensitive);
149 builder.line_terminator(Some(b'\n'));
150 builder
151 .build(pattern)
152 .map_err(|error| Error::Parse(format!("invalid regex pattern: {error}")))
153}
154
155fn matches_glob(glob_matcher: Option<&GlobMatcher>, path: &str) -> bool {
156 glob_matcher.is_none_or(|matcher| matcher.is_match(Path::new(path)))
157}
158
159fn grep_lines(matcher: &RegexMatcher, content: &[u8]) -> Result<Vec<(usize, String)>, Error> {
160 let text = String::from_utf8_lossy(content);
161 let mut matches = Vec::new();
162
163 for (index, line) in text.lines().enumerate() {
164 if matcher
165 .find(line.as_bytes())
166 .map_err(|error| Error::Parse(format!("failed to run regex search: {error}")))?
167 .is_some()
168 {
169 matches.push((index + 1, line.to_string()));
170 }
171 }
172
173 Ok(matches)
174}
175
176fn contains_nul_byte(content: &[u8]) -> bool {
177 content.contains(&0)
178}
179
180#[cfg(test)]
181mod tests {
182 use super::{GrepResult, PeekResult, SearchEngine, TreeNavEngine};
183 use crate::store::{LocalFsBackend, StorageBackend};
184 use std::rc::Rc;
185
186 fn engine() -> (tempfile::TempDir, LocalFsBackend, TreeNavEngine) {
187 let root = tempfile::TempDir::new().expect("temp dir");
188 let backend = LocalFsBackend::with_root(root.path().join("store"));
189 let backend_rc: Rc<dyn StorageBackend> = Rc::new(backend.clone());
190 let engine = TreeNavEngine::new(backend_rc);
191 (root, backend, engine)
192 }
193
194 #[test]
195 fn search_default_returns_peeks_sorted_by_full_path() {
196 let (_root, backend, engine) = engine();
197 backend
198 .create(
199 "services/rate-limits.md",
200 b"---\ndescription: API rate limits\n---\nBody\n",
201 )
202 .expect("create rate limits file");
203 backend
204 .create("notes/todo.md", b"First paragraph\n\nSecond paragraph\n")
205 .expect("create todo file");
206
207 assert_eq!(
208 engine.search_default("").expect("default search"),
209 vec![
210 PeekResult {
211 path: "notes/todo.md".to_string(),
212 peek: "First paragraph".to_string(),
213 },
214 PeekResult {
215 path: "services/rate-limits.md".to_string(),
216 peek: "---\ndescription: API rate limits\n---\nBody".to_string(),
217 },
218 ]
219 );
220 }
221
222 #[test]
223 fn search_glob_filters_by_pattern() {
224 let (_root, backend, engine) = engine();
225 backend
226 .create("services/rate-limits.md", b"Body\n")
227 .expect("create service file");
228 backend
229 .create("notes/todo.md", b"Body\n")
230 .expect("create notes file");
231
232 assert_eq!(
233 engine
234 .search_glob("", "services/**/*.md")
235 .expect("glob search"),
236 vec![PeekResult {
237 path: "services/rate-limits.md".to_string(),
238 peek: "Body".to_string(),
239 }]
240 );
241 }
242
243 #[test]
244 fn search_grep_returns_matching_lines_with_line_numbers() {
245 let (_root, backend, engine) = engine();
246 backend
247 .create(
248 "services/rate-limits.md",
249 b"first\nRate limit is 1000/min\nthird\n",
250 )
251 .expect("create service file");
252
253 assert_eq!(
254 engine
255 .search_grep("", "Rate limit", false)
256 .expect("grep search"),
257 vec![GrepResult {
258 path: "services/rate-limits.md".to_string(),
259 matches: vec![(2, "Rate limit is 1000/min".to_string())],
260 }]
261 );
262 }
263
264 #[test]
265 fn search_grep_honors_case_insensitive_flag() {
266 let (_root, backend, engine) = engine();
267 backend
268 .create("services/rate-limits.md", b"rate limit is 1000/min\n")
269 .expect("create service file");
270
271 assert_eq!(
272 engine
273 .search_grep("", "RATE LIMIT", true)
274 .expect("case insensitive grep"),
275 vec![GrepResult {
276 path: "services/rate-limits.md".to_string(),
277 matches: vec![(1, "rate limit is 1000/min".to_string())],
278 }]
279 );
280 }
281
282 #[test]
283 fn search_grep_matches_frontmatter_lines() {
284 let (_root, backend, engine) = engine();
285 backend
286 .create(
287 "services/rate-limits.md",
288 b"---\ndescription: API rate limits\n---\nBody\n",
289 )
290 .expect("create file");
291
292 assert_eq!(
293 engine
294 .search_grep("", "API rate", false)
295 .expect("frontmatter grep"),
296 vec![GrepResult {
297 path: "services/rate-limits.md".to_string(),
298 matches: vec![(2, "description: API rate limits".to_string())],
299 }]
300 );
301 }
302
303 #[test]
304 fn search_grep_skips_binary_files() {
305 let (_root, backend, engine) = engine();
306 backend
307 .create("services/binary.bin", b"text\0hidden\nRate limit\n")
308 .expect("create binary file");
309 backend
310 .create("services/rate-limits.md", b"Rate limit is 1000/min\n")
311 .expect("create text file");
312
313 assert_eq!(
314 engine
315 .search_grep("", "Rate limit", false)
316 .expect("grep search"),
317 vec![GrepResult {
318 path: "services/rate-limits.md".to_string(),
319 matches: vec![(1, "Rate limit is 1000/min".to_string())],
320 }]
321 );
322 }
323
324 #[test]
325 fn search_grep_glob_composes_filters() {
326 let (_root, backend, engine) = engine();
327 backend
328 .create("services/rate-limits.md", b"Rate limit\n")
329 .expect("create markdown file");
330 backend
331 .create("services/rate-limits.txt", b"Rate limit\n")
332 .expect("create text file");
333
334 assert_eq!(
335 engine
336 .search_grep_glob("", "Rate limit", "**/*.md", false)
337 .expect("grep glob search"),
338 vec![GrepResult {
339 path: "services/rate-limits.md".to_string(),
340 matches: vec![(1, "Rate limit".to_string())],
341 }]
342 );
343 }
344}