Skip to main content

stakpak_ak/
search.rs

1use crate::Error;
2use crate::format::extract_peek;
3use crate::store::StorageBackend;
4use globset::{GlobBuilder, GlobMatcher};
5use grep_matcher::Matcher;
6use grep_regex::{RegexMatcher, RegexMatcherBuilder};
7use serde::Serialize;
8use std::path::Path;
9use std::rc::Rc;
10
11const BINARY_DETECTION_BYTES: usize = 8 * 1024;
12
13pub trait SearchEngine {
14    fn search_default(&self, path: &str) -> Result<Vec<PeekResult>, Error>;
15    fn search_glob(&self, path: &str, glob: &str) -> Result<Vec<PeekResult>, Error>;
16    fn search_grep(
17        &self,
18        path: &str,
19        regex: &str,
20        case_insensitive: bool,
21    ) -> Result<Vec<GrepResult>, Error>;
22    fn search_grep_glob(
23        &self,
24        path: &str,
25        regex: &str,
26        glob: &str,
27        case_insensitive: bool,
28    ) -> Result<Vec<GrepResult>, Error>;
29}
30
31#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
32pub struct PeekResult {
33    pub path: String,
34    pub peek: String,
35}
36
37#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
38pub struct GrepResult {
39    pub path: String,
40    pub matches: Vec<(usize, String)>,
41}
42
43pub struct TreeNavEngine {
44    store: Rc<dyn StorageBackend>,
45}
46
47impl TreeNavEngine {
48    pub fn new(store: Rc<dyn StorageBackend>) -> Self {
49        Self { store }
50    }
51
52    fn search_peeks(
53        &self,
54        path: &str,
55        glob_matcher: Option<&GlobMatcher>,
56    ) -> Result<Vec<PeekResult>, Error> {
57        let mut results = Vec::new();
58
59        for relative_path in self.store.as_ref().walk(path)? {
60            if !matches_glob(glob_matcher, &relative_path) {
61                continue;
62            }
63            let content = self.store.as_ref().read(&relative_path)?;
64            results.push(PeekResult {
65                path: relative_path,
66                peek: extract_peek(&String::from_utf8_lossy(&content)),
67            });
68        }
69
70        Ok(results)
71    }
72
73    fn search_matches(
74        &self,
75        path: &str,
76        matcher: &RegexMatcher,
77        glob_matcher: Option<&GlobMatcher>,
78    ) -> Result<Vec<GrepResult>, Error> {
79        let mut results = Vec::new();
80
81        for relative_path in self.store.as_ref().walk(path)? {
82            if !matches_glob(glob_matcher, &relative_path) {
83                continue;
84            }
85            let content = self.store.as_ref().read(&relative_path)?;
86            if contains_nul_byte(&content[..content.len().min(BINARY_DETECTION_BYTES)]) {
87                continue;
88            }
89
90            let matches = grep_lines(matcher, &content)?;
91            if matches.is_empty() {
92                continue;
93            }
94
95            results.push(GrepResult {
96                path: relative_path,
97                matches,
98            });
99        }
100
101        Ok(results)
102    }
103}
104
105impl SearchEngine for TreeNavEngine {
106    fn search_default(&self, path: &str) -> Result<Vec<PeekResult>, Error> {
107        self.search_peeks(path, None)
108    }
109
110    fn search_glob(&self, path: &str, glob: &str) -> Result<Vec<PeekResult>, Error> {
111        let matcher = compile_glob(glob)?;
112        self.search_peeks(path, Some(&matcher))
113    }
114
115    fn search_grep(
116        &self,
117        path: &str,
118        regex: &str,
119        case_insensitive: bool,
120    ) -> Result<Vec<GrepResult>, Error> {
121        let matcher = compile_regex(regex, case_insensitive)?;
122        self.search_matches(path, &matcher, None)
123    }
124
125    fn search_grep_glob(
126        &self,
127        path: &str,
128        regex: &str,
129        glob: &str,
130        case_insensitive: bool,
131    ) -> Result<Vec<GrepResult>, Error> {
132        let regex_matcher = compile_regex(regex, case_insensitive)?;
133        let glob_matcher = compile_glob(glob)?;
134        self.search_matches(path, &regex_matcher, Some(&glob_matcher))
135    }
136}
137
138fn compile_glob(glob: &str) -> Result<GlobMatcher, Error> {
139    GlobBuilder::new(glob)
140        .literal_separator(true)
141        .build()
142        .map(|compiled| compiled.compile_matcher())
143        .map_err(|error| Error::Parse(format!("invalid glob pattern: {error}")))
144}
145
146fn compile_regex(pattern: &str, case_insensitive: bool) -> Result<RegexMatcher, Error> {
147    let mut builder = RegexMatcherBuilder::new();
148    builder.case_insensitive(case_insensitive);
149    builder.line_terminator(Some(b'\n'));
150    builder
151        .build(pattern)
152        .map_err(|error| Error::Parse(format!("invalid regex pattern: {error}")))
153}
154
155fn matches_glob(glob_matcher: Option<&GlobMatcher>, path: &str) -> bool {
156    glob_matcher.is_none_or(|matcher| matcher.is_match(Path::new(path)))
157}
158
159fn grep_lines(matcher: &RegexMatcher, content: &[u8]) -> Result<Vec<(usize, String)>, Error> {
160    let text = String::from_utf8_lossy(content);
161    let mut matches = Vec::new();
162
163    for (index, line) in text.lines().enumerate() {
164        if matcher
165            .find(line.as_bytes())
166            .map_err(|error| Error::Parse(format!("failed to run regex search: {error}")))?
167            .is_some()
168        {
169            matches.push((index + 1, line.to_string()));
170        }
171    }
172
173    Ok(matches)
174}
175
176fn contains_nul_byte(content: &[u8]) -> bool {
177    content.contains(&0)
178}
179
180#[cfg(test)]
181mod tests {
182    use super::{GrepResult, PeekResult, SearchEngine, TreeNavEngine};
183    use crate::store::{LocalFsBackend, StorageBackend};
184    use std::rc::Rc;
185
186    fn engine() -> (tempfile::TempDir, LocalFsBackend, TreeNavEngine) {
187        let root = tempfile::TempDir::new().expect("temp dir");
188        let backend = LocalFsBackend::with_root(root.path().join("store"));
189        let backend_rc: Rc<dyn StorageBackend> = Rc::new(backend.clone());
190        let engine = TreeNavEngine::new(backend_rc);
191        (root, backend, engine)
192    }
193
194    #[test]
195    fn search_default_returns_peeks_sorted_by_full_path() {
196        let (_root, backend, engine) = engine();
197        backend
198            .create(
199                "services/rate-limits.md",
200                b"---\ndescription: API rate limits\n---\nBody\n",
201            )
202            .expect("create rate limits file");
203        backend
204            .create("notes/todo.md", b"First paragraph\n\nSecond paragraph\n")
205            .expect("create todo file");
206
207        assert_eq!(
208            engine.search_default("").expect("default search"),
209            vec![
210                PeekResult {
211                    path: "notes/todo.md".to_string(),
212                    peek: "First paragraph".to_string(),
213                },
214                PeekResult {
215                    path: "services/rate-limits.md".to_string(),
216                    peek: "---\ndescription: API rate limits\n---\nBody".to_string(),
217                },
218            ]
219        );
220    }
221
222    #[test]
223    fn search_glob_filters_by_pattern() {
224        let (_root, backend, engine) = engine();
225        backend
226            .create("services/rate-limits.md", b"Body\n")
227            .expect("create service file");
228        backend
229            .create("notes/todo.md", b"Body\n")
230            .expect("create notes file");
231
232        assert_eq!(
233            engine
234                .search_glob("", "services/**/*.md")
235                .expect("glob search"),
236            vec![PeekResult {
237                path: "services/rate-limits.md".to_string(),
238                peek: "Body".to_string(),
239            }]
240        );
241    }
242
243    #[test]
244    fn search_grep_returns_matching_lines_with_line_numbers() {
245        let (_root, backend, engine) = engine();
246        backend
247            .create(
248                "services/rate-limits.md",
249                b"first\nRate limit is 1000/min\nthird\n",
250            )
251            .expect("create service file");
252
253        assert_eq!(
254            engine
255                .search_grep("", "Rate limit", false)
256                .expect("grep search"),
257            vec![GrepResult {
258                path: "services/rate-limits.md".to_string(),
259                matches: vec![(2, "Rate limit is 1000/min".to_string())],
260            }]
261        );
262    }
263
264    #[test]
265    fn search_grep_honors_case_insensitive_flag() {
266        let (_root, backend, engine) = engine();
267        backend
268            .create("services/rate-limits.md", b"rate limit is 1000/min\n")
269            .expect("create service file");
270
271        assert_eq!(
272            engine
273                .search_grep("", "RATE LIMIT", true)
274                .expect("case insensitive grep"),
275            vec![GrepResult {
276                path: "services/rate-limits.md".to_string(),
277                matches: vec![(1, "rate limit is 1000/min".to_string())],
278            }]
279        );
280    }
281
282    #[test]
283    fn search_grep_matches_frontmatter_lines() {
284        let (_root, backend, engine) = engine();
285        backend
286            .create(
287                "services/rate-limits.md",
288                b"---\ndescription: API rate limits\n---\nBody\n",
289            )
290            .expect("create file");
291
292        assert_eq!(
293            engine
294                .search_grep("", "API rate", false)
295                .expect("frontmatter grep"),
296            vec![GrepResult {
297                path: "services/rate-limits.md".to_string(),
298                matches: vec![(2, "description: API rate limits".to_string())],
299            }]
300        );
301    }
302
303    #[test]
304    fn search_grep_skips_binary_files() {
305        let (_root, backend, engine) = engine();
306        backend
307            .create("services/binary.bin", b"text\0hidden\nRate limit\n")
308            .expect("create binary file");
309        backend
310            .create("services/rate-limits.md", b"Rate limit is 1000/min\n")
311            .expect("create text file");
312
313        assert_eq!(
314            engine
315                .search_grep("", "Rate limit", false)
316                .expect("grep search"),
317            vec![GrepResult {
318                path: "services/rate-limits.md".to_string(),
319                matches: vec![(1, "Rate limit is 1000/min".to_string())],
320            }]
321        );
322    }
323
324    #[test]
325    fn search_grep_glob_composes_filters() {
326        let (_root, backend, engine) = engine();
327        backend
328            .create("services/rate-limits.md", b"Rate limit\n")
329            .expect("create markdown file");
330        backend
331            .create("services/rate-limits.txt", b"Rate limit\n")
332            .expect("create text file");
333
334        assert_eq!(
335            engine
336                .search_grep_glob("", "Rate limit", "**/*.md", false)
337                .expect("grep glob search"),
338            vec![GrepResult {
339                path: "services/rate-limits.md".to_string(),
340                matches: vec![(1, "Rate limit".to_string())],
341            }]
342        );
343    }
344}