1use anyhow::Result;
2use crate::storage::{SegmentReader, decode_line_table, byte_to_line};
3use crate::types::*;
4use globset::Glob;
5use crate::index::{PathIndex, HandlesMap};
6use regex::Regex;
7
8impl Snapshot {
9 pub fn find(&mut self, query: &str, path_glob: Option<&str>, limit: Option<usize>) -> Result<Vec<Hit>> {
11 let path_index = &self.path_index;
13 let handles_map = &self.handles_map;
14
15 let limit = limit.unwrap_or(1000);
16 let mut hits = Vec::new();
17
18 let path_filter = if let Some(glob) = path_glob {
20 Some(globset::Glob::new(glob)?.compile_matcher())
21 } else {
22 None
23 };
24
25 let candidate_files = if self.inverted_index.term_count() > 0 {
27 self.inverted_index.find_files_with_term(query)
29 } else {
30 let mut all_files = std::collections::HashSet::new();
32 for &handle in self.path_index.paths.values() {
33 all_files.insert(handle as u32);
34 }
35 all_files
36 };
37
38 for handle in candidate_files {
40 if hits.len() >= limit {
41 break;
42 }
43
44 let path = if let Some((path, _)) = self.path_index.paths.iter()
46 .find(|(_, &h)| h == handle as u64) {
47 path
48 } else {
49 continue;
50 };
51
52 if let Some(ref filter) = path_filter {
54 if !filter.is_match(path) {
55 continue;
56 }
57 }
58
59 if let Some(metadata) = self.handles_map.get_metadata(handle as u64) {
60 let store_path = self.collection_path.join("store");
61
62 let reader = if let Some(reader) = self.segment_cache.get_mut(&metadata.seg_id) {
64 reader
65 } else {
66 let new_reader = SegmentReader::new(&store_path, metadata.seg_id)?;
67 self.segment_cache.insert(metadata.seg_id, new_reader);
68 self.segment_cache.get_mut(&metadata.seg_id).unwrap()
69 };
70
71 let frame = reader.read_frame(metadata)?;
72
73 if let Ok(content_str) = String::from_utf8(frame.content.clone()) {
75 let newline_positions = decode_line_table(&frame.line_table)?;
77
78 for (byte_offset, line_content) in find_matches_in_content(&content_str, query) {
79 let line_num = byte_to_line(byte_offset, &newline_positions);
80
81 hits.push(Hit {
82 path: path.clone(),
83 line: line_num,
84 text: line_content,
85 });
86
87 if hits.len() >= limit {
88 return Ok(hits);
89 }
90 }
91 }
92 }
93 }
94
95 Ok(hits)
96 }
97
98 pub fn regex_find(&self, pattern: &str, path_glob: Option<&str>, limit: Option<usize>) -> Result<Vec<Hit>> {
100 let path_index = PathIndex::read_from_file(&self.collection_path.join("index/path.json"))?;
101 let handles_map = HandlesMap::read_from_file(&self.collection_path.join("index/handles.json"))?;
102
103 let trigram_candidates: Option<Vec<u32>> = None;
105
106 let regex = Regex::new(pattern)?;
107 let mut hits = Vec::new();
108 let limit = limit.unwrap_or(1000);
109
110 let file_handles: Vec<u32> = if let Some(candidates) = trigram_candidates {
112 candidates.into_iter().collect()
113 } else {
114 path_index.paths.values().map(|&h| h as u32).collect()
116 };
117
118 let path_filter = path_glob.map(|pattern| {
120 Glob::new(pattern).unwrap().compile_matcher()
121 });
122
123 for file_handle in file_handles {
124 if hits.len() >= limit {
125 break;
126 }
127
128 if let Some(metadata) = handles_map.handles.get(&(file_handle as u64)) {
129 if let Some(ref filter) = path_filter {
131 if let Some(path_entry) = path_index.paths.iter().find(|(_, &h)| h as u32 == file_handle) {
132 if !filter.is_match(path_entry.0) {
133 continue;
134 }
135 }
136 }
137
138 let mut reader = SegmentReader::new(&self.collection_path.join("segments"), metadata.seg_id)?;
140
141 if let Ok(frame) = reader.read_frame(metadata) {
142 let content_str = String::from_utf8_lossy(&frame.content);
143
144 for (line_idx, line) in content_str.lines().enumerate() {
146 if regex.is_match(line) {
147 let file_path = path_index.paths.iter()
148 .find(|(_, &h)| h as u32 == file_handle)
149 .map(|(path, _)| path.to_string())
150 .unwrap_or_else(|| "unknown".to_string());
151
152 hits.push(Hit {
153 path: file_path,
154 line: (line_idx + 1) as u32,
155 text: line.to_string(),
156 });
157
158 if hits.len() >= limit {
159 break;
160 }
161 }
162 }
163 }
164 }
165 }
166
167 Ok(hits)
168 }
169
170 pub fn grep(&mut self, pattern: &str, path_glob: Option<&str>, limit: Option<usize>) -> Result<Vec<Hit>> {
172 self.find(pattern, path_glob, limit)
175 }
176
177 pub fn open_span(&self, path: &str, start_line: u32, end_line: u32) -> Result<TextSpan> {
179 let path_index = PathIndex::read_from_file(&self.collection_path.join("index/path.json"))?;
180 let handles_map = HandlesMap::read_from_file(&self.collection_path.join("index/handles.json"))?;
181
182 let handle = path_index.get_handle(path)
183 .ok_or_else(|| anyhow::anyhow!("Path not found: {}", path))?;
184
185 let metadata = handles_map.get_metadata(handle)
186 .ok_or_else(|| anyhow::anyhow!("Handle metadata not found: {}", handle))?;
187
188 let store_path = self.collection_path.join("store");
189 let mut reader = SegmentReader::new(&store_path, metadata.seg_id)?;
190 let frame = reader.read_frame(metadata)?;
191
192 let content_str = String::from_utf8(frame.content)
193 .map_err(|_| anyhow::anyhow!("File contains non-UTF8 content"))?;
194
195 let newline_positions = decode_line_table(&frame.line_table)?;
196 let lines = extract_line_range(&content_str, &newline_positions, start_line, end_line)?;
197
198 Ok(TextSpan {
199 path: path.to_string(),
200 content: lines,
201 start_line,
202 end_line,
203 })
204 }
205}
206
207fn find_matches_in_content(content: &str, query: &str) -> Vec<(usize, String)> {
209 let mut matches = Vec::new();
210 let lines: Vec<&str> = content.lines().collect();
211 let mut byte_offset = 0;
212
213 for line in lines.iter() {
214 if line.contains(query) {
215 matches.push((byte_offset, line.to_string()));
216 }
217 byte_offset += line.len() + 1; }
219
220 matches
221}
222
223fn extract_line_range(
225 content: &str,
226 _newline_positions: &[u32],
227 start_line: u32,
228 end_line: u32
229) -> Result<String> {
230 let lines: Vec<&str> = content.lines().collect();
231
232 if start_line == 0 || end_line == 0 {
233 anyhow::bail!("Line numbers must be 1-based");
234 }
235
236 let start_idx = (start_line - 1) as usize;
237 let end_idx = std::cmp::min(end_line as usize, lines.len());
238
239 if start_idx >= lines.len() {
240 anyhow::bail!("Start line {} exceeds file length {}", start_line, lines.len());
241 }
242
243 let selected_lines = &lines[start_idx..end_idx];
244 Ok(selected_lines.join("\n"))
245}
246
247#[allow(dead_code)]
249fn glob_match(pattern: &str, text: &str) -> bool {
250 if pattern == "**/*" || pattern == "*" {
251 return true;
252 }
253
254 if pattern.starts_with("**/*.") {
256 let ext = &pattern[5..];
257 return text.ends_with(ext);
258 }
259
260 if pattern.starts_with("**/") {
261 let suffix = &pattern[3..];
262 return text.contains(suffix);
263 }
264
265 if pattern.ends_with("/**") {
266 let prefix = &pattern[..pattern.len() - 3];
267 return text.starts_with(prefix);
268 }
269
270 pattern == text
272}