seekr_code/search/
text.rs1use std::path::Path;
7
8use regex::RegexBuilder;
9
10use crate::error::SearchError;
11use crate::index::store::SeekrIndex;
12use crate::parser::CodeChunk;
13
14#[derive(Debug, Clone)]
16pub struct TextSearchOptions {
17 pub case_sensitive: bool,
19
20 pub context_lines: usize,
22
23 pub top_k: usize,
25}
26
27impl Default for TextSearchOptions {
28 fn default() -> Self {
29 Self {
30 case_sensitive: false,
31 context_lines: 2,
32 top_k: 20,
33 }
34 }
35}
36
37#[derive(Debug, Clone)]
39pub struct TextMatch {
40 pub chunk_id: u64,
42
43 pub matched_lines: Vec<usize>,
45
46 pub score: f32,
48}
49
50pub fn search_text_regex(
55 index: &SeekrIndex,
56 query: &str,
57 options: &TextSearchOptions,
58) -> Result<Vec<TextMatch>, SearchError> {
59 let regex = RegexBuilder::new(query)
60 .case_insensitive(!options.case_sensitive)
61 .build()
62 .map_err(|e| SearchError::InvalidRegex(e.to_string()))?;
63
64 let mut matches: Vec<TextMatch> = Vec::new();
65
66 for (chunk_id, chunk) in &index.chunks {
67 let mut matched_lines = Vec::new();
68
69 for (line_idx, line) in chunk.body.lines().enumerate() {
70 if regex.is_match(line) {
71 matched_lines.push(line_idx);
72 }
73 }
74
75 if !matched_lines.is_empty() {
76 let total_lines = chunk.body.lines().count().max(1) as f32;
77 let match_count = matched_lines.len() as f32;
78
79 let density = match_count / total_lines;
82 let score = match_count + density * 10.0;
83
84 matches.push(TextMatch {
85 chunk_id: *chunk_id,
86 matched_lines,
87 score,
88 });
89 }
90 }
91
92 matches.sort_by(|a, b| {
94 b.score
95 .partial_cmp(&a.score)
96 .unwrap_or(std::cmp::Ordering::Equal)
97 });
98
99 matches.truncate(options.top_k);
101
102 Ok(matches)
103}
104
105pub fn search_text_in_file(
110 file_path: &Path,
111 query: &str,
112 case_sensitive: bool,
113) -> Result<Vec<(usize, String)>, SearchError> {
114 let regex = RegexBuilder::new(query)
115 .case_insensitive(!case_sensitive)
116 .build()
117 .map_err(|e| SearchError::InvalidRegex(e.to_string()))?;
118
119 let content = std::fs::read_to_string(file_path)
120 .map_err(|e| SearchError::Index(crate::error::IndexError::Io(e)))?;
121
122 let mut results = Vec::new();
123 for (line_idx, line) in content.lines().enumerate() {
124 if regex.is_match(line) {
125 results.push((line_idx, line.to_string()));
126 }
127 }
128
129 Ok(results)
130}
131
132pub fn get_match_context(
136 chunk: &CodeChunk,
137 matched_lines: &[usize],
138 context_lines: usize,
139) -> Vec<(usize, String, bool)> {
140 let lines: Vec<&str> = chunk.body.lines().collect();
141 let total = lines.len();
142 let mut result: Vec<(usize, String, bool)> = Vec::new();
143 let mut included: std::collections::HashSet<usize> = std::collections::HashSet::new();
144
145 for &match_line in matched_lines {
146 let start = match_line.saturating_sub(context_lines);
147 let end = (match_line + context_lines + 1).min(total);
148
149 for (line_idx, line) in lines.iter().enumerate().take(end).skip(start) {
150 if included.insert(line_idx) {
151 let is_match = matched_lines.contains(&line_idx);
152 result.push((
153 line_idx + chunk.line_range.start, line.to_string(),
155 is_match,
156 ));
157 }
158 }
159 }
160
161 result.sort_by_key(|(line, _, _)| *line);
162 result
163}
164
165#[cfg(test)]
166mod tests {
167 use super::*;
168 use crate::parser::ChunkKind;
169 use std::path::PathBuf;
170
171 fn make_chunk(id: u64, body: &str) -> CodeChunk {
172 CodeChunk {
173 id,
174 file_path: PathBuf::from("test.rs"),
175 language: "rust".to_string(),
176 kind: ChunkKind::Function,
177 name: Some("test_fn".to_string()),
178 signature: None,
179 doc_comment: None,
180 body: body.to_string(),
181 byte_range: 0..body.len(),
182 line_range: 0..body.lines().count(),
183 }
184 }
185
186 #[test]
187 fn test_text_search_regex() {
188 let mut index = SeekrIndex::new(4);
189 let chunk = make_chunk(1, "fn authenticate(user: &str) {\n validate(user);\n}\n");
190 let entry = crate::index::IndexEntry {
191 chunk_id: 1,
192 embedding: vec![0.1; 4],
193 text_tokens: vec!["authenticate".to_string()],
194 };
195 index.add_entry(entry, chunk);
196
197 let options = TextSearchOptions {
198 case_sensitive: false,
199 context_lines: 0,
200 top_k: 10,
201 };
202
203 let results = search_text_regex(&index, "authenticate", &options).unwrap();
204 assert_eq!(results.len(), 1);
205 assert_eq!(results[0].chunk_id, 1);
206 assert!(!results[0].matched_lines.is_empty());
207 }
208
209 #[test]
210 fn test_text_search_case_insensitive() {
211 let mut index = SeekrIndex::new(4);
212 let chunk = make_chunk(1, "fn Authenticate(user: &str) {}");
213 let entry = crate::index::IndexEntry {
214 chunk_id: 1,
215 embedding: vec![0.1; 4],
216 text_tokens: vec!["authenticate".to_string()],
217 };
218 index.add_entry(entry, chunk);
219
220 let options = TextSearchOptions {
221 case_sensitive: false,
222 ..Default::default()
223 };
224
225 let results = search_text_regex(&index, "authenticate", &options).unwrap();
226 assert_eq!(results.len(), 1);
227 }
228
229 #[test]
230 fn test_context_lines() {
231 let chunk = make_chunk(1, "line 0\nline 1\nMATCH line 2\nline 3\nline 4\n");
232 let context = get_match_context(&chunk, &[2], 1);
233
234 assert!(context.len() >= 3); let line_nums: Vec<usize> = context.iter().map(|(l, _, _)| *l).collect();
236 assert!(line_nums.contains(&1));
237 assert!(line_nums.contains(&2));
238 assert!(line_nums.contains(&3));
239 }
240
241 #[test]
242 fn test_invalid_regex() {
243 let index = SeekrIndex::new(4);
244 let options = TextSearchOptions::default();
245
246 let result = search_text_regex(&index, "[invalid", &options);
247 assert!(result.is_err());
248 }
249}