seekr_code/search/
text.rs1use std::path::Path;
7
8use regex::RegexBuilder;
9
10use crate::error::SearchError;
11use crate::index::store::SeekrIndex;
12use crate::parser::CodeChunk;
13
14#[derive(Debug, Clone)]
16pub struct TextSearchOptions {
17 pub case_sensitive: bool,
19
20 pub context_lines: usize,
22
23 pub top_k: usize,
25}
26
27impl Default for TextSearchOptions {
28 fn default() -> Self {
29 Self {
30 case_sensitive: false,
31 context_lines: 2,
32 top_k: 20,
33 }
34 }
35}
36
37#[derive(Debug, Clone)]
39pub struct TextMatch {
40 pub chunk_id: u64,
42
43 pub matched_lines: Vec<usize>,
45
46 pub score: f32,
48}
49
50pub fn search_text_regex(
55 index: &SeekrIndex,
56 query: &str,
57 options: &TextSearchOptions,
58) -> Result<Vec<TextMatch>, SearchError> {
59 let regex = RegexBuilder::new(query)
60 .case_insensitive(!options.case_sensitive)
61 .build()
62 .map_err(|e| SearchError::InvalidRegex(e.to_string()))?;
63
64 let mut matches: Vec<TextMatch> = Vec::new();
65
66 for (chunk_id, chunk) in &index.chunks {
67 let mut matched_lines = Vec::new();
68
69 for (line_idx, line) in chunk.body.lines().enumerate() {
70 if regex.is_match(line) {
71 matched_lines.push(line_idx);
72 }
73 }
74
75 if !matched_lines.is_empty() {
76 let total_lines = chunk.body.lines().count().max(1) as f32;
77 let match_count = matched_lines.len() as f32;
78
79 let density = match_count / total_lines;
82 let score = match_count + density * 10.0;
83
84 matches.push(TextMatch {
85 chunk_id: *chunk_id,
86 matched_lines,
87 score,
88 });
89 }
90 }
91
92 matches.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
94
95 matches.truncate(options.top_k);
97
98 Ok(matches)
99}
100
101pub fn search_text_in_file(
106 file_path: &Path,
107 query: &str,
108 case_sensitive: bool,
109) -> Result<Vec<(usize, String)>, SearchError> {
110 let regex = RegexBuilder::new(query)
111 .case_insensitive(!case_sensitive)
112 .build()
113 .map_err(|e| SearchError::InvalidRegex(e.to_string()))?;
114
115 let content = std::fs::read_to_string(file_path).map_err(|e| {
116 SearchError::Index(crate::error::IndexError::Io(e))
117 })?;
118
119 let mut results = Vec::new();
120 for (line_idx, line) in content.lines().enumerate() {
121 if regex.is_match(line) {
122 results.push((line_idx, line.to_string()));
123 }
124 }
125
126 Ok(results)
127}
128
129pub fn get_match_context(
133 chunk: &CodeChunk,
134 matched_lines: &[usize],
135 context_lines: usize,
136) -> Vec<(usize, String, bool)> {
137 let lines: Vec<&str> = chunk.body.lines().collect();
138 let total = lines.len();
139 let mut result: Vec<(usize, String, bool)> = Vec::new();
140 let mut included: std::collections::HashSet<usize> = std::collections::HashSet::new();
141
142 for &match_line in matched_lines {
143 let start = match_line.saturating_sub(context_lines);
144 let end = (match_line + context_lines + 1).min(total);
145
146 for line_idx in start..end {
147 if included.insert(line_idx) {
148 let is_match = matched_lines.contains(&line_idx);
149 result.push((
150 line_idx + chunk.line_range.start, lines[line_idx].to_string(),
152 is_match,
153 ));
154 }
155 }
156 }
157
158 result.sort_by_key(|(line, _, _)| *line);
159 result
160}
161
162#[cfg(test)]
163mod tests {
164 use super::*;
165 use crate::parser::ChunkKind;
166 use std::path::PathBuf;
167
168 fn make_chunk(id: u64, body: &str) -> CodeChunk {
169 CodeChunk {
170 id,
171 file_path: PathBuf::from("test.rs"),
172 language: "rust".to_string(),
173 kind: ChunkKind::Function,
174 name: Some("test_fn".to_string()),
175 signature: None,
176 doc_comment: None,
177 body: body.to_string(),
178 byte_range: 0..body.len(),
179 line_range: 0..body.lines().count(),
180 }
181 }
182
183 #[test]
184 fn test_text_search_regex() {
185 let mut index = SeekrIndex::new(4);
186 let chunk = make_chunk(1, "fn authenticate(user: &str) {\n validate(user);\n}\n");
187 let entry = crate::index::IndexEntry {
188 chunk_id: 1,
189 embedding: vec![0.1; 4],
190 text_tokens: vec!["authenticate".to_string()],
191 };
192 index.add_entry(entry, chunk);
193
194 let options = TextSearchOptions {
195 case_sensitive: false,
196 context_lines: 0,
197 top_k: 10,
198 };
199
200 let results = search_text_regex(&index, "authenticate", &options).unwrap();
201 assert_eq!(results.len(), 1);
202 assert_eq!(results[0].chunk_id, 1);
203 assert!(!results[0].matched_lines.is_empty());
204 }
205
206 #[test]
207 fn test_text_search_case_insensitive() {
208 let mut index = SeekrIndex::new(4);
209 let chunk = make_chunk(1, "fn Authenticate(user: &str) {}");
210 let entry = crate::index::IndexEntry {
211 chunk_id: 1,
212 embedding: vec![0.1; 4],
213 text_tokens: vec!["authenticate".to_string()],
214 };
215 index.add_entry(entry, chunk);
216
217 let options = TextSearchOptions {
218 case_sensitive: false,
219 ..Default::default()
220 };
221
222 let results = search_text_regex(&index, "authenticate", &options).unwrap();
223 assert_eq!(results.len(), 1);
224 }
225
226 #[test]
227 fn test_context_lines() {
228 let chunk = make_chunk(1, "line 0\nline 1\nMATCH line 2\nline 3\nline 4\n");
229 let context = get_match_context(&chunk, &[2], 1);
230
231 assert!(context.len() >= 3); let line_nums: Vec<usize> = context.iter().map(|(l, _, _)| *l).collect();
233 assert!(line_nums.contains(&1));
234 assert!(line_nums.contains(&2));
235 assert!(line_nums.contains(&3));
236 }
237
238 #[test]
239 fn test_invalid_regex() {
240 let index = SeekrIndex::new(4);
241 let options = TextSearchOptions::default();
242
243 let result = search_text_regex(&index, "[invalid", &options);
244 assert!(result.is_err());
245 }
246}