Skip to main content

seekr_code/search/
text.rs

1//! Text regex search.
2//!
3//! Full-text regex matching using the `regex` crate.
4//! Supports case sensitivity and context line configuration.
5
6use std::path::Path;
7
8use regex::RegexBuilder;
9
10use crate::error::SearchError;
11use crate::index::store::SeekrIndex;
12use crate::parser::CodeChunk;
13
14/// Options for text search.
15#[derive(Debug, Clone)]
16pub struct TextSearchOptions {
17    /// Case-sensitive matching.
18    pub case_sensitive: bool,
19
20    /// Number of context lines before/after a match.
21    pub context_lines: usize,
22
23    /// Maximum number of results.
24    pub top_k: usize,
25}
26
27impl Default for TextSearchOptions {
28    fn default() -> Self {
29        Self {
30            case_sensitive: false,
31            context_lines: 2,
32            top_k: 20,
33        }
34    }
35}
36
37/// A single text match within a code chunk.
38#[derive(Debug, Clone)]
39pub struct TextMatch {
40    /// The chunk that matched.
41    pub chunk_id: u64,
42
43    /// Line numbers (0-indexed) that matched.
44    pub matched_lines: Vec<usize>,
45
46    /// Relevance score based on match count and density.
47    pub score: f32,
48}
49
50/// Perform text regex search across the index.
51///
52/// Searches through all indexed code chunks using regex pattern matching.
53/// Results are scored by the number and density of matches.
54pub fn search_text_regex(
55    index: &SeekrIndex,
56    query: &str,
57    options: &TextSearchOptions,
58) -> Result<Vec<TextMatch>, SearchError> {
59    let regex = RegexBuilder::new(query)
60        .case_insensitive(!options.case_sensitive)
61        .build()
62        .map_err(|e| SearchError::InvalidRegex(e.to_string()))?;
63
64    let mut matches: Vec<TextMatch> = Vec::new();
65
66    for (chunk_id, chunk) in &index.chunks {
67        let mut matched_lines = Vec::new();
68
69        for (line_idx, line) in chunk.body.lines().enumerate() {
70            if regex.is_match(line) {
71                matched_lines.push(line_idx);
72            }
73        }
74
75        if !matched_lines.is_empty() {
76            let total_lines = chunk.body.lines().count().max(1) as f32;
77            let match_count = matched_lines.len() as f32;
78
79            // Score combines match count with density
80            // More matches and higher density = higher score
81            let density = match_count / total_lines;
82            let score = match_count + density * 10.0;
83
84            matches.push(TextMatch {
85                chunk_id: *chunk_id,
86                matched_lines,
87                score,
88            });
89        }
90    }
91
92    // Sort by score descending
93    matches.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
94
95    // Truncate to top-k
96    matches.truncate(options.top_k);
97
98    Ok(matches)
99}
100
101/// Perform text regex search directly on files (without index).
102///
103/// Scans the file system for regex matches. Useful for ad-hoc searches
104/// before an index is built.
105pub fn search_text_in_file(
106    file_path: &Path,
107    query: &str,
108    case_sensitive: bool,
109) -> Result<Vec<(usize, String)>, SearchError> {
110    let regex = RegexBuilder::new(query)
111        .case_insensitive(!case_sensitive)
112        .build()
113        .map_err(|e| SearchError::InvalidRegex(e.to_string()))?;
114
115    let content = std::fs::read_to_string(file_path).map_err(|e| {
116        SearchError::Index(crate::error::IndexError::Io(e))
117    })?;
118
119    let mut results = Vec::new();
120    for (line_idx, line) in content.lines().enumerate() {
121        if regex.is_match(line) {
122            results.push((line_idx, line.to_string()));
123        }
124    }
125
126    Ok(results)
127}
128
129/// Get context lines around matched lines.
130///
131/// Returns a list of (line_number, line_content, is_match) tuples.
132pub fn get_match_context(
133    chunk: &CodeChunk,
134    matched_lines: &[usize],
135    context_lines: usize,
136) -> Vec<(usize, String, bool)> {
137    let lines: Vec<&str> = chunk.body.lines().collect();
138    let total = lines.len();
139    let mut result: Vec<(usize, String, bool)> = Vec::new();
140    let mut included: std::collections::HashSet<usize> = std::collections::HashSet::new();
141
142    for &match_line in matched_lines {
143        let start = match_line.saturating_sub(context_lines);
144        let end = (match_line + context_lines + 1).min(total);
145
146        for line_idx in start..end {
147            if included.insert(line_idx) {
148                let is_match = matched_lines.contains(&line_idx);
149                result.push((
150                    line_idx + chunk.line_range.start, // absolute line number
151                    lines[line_idx].to_string(),
152                    is_match,
153                ));
154            }
155        }
156    }
157
158    result.sort_by_key(|(line, _, _)| *line);
159    result
160}
161
162#[cfg(test)]
163mod tests {
164    use super::*;
165    use crate::parser::ChunkKind;
166    use std::path::PathBuf;
167
168    fn make_chunk(id: u64, body: &str) -> CodeChunk {
169        CodeChunk {
170            id,
171            file_path: PathBuf::from("test.rs"),
172            language: "rust".to_string(),
173            kind: ChunkKind::Function,
174            name: Some("test_fn".to_string()),
175            signature: None,
176            doc_comment: None,
177            body: body.to_string(),
178            byte_range: 0..body.len(),
179            line_range: 0..body.lines().count(),
180        }
181    }
182
183    #[test]
184    fn test_text_search_regex() {
185        let mut index = SeekrIndex::new(4);
186        let chunk = make_chunk(1, "fn authenticate(user: &str) {\n    validate(user);\n}\n");
187        let entry = crate::index::IndexEntry {
188            chunk_id: 1,
189            embedding: vec![0.1; 4],
190            text_tokens: vec!["authenticate".to_string()],
191        };
192        index.add_entry(entry, chunk);
193
194        let options = TextSearchOptions {
195            case_sensitive: false,
196            context_lines: 0,
197            top_k: 10,
198        };
199
200        let results = search_text_regex(&index, "authenticate", &options).unwrap();
201        assert_eq!(results.len(), 1);
202        assert_eq!(results[0].chunk_id, 1);
203        assert!(!results[0].matched_lines.is_empty());
204    }
205
206    #[test]
207    fn test_text_search_case_insensitive() {
208        let mut index = SeekrIndex::new(4);
209        let chunk = make_chunk(1, "fn Authenticate(user: &str) {}");
210        let entry = crate::index::IndexEntry {
211            chunk_id: 1,
212            embedding: vec![0.1; 4],
213            text_tokens: vec!["authenticate".to_string()],
214        };
215        index.add_entry(entry, chunk);
216
217        let options = TextSearchOptions {
218            case_sensitive: false,
219            ..Default::default()
220        };
221
222        let results = search_text_regex(&index, "authenticate", &options).unwrap();
223        assert_eq!(results.len(), 1);
224    }
225
226    #[test]
227    fn test_context_lines() {
228        let chunk = make_chunk(1, "line 0\nline 1\nMATCH line 2\nline 3\nline 4\n");
229        let context = get_match_context(&chunk, &[2], 1);
230
231        assert!(context.len() >= 3); // at least match + 1 before + 1 after
232        let line_nums: Vec<usize> = context.iter().map(|(l, _, _)| *l).collect();
233        assert!(line_nums.contains(&1));
234        assert!(line_nums.contains(&2));
235        assert!(line_nums.contains(&3));
236    }
237
238    #[test]
239    fn test_invalid_regex() {
240        let index = SeekrIndex::new(4);
241        let options = TextSearchOptions::default();
242
243        let result = search_text_regex(&index, "[invalid", &options);
244        assert!(result.is_err());
245    }
246}