greppy/search/
query.rs

1//! Query parsing and execution
2
3use crate::core::error::{Error, Result};
4use crate::index::TantivyIndex;
5use crate::search::results::{SearchResponse, SearchResult};
6use std::path::PathBuf;
7use std::time::Instant;
8use tantivy::collector::TopDocs;
9use tantivy::query::{BooleanQuery, BoostQuery, Occur, Query, TermQuery};
10use tantivy::schema::{IndexRecordOption, Value};
11use tantivy::Term;
12use tracing::debug;
13
14/// A search query with options
15#[derive(Debug, Clone)]
16pub struct SearchQuery {
17    /// The search text
18    pub text: String,
19    /// Maximum results to return
20    pub limit: usize,
21    /// Filter to specific paths
22    pub path_filters: Vec<PathBuf>,
23    /// Include test files
24    pub include_tests: bool,
25}
26
27impl SearchQuery {
28    /// Create a new search query
29    pub fn new(text: impl Into<String>) -> Self {
30        Self {
31            text: text.into(),
32            limit: 20,
33            path_filters: Vec::new(),
34            include_tests: false,
35        }
36    }
37
38    /// Set the result limit
39    pub fn with_limit(mut self, limit: usize) -> Self {
40        self.limit = limit;
41        self
42    }
43
44    /// Add path filters
45    pub fn with_path_filters(mut self, paths: Vec<PathBuf>) -> Self {
46        self.path_filters = paths;
47        self
48    }
49
50    /// Include test files
51    pub fn with_tests(mut self, include: bool) -> Self {
52        self.include_tests = include;
53        self
54    }
55
56    /// Execute the search against an index
57    pub fn execute(&self, index: &TantivyIndex) -> Result<SearchResponse> {
58        let start = Instant::now();
59
60        let searcher = index.reader.searcher();
61        let schema = &index.schema;
62
63        // Build the query
64        let query = self.build_query(index)?;
65
66        // Execute search
67        let top_docs = searcher
68            .search(&query, &TopDocs::with_limit(self.limit))
69            .map_err(|e| Error::SearchError {
70                message: format!("Search failed: {}", e),
71            })?;
72
73        // Collect results
74        let mut results = Vec::new();
75        for (score, doc_address) in top_docs {
76            let doc: tantivy::TantivyDocument =
77                searcher.doc(doc_address).map_err(|e| Error::SearchError {
78                    message: format!("Failed to retrieve doc: {}", e),
79                })?;
80
81            let path = doc
82                .get_first(schema.path)
83                .and_then(|v| v.as_str())
84                .unwrap_or("")
85                .to_string();
86
87            let content = doc
88                .get_first(schema.content)
89                .and_then(|v| v.as_str())
90                .unwrap_or("")
91                .to_string();
92
93            let symbol_name = doc
94                .get_first(schema.symbol_name)
95                .and_then(|v| v.as_str())
96                .filter(|s| !s.is_empty())
97                .map(String::from);
98
99            let symbol_type = doc
100                .get_first(schema.symbol_type)
101                .and_then(|v| v.as_str())
102                .filter(|s| !s.is_empty())
103                .map(String::from);
104
105            let start_line = doc
106                .get_first(schema.start_line)
107                .and_then(|v| v.as_u64())
108                .unwrap_or(0) as usize;
109
110            let end_line = doc
111                .get_first(schema.end_line)
112                .and_then(|v| v.as_u64())
113                .unwrap_or(0) as usize;
114
115            let language = doc
116                .get_first(schema.language)
117                .and_then(|v| v.as_str())
118                .unwrap_or("unknown")
119                .to_string();
120
121            results.push(SearchResult {
122                path,
123                content,
124                symbol_name,
125                symbol_type,
126                start_line,
127                end_line,
128                language,
129                score,
130            });
131        }
132
133        let elapsed = start.elapsed();
134        let elapsed_ms = elapsed.as_secs_f64() * 1000.0;
135
136        // Build response and deduplicate overlapping chunks
137        let mut response = SearchResponse {
138            results,
139            query: self.text.clone(),
140            elapsed_ms,
141            project: "unknown".to_string(), // TODO: Pass project name
142        };
143        response.deduplicate();
144
145        debug!(
146            query = %self.text,
147            results = response.results.len(),
148            elapsed_ms = elapsed_ms,
149            "Search completed (after dedup)"
150        );
151
152        Ok(response)
153    }
154
155    /// Build a Tantivy query from the search text
156    fn build_query(&self, index: &TantivyIndex) -> Result<Box<dyn Query>> {
157        let schema = &index.schema;
158
159        // Tokenize the query
160        let mut tokenizer = index
161            .index
162            .tokenizer_for_field(schema.content)
163            .map_err(|e| Error::SearchError {
164                message: format!("Failed to get tokenizer: {}", e),
165            })?;
166
167        let mut tokens = Vec::new();
168        let mut token_stream = tokenizer.token_stream(&self.text);
169        while let Some(token) = token_stream.next() {
170            tokens.push(token.text.to_string());
171        }
172
173        if tokens.is_empty() {
174            return Err(Error::SearchError {
175                message: "Query produced no tokens".to_string(),
176            });
177        }
178
179        // Build query: search in content and symbol_name (boosted)
180        let mut subqueries: Vec<(Occur, Box<dyn Query>)> = Vec::new();
181
182        for token in &tokens {
183            // Content query
184            let content_term = Term::from_field_text(schema.content, token);
185            let content_query = TermQuery::new(content_term, IndexRecordOption::WithFreqs);
186
187            // Symbol name query (boosted 3x)
188            let symbol_term = Term::from_field_text(schema.symbol_name, token);
189            let symbol_query = TermQuery::new(symbol_term, IndexRecordOption::WithFreqs);
190            let boosted_symbol = BoostQuery::new(Box::new(symbol_query), 3.0);
191
192            // Combine with OR
193            subqueries.push((Occur::Should, Box::new(content_query)));
194            subqueries.push((Occur::Should, Box::new(boosted_symbol)));
195        }
196
197        Ok(Box::new(BooleanQuery::new(subqueries)))
198    }
199}