greppy/index/
reader.rs

1use crate::core::config::Config;
2use crate::core::error::{Error, Result};
3use crate::index::schema::IndexSchema;
4use crate::search::SearchResult;
5use std::path::Path;
6use tantivy::collector::TopDocs;
7use tantivy::query::{BooleanQuery, BoostQuery, Occur, Query, TermQuery};
8use tantivy::schema::{IndexRecordOption, Value};
9use tantivy::{Index, IndexReader, ReloadPolicy, Term};
10
11#[derive(Clone)]
12pub struct IndexSearcher {
13    reader: IndexReader,
14    schema: IndexSchema,
15    index: Index,
16}
17
18impl IndexSearcher {
19    /// Open an existing index
20    pub fn open(project_path: &Path) -> Result<Self> {
21        let index_dir = Config::index_dir(project_path)?;
22
23        if !index_dir.join("meta.json").exists() {
24            return Err(Error::IndexNotFound {
25                path: project_path.to_path_buf(),
26            });
27        }
28
29        let schema = IndexSchema::new();
30        let index = Index::open_in_dir(&index_dir).map_err(|e| Error::IndexError {
31            message: e.to_string(),
32        })?;
33
34        let reader = index
35            .reader_builder()
36            .reload_policy(ReloadPolicy::Manual)
37            .try_into()
38            .map_err(|e| Error::IndexError {
39                message: format!("Failed to create reader: {}", e),
40            })?;
41
42        Ok(Self {
43            reader,
44            schema,
45            index,
46        })
47    }
48
49    /// Check if index exists
50    pub fn exists(project_path: &Path) -> Result<bool> {
51        let index_dir = Config::index_dir(project_path)?;
52        Ok(index_dir.join("meta.json").exists())
53    }
54
55    /// Search the index
56    pub fn search(&self, query_text: &str, limit: usize) -> Result<Vec<SearchResult>> {
57        let searcher = self.reader.searcher();
58
59        // Tokenize query
60        let mut tokenizer = self
61            .index
62            .tokenizer_for_field(self.schema.content)
63            .map_err(|e| Error::SearchError {
64                message: e.to_string(),
65            })?;
66
67        let mut tokens = Vec::new();
68        let mut stream = tokenizer.token_stream(query_text);
69        while let Some(token) = stream.next() {
70            tokens.push(token.text.to_string());
71        }
72
73        if tokens.is_empty() {
74            return Ok(Vec::new());
75        }
76
77        // Build query: content + boosted symbol_name
78        let mut subqueries: Vec<(Occur, Box<dyn Query>)> = Vec::new();
79
80        for token in &tokens {
81            // Content query
82            let content_term = Term::from_field_text(self.schema.content, token);
83            let content_query = TermQuery::new(content_term, IndexRecordOption::WithFreqs);
84            subqueries.push((Occur::Should, Box::new(content_query)));
85
86            // Symbol name query (boosted 3x)
87            let symbol_term = Term::from_field_text(self.schema.symbol_name, token);
88            let symbol_query = TermQuery::new(symbol_term, IndexRecordOption::WithFreqs);
89            let boosted = BoostQuery::new(Box::new(symbol_query), 3.0);
90            subqueries.push((Occur::Should, Box::new(boosted)));
91        }
92
93        let query = BooleanQuery::new(subqueries);
94
95        // Execute search
96        let top_docs = searcher
97            .search(&query, &TopDocs::with_limit(limit))
98            .map_err(|e| Error::SearchError {
99                message: e.to_string(),
100            })?;
101
102        // Collect results
103        let mut results = Vec::new();
104
105        for (score, doc_address) in top_docs {
106            let doc: tantivy::TantivyDocument =
107                searcher.doc(doc_address).map_err(|e| Error::SearchError {
108                    message: e.to_string(),
109                })?;
110
111            let path = doc
112                .get_first(self.schema.path)
113                .and_then(|v| v.as_str())
114                .unwrap_or("")
115                .to_string();
116
117            let content = doc
118                .get_first(self.schema.content)
119                .and_then(|v| v.as_str())
120                .unwrap_or("")
121                .to_string();
122
123            let symbol_name = doc
124                .get_first(self.schema.symbol_name)
125                .and_then(|v| v.as_str())
126                .filter(|s| !s.is_empty())
127                .map(String::from);
128
129            let symbol_type = doc
130                .get_first(self.schema.symbol_type)
131                .and_then(|v| v.as_str())
132                .filter(|s| !s.is_empty())
133                .map(String::from);
134
135            let start_line = doc
136                .get_first(self.schema.start_line)
137                .and_then(|v| v.as_u64())
138                .unwrap_or(0) as usize;
139
140            let end_line = doc
141                .get_first(self.schema.end_line)
142                .and_then(|v| v.as_u64())
143                .unwrap_or(0) as usize;
144
145            let language = doc
146                .get_first(self.schema.language)
147                .and_then(|v| v.as_str())
148                .unwrap_or("unknown")
149                .to_string();
150
151            results.push(SearchResult {
152                path,
153                content,
154                symbol_name,
155                symbol_type,
156                start_line,
157                end_line,
158                language,
159                score,
160            });
161        }
162
163        Ok(results)
164    }
165}