Skip to main content

codelens_engine/symbols/
reader.rs

1use super::parser::{flatten_symbol_infos, slice_source};
2use super::ranking::{prune_to_budget, rank_symbols, RankingContext};
3use super::types::{make_symbol_id, parse_symbol_id, RankedContextResult, SymbolInfo, SymbolKind};
4use super::SymbolIndex;
5use crate::db::IndexDb;
6use crate::project::ProjectRoot;
7use anyhow::Result;
8use std::fs;
9
10impl SymbolIndex {
11    /// Hybrid candidate collection: fan-out to multiple retrieval paths,
12    /// then merge and deduplicate. Returns a broad candidate pool for ranking.
13    ///
14    /// Retrieval paths:
15    /// 1. File path token matching — top files whose path contains query tokens
16    /// 2. Direct symbol name matching — exact/substring DB lookup
17    /// 3. Import graph proximity — files that import/are imported by matched files
18    pub(super) fn select_solve_symbols_cached(
19        &self,
20        query: &str,
21        depth: usize,
22    ) -> Result<Vec<SymbolInfo>> {
23        let query_lower = query.to_ascii_lowercase();
24        let query_tokens: Vec<&str> = query_lower
25            .split(|c: char| c.is_whitespace() || c == '_' || c == '-')
26            .filter(|t| t.len() >= 3)
27            .collect();
28
29        // Compute file scores and import-graph proximity inside a block so the
30        // ReadDb guard is dropped before calling find_symbol_cached /
31        // get_symbols_overview_cached, which also acquire the reader lock.
32        // Holding both causes a deadlock when in_memory=true (same Mutex).
33        let (top_files, importer_files) = {
34            let db = self.reader()?;
35            let all_paths = db.all_file_paths()?;
36
37            let mut file_scores: Vec<(String, usize)> = all_paths
38                .iter()
39                .map(|path| {
40                    let path_lower = path.to_ascii_lowercase();
41                    let score = query_tokens
42                        .iter()
43                        .filter(|t| path_lower.contains(**t))
44                        .count();
45                    (path.clone(), score)
46                })
47                .collect();
48
49            file_scores.sort_by(|a, b| b.1.cmp(&a.1));
50            let top: Vec<String> = file_scores
51                .iter()
52                .filter(|(_, score)| *score > 0)
53                .take(10)
54                .map(|(path, _)| path.clone())
55                .collect();
56
57            // Path 3: import graph proximity
58            let mut importers = Vec::new();
59            if !top.is_empty() && top.len() <= 5 {
60                for file_path in top.iter().take(3) {
61                    if let Ok(imp) = db.get_importers(file_path) {
62                        for importer_path in imp.into_iter().take(3) {
63                            importers.push(importer_path);
64                        }
65                    }
66                }
67            }
68
69            (top, importers)
70            // db dropped here
71        };
72
73        let mut seen_ids = std::collections::HashSet::new();
74        let mut all_symbols = Vec::new();
75
76        // Path 1: collect symbols from path-matched files
77        for file_path in &top_files {
78            if let Ok(symbols) = self.get_symbols_overview_cached(file_path, depth) {
79                for sym in symbols {
80                    if seen_ids.insert(sym.id.clone()) {
81                        all_symbols.push(sym);
82                    }
83                }
84            }
85        }
86
87        // Path 2: direct symbol name matching
88        if let Ok(direct) = self.find_symbol_cached(query, None, false, false, 50) {
89            for sym in direct {
90                if seen_ids.insert(sym.id.clone()) {
91                    all_symbols.push(sym);
92                }
93            }
94        }
95
96        // Path 3: import graph proximity — related code via structural connection
97        for importer_path in &importer_files {
98            if let Ok(symbols) = self.get_symbols_overview_cached(importer_path, 1) {
99                for sym in symbols {
100                    if seen_ids.insert(sym.id.clone()) {
101                        all_symbols.push(sym);
102                    }
103                }
104            }
105        }
106
107        // Path 4: for multi-word queries, search individual tokens as symbol names
108        if query_tokens.len() >= 2 {
109            for token in &query_tokens {
110                if let Ok(hits) = self.find_symbol_cached(token, None, false, false, 10) {
111                    for sym in hits {
112                        if seen_ids.insert(sym.id.clone()) {
113                            all_symbols.push(sym);
114                        }
115                    }
116                }
117            }
118        }
119
120        // Fallback: if no candidates found, do a broad symbol search
121        if all_symbols.is_empty() {
122            return self.find_symbol_cached(query, None, false, false, 500);
123        }
124
125        Ok(all_symbols)
126    }
127
128    /// Query symbols from DB without lazy indexing. Returns empty if file not yet indexed.
129    pub fn find_symbol_cached(
130        &self,
131        name: &str,
132        file_path: Option<&str>,
133        include_body: bool,
134        exact_match: bool,
135        max_matches: usize,
136    ) -> Result<Vec<SymbolInfo>> {
137        let db = self.reader()?;
138        // Stable ID fast path
139        if let Some((id_file, _id_kind, id_name_path)) = parse_symbol_id(name) {
140            let leaf_name = id_name_path.rsplit('/').next().unwrap_or(id_name_path);
141            let db_rows = db.find_symbols_by_name(leaf_name, Some(id_file), true, max_matches)?;
142            return Self::rows_to_symbol_infos(&self.project, &db, db_rows, include_body);
143        }
144
145        // Resolve file_path (handles symlinks → canonical relative path)
146        let resolved_fp = file_path.and_then(|fp| {
147            self.project
148                .resolve(fp)
149                .ok()
150                .map(|abs| self.project.to_relative(&abs))
151        });
152        let fp_ref = resolved_fp.as_deref().or(file_path);
153
154        let db_rows = db.find_symbols_by_name(name, fp_ref, exact_match, max_matches)?;
155        Self::rows_to_symbol_infos(&self.project, &db, db_rows, include_body)
156    }
157
158    /// Get symbols overview from DB without lazy indexing.
159    pub fn get_symbols_overview_cached(
160        &self,
161        path: &str,
162        _depth: usize,
163    ) -> Result<Vec<SymbolInfo>> {
164        let db = self.reader()?;
165        let resolved = self.project.resolve(path)?;
166        if resolved.is_dir() {
167            let prefix = self.project.to_relative(&resolved);
168            // Single JOIN query instead of N+1 (all_file_paths + get_file + get_file_symbols per file)
169            let file_groups = db.get_symbols_for_directory(&prefix)?;
170            let mut symbols = Vec::new();
171            for (rel, file_symbols) in file_groups {
172                if file_symbols.is_empty() {
173                    continue;
174                }
175                let id = make_symbol_id(&rel, &SymbolKind::File, &rel);
176                symbols.push(SymbolInfo {
177                    name: rel.clone(),
178                    kind: SymbolKind::File,
179                    file_path: rel.clone(),
180                    line: 0,
181                    column: 0,
182                    signature: format!(
183                        "{} ({} symbols)",
184                        std::path::Path::new(&rel)
185                            .file_name()
186                            .and_then(|n| n.to_str())
187                            .unwrap_or(&rel),
188                        file_symbols.len()
189                    ),
190                    name_path: rel,
191                    id,
192                    body: None,
193                    children: file_symbols
194                        .into_iter()
195                        .map(|row| {
196                            let kind = SymbolKind::from_str_label(&row.kind);
197                            let sid = make_symbol_id("", &kind, &row.name_path);
198                            SymbolInfo {
199                                name: row.name,
200                                kind,
201                                file_path: String::new(),
202                                line: row.line as usize,
203                                column: row.column_num as usize,
204                                signature: row.signature,
205                                name_path: row.name_path,
206                                id: sid,
207                                body: None,
208                                children: Vec::new(),
209                                start_byte: row.start_byte as u32,
210                                end_byte: row.end_byte as u32,
211                            }
212                        })
213                        .collect(),
214                    start_byte: 0,
215                    end_byte: 0,
216                });
217            }
218            return Ok(symbols);
219        }
220
221        // Single file
222        let relative = self.project.to_relative(&resolved);
223        let file_row = match db.get_file(&relative)? {
224            Some(row) => row,
225            None => return Ok(Vec::new()),
226        };
227        let db_symbols = db.get_file_symbols(file_row.id)?;
228        Ok(db_symbols
229            .into_iter()
230            .map(|row| {
231                let kind = SymbolKind::from_str_label(&row.kind);
232                let id = make_symbol_id(&relative, &kind, &row.name_path);
233                SymbolInfo {
234                    name: row.name,
235                    kind,
236                    file_path: relative.clone(),
237                    line: row.line as usize,
238                    column: row.column_num as usize,
239                    signature: row.signature,
240                    name_path: row.name_path,
241                    id,
242                    body: None,
243                    children: Vec::new(),
244                    start_byte: row.start_byte as u32,
245                    end_byte: row.end_byte as u32,
246                }
247            })
248            .collect())
249    }
250
251    /// Ranked context from DB without lazy indexing.
252    /// If `graph_cache` is provided, PageRank scores boost symbols in highly-imported files.
253    /// If `semantic_scores` is non-empty, vector similarity is blended into ranking.
254    #[allow(clippy::too_many_arguments)]
255    pub fn get_ranked_context_cached(
256        &self,
257        query: &str,
258        path: Option<&str>,
259        max_tokens: usize,
260        include_body: bool,
261        depth: usize,
262        graph_cache: Option<&crate::import_graph::GraphCache>,
263        semantic_scores: std::collections::HashMap<String, f64>,
264    ) -> Result<RankedContextResult> {
265        let all_symbols = if let Some(path) = path {
266            self.get_symbols_overview_cached(path, depth)?
267        } else {
268            self.select_solve_symbols_cached(query, depth)?
269        };
270
271        let ranking_ctx = match graph_cache {
272            Some(gc) => {
273                let pagerank = gc.file_pagerank_scores(&self.project);
274                if semantic_scores.is_empty() {
275                    RankingContext::with_pagerank(pagerank)
276                } else {
277                    RankingContext::with_pagerank_and_semantic(query, pagerank, semantic_scores)
278                }
279            }
280            None => {
281                if semantic_scores.is_empty() {
282                    RankingContext::text_only()
283                } else {
284                    RankingContext::with_pagerank_and_semantic(
285                        query,
286                        std::collections::HashMap::new(),
287                        semantic_scores,
288                    )
289                }
290            }
291        };
292
293        let flat_symbols: Vec<SymbolInfo> = all_symbols
294            .into_iter()
295            .flat_map(flatten_symbol_infos)
296            .collect();
297
298        let scored = rank_symbols(query, flat_symbols, &ranking_ctx);
299
300        let (selected, chars_used) =
301            prune_to_budget(scored, max_tokens, include_body, self.project.as_path());
302
303        Ok(RankedContextResult {
304            query: query.to_owned(),
305            count: selected.len(),
306            symbols: selected,
307            token_budget: max_tokens,
308            chars_used,
309        })
310    }
311
312    /// Helper: convert DB rows to SymbolInfo with optional body.
313    /// Uses a file_id→path cache to avoid N+1 `get_file_path` queries.
314    pub(super) fn rows_to_symbol_infos(
315        project: &ProjectRoot,
316        db: &IndexDb,
317        rows: Vec<crate::db::SymbolRow>,
318        include_body: bool,
319    ) -> Result<Vec<SymbolInfo>> {
320        let mut results = Vec::new();
321        let mut path_cache: std::collections::HashMap<i64, String> =
322            std::collections::HashMap::new();
323        for row in rows {
324            let rel_path = match path_cache.get(&row.file_id) {
325                Some(p) => p.clone(),
326                None => {
327                    let p = db.get_file_path(row.file_id)?.unwrap_or_default();
328                    path_cache.insert(row.file_id, p.clone());
329                    p
330                }
331            };
332            let body = if include_body {
333                let abs = project.as_path().join(&rel_path);
334                fs::read_to_string(&abs)
335                    .ok()
336                    .map(|source| slice_source(&source, row.start_byte as u32, row.end_byte as u32))
337            } else {
338                None
339            };
340            let kind = SymbolKind::from_str_label(&row.kind);
341            let id = make_symbol_id(&rel_path, &kind, &row.name_path);
342            results.push(SymbolInfo {
343                name: row.name,
344                kind,
345                file_path: rel_path,
346                line: row.line as usize,
347                column: row.column_num as usize,
348                signature: row.signature,
349                name_path: row.name_path,
350                id,
351                body,
352                children: Vec::new(),
353                start_byte: row.start_byte as u32,
354                end_byte: row.end_byte as u32,
355            });
356        }
357        Ok(results)
358    }
359}