Skip to main content

codelens_engine/symbols/
mod.rs

1mod parser;
2mod ranking;
3mod reader;
4pub mod scoring;
5#[cfg(test)]
6mod tests;
7mod types;
8mod writer;
9
10use parser::{flatten_symbol_infos, flatten_symbols, parse_symbols, slice_source, to_symbol_info};
11use ranking::prune_to_budget;
12use scoring::score_symbol;
13pub use scoring::{
14    sparse_coverage_bonus_from_fields, sparse_max_bonus, sparse_threshold, sparse_weighting_enabled,
15};
16pub(crate) use types::ReadDb;
17pub use types::{
18    make_symbol_id, parse_symbol_id, IndexStats, RankedContextEntry, RankedContextResult,
19    SymbolInfo, SymbolKind,
20};
21
22use crate::db::{self, content_hash, index_db_path, IndexDb};
23// Re-export language_for_path so downstream crate modules keep working.
24pub(crate) use crate::lang_config::{language_for_path, LanguageConfig};
25use crate::project::ProjectRoot;
26use anyhow::{bail, Context, Result};
27use std::fs;
28use std::path::{Path, PathBuf};
29use std::time::UNIX_EPOCH;
30use walkdir::WalkDir;
31
32use crate::project::{collect_files, is_excluded};
33
34// Types (SymbolKind, SymbolInfo, ParsedSymbol, IndexStats, RankedContextEntry,
35// RankedContextResult, ReadDb) are in types.rs, re-exported above.
36
37/// SQLite-backed symbol index for a project.
38///
39/// Architecture: writer `Mutex<IndexDb>` for mutations + per-query read-only
40/// connections for `_cached` methods. This makes `SymbolIndex: Send + Sync`,
41/// enabling `Arc<SymbolIndex>` without an external Mutex.
42pub struct SymbolIndex {
43    project: ProjectRoot,
44    db_path: PathBuf,
45    writer: std::sync::Mutex<IndexDb>,
46    /// In-memory mode flag (tests) — when true, _cached reads use the writer.
47    in_memory: bool,
48}
49
50impl SymbolIndex {
51    pub fn new(project: ProjectRoot) -> Self {
52        let db_path = index_db_path(project.as_path());
53        let db = IndexDb::open(&db_path).unwrap_or_else(|e| {
54            tracing::warn!(
55                path = %db_path.display(),
56                error = %e,
57                "failed to open DB, falling back to in-memory"
58            );
59            IndexDb::open_memory().unwrap()
60        });
61        let in_memory = !db_path.is_file();
62        let mut idx = Self {
63            project,
64            db_path,
65            writer: std::sync::Mutex::new(db),
66            in_memory,
67        };
68        // Auto-migrate from legacy JSON index if DB is empty
69        if idx.writer().file_count().unwrap_or(0) == 0 {
70            let _ = idx.migrate_from_json();
71        }
72        idx
73    }
74
75    /// Acquire the writer connection (poison-safe).
76    fn writer(&self) -> std::sync::MutexGuard<'_, IndexDb> {
77        self.writer
78            .lock()
79            .unwrap_or_else(|poisoned| poisoned.into_inner())
80    }
81
82    /// Open a read-only DB connection for queries (or fall back to writer for in-memory).
83    fn reader(&self) -> Result<ReadDb<'_>> {
84        if self.in_memory {
85            return Ok(ReadDb::Writer(self.writer()));
86        }
87        match IndexDb::open_readonly(&self.db_path)? {
88            Some(db) => Ok(ReadDb::Owned(db)),
89            None => Ok(ReadDb::Writer(self.writer())),
90        }
91    }
92
93    /// Create an in-memory index (for tests and benchmarks — no disk persistence).
94    pub fn new_memory(project: ProjectRoot) -> Self {
95        let db = IndexDb::open_memory().unwrap();
96        Self {
97            db_path: PathBuf::new(),
98            project,
99            writer: std::sync::Mutex::new(db),
100            in_memory: true,
101        }
102    }
103
104    pub fn stats(&self) -> Result<IndexStats> {
105        let db = self.reader()?;
106        let supported_files = collect_candidate_files(self.project.as_path())?;
107        let indexed_files = db.file_count()?;
108        let indexed_paths = db.all_file_paths()?;
109
110        let mut stale = 0usize;
111        for rel in &indexed_paths {
112            let path = self.project.as_path().join(rel);
113            if !path.is_file() {
114                stale += 1;
115                continue;
116            }
117            let content = match fs::read(&path) {
118                Ok(c) => c,
119                Err(_) => {
120                    stale += 1;
121                    continue;
122                }
123            };
124            let hash = content_hash(&content);
125            let mtime = file_modified_ms(&path).unwrap_or(0) as i64;
126            if db.get_fresh_file(rel, mtime, &hash)?.is_none() {
127                stale += 1;
128            }
129        }
130
131        Ok(IndexStats {
132            indexed_files,
133            supported_files: supported_files.len(),
134            stale_files: stale,
135        })
136    }
137
138    /// SelectSolve file pre-filtering: score files by name relevance to query,
139    /// then extract symbols only from top-scoring files.
140    /// Path-first retrieval with FTS5 boost: file paths scored by query token
141    /// matching, then boosted by FTS5 symbol hits in the same file.
142    fn select_solve_symbols(&self, query: &str, depth: usize) -> Result<Vec<SymbolInfo>> {
143        // Collect file paths and compute top matches inside a block so the
144        // MutexGuard (ReadDb::Writer) is dropped before we call find_symbol /
145        // get_symbols_overview_cached, which also need the lock.  Holding the
146        // guard across those calls causes a deadlock with in-memory DBs.
147        //
148        // FTS5 boost: run a quick symbol name search, collect which files
149        // have matching symbols, then add +2 to those files' path scores.
150        // This surfaces files with matching content even when the file path
151        // itself doesn't match the query tokens.
152        let fts_file_boost: std::collections::HashSet<String> = self
153            .find_symbol(query, None, false, false, 30)
154            .map(|hits| hits.into_iter().map(|s| s.file_path).collect())
155            .unwrap_or_default();
156
157        let (top_files, importer_files): (Vec<String>, Vec<String>) = {
158            let db = self.reader()?;
159            let all_paths = db.all_file_paths()?;
160
161            let query_lower = query.to_ascii_lowercase();
162            let query_tokens: Vec<&str> = query_lower
163                .split(|c: char| c.is_whitespace() || c == '_' || c == '-')
164                .filter(|t| t.len() >= 3)
165                .collect();
166
167            let mut file_scores: Vec<(String, usize)> = all_paths
168                .into_iter()
169                .map(|path| {
170                    let path_lower = path.to_ascii_lowercase();
171                    let mut score = query_tokens
172                        .iter()
173                        .filter(|token| path_lower.contains(**token))
174                        .count();
175                    // FTS5 boost: files containing matching symbols get +2
176                    if fts_file_boost.contains(&path) {
177                        score += 2;
178                    }
179                    (path, score)
180                })
181                .collect();
182
183            file_scores.sort_by(|a, b| b.1.cmp(&a.1));
184            let top: Vec<String> = file_scores
185                .into_iter()
186                .filter(|(_, score)| *score > 0)
187                .take(10)
188                .map(|(path, _)| path)
189                .collect();
190
191            // Import graph proximity: files that import top-matched files
192            // provide structural context (callers, consumers of matched code).
193            let mut importers = Vec::new();
194            if !top.is_empty() && top.len() <= 5 {
195                for file_path in top.iter().take(3) {
196                    if let Ok(imp) = db.get_importers(file_path) {
197                        for importer_path in imp.into_iter().take(3) {
198                            importers.push(importer_path);
199                        }
200                    }
201                }
202            }
203
204            (top, importers)
205            // db (MutexGuard) dropped here
206        };
207
208        // If no file matches (path + FTS5 both empty), fall back to broad symbol search
209        if top_files.is_empty() {
210            return self.find_symbol(query, None, false, false, 500);
211        }
212
213        // Collect symbols from top files
214        let mut all_symbols = Vec::new();
215        for file_path in &top_files {
216            if let Ok(symbols) = self.get_symbols_overview_cached(file_path, depth) {
217                all_symbols.extend(symbols);
218            }
219        }
220
221        // Import graph proximity: include symbols from files that import top matches.
222        // These provide structural context (callers, consumers of matched code).
223        for importer_path in &importer_files {
224            if let Ok(symbols) = self.get_symbols_overview_cached(importer_path, 1) {
225                all_symbols.extend(symbols);
226            }
227        }
228
229        // Also include direct symbol name matches (for exact/substring hits)
230        let mut seen_ids: std::collections::HashSet<String> =
231            all_symbols.iter().map(|s| s.id.clone()).collect();
232
233        if let Ok(direct) = self.find_symbol(query, None, false, false, 50) {
234            for sym in direct {
235                if seen_ids.insert(sym.id.clone()) {
236                    all_symbols.push(sym);
237                }
238            }
239        }
240
241        // For multi-word queries, also search individual tokens as symbol names
242        // (e.g., "dispatch tool call" → search for "dispatch", "tool", "call")
243        let query_lower = query.to_ascii_lowercase();
244        let tokens: Vec<&str> = query_lower
245            .split(|c: char| c.is_whitespace() || c == '_' || c == '-')
246            .filter(|t| t.len() >= 3)
247            .collect();
248        if tokens.len() >= 2 {
249            for token in &tokens {
250                match self.find_symbol(token, None, false, false, 10) {
251                    Ok(hits) => {
252                        for sym in hits {
253                            if seen_ids.insert(sym.id.clone()) {
254                                all_symbols.push(sym);
255                            }
256                        }
257                    }
258                    Err(e) => {
259                        tracing::debug!(token, error = %e, "token find_symbol failed");
260                    }
261                }
262            }
263        }
264
265        Ok(all_symbols)
266    }
267
268    /// Hierarchical project structure: per-directory file count + symbol count.
269    /// Used as Level 1 pruning — lets LLM decide which directories to drill into.
270    pub fn get_project_structure(&self) -> Result<Vec<db::DirStats>> {
271        let db = self.reader()?;
272        db.dir_stats()
273    }
274
275    pub fn get_symbols_overview(&self, path: &str, depth: usize) -> Result<Vec<SymbolInfo>> {
276        let resolved = self.project.resolve(path)?;
277        if resolved.is_dir() {
278            let mut symbols = Vec::new();
279            for file in WalkDir::new(&resolved)
280                .into_iter()
281                .filter_entry(|entry| !is_excluded(entry.path()))
282            {
283                let file = file?;
284                if !file.file_type().is_file() || language_for_path(file.path()).is_none() {
285                    continue;
286                }
287                let relative = self.project.to_relative(file.path());
288                let parsed = self.ensure_indexed(file.path(), &relative)?;
289                if !parsed.is_empty() {
290                    let id = make_symbol_id(&relative, &SymbolKind::File, &relative);
291                    symbols.push(SymbolInfo {
292                        name: relative.clone(),
293                        kind: SymbolKind::File,
294                        file_path: relative.clone(),
295                        line: 0,
296                        column: 0,
297                        signature: format!(
298                            "{} ({} symbols)",
299                            file.file_name().to_string_lossy(),
300                            parsed.len()
301                        ),
302                        name_path: relative,
303                        id,
304                        body: None,
305                        children: parsed
306                            .into_iter()
307                            .map(|symbol| to_symbol_info(symbol, depth))
308                            .collect(),
309                        start_byte: 0,
310                        end_byte: 0,
311                    });
312                }
313            }
314            return Ok(symbols);
315        }
316
317        let relative = self.project.to_relative(&resolved);
318        let parsed = self.ensure_indexed(&resolved, &relative)?;
319        Ok(parsed
320            .into_iter()
321            .map(|symbol| to_symbol_info(symbol, depth))
322            .collect())
323    }
324
325    pub fn find_symbol(
326        &self,
327        name: &str,
328        file_path: Option<&str>,
329        include_body: bool,
330        exact_match: bool,
331        max_matches: usize,
332    ) -> Result<Vec<SymbolInfo>> {
333        // Fast path: if name looks like a stable symbol ID, parse and do targeted lookup
334        if let Some((id_file, _id_kind, id_name_path)) = parse_symbol_id(name) {
335            let resolved = self.project.resolve(id_file)?;
336            let relative = self.project.to_relative(&resolved);
337            self.ensure_indexed(&resolved, &relative)?;
338            // Extract the leaf name from name_path (after last '/')
339            let leaf_name = id_name_path.rsplit('/').next().unwrap_or(id_name_path);
340            let db = self.writer();
341            let db_rows = db.find_symbols_by_name(leaf_name, Some(id_file), true, max_matches)?;
342            let mut results = Vec::new();
343            for row in db_rows {
344                if row.name_path != id_name_path {
345                    continue;
346                }
347                let rel_path = db.get_file_path(row.file_id)?.unwrap_or_default();
348                let body = if include_body {
349                    let abs = self.project.as_path().join(&rel_path);
350                    fs::read_to_string(&abs).ok().map(|source| {
351                        slice_source(&source, row.start_byte as u32, row.end_byte as u32)
352                    })
353                } else {
354                    None
355                };
356                let kind = SymbolKind::from_str_label(&row.kind);
357                let id = make_symbol_id(&rel_path, &kind, &row.name_path);
358                results.push(SymbolInfo {
359                    name: row.name,
360                    kind,
361                    file_path: rel_path,
362                    line: row.line as usize,
363                    column: row.column_num as usize,
364                    signature: row.signature,
365                    name_path: row.name_path,
366                    id,
367                    body,
368                    children: Vec::new(),
369                    start_byte: row.start_byte as u32,
370                    end_byte: row.end_byte as u32,
371                });
372            }
373            return Ok(results);
374        }
375
376        // Ensure target files are indexed first
377        if let Some(fp) = file_path {
378            let resolved = self.project.resolve(fp)?;
379            let relative = self.project.to_relative(&resolved);
380            self.ensure_indexed(&resolved, &relative)?;
381        } else {
382            // Ensure all files are indexed for a global search
383            let files = collect_candidate_files(self.project.as_path())?;
384            for file in &files {
385                let relative = self.project.to_relative(file);
386                self.ensure_indexed(file, &relative)?;
387            }
388        }
389
390        let db = self.writer();
391        let db_rows = db.find_symbols_by_name(name, file_path, exact_match, max_matches)?;
392
393        let mut results = Vec::new();
394        for row in db_rows {
395            let rel_path = db.get_file_path(row.file_id)?.unwrap_or_default();
396            let body = if include_body {
397                let abs = self.project.as_path().join(&rel_path);
398                fs::read_to_string(&abs)
399                    .ok()
400                    .map(|source| slice_source(&source, row.start_byte as u32, row.end_byte as u32))
401            } else {
402                None
403            };
404            let kind = SymbolKind::from_str_label(&row.kind);
405            let id = make_symbol_id(&rel_path, &kind, &row.name_path);
406            results.push(SymbolInfo {
407                name: row.name,
408                kind,
409                file_path: rel_path,
410                line: row.line as usize,
411                column: row.column_num as usize,
412                signature: row.signature,
413                name_path: row.name_path,
414                id,
415                body,
416                children: Vec::new(),
417                start_byte: row.start_byte as u32,
418                end_byte: row.end_byte as u32,
419            });
420        }
421        Ok(results)
422    }
423
424    pub fn get_ranked_context(
425        &self,
426        query: &str,
427        path: Option<&str>,
428        max_tokens: usize,
429        include_body: bool,
430        depth: usize,
431    ) -> Result<RankedContextResult> {
432        let all_symbols = if let Some(path) = path {
433            self.get_symbols_overview(path, depth)?
434        } else {
435            // SelectSolve: file pre-filtering → top files → symbol extraction
436            self.select_solve_symbols(query, depth)?
437        };
438
439        let mut scored = all_symbols
440            .into_iter()
441            .flat_map(flatten_symbol_infos)
442            .filter_map(|symbol| score_symbol(query, &symbol).map(|score| (symbol, score)))
443            .collect::<Vec<_>>();
444        scored.sort_by(|left, right| right.1.cmp(&left.1));
445
446        let (selected, chars_used) =
447            prune_to_budget(scored, max_tokens, include_body, self.project.as_path());
448
449        Ok(RankedContextResult {
450            query: query.to_owned(),
451            count: selected.len(),
452            symbols: selected,
453            token_budget: max_tokens,
454            chars_used,
455        })
456    }
457
458    /// Access the underlying database (e.g. for import graph queries).
459    pub fn db(&self) -> std::sync::MutexGuard<'_, IndexDb> {
460        self.writer()
461    }
462}
463
464pub fn get_symbols_overview(
465    project: &ProjectRoot,
466    path: &str,
467    depth: usize,
468) -> Result<Vec<SymbolInfo>> {
469    let resolved = project.resolve(path)?;
470    if resolved.is_dir() {
471        return get_directory_symbols(project, &resolved, depth);
472    }
473    get_file_symbols(project, &resolved, depth)
474}
475
476/// Find the byte range (start_byte, end_byte) of a named symbol in a file.
477/// If name_path is provided (e.g. "ClassName/method"), matches by full name_path;
478/// otherwise matches by symbol name alone.
479pub fn find_symbol_range(
480    project: &ProjectRoot,
481    relative_path: &str,
482    symbol_name: &str,
483    name_path: Option<&str>,
484) -> Result<(usize, usize)> {
485    let file = project.resolve(relative_path)?;
486    let rel = project.to_relative(&file);
487    let Some(language_config) = language_for_path(&file) else {
488        bail!("unsupported file type: {}", file.display());
489    };
490    let source =
491        fs::read_to_string(&file).with_context(|| format!("failed to read {}", file.display()))?;
492    let parsed = parse_symbols(&language_config, &rel, &source, false)?;
493    let flat = flatten_symbols(parsed);
494
495    let candidate = if let Some(np) = name_path {
496        flat.into_iter()
497            .find(|sym| sym.name_path == np || sym.name == symbol_name)
498    } else {
499        flat.into_iter().find(|sym| sym.name == symbol_name)
500    };
501
502    match candidate {
503        Some(sym) => Ok((sym.start_byte as usize, sym.end_byte as usize)),
504        None => bail!(
505            "symbol '{}' not found in {}",
506            name_path.unwrap_or(symbol_name),
507            relative_path
508        ),
509    }
510}
511
512pub fn find_symbol(
513    project: &ProjectRoot,
514    name: &str,
515    file_path: Option<&str>,
516    include_body: bool,
517    exact_match: bool,
518    max_matches: usize,
519) -> Result<Vec<SymbolInfo>> {
520    // Fast path: stable symbol ID
521    if let Some((id_file, _id_kind, id_name_path)) = parse_symbol_id(name) {
522        let resolved = project.resolve(id_file)?;
523        let rel = project.to_relative(&resolved);
524        let Some(language_config) = language_for_path(&resolved) else {
525            return Ok(Vec::new());
526        };
527        let source = fs::read_to_string(&resolved)?;
528        let parsed = parse_symbols(&language_config, &rel, &source, include_body)?;
529        let mut results = Vec::new();
530        for symbol in flatten_symbols(parsed) {
531            if symbol.name_path == id_name_path {
532                results.push(to_symbol_info(symbol, usize::MAX));
533                if results.len() >= max_matches {
534                    return Ok(results);
535                }
536            }
537        }
538        return Ok(results);
539    }
540
541    let files = match file_path {
542        Some(path) => vec![project.resolve(path)?],
543        None => collect_candidate_files(project.as_path())?,
544    };
545
546    let query = name.to_lowercase();
547    let mut results = Vec::new();
548
549    for file in files {
550        let rel = project.to_relative(&file);
551        let Some(language_config) = language_for_path(&file) else {
552            continue;
553        };
554        let source = match fs::read_to_string(&file) {
555            Ok(source) => source,
556            Err(_) => continue,
557        };
558        let parsed = parse_symbols(&language_config, &rel, &source, include_body)?;
559        for symbol in flatten_symbols(parsed) {
560            let matched = if exact_match {
561                symbol.name == name
562            } else {
563                scoring::contains_ascii_ci(&symbol.name, &query)
564            };
565            if matched {
566                results.push(to_symbol_info(symbol, usize::MAX));
567                if results.len() >= max_matches {
568                    return Ok(results);
569                }
570            }
571        }
572    }
573
574    Ok(results)
575}
576
577fn get_directory_symbols(
578    project: &ProjectRoot,
579    dir: &Path,
580    depth: usize,
581) -> Result<Vec<SymbolInfo>> {
582    let mut symbols = Vec::new();
583    for entry in WalkDir::new(dir)
584        .into_iter()
585        .filter_entry(|entry| !is_excluded(entry.path()))
586    {
587        let entry = entry?;
588        if !entry.file_type().is_file() {
589            continue;
590        }
591        let path = entry.path();
592        if language_for_path(path).is_none() {
593            continue;
594        }
595        let file_symbols = get_file_symbols(project, path, depth)?;
596        if !file_symbols.is_empty() {
597            let relative = project.to_relative(path);
598            let id = make_symbol_id(&relative, &SymbolKind::File, &relative);
599            symbols.push(SymbolInfo {
600                name: relative.clone(),
601                kind: SymbolKind::File,
602                file_path: relative.clone(),
603                line: 0,
604                column: 0,
605                signature: format!(
606                    "{} ({} symbols)",
607                    path.file_name()
608                        .and_then(|name| name.to_str())
609                        .unwrap_or_default(),
610                    file_symbols.len()
611                ),
612                name_path: relative,
613                id,
614                body: None,
615                children: file_symbols,
616                start_byte: 0,
617                end_byte: 0,
618            });
619        }
620    }
621    Ok(symbols)
622}
623
624fn get_file_symbols(project: &ProjectRoot, file: &Path, depth: usize) -> Result<Vec<SymbolInfo>> {
625    let relative = project.to_relative(file);
626    let Some(language_config) = language_for_path(file) else {
627        return Ok(Vec::new());
628    };
629    let source =
630        fs::read_to_string(file).with_context(|| format!("failed to read {}", file.display()))?;
631    let parsed = parse_symbols(&language_config, &relative, &source, false)?;
632    Ok(parsed
633        .into_iter()
634        .map(|symbol| to_symbol_info(symbol, depth))
635        .collect())
636}
637
638fn collect_candidate_files(root: &Path) -> Result<Vec<PathBuf>> {
639    collect_files(root, |path| language_for_path(path).is_some())
640}
641
642fn file_modified_ms(path: &Path) -> Result<u128> {
643    let modified = fs::metadata(path)
644        .with_context(|| format!("failed to stat {}", path.display()))?
645        .modified()
646        .with_context(|| format!("failed to read mtime for {}", path.display()))?;
647    Ok(modified
648        .duration_since(UNIX_EPOCH)
649        .unwrap_or_default()
650        .as_millis())
651}