Skip to main content

codelens_engine/symbols/
mod.rs

1mod parser;
2mod ranking;
3mod reader;
4pub mod scoring;
5#[cfg(test)]
6mod tests;
7mod types;
8mod writer;
9
10use parser::{flatten_symbol_infos, flatten_symbols, parse_symbols, slice_source, to_symbol_info};
11use ranking::prune_to_budget;
12use scoring::score_symbol;
13pub use scoring::{
14    sparse_coverage_bonus_from_fields, sparse_max_bonus, sparse_threshold, sparse_weighting_enabled,
15};
16pub(crate) use types::ReadDb;
17pub use types::{
18    IndexStats, RankedContextEntry, RankedContextResult, SymbolInfo, SymbolKind, make_symbol_id,
19    parse_symbol_id,
20};
21
22use crate::db::{self, IndexDb, content_hash, index_db_path};
23// Re-export language_for_path so downstream crate modules keep working.
24pub(crate) use crate::lang_config::{LanguageConfig, language_for_path};
25use crate::project::ProjectRoot;
26use anyhow::{Context, Result, bail};
27use std::fs;
28use std::path::{Path, PathBuf};
29use std::time::UNIX_EPOCH;
30use walkdir::WalkDir;
31
32use crate::project::{collect_files, is_excluded};
33
34// Types (SymbolKind, SymbolInfo, ParsedSymbol, IndexStats, RankedContextEntry,
35// RankedContextResult, ReadDb) are in types.rs, re-exported above.
36
37/// SQLite-backed symbol index for a project.
38///
39/// Architecture: writer `Mutex<IndexDb>` for mutations + per-query read-only
40/// connections for `_cached` methods. This makes `SymbolIndex: Send + Sync`,
41/// enabling `Arc<SymbolIndex>` without an external Mutex.
42pub struct SymbolIndex {
43    project: ProjectRoot,
44    db_path: PathBuf,
45    writer: std::sync::Mutex<IndexDb>,
46    /// In-memory mode flag (tests) — when true, _cached reads use the writer.
47    in_memory: bool,
48}
49
50impl SymbolIndex {
51    pub fn new(project: ProjectRoot) -> Self {
52        let db_path = index_db_path(project.as_path());
53        let db = IndexDb::open(&db_path).unwrap_or_else(|e| {
54            tracing::warn!(
55                path = %db_path.display(),
56                error = %e,
57                "failed to open DB, falling back to in-memory"
58            );
59            IndexDb::open_memory().unwrap()
60        });
61        let in_memory = !db_path.is_file();
62        let mut idx = Self {
63            project,
64            db_path,
65            writer: std::sync::Mutex::new(db),
66            in_memory,
67        };
68        // Auto-migrate from legacy JSON index if DB is empty
69        if idx.writer().file_count().unwrap_or(0) == 0 {
70            let _ = idx.migrate_from_json();
71        }
72        idx
73    }
74
75    /// Acquire the writer connection (poison-safe).
76    fn writer(&self) -> std::sync::MutexGuard<'_, IndexDb> {
77        self.writer
78            .lock()
79            .unwrap_or_else(|poisoned| poisoned.into_inner())
80    }
81
82    /// Open a read-only DB connection for queries (or fall back to writer for in-memory).
83    fn reader(&self) -> Result<ReadDb<'_>> {
84        if self.in_memory {
85            return Ok(ReadDb::Writer(self.writer()));
86        }
87        match IndexDb::open_readonly(&self.db_path)? {
88            Some(db) => Ok(ReadDb::Owned(db)),
89            None => Ok(ReadDb::Writer(self.writer())),
90        }
91    }
92
93    /// Create an in-memory index (for tests and benchmarks — no disk persistence).
94    pub fn new_memory(project: ProjectRoot) -> Self {
95        let db = IndexDb::open_memory().unwrap();
96        Self {
97            db_path: PathBuf::new(),
98            project,
99            writer: std::sync::Mutex::new(db),
100            in_memory: true,
101        }
102    }
103
104    pub fn stats(&self) -> Result<IndexStats> {
105        let db = self.reader()?;
106        let supported_files = collect_candidate_files(self.project.as_path())?;
107        let indexed_files = db.file_count()?;
108        let indexed_paths = db.all_file_paths()?;
109
110        let mut stale = 0usize;
111        for rel in &indexed_paths {
112            let path = self.project.as_path().join(rel);
113            if !path.is_file() {
114                stale += 1;
115                continue;
116            }
117            let content = match fs::read(&path) {
118                Ok(c) => c,
119                Err(_) => {
120                    stale += 1;
121                    continue;
122                }
123            };
124            let hash = content_hash(&content);
125            let mtime = file_modified_ms(&path).unwrap_or(0) as i64;
126            if db.get_fresh_file(rel, mtime, &hash)?.is_none() {
127                stale += 1;
128            }
129        }
130
131        Ok(IndexStats {
132            indexed_files,
133            supported_files: supported_files.len(),
134            stale_files: stale,
135        })
136    }
137
138    /// SelectSolve file pre-filtering: score files by name relevance to query,
139    /// then extract symbols only from top-scoring files.
140    fn select_solve_symbols(&self, query: &str, depth: usize) -> Result<Vec<SymbolInfo>> {
141        // Collect file paths and compute top matches inside a block so the
142        // MutexGuard (ReadDb::Writer) is dropped before we call find_symbol /
143        // get_symbols_overview_cached, which also need the lock.  Holding the
144        // guard across those calls causes a deadlock with in-memory DBs.
145        let top_files: Vec<String> = {
146            let db = self.reader()?;
147            let all_paths = db.all_file_paths()?;
148
149            let query_lower = query.to_ascii_lowercase();
150            let query_tokens: Vec<&str> = query_lower
151                .split(|c: char| c.is_whitespace() || c == '_' || c == '-')
152                .filter(|t| t.len() >= 3)
153                .collect();
154
155            let mut file_scores: Vec<(String, usize)> = all_paths
156                .into_iter()
157                .map(|path| {
158                    let path_lower = path.to_ascii_lowercase();
159                    let score = query_tokens
160                        .iter()
161                        .filter(|token| path_lower.contains(**token))
162                        .count();
163                    (path, score)
164                })
165                .collect();
166
167            file_scores.sort_by(|a, b| b.1.cmp(&a.1));
168            file_scores
169                .into_iter()
170                .filter(|(_, score)| *score > 0)
171                .take(10)
172                .map(|(path, _)| path)
173                .collect()
174            // db (MutexGuard) dropped here
175        };
176
177        // If no file matches, fall back to direct symbol name search
178        if top_files.is_empty() {
179            return self.find_symbol(query, None, false, false, 500);
180        }
181
182        // Collect symbols from top files
183        let mut all_symbols = Vec::new();
184        for file_path in &top_files {
185            if let Ok(symbols) = self.get_symbols_overview_cached(file_path, depth) {
186                all_symbols.extend(symbols);
187            }
188        }
189
190        // Also include direct symbol name matches (for exact/substring hits)
191        let mut seen_ids: std::collections::HashSet<String> =
192            all_symbols.iter().map(|s| s.id.clone()).collect();
193
194        if let Ok(direct) = self.find_symbol(query, None, false, false, 50) {
195            for sym in direct {
196                if seen_ids.insert(sym.id.clone()) {
197                    all_symbols.push(sym);
198                }
199            }
200        }
201
202        // For multi-word queries, also search individual tokens as symbol names
203        // (e.g., "dispatch tool call" → search for "dispatch", "tool", "call")
204        let query_lower = query.to_ascii_lowercase();
205        let tokens: Vec<&str> = query_lower
206            .split(|c: char| c.is_whitespace() || c == '_' || c == '-')
207            .filter(|t| t.len() >= 3)
208            .collect();
209        if tokens.len() >= 2 {
210            for token in &tokens {
211                match self.find_symbol(token, None, false, false, 10) {
212                    Ok(hits) => {
213                        for sym in hits {
214                            if seen_ids.insert(sym.id.clone()) {
215                                all_symbols.push(sym);
216                            }
217                        }
218                    }
219                    Err(e) => {
220                        tracing::debug!(token, error = %e, "token find_symbol failed");
221                    }
222                }
223            }
224        }
225
226        Ok(all_symbols)
227    }
228
229    /// Hierarchical project structure: per-directory file count + symbol count.
230    /// Used as Level 1 pruning — lets LLM decide which directories to drill into.
231    pub fn get_project_structure(&self) -> Result<Vec<db::DirStats>> {
232        let db = self.reader()?;
233        db.dir_stats()
234    }
235
236    pub fn get_symbols_overview(&self, path: &str, depth: usize) -> Result<Vec<SymbolInfo>> {
237        let resolved = self.project.resolve(path)?;
238        if resolved.is_dir() {
239            let mut symbols = Vec::new();
240            for file in WalkDir::new(&resolved)
241                .into_iter()
242                .filter_entry(|entry| !is_excluded(entry.path()))
243            {
244                let file = file?;
245                if !file.file_type().is_file() || language_for_path(file.path()).is_none() {
246                    continue;
247                }
248                let relative = self.project.to_relative(file.path());
249                let parsed = self.ensure_indexed(file.path(), &relative)?;
250                if !parsed.is_empty() {
251                    let id = make_symbol_id(&relative, &SymbolKind::File, &relative);
252                    symbols.push(SymbolInfo {
253                        name: relative.clone(),
254                        kind: SymbolKind::File,
255                        file_path: relative.clone(),
256                        line: 0,
257                        column: 0,
258                        signature: format!(
259                            "{} ({} symbols)",
260                            file.file_name().to_string_lossy(),
261                            parsed.len()
262                        ),
263                        name_path: relative,
264                        id,
265                        body: None,
266                        children: parsed
267                            .into_iter()
268                            .map(|symbol| to_symbol_info(symbol, depth))
269                            .collect(),
270                        start_byte: 0,
271                        end_byte: 0,
272                    });
273                }
274            }
275            return Ok(symbols);
276        }
277
278        let relative = self.project.to_relative(&resolved);
279        let parsed = self.ensure_indexed(&resolved, &relative)?;
280        Ok(parsed
281            .into_iter()
282            .map(|symbol| to_symbol_info(symbol, depth))
283            .collect())
284    }
285
286    pub fn find_symbol(
287        &self,
288        name: &str,
289        file_path: Option<&str>,
290        include_body: bool,
291        exact_match: bool,
292        max_matches: usize,
293    ) -> Result<Vec<SymbolInfo>> {
294        // Fast path: if name looks like a stable symbol ID, parse and do targeted lookup
295        if let Some((id_file, _id_kind, id_name_path)) = parse_symbol_id(name) {
296            let resolved = self.project.resolve(id_file)?;
297            let relative = self.project.to_relative(&resolved);
298            self.ensure_indexed(&resolved, &relative)?;
299            // Extract the leaf name from name_path (after last '/')
300            let leaf_name = id_name_path.rsplit('/').next().unwrap_or(id_name_path);
301            let db = self.writer();
302            let db_rows = db.find_symbols_by_name(leaf_name, Some(id_file), true, max_matches)?;
303            let mut results = Vec::new();
304            for row in db_rows {
305                if row.name_path != id_name_path {
306                    continue;
307                }
308                let rel_path = db.get_file_path(row.file_id)?.unwrap_or_default();
309                let body = if include_body {
310                    let abs = self.project.as_path().join(&rel_path);
311                    fs::read_to_string(&abs).ok().map(|source| {
312                        slice_source(&source, row.start_byte as u32, row.end_byte as u32)
313                    })
314                } else {
315                    None
316                };
317                let kind = SymbolKind::from_str_label(&row.kind);
318                let id = make_symbol_id(&rel_path, &kind, &row.name_path);
319                results.push(SymbolInfo {
320                    name: row.name,
321                    kind,
322                    file_path: rel_path,
323                    line: row.line as usize,
324                    column: row.column_num as usize,
325                    signature: row.signature,
326                    name_path: row.name_path,
327                    id,
328                    body,
329                    children: Vec::new(),
330                    start_byte: row.start_byte as u32,
331                    end_byte: row.end_byte as u32,
332                });
333            }
334            return Ok(results);
335        }
336
337        // Ensure target files are indexed first
338        if let Some(fp) = file_path {
339            let resolved = self.project.resolve(fp)?;
340            let relative = self.project.to_relative(&resolved);
341            self.ensure_indexed(&resolved, &relative)?;
342        } else {
343            // Ensure all files are indexed for a global search
344            let files = collect_candidate_files(self.project.as_path())?;
345            for file in &files {
346                let relative = self.project.to_relative(file);
347                self.ensure_indexed(file, &relative)?;
348            }
349        }
350
351        let db = self.writer();
352        let db_rows = db.find_symbols_by_name(name, file_path, exact_match, max_matches)?;
353
354        let mut results = Vec::new();
355        for row in db_rows {
356            let rel_path = db.get_file_path(row.file_id)?.unwrap_or_default();
357            let body = if include_body {
358                let abs = self.project.as_path().join(&rel_path);
359                fs::read_to_string(&abs)
360                    .ok()
361                    .map(|source| slice_source(&source, row.start_byte as u32, row.end_byte as u32))
362            } else {
363                None
364            };
365            let kind = SymbolKind::from_str_label(&row.kind);
366            let id = make_symbol_id(&rel_path, &kind, &row.name_path);
367            results.push(SymbolInfo {
368                name: row.name,
369                kind,
370                file_path: rel_path,
371                line: row.line as usize,
372                column: row.column_num as usize,
373                signature: row.signature,
374                name_path: row.name_path,
375                id,
376                body,
377                children: Vec::new(),
378                start_byte: row.start_byte as u32,
379                end_byte: row.end_byte as u32,
380            });
381        }
382        Ok(results)
383    }
384
385    pub fn get_ranked_context(
386        &self,
387        query: &str,
388        path: Option<&str>,
389        max_tokens: usize,
390        include_body: bool,
391        depth: usize,
392    ) -> Result<RankedContextResult> {
393        let all_symbols = if let Some(path) = path {
394            self.get_symbols_overview(path, depth)?
395        } else {
396            // SelectSolve: file pre-filtering → top files → symbol extraction
397            self.select_solve_symbols(query, depth)?
398        };
399
400        let mut scored = all_symbols
401            .into_iter()
402            .flat_map(flatten_symbol_infos)
403            .filter_map(|symbol| score_symbol(query, &symbol).map(|score| (symbol, score)))
404            .collect::<Vec<_>>();
405        scored.sort_by(|left, right| right.1.cmp(&left.1));
406
407        let (selected, chars_used) =
408            prune_to_budget(scored, max_tokens, include_body, self.project.as_path());
409
410        Ok(RankedContextResult {
411            query: query.to_owned(),
412            count: selected.len(),
413            symbols: selected,
414            token_budget: max_tokens,
415            chars_used,
416        })
417    }
418
419    /// Access the underlying database (e.g. for import graph queries).
420    pub fn db(&self) -> std::sync::MutexGuard<'_, IndexDb> {
421        self.writer()
422    }
423}
424
425pub fn get_symbols_overview(
426    project: &ProjectRoot,
427    path: &str,
428    depth: usize,
429) -> Result<Vec<SymbolInfo>> {
430    let resolved = project.resolve(path)?;
431    if resolved.is_dir() {
432        return get_directory_symbols(project, &resolved, depth);
433    }
434    get_file_symbols(project, &resolved, depth)
435}
436
437/// Find the byte range (start_byte, end_byte) of a named symbol in a file.
438/// If name_path is provided (e.g. "ClassName/method"), matches by full name_path;
439/// otherwise matches by symbol name alone.
440pub fn find_symbol_range(
441    project: &ProjectRoot,
442    relative_path: &str,
443    symbol_name: &str,
444    name_path: Option<&str>,
445) -> Result<(usize, usize)> {
446    let file = project.resolve(relative_path)?;
447    let rel = project.to_relative(&file);
448    let Some(language_config) = language_for_path(&file) else {
449        bail!("unsupported file type: {}", file.display());
450    };
451    let source =
452        fs::read_to_string(&file).with_context(|| format!("failed to read {}", file.display()))?;
453    let parsed = parse_symbols(&language_config, &rel, &source, false)?;
454    let flat = flatten_symbols(parsed);
455
456    let candidate = if let Some(np) = name_path {
457        flat.into_iter()
458            .find(|sym| sym.name_path == np || sym.name == symbol_name)
459    } else {
460        flat.into_iter().find(|sym| sym.name == symbol_name)
461    };
462
463    match candidate {
464        Some(sym) => Ok((sym.start_byte as usize, sym.end_byte as usize)),
465        None => bail!(
466            "symbol '{}' not found in {}",
467            name_path.unwrap_or(symbol_name),
468            relative_path
469        ),
470    }
471}
472
473pub fn find_symbol(
474    project: &ProjectRoot,
475    name: &str,
476    file_path: Option<&str>,
477    include_body: bool,
478    exact_match: bool,
479    max_matches: usize,
480) -> Result<Vec<SymbolInfo>> {
481    // Fast path: stable symbol ID
482    if let Some((id_file, _id_kind, id_name_path)) = parse_symbol_id(name) {
483        let resolved = project.resolve(id_file)?;
484        let rel = project.to_relative(&resolved);
485        let Some(language_config) = language_for_path(&resolved) else {
486            return Ok(Vec::new());
487        };
488        let source = fs::read_to_string(&resolved)?;
489        let parsed = parse_symbols(&language_config, &rel, &source, include_body)?;
490        let mut results = Vec::new();
491        for symbol in flatten_symbols(parsed) {
492            if symbol.name_path == id_name_path {
493                results.push(to_symbol_info(symbol, usize::MAX));
494                if results.len() >= max_matches {
495                    return Ok(results);
496                }
497            }
498        }
499        return Ok(results);
500    }
501
502    let files = match file_path {
503        Some(path) => vec![project.resolve(path)?],
504        None => collect_candidate_files(project.as_path())?,
505    };
506
507    let query = name.to_lowercase();
508    let mut results = Vec::new();
509
510    for file in files {
511        let rel = project.to_relative(&file);
512        let Some(language_config) = language_for_path(&file) else {
513            continue;
514        };
515        let source = match fs::read_to_string(&file) {
516            Ok(source) => source,
517            Err(_) => continue,
518        };
519        let parsed = parse_symbols(&language_config, &rel, &source, include_body)?;
520        for symbol in flatten_symbols(parsed) {
521            let matched = if exact_match {
522                symbol.name == name
523            } else {
524                scoring::contains_ascii_ci(&symbol.name, &query)
525            };
526            if matched {
527                results.push(to_symbol_info(symbol, usize::MAX));
528                if results.len() >= max_matches {
529                    return Ok(results);
530                }
531            }
532        }
533    }
534
535    Ok(results)
536}
537
538fn get_directory_symbols(
539    project: &ProjectRoot,
540    dir: &Path,
541    depth: usize,
542) -> Result<Vec<SymbolInfo>> {
543    let mut symbols = Vec::new();
544    for entry in WalkDir::new(dir)
545        .into_iter()
546        .filter_entry(|entry| !is_excluded(entry.path()))
547    {
548        let entry = entry?;
549        if !entry.file_type().is_file() {
550            continue;
551        }
552        let path = entry.path();
553        if language_for_path(path).is_none() {
554            continue;
555        }
556        let file_symbols = get_file_symbols(project, path, depth)?;
557        if !file_symbols.is_empty() {
558            let relative = project.to_relative(path);
559            let id = make_symbol_id(&relative, &SymbolKind::File, &relative);
560            symbols.push(SymbolInfo {
561                name: relative.clone(),
562                kind: SymbolKind::File,
563                file_path: relative.clone(),
564                line: 0,
565                column: 0,
566                signature: format!(
567                    "{} ({} symbols)",
568                    path.file_name()
569                        .and_then(|name| name.to_str())
570                        .unwrap_or_default(),
571                    file_symbols.len()
572                ),
573                name_path: relative,
574                id,
575                body: None,
576                children: file_symbols,
577                start_byte: 0,
578                end_byte: 0,
579            });
580        }
581    }
582    Ok(symbols)
583}
584
585fn get_file_symbols(project: &ProjectRoot, file: &Path, depth: usize) -> Result<Vec<SymbolInfo>> {
586    let relative = project.to_relative(file);
587    let Some(language_config) = language_for_path(file) else {
588        return Ok(Vec::new());
589    };
590    let source =
591        fs::read_to_string(file).with_context(|| format!("failed to read {}", file.display()))?;
592    let parsed = parse_symbols(&language_config, &relative, &source, false)?;
593    Ok(parsed
594        .into_iter()
595        .map(|symbol| to_symbol_info(symbol, depth))
596        .collect())
597}
598
599fn collect_candidate_files(root: &Path) -> Result<Vec<PathBuf>> {
600    collect_files(root, |path| language_for_path(path).is_some())
601}
602
603fn file_modified_ms(path: &Path) -> Result<u128> {
604    let modified = fs::metadata(path)
605        .with_context(|| format!("failed to stat {}", path.display()))?
606        .modified()
607        .with_context(|| format!("failed to read mtime for {}", path.display()))?;
608    Ok(modified
609        .duration_since(UNIX_EPOCH)
610        .unwrap_or_default()
611        .as_millis())
612}