1mod parser;
2mod ranking;
3mod reader;
4pub mod scoring;
5#[cfg(test)]
6mod tests;
7mod types;
8mod writer;
9
10use parser::{
11 extend_start_to_doc_comments, flatten_symbol_infos, flatten_symbols, parse_symbols,
12 slice_source, to_symbol_info,
13};
14use ranking::prune_to_budget;
15use scoring::score_symbol;
16pub use scoring::{
17 sparse_coverage_bonus_from_fields, sparse_max_bonus, sparse_threshold, sparse_weighting_enabled,
18};
19pub(crate) use types::ReadDb;
20pub use types::{
21 make_symbol_id, parse_symbol_id, IndexStats, RankedContextEntry, RankedContextResult,
22 SymbolInfo, SymbolKind, SymbolProvenance,
23};
24
25use crate::db::{self, content_hash, index_db_path, IndexDb};
26pub(crate) use crate::lang_config::{language_for_path, LanguageConfig};
28use crate::project::ProjectRoot;
29use anyhow::{bail, Context, Result};
30use std::fs;
31use std::path::{Path, PathBuf};
32use std::time::UNIX_EPOCH;
33use walkdir::WalkDir;
34
35use crate::project::{collect_files, is_excluded};
36
37pub struct SymbolIndex {
46 project: ProjectRoot,
47 db_path: PathBuf,
48 writer: std::sync::Mutex<IndexDb>,
49 in_memory: bool,
51}
52
53impl SymbolIndex {
54 pub fn new(project: ProjectRoot) -> Self {
55 let db_path = index_db_path(project.as_path());
56 let db = IndexDb::open(&db_path).unwrap_or_else(|e| {
57 tracing::warn!(
58 path = %db_path.display(),
59 error = %e,
60 "failed to open DB, falling back to in-memory"
61 );
62 IndexDb::open_memory().unwrap()
63 });
64 let in_memory = !db_path.is_file();
65 let mut idx = Self {
66 project,
67 db_path,
68 writer: std::sync::Mutex::new(db),
69 in_memory,
70 };
71 if idx.writer().file_count().unwrap_or(0) == 0 {
73 let _ = idx.migrate_from_json();
74 }
75 idx
76 }
77
78 fn writer(&self) -> std::sync::MutexGuard<'_, IndexDb> {
80 self.writer
81 .lock()
82 .unwrap_or_else(|poisoned| poisoned.into_inner())
83 }
84
85 fn reader(&self) -> Result<ReadDb<'_>> {
87 if self.in_memory {
88 return Ok(ReadDb::Writer(self.writer()));
89 }
90 match IndexDb::open_readonly(&self.db_path)? {
91 Some(db) => Ok(ReadDb::Owned(db)),
92 None => Ok(ReadDb::Writer(self.writer())),
93 }
94 }
95
96 pub fn new_memory(project: ProjectRoot) -> Self {
98 let db = IndexDb::open_memory().unwrap();
99 Self {
100 db_path: PathBuf::new(),
101 project,
102 writer: std::sync::Mutex::new(db),
103 in_memory: true,
104 }
105 }
106
107 pub fn stats(&self) -> Result<IndexStats> {
108 let db = self.reader()?;
109 let supported_files = collect_candidate_files(self.project.as_path())?;
110 let indexed_files = db.file_count()?;
111 let indexed_paths = db.all_file_paths()?;
112
113 let mut stale = 0usize;
114 for rel in &indexed_paths {
115 let path = self.project.as_path().join(rel);
116 if !path.is_file() {
117 stale += 1;
118 continue;
119 }
120 let content = match fs::read(&path) {
121 Ok(c) => c,
122 Err(_) => {
123 stale += 1;
124 continue;
125 }
126 };
127 let hash = content_hash(&content);
128 let mtime = file_modified_ms(&path).unwrap_or(0) as i64;
129 if db.get_fresh_file(rel, mtime, &hash)?.is_none() {
130 stale += 1;
131 }
132 }
133
134 Ok(IndexStats {
135 indexed_files,
136 supported_files: supported_files.len(),
137 stale_files: stale,
138 })
139 }
140
141 fn select_solve_symbols(&self, query: &str, depth: usize) -> Result<Vec<SymbolInfo>> {
146 let fts_file_boost: std::collections::HashSet<String> = {
157 let query_lower = query.to_ascii_lowercase();
158 let tokens: Vec<&str> = query_lower
159 .split(|c: char| c.is_whitespace() || c == '_' || c == '-')
160 .filter(|t| t.len() >= 3)
161 .collect();
162 let mut boost_files = std::collections::HashSet::new();
163 if let Ok(hits) = self.find_symbol(query, None, false, false, 15) {
165 for sym in hits {
166 boost_files.insert(sym.file_path);
167 }
168 }
169 for token in &tokens {
171 if let Ok(hits) = self.find_symbol(token, None, false, false, 10) {
172 for sym in hits {
173 boost_files.insert(sym.file_path);
174 }
175 }
176 }
177 boost_files
178 };
179
180 let (top_files, importer_files): (Vec<String>, Vec<String>) = {
181 let db = self.reader()?;
182 let all_paths = db.all_file_paths()?;
183
184 let query_lower = query.to_ascii_lowercase();
185 let query_tokens: Vec<&str> = query_lower
186 .split(|c: char| c.is_whitespace() || c == '_' || c == '-')
187 .filter(|t| t.len() >= 3)
188 .collect();
189
190 let mut file_scores: Vec<(String, usize)> = all_paths
191 .into_iter()
192 .map(|path| {
193 let path_lower = path.to_ascii_lowercase();
194 let mut score = query_tokens
195 .iter()
196 .filter(|token| path_lower.contains(**token))
197 .count();
198 if fts_file_boost.contains(&path) {
200 score += 2;
201 }
202 (path, score)
203 })
204 .collect();
205
206 file_scores.sort_by(|a, b| b.1.cmp(&a.1));
207 let top: Vec<String> = file_scores
208 .into_iter()
209 .filter(|(_, score)| *score > 0)
210 .take(10)
211 .map(|(path, _)| path)
212 .collect();
213
214 let mut importers = Vec::new();
217 if !top.is_empty() && top.len() <= 5 {
218 for file_path in top.iter().take(3) {
219 if let Ok(imp) = db.get_importers(file_path) {
220 for importer_path in imp.into_iter().take(3) {
221 importers.push(importer_path);
222 }
223 }
224 }
225 }
226
227 (top, importers)
228 };
230
231 if top_files.is_empty() {
233 return self.find_symbol(query, None, false, false, 500);
234 }
235
236 let mut all_symbols = Vec::new();
238 for file_path in &top_files {
239 if let Ok(symbols) = self.get_symbols_overview_cached(file_path, depth) {
240 all_symbols.extend(symbols);
241 }
242 }
243
244 for importer_path in &importer_files {
247 if let Ok(symbols) = self.get_symbols_overview_cached(importer_path, 1) {
248 all_symbols.extend(symbols);
249 }
250 }
251
252 let mut seen_ids: std::collections::HashSet<String> =
254 all_symbols.iter().map(|s| s.id.clone()).collect();
255
256 if let Ok(direct) = self.find_symbol(query, None, false, false, 50) {
257 for sym in direct {
258 if seen_ids.insert(sym.id.clone()) {
259 all_symbols.push(sym);
260 }
261 }
262 }
263
264 let query_lower = query.to_ascii_lowercase();
267 let tokens: Vec<&str> = query_lower
268 .split(|c: char| c.is_whitespace() || c == '_' || c == '-')
269 .filter(|t| t.len() >= 3)
270 .collect();
271 if tokens.len() >= 2 {
272 for token in &tokens {
273 match self.find_symbol(token, None, false, false, 10) {
274 Ok(hits) => {
275 for sym in hits {
276 if seen_ids.insert(sym.id.clone()) {
277 all_symbols.push(sym);
278 }
279 }
280 }
281 Err(e) => {
282 tracing::debug!(token, error = %e, "token find_symbol failed");
283 }
284 }
285 }
286 }
287
288 Ok(all_symbols)
289 }
290
291 pub fn get_project_structure(&self) -> Result<Vec<db::DirStats>> {
294 let db = self.reader()?;
295 db.dir_stats()
296 }
297
298 pub fn indexed_file_paths(&self) -> Result<Vec<String>> {
299 let db = self.reader()?;
300 db.all_file_paths()
301 }
302
303 pub fn get_symbols_overview(&self, path: &str, depth: usize) -> Result<Vec<SymbolInfo>> {
304 let resolved = self.project.resolve(path)?;
305 if resolved.is_dir() {
306 let mut symbols = Vec::new();
307 for file in WalkDir::new(&resolved)
308 .into_iter()
309 .filter_entry(|entry| !is_excluded(entry.path()))
310 {
311 let file = file?;
312 if !file.file_type().is_file() || language_for_path(file.path()).is_none() {
313 continue;
314 }
315 let relative = self.project.to_relative(file.path());
316 let parsed = self.ensure_indexed(file.path(), &relative)?;
317 if !parsed.is_empty() {
318 let id = make_symbol_id(&relative, &SymbolKind::File, &relative);
319 symbols.push(SymbolInfo {
320 name: relative.clone(),
321 kind: SymbolKind::File,
322 file_path: relative.clone(),
323 provenance: SymbolProvenance::from_path(&relative),
324 line: 0,
325 column: 0,
326 signature: format!(
327 "{} ({} symbols)",
328 file.file_name().to_string_lossy(),
329 parsed.len()
330 ),
331 name_path: relative,
332 id,
333 body: None,
334 children: parsed
335 .into_iter()
336 .map(|symbol| to_symbol_info(symbol, depth))
337 .collect(),
338 start_byte: 0,
339 end_byte: 0,
340 end_line: 0,
341 });
342 }
343 }
344 return Ok(symbols);
345 }
346
347 let relative = self.project.to_relative(&resolved);
348 let parsed = self.ensure_indexed(&resolved, &relative)?;
349 Ok(parsed
350 .into_iter()
351 .map(|symbol| to_symbol_info(symbol, depth))
352 .collect())
353 }
354
355 pub fn find_symbol(
356 &self,
357 name: &str,
358 file_path: Option<&str>,
359 include_body: bool,
360 exact_match: bool,
361 max_matches: usize,
362 ) -> Result<Vec<SymbolInfo>> {
363 if let Some((id_file, _id_kind, id_name_path)) = parse_symbol_id(name) {
365 let resolved = self.project.resolve(id_file)?;
366 let relative = self.project.to_relative(&resolved);
367 self.ensure_indexed(&resolved, &relative)?;
368 let leaf_name = id_name_path.rsplit('/').next().unwrap_or(id_name_path);
370 let db = self.writer();
371 let db_rows = db.find_symbols_by_name(leaf_name, Some(id_file), true, max_matches)?;
372 let mut results = Vec::new();
373 for row in db_rows {
374 if row.name_path != id_name_path {
375 continue;
376 }
377 let rel_path = db.get_file_path(row.file_id)?.unwrap_or_default();
378 let body = if include_body {
379 let abs = self.project.as_path().join(&rel_path);
380 fs::read_to_string(&abs).ok().map(|source| {
381 let extended_start =
382 extend_start_to_doc_comments(&source, row.start_byte as u32);
383 slice_source(&source, extended_start, row.end_byte as u32)
384 })
385 } else {
386 None
387 };
388 let kind = SymbolKind::from_str_label(&row.kind);
389 let id = make_symbol_id(&rel_path, &kind, &row.name_path);
390 let prov = SymbolProvenance::from_path(&rel_path);
391 results.push(SymbolInfo {
392 name: row.name,
393 kind,
394 provenance: prov,
395 file_path: rel_path,
396 line: row.line as usize,
397 column: row.column_num as usize,
398 signature: row.signature,
399 name_path: row.name_path,
400 id,
401 body,
402 children: Vec::new(),
403 start_byte: row.start_byte as u32,
404 end_byte: row.end_byte as u32,
405 end_line: if row.end_line > 0 {
406 row.end_line as usize
407 } else {
408 row.line as usize
409 },
410 });
411 }
412 return Ok(results);
413 }
414
415 if let Some(fp) = file_path {
417 let resolved = self.project.resolve(fp)?;
418 let relative = self.project.to_relative(&resolved);
419 self.ensure_indexed(&resolved, &relative)?;
420 } else {
421 let files = collect_candidate_files(self.project.as_path())?;
423 for file in &files {
424 let relative = self.project.to_relative(file);
425 self.ensure_indexed(file, &relative)?;
426 }
427 }
428
429 let db = self.writer();
430 let db_rows = db.find_symbols_by_name(name, file_path, exact_match, max_matches)?;
431
432 let mut results = Vec::new();
433 for row in db_rows {
434 let rel_path = db.get_file_path(row.file_id)?.unwrap_or_default();
435 let body = if include_body {
436 let abs = self.project.as_path().join(&rel_path);
437 fs::read_to_string(&abs).ok().map(|source| {
438 let extended_start =
439 extend_start_to_doc_comments(&source, row.start_byte as u32);
440 slice_source(&source, extended_start, row.end_byte as u32)
441 })
442 } else {
443 None
444 };
445 let kind = SymbolKind::from_str_label(&row.kind);
446 let id = make_symbol_id(&rel_path, &kind, &row.name_path);
447 let prov = SymbolProvenance::from_path(&rel_path);
448 results.push(SymbolInfo {
449 name: row.name,
450 kind,
451 provenance: prov,
452 file_path: rel_path,
453 line: row.line as usize,
454 column: row.column_num as usize,
455 signature: row.signature,
456 name_path: row.name_path,
457 id,
458 body,
459 children: Vec::new(),
460 start_byte: row.start_byte as u32,
461 end_byte: row.end_byte as u32,
462 end_line: if row.end_line > 0 {
463 row.end_line as usize
464 } else {
465 row.line as usize
466 },
467 });
468 }
469 Ok(results)
470 }
471
472 pub fn get_ranked_context(
473 &self,
474 query: &str,
475 path: Option<&str>,
476 max_tokens: usize,
477 include_body: bool,
478 depth: usize,
479 ) -> Result<RankedContextResult> {
480 let all_symbols = if let Some(path) = path {
481 self.get_symbols_overview(path, depth)?
482 } else {
483 self.select_solve_symbols(query, depth)?
485 };
486
487 let mut scored = all_symbols
488 .into_iter()
489 .flat_map(flatten_symbol_infos)
490 .filter_map(|symbol| score_symbol(query, &symbol).map(|score| (symbol, score)))
491 .collect::<Vec<_>>();
492 scored.sort_by(|left, right| right.1.cmp(&left.1));
493
494 let (selected, chars_used, pruned_count, last_kept_score) =
495 prune_to_budget(scored, max_tokens, include_body, self.project.as_path());
496
497 Ok(RankedContextResult {
498 query: query.to_owned(),
499 count: selected.len(),
500 symbols: selected,
501 token_budget: max_tokens,
502 chars_used,
503 pruned_count,
504 last_kept_score,
505 })
506 }
507
508 pub fn db(&self) -> std::sync::MutexGuard<'_, IndexDb> {
510 self.writer()
511 }
512}
513
514pub fn get_symbols_overview(
515 project: &ProjectRoot,
516 path: &str,
517 depth: usize,
518) -> Result<Vec<SymbolInfo>> {
519 let resolved = project.resolve(path)?;
520 if resolved.is_dir() {
521 return get_directory_symbols(project, &resolved, depth);
522 }
523 get_file_symbols(project, &resolved, depth)
524}
525
526pub fn find_symbol_range(
530 project: &ProjectRoot,
531 relative_path: &str,
532 symbol_name: &str,
533 name_path: Option<&str>,
534) -> Result<(usize, usize)> {
535 let file = project.resolve(relative_path)?;
536 let rel = project.to_relative(&file);
537 let Some(language_config) = language_for_path(&file) else {
538 bail!("unsupported file type: {}", file.display());
539 };
540 let source =
541 fs::read_to_string(&file).with_context(|| format!("failed to read {}", file.display()))?;
542 let parsed = parse_symbols(&language_config, &rel, &source, false)?;
543 let flat = flatten_symbols(parsed);
544
545 let candidate = if let Some(np) = name_path {
546 flat.into_iter()
547 .find(|sym| sym.name_path == np || sym.name == symbol_name)
548 } else {
549 flat.into_iter().find(|sym| sym.name == symbol_name)
550 };
551
552 match candidate {
553 Some(sym) => Ok((sym.start_byte as usize, sym.end_byte as usize)),
554 None => bail!(
555 "symbol '{}' not found in {}",
556 name_path.unwrap_or(symbol_name),
557 relative_path
558 ),
559 }
560}
561
562pub fn find_symbol(
563 project: &ProjectRoot,
564 name: &str,
565 file_path: Option<&str>,
566 include_body: bool,
567 exact_match: bool,
568 max_matches: usize,
569) -> Result<Vec<SymbolInfo>> {
570 if let Some((id_file, _id_kind, id_name_path)) = parse_symbol_id(name) {
572 let resolved = project.resolve(id_file)?;
573 let rel = project.to_relative(&resolved);
574 let Some(language_config) = language_for_path(&resolved) else {
575 return Ok(Vec::new());
576 };
577 let source = fs::read_to_string(&resolved)?;
578 let parsed = parse_symbols(&language_config, &rel, &source, include_body)?;
579 let mut results = Vec::new();
580 for symbol in flatten_symbols(parsed) {
581 if symbol.name_path == id_name_path {
582 results.push(to_symbol_info(symbol, usize::MAX));
583 if results.len() >= max_matches {
584 return Ok(results);
585 }
586 }
587 }
588 return Ok(results);
589 }
590
591 let files = match file_path {
592 Some(path) => vec![project.resolve(path)?],
593 None => collect_candidate_files(project.as_path())?,
594 };
595
596 let query = name.to_lowercase();
597 let mut results = Vec::new();
598
599 for file in files {
600 let rel = project.to_relative(&file);
601 let Some(language_config) = language_for_path(&file) else {
602 continue;
603 };
604 let source = match fs::read_to_string(&file) {
605 Ok(source) => source,
606 Err(_) => continue,
607 };
608 let parsed = parse_symbols(&language_config, &rel, &source, include_body)?;
609 for symbol in flatten_symbols(parsed) {
610 let matched = if exact_match {
611 symbol.name == name
612 } else {
613 scoring::contains_ascii_ci(&symbol.name, &query)
614 };
615 if matched {
616 results.push(to_symbol_info(symbol, usize::MAX));
617 if results.len() >= max_matches {
618 return Ok(results);
619 }
620 }
621 }
622 }
623
624 Ok(results)
625}
626
627fn get_directory_symbols(
628 project: &ProjectRoot,
629 dir: &Path,
630 depth: usize,
631) -> Result<Vec<SymbolInfo>> {
632 let mut symbols = Vec::new();
633 for entry in WalkDir::new(dir)
634 .into_iter()
635 .filter_entry(|entry| !is_excluded(entry.path()))
636 {
637 let entry = entry?;
638 if !entry.file_type().is_file() {
639 continue;
640 }
641 let path = entry.path();
642 if language_for_path(path).is_none() {
643 continue;
644 }
645 let file_symbols = get_file_symbols(project, path, depth)?;
646 if !file_symbols.is_empty() {
647 let relative = project.to_relative(path);
648 let id = make_symbol_id(&relative, &SymbolKind::File, &relative);
649 symbols.push(SymbolInfo {
650 name: relative.clone(),
651 kind: SymbolKind::File,
652 file_path: relative.clone(),
653 provenance: SymbolProvenance::from_path(&relative),
654 line: 0,
655 column: 0,
656 signature: format!(
657 "{} ({} symbols)",
658 path.file_name()
659 .and_then(|name| name.to_str())
660 .unwrap_or_default(),
661 file_symbols.len()
662 ),
663 name_path: relative,
664 id,
665 body: None,
666 children: file_symbols,
667 start_byte: 0,
668 end_byte: 0,
669 end_line: 0,
670 });
671 }
672 }
673 Ok(symbols)
674}
675
676fn get_file_symbols(project: &ProjectRoot, file: &Path, depth: usize) -> Result<Vec<SymbolInfo>> {
677 let relative = project.to_relative(file);
678 let Some(language_config) = language_for_path(file) else {
679 return Ok(Vec::new());
680 };
681 let source =
682 fs::read_to_string(file).with_context(|| format!("failed to read {}", file.display()))?;
683 let parsed = parse_symbols(&language_config, &relative, &source, false)?;
684 Ok(parsed
685 .into_iter()
686 .map(|symbol| to_symbol_info(symbol, depth))
687 .collect())
688}
689
690fn collect_candidate_files(root: &Path) -> Result<Vec<PathBuf>> {
691 collect_files(root, |path| language_for_path(path).is_some())
692}
693
694fn file_modified_ms(path: &Path) -> Result<u128> {
695 let modified = fs::metadata(path)
696 .with_context(|| format!("failed to stat {}", path.display()))?
697 .modified()
698 .with_context(|| format!("failed to read mtime for {}", path.display()))?;
699 Ok(modified
700 .duration_since(UNIX_EPOCH)
701 .unwrap_or_default()
702 .as_millis())
703}