1mod parser;
2mod ranking;
3mod reader;
4pub mod scoring;
5#[cfg(test)]
6mod tests;
7mod types;
8mod writer;
9
10use parser::{flatten_symbol_infos, flatten_symbols, parse_symbols, slice_source, to_symbol_info};
11use ranking::prune_to_budget;
12use scoring::score_symbol;
13pub use scoring::{
14 sparse_coverage_bonus_from_fields, sparse_max_bonus, sparse_threshold, sparse_weighting_enabled,
15};
16pub(crate) use types::ReadDb;
17pub use types::{
18 IndexStats, RankedContextEntry, RankedContextResult, SymbolInfo, SymbolKind, SymbolProvenance,
19 make_symbol_id, parse_symbol_id,
20};
21
22use crate::db::{self, IndexDb, content_hash, index_db_path};
23pub(crate) use crate::lang_config::{LanguageConfig, language_for_path};
25use crate::project::ProjectRoot;
26use anyhow::{Context, Result, bail};
27use std::fs;
28use std::path::{Path, PathBuf};
29use std::time::UNIX_EPOCH;
30use walkdir::WalkDir;
31
32use crate::project::{collect_files, is_excluded_within};
33
34pub struct SymbolIndex {
43 project: ProjectRoot,
44 db_path: PathBuf,
45 writer: std::sync::Mutex<IndexDb>,
46 in_memory: bool,
48}
49
50impl SymbolIndex {
51 pub fn new(project: ProjectRoot) -> Self {
52 let db_path = index_db_path(project.as_path());
53 let db = IndexDb::open(&db_path).unwrap_or_else(|e| {
54 tracing::warn!(
55 path = %db_path.display(),
56 error = %e,
57 "failed to open DB, falling back to in-memory"
58 );
59 IndexDb::open_memory().unwrap()
60 });
61 let in_memory = !db_path.is_file();
62 let mut idx = Self {
63 project,
64 db_path,
65 writer: std::sync::Mutex::new(db),
66 in_memory,
67 };
68 if idx.writer().file_count().unwrap_or(0) == 0 {
70 let _ = idx.migrate_from_json();
71 }
72 idx
73 }
74
75 fn writer(&self) -> std::sync::MutexGuard<'_, IndexDb> {
77 self.writer
78 .lock()
79 .unwrap_or_else(|poisoned| poisoned.into_inner())
80 }
81
82 fn reader(&self) -> Result<ReadDb<'_>> {
84 if self.in_memory {
85 return Ok(ReadDb::Writer(self.writer()));
86 }
87 match IndexDb::open_readonly(&self.db_path)? {
88 Some(db) => Ok(ReadDb::Owned(db)),
89 None => Ok(ReadDb::Writer(self.writer())),
90 }
91 }
92
93 pub fn new_memory(project: ProjectRoot) -> Self {
95 let db = IndexDb::open_memory().unwrap();
96 Self {
97 db_path: PathBuf::new(),
98 project,
99 writer: std::sync::Mutex::new(db),
100 in_memory: true,
101 }
102 }
103
104 pub fn max_indexed_at(&self) -> Result<Option<i64>> {
109 let db = self.reader()?;
110 db.max_files_indexed_at()
111 }
112
113 pub fn min_indexed_at(&self) -> Result<Option<i64>> {
116 let db = self.reader()?;
117 db.min_files_indexed_at()
118 }
119
120 pub fn file_count(&self) -> Result<usize> {
126 let db = self.reader()?;
127 db.file_count()
128 }
129
130 pub fn language_counts(&self) -> Result<Vec<(String, usize)>> {
134 let db = self.reader()?;
135 db.language_file_counts()
136 }
137
138 fn checkpoint_wal_passive(&self) -> Result<(i64, i64, i64)> {
139 if self.in_memory {
140 return Ok((0, 0, 0));
141 }
142 self.writer().checkpoint_wal_passive()
143 }
144
145 pub fn stats(&self) -> Result<IndexStats> {
146 let db = self.reader()?;
147 let supported_files = collect_candidate_files(self.project.as_path())?;
148 let indexed_files = db.file_count()?;
149 let indexed_paths = db.all_file_paths()?;
150
151 let mut stale = 0usize;
152 for rel in &indexed_paths {
153 let path = self.project.as_path().join(rel);
154 if !path.is_file() {
155 stale += 1;
156 continue;
157 }
158 let content = match fs::read(&path) {
159 Ok(c) => c,
160 Err(_) => {
161 stale += 1;
162 continue;
163 }
164 };
165 let hash = content_hash(&content);
166 let mtime = file_modified_ms(&path).unwrap_or(0) as i64;
167 if db.get_fresh_file(rel, mtime, &hash)?.is_none() {
168 stale += 1;
169 }
170 }
171
172 Ok(IndexStats {
173 indexed_files,
174 supported_files: supported_files.len(),
175 stale_files: stale,
176 })
177 }
178
179 fn select_solve_symbols(&self, query: &str, depth: usize) -> Result<Vec<SymbolInfo>> {
184 let fts_file_boost: std::collections::HashSet<String> = {
195 let query_lower = query.to_ascii_lowercase();
196 let tokens: Vec<&str> = query_lower
197 .split(|c: char| c.is_whitespace() || c == '_' || c == '-')
198 .filter(|t| t.len() >= 3)
199 .collect();
200 let mut boost_files = std::collections::HashSet::new();
201 if let Ok(hits) = self.find_symbol(query, None, false, false, 15) {
203 for sym in hits {
204 boost_files.insert(sym.file_path);
205 }
206 }
207 for token in &tokens {
209 if let Ok(hits) = self.find_symbol(token, None, false, false, 10) {
210 for sym in hits {
211 boost_files.insert(sym.file_path);
212 }
213 }
214 }
215 boost_files
216 };
217
218 let (top_files, importer_files): (Vec<String>, Vec<String>) = {
219 let db = self.reader()?;
220 let all_paths = db.all_file_paths()?;
221
222 let query_lower = query.to_ascii_lowercase();
223 let query_tokens: Vec<&str> = query_lower
224 .split(|c: char| c.is_whitespace() || c == '_' || c == '-')
225 .filter(|t| t.len() >= 3)
226 .collect();
227
228 let mut file_scores: Vec<(String, usize)> = all_paths
229 .into_iter()
230 .map(|path| {
231 let path_lower = path.to_ascii_lowercase();
232 let mut score = query_tokens
233 .iter()
234 .filter(|token| path_lower.contains(**token))
235 .count();
236 if fts_file_boost.contains(&path) {
238 score += 2;
239 }
240 (path, score)
241 })
242 .collect();
243
244 file_scores.sort_by_key(|b| std::cmp::Reverse(b.1));
245 let top: Vec<String> = file_scores
246 .into_iter()
247 .filter(|(_, score)| *score > 0)
248 .take(10)
249 .map(|(path, _)| path)
250 .collect();
251
252 let mut importers = Vec::new();
255 if !top.is_empty() && top.len() <= 5 {
256 for file_path in top.iter().take(3) {
257 if let Ok(imp) = db.get_importers(file_path) {
258 for importer_path in imp.into_iter().take(3) {
259 importers.push(importer_path);
260 }
261 }
262 }
263 }
264
265 (top, importers)
266 };
268
269 if top_files.is_empty() {
271 return self.find_symbol(query, None, false, false, 500);
272 }
273
274 let mut all_symbols = Vec::new();
276 for file_path in &top_files {
277 if let Ok(symbols) = self.get_symbols_overview_cached(file_path, depth) {
278 all_symbols.extend(symbols);
279 }
280 }
281
282 for importer_path in &importer_files {
285 if let Ok(symbols) = self.get_symbols_overview_cached(importer_path, 1) {
286 all_symbols.extend(symbols);
287 }
288 }
289
290 let mut seen_ids: std::collections::HashSet<String> =
292 all_symbols.iter().map(|s| s.id.clone()).collect();
293
294 if let Ok(direct) = self.find_symbol(query, None, false, false, 50) {
295 for sym in direct {
296 if seen_ids.insert(sym.id.clone()) {
297 all_symbols.push(sym);
298 }
299 }
300 }
301
302 let query_lower = query.to_ascii_lowercase();
305 let tokens: Vec<&str> = query_lower
306 .split(|c: char| c.is_whitespace() || c == '_' || c == '-')
307 .filter(|t| t.len() >= 3)
308 .collect();
309 if tokens.len() >= 2 {
310 for token in &tokens {
311 match self.find_symbol(token, None, false, false, 10) {
312 Ok(hits) => {
313 for sym in hits {
314 if seen_ids.insert(sym.id.clone()) {
315 all_symbols.push(sym);
316 }
317 }
318 }
319 Err(e) => {
320 tracing::debug!(token, error = %e, "token find_symbol failed");
321 }
322 }
323 }
324 }
325
326 Ok(all_symbols)
327 }
328
329 pub fn get_project_structure(&self) -> Result<Vec<db::DirStats>> {
332 let db = self.reader()?;
333 db.dir_stats()
334 }
335
336 pub fn indexed_file_paths(&self) -> Result<Vec<String>> {
337 let db = self.reader()?;
338 db.all_file_paths()
339 }
340
341 pub fn get_symbols_overview(&self, path: &str, depth: usize) -> Result<Vec<SymbolInfo>> {
342 let resolved = self.project.resolve(path)?;
343 if resolved.is_dir() {
344 let mut symbols = Vec::new();
345 for file in WalkDir::new(&resolved)
346 .into_iter()
347 .filter_entry(|entry| !is_excluded_within(&resolved, entry.path()))
348 {
349 let file = file?;
350 if !file.file_type().is_file() || language_for_path(file.path()).is_none() {
351 continue;
352 }
353 let relative = self.project.to_relative(file.path());
354 let parsed = self.ensure_indexed(file.path(), &relative)?;
355 if !parsed.is_empty() {
356 let id = make_symbol_id(&relative, &SymbolKind::File, &relative);
357 symbols.push(SymbolInfo {
358 name: relative.clone(),
359 kind: SymbolKind::File,
360 file_path: relative.clone(),
361 provenance: SymbolProvenance::from_path(&relative),
362 line: 0,
363 column: 0,
364 signature: format!(
365 "{} ({} symbols)",
366 file.file_name().to_string_lossy(),
367 parsed.len()
368 ),
369 name_path: relative,
370 id,
371 body: None,
372 children: parsed
373 .into_iter()
374 .map(|symbol| to_symbol_info(symbol, depth))
375 .collect(),
376 start_byte: 0,
377 end_byte: 0,
378 });
379 }
380 }
381 return Ok(symbols);
382 }
383
384 let relative = self.project.to_relative(&resolved);
385 let parsed = self.ensure_indexed(&resolved, &relative)?;
386 Ok(parsed
387 .into_iter()
388 .map(|symbol| to_symbol_info(symbol, depth))
389 .collect())
390 }
391
392 pub fn find_symbol(
393 &self,
394 name: &str,
395 file_path: Option<&str>,
396 include_body: bool,
397 exact_match: bool,
398 max_matches: usize,
399 ) -> Result<Vec<SymbolInfo>> {
400 if let Some((id_file, _id_kind, id_name_path)) = parse_symbol_id(name) {
402 let resolved = self.project.resolve(id_file)?;
403 let relative = self.project.to_relative(&resolved);
404 self.ensure_indexed(&resolved, &relative)?;
405 let db = self.writer();
406 let db_rows = db.find_symbols_by_name_path(&relative, id_name_path, max_matches)?;
407 let mut results = Vec::new();
408 for row in db_rows {
409 let rel_path = db.get_file_path(row.file_id)?.unwrap_or_default();
410 let body = if include_body {
411 let abs = self.project.as_path().join(&rel_path);
412 fs::read_to_string(&abs).ok().map(|source| {
413 slice_source(&source, row.start_byte as u32, row.end_byte as u32)
414 })
415 } else {
416 None
417 };
418 let kind = SymbolKind::from_str_label(&row.kind);
419 let id = make_symbol_id(&rel_path, &kind, &row.name_path);
420 let prov = SymbolProvenance::from_path(&rel_path);
421 results.push(SymbolInfo {
422 name: row.name,
423 kind,
424 provenance: prov,
425 file_path: rel_path,
426 line: row.line as usize,
427 column: row.column_num as usize,
428 signature: row.signature,
429 name_path: row.name_path,
430 id,
431 body,
432 children: Vec::new(),
433 start_byte: row.start_byte as u32,
434 end_byte: row.end_byte as u32,
435 });
436 }
437 return Ok(results);
438 }
439
440 if let Some(fp) = file_path {
442 let resolved = self.project.resolve(fp)?;
443 let relative = self.project.to_relative(&resolved);
444 self.ensure_indexed(&resolved, &relative)?;
445 } else {
446 let files = collect_candidate_files(self.project.as_path())?;
448 for file in &files {
449 let relative = self.project.to_relative(file);
450 self.ensure_indexed(file, &relative)?;
451 }
452 }
453
454 let db = self.writer();
455 let db_rows = db.find_symbols_by_name(name, file_path, exact_match, max_matches)?;
456
457 let mut results = Vec::new();
458 for row in db_rows {
459 let rel_path = db.get_file_path(row.file_id)?.unwrap_or_default();
460 let body = if include_body {
461 let abs = self.project.as_path().join(&rel_path);
462 fs::read_to_string(&abs)
463 .ok()
464 .map(|source| slice_source(&source, row.start_byte as u32, row.end_byte as u32))
465 } else {
466 None
467 };
468 let kind = SymbolKind::from_str_label(&row.kind);
469 let id = make_symbol_id(&rel_path, &kind, &row.name_path);
470 let prov = SymbolProvenance::from_path(&rel_path);
471 results.push(SymbolInfo {
472 name: row.name,
473 kind,
474 provenance: prov,
475 file_path: rel_path,
476 line: row.line as usize,
477 column: row.column_num as usize,
478 signature: row.signature,
479 name_path: row.name_path,
480 id,
481 body,
482 children: Vec::new(),
483 start_byte: row.start_byte as u32,
484 end_byte: row.end_byte as u32,
485 });
486 }
487 Ok(results)
488 }
489
490 pub fn get_ranked_context(
491 &self,
492 query: &str,
493 path: Option<&str>,
494 max_tokens: usize,
495 include_body: bool,
496 depth: usize,
497 ) -> Result<RankedContextResult> {
498 let all_symbols = if let Some(path) = path {
499 self.get_symbols_overview(path, depth)?
500 } else {
501 self.select_solve_symbols(query, depth)?
503 };
504
505 let mut scored = all_symbols
506 .into_iter()
507 .flat_map(flatten_symbol_infos)
508 .filter_map(|symbol| score_symbol(query, &symbol).map(|score| (symbol, score)))
509 .collect::<Vec<_>>();
510 scored.sort_by_key(|right| std::cmp::Reverse(right.1));
511
512 let (selected, chars_used) =
513 prune_to_budget(scored, max_tokens, include_body, self.project.as_path());
514
515 Ok(RankedContextResult {
516 query: query.to_owned(),
517 count: selected.len(),
518 symbols: selected,
519 token_budget: max_tokens,
520 chars_used,
521 })
522 }
523
524 pub fn db(&self) -> std::sync::MutexGuard<'_, IndexDb> {
526 self.writer()
527 }
528}
529
530pub fn get_symbols_overview(
531 project: &ProjectRoot,
532 path: &str,
533 depth: usize,
534) -> Result<Vec<SymbolInfo>> {
535 let resolved = project.resolve(path)?;
536 if resolved.is_dir() {
537 return get_directory_symbols(project, &resolved, depth);
538 }
539 get_file_symbols(project, &resolved, depth)
540}
541
542pub fn find_symbol_range(
546 project: &ProjectRoot,
547 relative_path: &str,
548 symbol_name: &str,
549 name_path: Option<&str>,
550) -> Result<(usize, usize)> {
551 let file = project.resolve(relative_path)?;
552 let rel = project.to_relative(&file);
553 let Some(language_config) = language_for_path(&file) else {
554 bail!("unsupported file type: {}", file.display());
555 };
556 let source =
557 fs::read_to_string(&file).with_context(|| format!("failed to read {}", file.display()))?;
558 let parsed = parse_symbols(&language_config, &rel, &source, false)?;
559 let flat = flatten_symbols(parsed);
560
561 let candidate = if let Some(np) = name_path {
562 flat.into_iter().find(|sym| sym.name_path == np)
563 } else {
564 flat.into_iter().find(|sym| sym.name == symbol_name)
565 };
566
567 match candidate {
568 Some(sym) => Ok((sym.start_byte as usize, sym.end_byte as usize)),
569 None => bail!(
570 "symbol '{}' not found in {}",
571 name_path.unwrap_or(symbol_name),
572 relative_path
573 ),
574 }
575}
576
577pub fn find_symbol(
578 project: &ProjectRoot,
579 name: &str,
580 file_path: Option<&str>,
581 include_body: bool,
582 exact_match: bool,
583 max_matches: usize,
584) -> Result<Vec<SymbolInfo>> {
585 if let Some((id_file, _id_kind, id_name_path)) = parse_symbol_id(name) {
587 let resolved = project.resolve(id_file)?;
588 let rel = project.to_relative(&resolved);
589 let Some(language_config) = language_for_path(&resolved) else {
590 return Ok(Vec::new());
591 };
592 let source = fs::read_to_string(&resolved)?;
593 let parsed = parse_symbols(&language_config, &rel, &source, include_body)?;
594 let mut results = Vec::new();
595 for symbol in flatten_symbols(parsed) {
596 if symbol.name_path == id_name_path {
597 results.push(to_symbol_info(symbol, usize::MAX));
598 if results.len() >= max_matches {
599 return Ok(results);
600 }
601 }
602 }
603 return Ok(results);
604 }
605
606 let files = match file_path {
607 Some(path) => vec![project.resolve(path)?],
608 None => collect_candidate_files(project.as_path())?,
609 };
610
611 let query = name.to_lowercase();
612 let mut results = Vec::new();
613
614 for file in files {
615 let rel = project.to_relative(&file);
616 let Some(language_config) = language_for_path(&file) else {
617 continue;
618 };
619 let source = match fs::read_to_string(&file) {
620 Ok(source) => source,
621 Err(_) => continue,
622 };
623 let parsed = parse_symbols(&language_config, &rel, &source, include_body)?;
624 for symbol in flatten_symbols(parsed) {
625 let matched = if exact_match {
626 symbol.name == name
627 } else {
628 scoring::contains_ascii_ci(&symbol.name, &query)
629 };
630 if matched {
631 results.push(to_symbol_info(symbol, usize::MAX));
632 if results.len() >= max_matches {
633 return Ok(results);
634 }
635 }
636 }
637 }
638
639 Ok(results)
640}
641
642fn get_directory_symbols(
643 project: &ProjectRoot,
644 dir: &Path,
645 depth: usize,
646) -> Result<Vec<SymbolInfo>> {
647 let mut symbols = Vec::new();
648 for entry in WalkDir::new(dir)
649 .into_iter()
650 .filter_entry(|entry| !is_excluded_within(dir, entry.path()))
651 {
652 let entry = entry?;
653 if !entry.file_type().is_file() {
654 continue;
655 }
656 let path = entry.path();
657 if language_for_path(path).is_none() {
658 continue;
659 }
660 let file_symbols = get_file_symbols(project, path, depth)?;
661 if !file_symbols.is_empty() {
662 let relative = project.to_relative(path);
663 let id = make_symbol_id(&relative, &SymbolKind::File, &relative);
664 symbols.push(SymbolInfo {
665 name: relative.clone(),
666 kind: SymbolKind::File,
667 file_path: relative.clone(),
668 provenance: SymbolProvenance::from_path(&relative),
669 line: 0,
670 column: 0,
671 signature: format!(
672 "{} ({} symbols)",
673 path.file_name()
674 .and_then(|name| name.to_str())
675 .unwrap_or_default(),
676 file_symbols.len()
677 ),
678 name_path: relative,
679 id,
680 body: None,
681 children: file_symbols,
682 start_byte: 0,
683 end_byte: 0,
684 });
685 }
686 }
687 Ok(symbols)
688}
689
690fn get_file_symbols(project: &ProjectRoot, file: &Path, depth: usize) -> Result<Vec<SymbolInfo>> {
691 let relative = project.to_relative(file);
692 let Some(language_config) = language_for_path(file) else {
693 return Ok(Vec::new());
694 };
695 let source =
696 fs::read_to_string(file).with_context(|| format!("failed to read {}", file.display()))?;
697 let parsed = parse_symbols(&language_config, &relative, &source, false)?;
698 Ok(parsed
699 .into_iter()
700 .map(|symbol| to_symbol_info(symbol, depth))
701 .collect())
702}
703
704fn collect_candidate_files(root: &Path) -> Result<Vec<PathBuf>> {
705 collect_files(root, |path| language_for_path(path).is_some())
706}
707
708fn file_modified_ms(path: &Path) -> Result<u128> {
709 let modified = fs::metadata(path)
710 .with_context(|| format!("failed to stat {}", path.display()))?
711 .modified()
712 .with_context(|| format!("failed to read mtime for {}", path.display()))?;
713 Ok(modified
714 .duration_since(UNIX_EPOCH)
715 .unwrap_or_default()
716 .as_millis())
717}