1mod parser;
2mod ranking;
3mod reader;
4pub mod scoring;
5#[cfg(test)]
6mod tests;
7mod types;
8mod writer;
9
10use parser::{flatten_symbol_infos, flatten_symbols, parse_symbols, slice_source, to_symbol_info};
11use ranking::prune_to_budget;
12use scoring::score_symbol;
13pub use scoring::{
14 sparse_coverage_bonus_from_fields, sparse_max_bonus, sparse_threshold, sparse_weighting_enabled,
15};
16pub(crate) use types::ReadDb;
17pub use types::{
18 IndexStats, RankedContextEntry, RankedContextResult, SymbolInfo, SymbolKind, SymbolProvenance,
19 make_symbol_id, parse_symbol_id,
20};
21
22use crate::db::{self, IndexDb, content_hash, index_db_path};
23pub(crate) use crate::lang_config::{LanguageConfig, language_for_path};
25use crate::project::ProjectRoot;
26use anyhow::{Context, Result, bail};
27use std::fs;
28use std::path::{Path, PathBuf};
29use std::time::UNIX_EPOCH;
30use walkdir::WalkDir;
31
32use crate::project::{collect_files, is_excluded};
33
34pub struct SymbolIndex {
43 project: ProjectRoot,
44 db_path: PathBuf,
45 writer: std::sync::Mutex<IndexDb>,
46 in_memory: bool,
48}
49
50impl SymbolIndex {
51 pub fn new(project: ProjectRoot) -> Self {
52 let db_path = index_db_path(project.as_path());
53 let db = IndexDb::open(&db_path).unwrap_or_else(|e| {
54 tracing::warn!(
55 path = %db_path.display(),
56 error = %e,
57 "failed to open DB, falling back to in-memory"
58 );
59 IndexDb::open_memory().unwrap()
60 });
61 let in_memory = !db_path.is_file();
62 let mut idx = Self {
63 project,
64 db_path,
65 writer: std::sync::Mutex::new(db),
66 in_memory,
67 };
68 if idx.writer().file_count().unwrap_or(0) == 0 {
70 let _ = idx.migrate_from_json();
71 }
72 idx
73 }
74
75 fn writer(&self) -> std::sync::MutexGuard<'_, IndexDb> {
77 self.writer
78 .lock()
79 .unwrap_or_else(|poisoned| poisoned.into_inner())
80 }
81
82 fn reader(&self) -> Result<ReadDb<'_>> {
84 if self.in_memory {
85 return Ok(ReadDb::Writer(self.writer()));
86 }
87 match IndexDb::open_readonly(&self.db_path)? {
88 Some(db) => Ok(ReadDb::Owned(db)),
89 None => Ok(ReadDb::Writer(self.writer())),
90 }
91 }
92
93 pub fn new_memory(project: ProjectRoot) -> Self {
95 let db = IndexDb::open_memory().unwrap();
96 Self {
97 db_path: PathBuf::new(),
98 project,
99 writer: std::sync::Mutex::new(db),
100 in_memory: true,
101 }
102 }
103
104 pub fn stats(&self) -> Result<IndexStats> {
105 let db = self.reader()?;
106 let supported_files = collect_candidate_files(self.project.as_path())?;
107 let indexed_files = db.file_count()?;
108 let indexed_paths = db.all_file_paths()?;
109
110 let mut stale = 0usize;
111 for rel in &indexed_paths {
112 let path = self.project.as_path().join(rel);
113 if !path.is_file() {
114 stale += 1;
115 continue;
116 }
117 let content = match fs::read(&path) {
118 Ok(c) => c,
119 Err(_) => {
120 stale += 1;
121 continue;
122 }
123 };
124 let hash = content_hash(&content);
125 let mtime = file_modified_ms(&path).unwrap_or(0) as i64;
126 if db.get_fresh_file(rel, mtime, &hash)?.is_none() {
127 stale += 1;
128 }
129 }
130
131 Ok(IndexStats {
132 indexed_files,
133 supported_files: supported_files.len(),
134 stale_files: stale,
135 })
136 }
137
138 fn select_solve_symbols(&self, query: &str, depth: usize) -> Result<Vec<SymbolInfo>> {
143 let fts_file_boost: std::collections::HashSet<String> = {
154 let query_lower = query.to_ascii_lowercase();
155 let tokens: Vec<&str> = query_lower
156 .split(|c: char| c.is_whitespace() || c == '_' || c == '-')
157 .filter(|t| t.len() >= 3)
158 .collect();
159 let mut boost_files = std::collections::HashSet::new();
160 if let Ok(hits) = self.find_symbol(query, None, false, false, 15) {
162 for sym in hits {
163 boost_files.insert(sym.file_path);
164 }
165 }
166 for token in &tokens {
168 if let Ok(hits) = self.find_symbol(token, None, false, false, 10) {
169 for sym in hits {
170 boost_files.insert(sym.file_path);
171 }
172 }
173 }
174 boost_files
175 };
176
177 let (top_files, importer_files): (Vec<String>, Vec<String>) = {
178 let db = self.reader()?;
179 let all_paths = db.all_file_paths()?;
180
181 let query_lower = query.to_ascii_lowercase();
182 let query_tokens: Vec<&str> = query_lower
183 .split(|c: char| c.is_whitespace() || c == '_' || c == '-')
184 .filter(|t| t.len() >= 3)
185 .collect();
186
187 let mut file_scores: Vec<(String, usize)> = all_paths
188 .into_iter()
189 .map(|path| {
190 let path_lower = path.to_ascii_lowercase();
191 let mut score = query_tokens
192 .iter()
193 .filter(|token| path_lower.contains(**token))
194 .count();
195 if fts_file_boost.contains(&path) {
197 score += 2;
198 }
199 (path, score)
200 })
201 .collect();
202
203 file_scores.sort_by_key(|b| std::cmp::Reverse(b.1));
204 let top: Vec<String> = file_scores
205 .into_iter()
206 .filter(|(_, score)| *score > 0)
207 .take(10)
208 .map(|(path, _)| path)
209 .collect();
210
211 let mut importers = Vec::new();
214 if !top.is_empty() && top.len() <= 5 {
215 for file_path in top.iter().take(3) {
216 if let Ok(imp) = db.get_importers(file_path) {
217 for importer_path in imp.into_iter().take(3) {
218 importers.push(importer_path);
219 }
220 }
221 }
222 }
223
224 (top, importers)
225 };
227
228 if top_files.is_empty() {
230 return self.find_symbol(query, None, false, false, 500);
231 }
232
233 let mut all_symbols = Vec::new();
235 for file_path in &top_files {
236 if let Ok(symbols) = self.get_symbols_overview_cached(file_path, depth) {
237 all_symbols.extend(symbols);
238 }
239 }
240
241 for importer_path in &importer_files {
244 if let Ok(symbols) = self.get_symbols_overview_cached(importer_path, 1) {
245 all_symbols.extend(symbols);
246 }
247 }
248
249 let mut seen_ids: std::collections::HashSet<String> =
251 all_symbols.iter().map(|s| s.id.clone()).collect();
252
253 if let Ok(direct) = self.find_symbol(query, None, false, false, 50) {
254 for sym in direct {
255 if seen_ids.insert(sym.id.clone()) {
256 all_symbols.push(sym);
257 }
258 }
259 }
260
261 let query_lower = query.to_ascii_lowercase();
264 let tokens: Vec<&str> = query_lower
265 .split(|c: char| c.is_whitespace() || c == '_' || c == '-')
266 .filter(|t| t.len() >= 3)
267 .collect();
268 if tokens.len() >= 2 {
269 for token in &tokens {
270 match self.find_symbol(token, None, false, false, 10) {
271 Ok(hits) => {
272 for sym in hits {
273 if seen_ids.insert(sym.id.clone()) {
274 all_symbols.push(sym);
275 }
276 }
277 }
278 Err(e) => {
279 tracing::debug!(token, error = %e, "token find_symbol failed");
280 }
281 }
282 }
283 }
284
285 Ok(all_symbols)
286 }
287
288 pub fn get_project_structure(&self) -> Result<Vec<db::DirStats>> {
291 let db = self.reader()?;
292 db.dir_stats()
293 }
294
295 pub fn indexed_file_paths(&self) -> Result<Vec<String>> {
296 let db = self.reader()?;
297 db.all_file_paths()
298 }
299
300 pub fn get_symbols_overview(&self, path: &str, depth: usize) -> Result<Vec<SymbolInfo>> {
301 let resolved = self.project.resolve(path)?;
302 if resolved.is_dir() {
303 let mut symbols = Vec::new();
304 for file in WalkDir::new(&resolved)
305 .into_iter()
306 .filter_entry(|entry| !is_excluded(entry.path()))
307 {
308 let file = file?;
309 if !file.file_type().is_file() || language_for_path(file.path()).is_none() {
310 continue;
311 }
312 let relative = self.project.to_relative(file.path());
313 let parsed = self.ensure_indexed(file.path(), &relative)?;
314 if !parsed.is_empty() {
315 let id = make_symbol_id(&relative, &SymbolKind::File, &relative);
316 symbols.push(SymbolInfo {
317 name: relative.clone(),
318 kind: SymbolKind::File,
319 file_path: relative.clone(),
320 provenance: SymbolProvenance::from_path(&relative),
321 line: 0,
322 column: 0,
323 signature: format!(
324 "{} ({} symbols)",
325 file.file_name().to_string_lossy(),
326 parsed.len()
327 ),
328 name_path: relative,
329 id,
330 body: None,
331 children: parsed
332 .into_iter()
333 .map(|symbol| to_symbol_info(symbol, depth))
334 .collect(),
335 start_byte: 0,
336 end_byte: 0,
337 });
338 }
339 }
340 return Ok(symbols);
341 }
342
343 let relative = self.project.to_relative(&resolved);
344 let parsed = self.ensure_indexed(&resolved, &relative)?;
345 Ok(parsed
346 .into_iter()
347 .map(|symbol| to_symbol_info(symbol, depth))
348 .collect())
349 }
350
351 pub fn find_symbol(
352 &self,
353 name: &str,
354 file_path: Option<&str>,
355 include_body: bool,
356 exact_match: bool,
357 max_matches: usize,
358 ) -> Result<Vec<SymbolInfo>> {
359 if let Some((id_file, _id_kind, id_name_path)) = parse_symbol_id(name) {
361 let resolved = self.project.resolve(id_file)?;
362 let relative = self.project.to_relative(&resolved);
363 self.ensure_indexed(&resolved, &relative)?;
364 let leaf_name = id_name_path.rsplit('/').next().unwrap_or(id_name_path);
366 let db = self.writer();
367 let db_rows = db.find_symbols_by_name(leaf_name, Some(id_file), true, max_matches)?;
368 let mut results = Vec::new();
369 for row in db_rows {
370 if row.name_path != id_name_path {
371 continue;
372 }
373 let rel_path = db.get_file_path(row.file_id)?.unwrap_or_default();
374 let body = if include_body {
375 let abs = self.project.as_path().join(&rel_path);
376 fs::read_to_string(&abs).ok().map(|source| {
377 slice_source(&source, row.start_byte as u32, row.end_byte as u32)
378 })
379 } else {
380 None
381 };
382 let kind = SymbolKind::from_str_label(&row.kind);
383 let id = make_symbol_id(&rel_path, &kind, &row.name_path);
384 let prov = SymbolProvenance::from_path(&rel_path);
385 results.push(SymbolInfo {
386 name: row.name,
387 kind,
388 provenance: prov,
389 file_path: rel_path,
390 line: row.line as usize,
391 column: row.column_num as usize,
392 signature: row.signature,
393 name_path: row.name_path,
394 id,
395 body,
396 children: Vec::new(),
397 start_byte: row.start_byte as u32,
398 end_byte: row.end_byte as u32,
399 });
400 }
401 return Ok(results);
402 }
403
404 if let Some(fp) = file_path {
406 let resolved = self.project.resolve(fp)?;
407 let relative = self.project.to_relative(&resolved);
408 self.ensure_indexed(&resolved, &relative)?;
409 } else {
410 let files = collect_candidate_files(self.project.as_path())?;
412 for file in &files {
413 let relative = self.project.to_relative(file);
414 self.ensure_indexed(file, &relative)?;
415 }
416 }
417
418 let db = self.writer();
419 let db_rows = db.find_symbols_by_name(name, file_path, exact_match, max_matches)?;
420
421 let mut results = Vec::new();
422 for row in db_rows {
423 let rel_path = db.get_file_path(row.file_id)?.unwrap_or_default();
424 let body = if include_body {
425 let abs = self.project.as_path().join(&rel_path);
426 fs::read_to_string(&abs)
427 .ok()
428 .map(|source| slice_source(&source, row.start_byte as u32, row.end_byte as u32))
429 } else {
430 None
431 };
432 let kind = SymbolKind::from_str_label(&row.kind);
433 let id = make_symbol_id(&rel_path, &kind, &row.name_path);
434 let prov = SymbolProvenance::from_path(&rel_path);
435 results.push(SymbolInfo {
436 name: row.name,
437 kind,
438 provenance: prov,
439 file_path: rel_path,
440 line: row.line as usize,
441 column: row.column_num as usize,
442 signature: row.signature,
443 name_path: row.name_path,
444 id,
445 body,
446 children: Vec::new(),
447 start_byte: row.start_byte as u32,
448 end_byte: row.end_byte as u32,
449 });
450 }
451 Ok(results)
452 }
453
454 pub fn get_ranked_context(
455 &self,
456 query: &str,
457 path: Option<&str>,
458 max_tokens: usize,
459 include_body: bool,
460 depth: usize,
461 ) -> Result<RankedContextResult> {
462 let all_symbols = if let Some(path) = path {
463 self.get_symbols_overview(path, depth)?
464 } else {
465 self.select_solve_symbols(query, depth)?
467 };
468
469 let mut scored = all_symbols
470 .into_iter()
471 .flat_map(flatten_symbol_infos)
472 .filter_map(|symbol| score_symbol(query, &symbol).map(|score| (symbol, score)))
473 .collect::<Vec<_>>();
474 scored.sort_by_key(|right| std::cmp::Reverse(right.1));
475
476 let (selected, chars_used) =
477 prune_to_budget(scored, max_tokens, include_body, self.project.as_path());
478
479 Ok(RankedContextResult {
480 query: query.to_owned(),
481 count: selected.len(),
482 symbols: selected,
483 token_budget: max_tokens,
484 chars_used,
485 })
486 }
487
488 pub fn db(&self) -> std::sync::MutexGuard<'_, IndexDb> {
490 self.writer()
491 }
492}
493
494pub fn get_symbols_overview(
495 project: &ProjectRoot,
496 path: &str,
497 depth: usize,
498) -> Result<Vec<SymbolInfo>> {
499 let resolved = project.resolve(path)?;
500 if resolved.is_dir() {
501 return get_directory_symbols(project, &resolved, depth);
502 }
503 get_file_symbols(project, &resolved, depth)
504}
505
506pub fn find_symbol_range(
510 project: &ProjectRoot,
511 relative_path: &str,
512 symbol_name: &str,
513 name_path: Option<&str>,
514) -> Result<(usize, usize)> {
515 let file = project.resolve(relative_path)?;
516 let rel = project.to_relative(&file);
517 let Some(language_config) = language_for_path(&file) else {
518 bail!("unsupported file type: {}", file.display());
519 };
520 let source =
521 fs::read_to_string(&file).with_context(|| format!("failed to read {}", file.display()))?;
522 let parsed = parse_symbols(&language_config, &rel, &source, false)?;
523 let flat = flatten_symbols(parsed);
524
525 let candidate = if let Some(np) = name_path {
526 flat.into_iter()
527 .find(|sym| sym.name_path == np || sym.name == symbol_name)
528 } else {
529 flat.into_iter().find(|sym| sym.name == symbol_name)
530 };
531
532 match candidate {
533 Some(sym) => Ok((sym.start_byte as usize, sym.end_byte as usize)),
534 None => bail!(
535 "symbol '{}' not found in {}",
536 name_path.unwrap_or(symbol_name),
537 relative_path
538 ),
539 }
540}
541
542pub fn find_symbol(
543 project: &ProjectRoot,
544 name: &str,
545 file_path: Option<&str>,
546 include_body: bool,
547 exact_match: bool,
548 max_matches: usize,
549) -> Result<Vec<SymbolInfo>> {
550 if let Some((id_file, _id_kind, id_name_path)) = parse_symbol_id(name) {
552 let resolved = project.resolve(id_file)?;
553 let rel = project.to_relative(&resolved);
554 let Some(language_config) = language_for_path(&resolved) else {
555 return Ok(Vec::new());
556 };
557 let source = fs::read_to_string(&resolved)?;
558 let parsed = parse_symbols(&language_config, &rel, &source, include_body)?;
559 let mut results = Vec::new();
560 for symbol in flatten_symbols(parsed) {
561 if symbol.name_path == id_name_path {
562 results.push(to_symbol_info(symbol, usize::MAX));
563 if results.len() >= max_matches {
564 return Ok(results);
565 }
566 }
567 }
568 return Ok(results);
569 }
570
571 let files = match file_path {
572 Some(path) => vec![project.resolve(path)?],
573 None => collect_candidate_files(project.as_path())?,
574 };
575
576 let query = name.to_lowercase();
577 let mut results = Vec::new();
578
579 for file in files {
580 let rel = project.to_relative(&file);
581 let Some(language_config) = language_for_path(&file) else {
582 continue;
583 };
584 let source = match fs::read_to_string(&file) {
585 Ok(source) => source,
586 Err(_) => continue,
587 };
588 let parsed = parse_symbols(&language_config, &rel, &source, include_body)?;
589 for symbol in flatten_symbols(parsed) {
590 let matched = if exact_match {
591 symbol.name == name
592 } else {
593 scoring::contains_ascii_ci(&symbol.name, &query)
594 };
595 if matched {
596 results.push(to_symbol_info(symbol, usize::MAX));
597 if results.len() >= max_matches {
598 return Ok(results);
599 }
600 }
601 }
602 }
603
604 Ok(results)
605}
606
607fn get_directory_symbols(
608 project: &ProjectRoot,
609 dir: &Path,
610 depth: usize,
611) -> Result<Vec<SymbolInfo>> {
612 let mut symbols = Vec::new();
613 for entry in WalkDir::new(dir)
614 .into_iter()
615 .filter_entry(|entry| !is_excluded(entry.path()))
616 {
617 let entry = entry?;
618 if !entry.file_type().is_file() {
619 continue;
620 }
621 let path = entry.path();
622 if language_for_path(path).is_none() {
623 continue;
624 }
625 let file_symbols = get_file_symbols(project, path, depth)?;
626 if !file_symbols.is_empty() {
627 let relative = project.to_relative(path);
628 let id = make_symbol_id(&relative, &SymbolKind::File, &relative);
629 symbols.push(SymbolInfo {
630 name: relative.clone(),
631 kind: SymbolKind::File,
632 file_path: relative.clone(),
633 provenance: SymbolProvenance::from_path(&relative),
634 line: 0,
635 column: 0,
636 signature: format!(
637 "{} ({} symbols)",
638 path.file_name()
639 .and_then(|name| name.to_str())
640 .unwrap_or_default(),
641 file_symbols.len()
642 ),
643 name_path: relative,
644 id,
645 body: None,
646 children: file_symbols,
647 start_byte: 0,
648 end_byte: 0,
649 });
650 }
651 }
652 Ok(symbols)
653}
654
655fn get_file_symbols(project: &ProjectRoot, file: &Path, depth: usize) -> Result<Vec<SymbolInfo>> {
656 let relative = project.to_relative(file);
657 let Some(language_config) = language_for_path(file) else {
658 return Ok(Vec::new());
659 };
660 let source =
661 fs::read_to_string(file).with_context(|| format!("failed to read {}", file.display()))?;
662 let parsed = parse_symbols(&language_config, &relative, &source, false)?;
663 Ok(parsed
664 .into_iter()
665 .map(|symbol| to_symbol_info(symbol, depth))
666 .collect())
667}
668
669fn collect_candidate_files(root: &Path) -> Result<Vec<PathBuf>> {
670 collect_files(root, |path| language_for_path(path).is_some())
671}
672
673fn file_modified_ms(path: &Path) -> Result<u128> {
674 let modified = fs::metadata(path)
675 .with_context(|| format!("failed to stat {}", path.display()))?
676 .modified()
677 .with_context(|| format!("failed to read mtime for {}", path.display()))?;
678 Ok(modified
679 .duration_since(UNIX_EPOCH)
680 .unwrap_or_default()
681 .as_millis())
682}