1mod parser;
2mod ranking;
3mod reader;
4pub mod scoring;
5#[cfg(test)]
6mod tests;
7mod types;
8mod writer;
9
10use parser::{flatten_symbol_infos, flatten_symbols, parse_symbols, slice_source, to_symbol_info};
11use ranking::prune_to_budget;
12use scoring::score_symbol;
13pub use scoring::{
14 sparse_coverage_bonus_from_fields, sparse_max_bonus, sparse_threshold, sparse_weighting_enabled,
15};
16pub(crate) use types::ReadDb;
17pub use types::{
18 IndexStats, RankedContextEntry, RankedContextResult, SymbolInfo, SymbolKind, SymbolProvenance,
19 make_symbol_id, parse_symbol_id,
20};
21
22use crate::db::{self, IndexDb, content_hash, index_db_path};
23pub(crate) use crate::lang_config::{LanguageConfig, language_for_path};
25use crate::project::ProjectRoot;
26use anyhow::{Context, Result, bail};
27use std::fs;
28use std::path::{Path, PathBuf};
29use std::time::UNIX_EPOCH;
30use walkdir::WalkDir;
31
32use crate::project::{collect_files, is_excluded};
33
34pub struct SymbolIndex {
43 project: ProjectRoot,
44 db_path: PathBuf,
45 writer: std::sync::Mutex<IndexDb>,
46 in_memory: bool,
48}
49
50impl SymbolIndex {
51 pub fn new(project: ProjectRoot) -> Self {
52 let db_path = index_db_path(project.as_path());
53 let db = IndexDb::open(&db_path).unwrap_or_else(|e| {
54 tracing::warn!(
55 path = %db_path.display(),
56 error = %e,
57 "failed to open DB, falling back to in-memory"
58 );
59 IndexDb::open_memory().unwrap()
60 });
61 let in_memory = !db_path.is_file();
62 let mut idx = Self {
63 project,
64 db_path,
65 writer: std::sync::Mutex::new(db),
66 in_memory,
67 };
68 if idx.writer().file_count().unwrap_or(0) == 0 {
70 let _ = idx.migrate_from_json();
71 }
72 idx
73 }
74
75 fn writer(&self) -> std::sync::MutexGuard<'_, IndexDb> {
77 self.writer
78 .lock()
79 .unwrap_or_else(|poisoned| poisoned.into_inner())
80 }
81
82 fn reader(&self) -> Result<ReadDb<'_>> {
84 if self.in_memory {
85 return Ok(ReadDb::Writer(self.writer()));
86 }
87 match IndexDb::open_readonly(&self.db_path)? {
88 Some(db) => Ok(ReadDb::Owned(db)),
89 None => Ok(ReadDb::Writer(self.writer())),
90 }
91 }
92
93 pub fn new_memory(project: ProjectRoot) -> Self {
95 let db = IndexDb::open_memory().unwrap();
96 Self {
97 db_path: PathBuf::new(),
98 project,
99 writer: std::sync::Mutex::new(db),
100 in_memory: true,
101 }
102 }
103
104 pub fn stats(&self) -> Result<IndexStats> {
105 let db = self.reader()?;
106 let supported_files = collect_candidate_files(self.project.as_path())?;
107 let indexed_files = db.file_count()?;
108 let indexed_paths = db.all_file_paths()?;
109
110 let mut stale = 0usize;
111 for rel in &indexed_paths {
112 let path = self.project.as_path().join(rel);
113 if !path.is_file() {
114 stale += 1;
115 continue;
116 }
117 let content = match fs::read(&path) {
118 Ok(c) => c,
119 Err(_) => {
120 stale += 1;
121 continue;
122 }
123 };
124 let hash = content_hash(&content);
125 let mtime = file_modified_ms(&path).unwrap_or(0) as i64;
126 if db.get_fresh_file(rel, mtime, &hash)?.is_none() {
127 stale += 1;
128 }
129 }
130
131 Ok(IndexStats {
132 indexed_files,
133 supported_files: supported_files.len(),
134 stale_files: stale,
135 })
136 }
137
138 fn select_solve_symbols(&self, query: &str, depth: usize) -> Result<Vec<SymbolInfo>> {
143 let fts_file_boost: std::collections::HashSet<String> = {
154 let query_lower = query.to_ascii_lowercase();
155 let tokens: Vec<&str> = query_lower
156 .split(|c: char| c.is_whitespace() || c == '_' || c == '-')
157 .filter(|t| t.len() >= 3)
158 .collect();
159 let mut boost_files = std::collections::HashSet::new();
160 if let Ok(hits) = self.find_symbol(query, None, false, false, 15) {
162 for sym in hits {
163 boost_files.insert(sym.file_path);
164 }
165 }
166 for token in &tokens {
168 if let Ok(hits) = self.find_symbol(token, None, false, false, 10) {
169 for sym in hits {
170 boost_files.insert(sym.file_path);
171 }
172 }
173 }
174 boost_files
175 };
176
177 let (top_files, importer_files): (Vec<String>, Vec<String>) = {
178 let db = self.reader()?;
179 let all_paths = db.all_file_paths()?;
180
181 let query_lower = query.to_ascii_lowercase();
182 let query_tokens: Vec<&str> = query_lower
183 .split(|c: char| c.is_whitespace() || c == '_' || c == '-')
184 .filter(|t| t.len() >= 3)
185 .collect();
186
187 let mut file_scores: Vec<(String, usize)> = all_paths
188 .into_iter()
189 .map(|path| {
190 let path_lower = path.to_ascii_lowercase();
191 let mut score = query_tokens
192 .iter()
193 .filter(|token| path_lower.contains(**token))
194 .count();
195 if fts_file_boost.contains(&path) {
197 score += 2;
198 }
199 (path, score)
200 })
201 .collect();
202
203 file_scores.sort_by(|a, b| b.1.cmp(&a.1));
204 let top: Vec<String> = file_scores
205 .into_iter()
206 .filter(|(_, score)| *score > 0)
207 .take(10)
208 .map(|(path, _)| path)
209 .collect();
210
211 let mut importers = Vec::new();
214 if !top.is_empty() && top.len() <= 5 {
215 for file_path in top.iter().take(3) {
216 if let Ok(imp) = db.get_importers(file_path) {
217 for importer_path in imp.into_iter().take(3) {
218 importers.push(importer_path);
219 }
220 }
221 }
222 }
223
224 (top, importers)
225 };
227
228 if top_files.is_empty() {
230 return self.find_symbol(query, None, false, false, 500);
231 }
232
233 let mut all_symbols = Vec::new();
235 for file_path in &top_files {
236 if let Ok(symbols) = self.get_symbols_overview_cached(file_path, depth) {
237 all_symbols.extend(symbols);
238 }
239 }
240
241 for importer_path in &importer_files {
244 if let Ok(symbols) = self.get_symbols_overview_cached(importer_path, 1) {
245 all_symbols.extend(symbols);
246 }
247 }
248
249 let mut seen_ids: std::collections::HashSet<String> =
251 all_symbols.iter().map(|s| s.id.clone()).collect();
252
253 if let Ok(direct) = self.find_symbol(query, None, false, false, 50) {
254 for sym in direct {
255 if seen_ids.insert(sym.id.clone()) {
256 all_symbols.push(sym);
257 }
258 }
259 }
260
261 let query_lower = query.to_ascii_lowercase();
264 let tokens: Vec<&str> = query_lower
265 .split(|c: char| c.is_whitespace() || c == '_' || c == '-')
266 .filter(|t| t.len() >= 3)
267 .collect();
268 if tokens.len() >= 2 {
269 for token in &tokens {
270 match self.find_symbol(token, None, false, false, 10) {
271 Ok(hits) => {
272 for sym in hits {
273 if seen_ids.insert(sym.id.clone()) {
274 all_symbols.push(sym);
275 }
276 }
277 }
278 Err(e) => {
279 tracing::debug!(token, error = %e, "token find_symbol failed");
280 }
281 }
282 }
283 }
284
285 Ok(all_symbols)
286 }
287
288 pub fn get_project_structure(&self) -> Result<Vec<db::DirStats>> {
291 let db = self.reader()?;
292 db.dir_stats()
293 }
294
295 pub fn get_symbols_overview(&self, path: &str, depth: usize) -> Result<Vec<SymbolInfo>> {
296 let resolved = self.project.resolve(path)?;
297 if resolved.is_dir() {
298 let mut symbols = Vec::new();
299 for file in WalkDir::new(&resolved)
300 .into_iter()
301 .filter_entry(|entry| !is_excluded(entry.path()))
302 {
303 let file = file?;
304 if !file.file_type().is_file() || language_for_path(file.path()).is_none() {
305 continue;
306 }
307 let relative = self.project.to_relative(file.path());
308 let parsed = self.ensure_indexed(file.path(), &relative)?;
309 if !parsed.is_empty() {
310 let id = make_symbol_id(&relative, &SymbolKind::File, &relative);
311 symbols.push(SymbolInfo {
312 name: relative.clone(),
313 kind: SymbolKind::File,
314 file_path: relative.clone(),
315 provenance: SymbolProvenance::from_path(&relative),
316 line: 0,
317 column: 0,
318 signature: format!(
319 "{} ({} symbols)",
320 file.file_name().to_string_lossy(),
321 parsed.len()
322 ),
323 name_path: relative,
324 id,
325 body: None,
326 children: parsed
327 .into_iter()
328 .map(|symbol| to_symbol_info(symbol, depth))
329 .collect(),
330 start_byte: 0,
331 end_byte: 0,
332 });
333 }
334 }
335 return Ok(symbols);
336 }
337
338 let relative = self.project.to_relative(&resolved);
339 let parsed = self.ensure_indexed(&resolved, &relative)?;
340 Ok(parsed
341 .into_iter()
342 .map(|symbol| to_symbol_info(symbol, depth))
343 .collect())
344 }
345
346 pub fn find_symbol(
347 &self,
348 name: &str,
349 file_path: Option<&str>,
350 include_body: bool,
351 exact_match: bool,
352 max_matches: usize,
353 ) -> Result<Vec<SymbolInfo>> {
354 if let Some((id_file, _id_kind, id_name_path)) = parse_symbol_id(name) {
356 let resolved = self.project.resolve(id_file)?;
357 let relative = self.project.to_relative(&resolved);
358 self.ensure_indexed(&resolved, &relative)?;
359 let leaf_name = id_name_path.rsplit('/').next().unwrap_or(id_name_path);
361 let db = self.writer();
362 let db_rows = db.find_symbols_by_name(leaf_name, Some(id_file), true, max_matches)?;
363 let mut results = Vec::new();
364 for row in db_rows {
365 if row.name_path != id_name_path {
366 continue;
367 }
368 let rel_path = db.get_file_path(row.file_id)?.unwrap_or_default();
369 let body = if include_body {
370 let abs = self.project.as_path().join(&rel_path);
371 fs::read_to_string(&abs).ok().map(|source| {
372 slice_source(&source, row.start_byte as u32, row.end_byte as u32)
373 })
374 } else {
375 None
376 };
377 let kind = SymbolKind::from_str_label(&row.kind);
378 let id = make_symbol_id(&rel_path, &kind, &row.name_path);
379 let prov = SymbolProvenance::from_path(&rel_path);
380 results.push(SymbolInfo {
381 name: row.name,
382 kind,
383 provenance: prov,
384 file_path: rel_path,
385 line: row.line as usize,
386 column: row.column_num as usize,
387 signature: row.signature,
388 name_path: row.name_path,
389 id,
390 body,
391 children: Vec::new(),
392 start_byte: row.start_byte as u32,
393 end_byte: row.end_byte as u32,
394 });
395 }
396 return Ok(results);
397 }
398
399 if let Some(fp) = file_path {
401 let resolved = self.project.resolve(fp)?;
402 let relative = self.project.to_relative(&resolved);
403 self.ensure_indexed(&resolved, &relative)?;
404 } else {
405 let files = collect_candidate_files(self.project.as_path())?;
407 for file in &files {
408 let relative = self.project.to_relative(file);
409 self.ensure_indexed(file, &relative)?;
410 }
411 }
412
413 let db = self.writer();
414 let db_rows = db.find_symbols_by_name(name, file_path, exact_match, max_matches)?;
415
416 let mut results = Vec::new();
417 for row in db_rows {
418 let rel_path = db.get_file_path(row.file_id)?.unwrap_or_default();
419 let body = if include_body {
420 let abs = self.project.as_path().join(&rel_path);
421 fs::read_to_string(&abs)
422 .ok()
423 .map(|source| slice_source(&source, row.start_byte as u32, row.end_byte as u32))
424 } else {
425 None
426 };
427 let kind = SymbolKind::from_str_label(&row.kind);
428 let id = make_symbol_id(&rel_path, &kind, &row.name_path);
429 let prov = SymbolProvenance::from_path(&rel_path);
430 results.push(SymbolInfo {
431 name: row.name,
432 kind,
433 provenance: prov,
434 file_path: rel_path,
435 line: row.line as usize,
436 column: row.column_num as usize,
437 signature: row.signature,
438 name_path: row.name_path,
439 id,
440 body,
441 children: Vec::new(),
442 start_byte: row.start_byte as u32,
443 end_byte: row.end_byte as u32,
444 });
445 }
446 Ok(results)
447 }
448
449 pub fn get_ranked_context(
450 &self,
451 query: &str,
452 path: Option<&str>,
453 max_tokens: usize,
454 include_body: bool,
455 depth: usize,
456 ) -> Result<RankedContextResult> {
457 let all_symbols = if let Some(path) = path {
458 self.get_symbols_overview(path, depth)?
459 } else {
460 self.select_solve_symbols(query, depth)?
462 };
463
464 let mut scored = all_symbols
465 .into_iter()
466 .flat_map(flatten_symbol_infos)
467 .filter_map(|symbol| score_symbol(query, &symbol).map(|score| (symbol, score)))
468 .collect::<Vec<_>>();
469 scored.sort_by(|left, right| right.1.cmp(&left.1));
470
471 let (selected, chars_used) =
472 prune_to_budget(scored, max_tokens, include_body, self.project.as_path());
473
474 Ok(RankedContextResult {
475 query: query.to_owned(),
476 count: selected.len(),
477 symbols: selected,
478 token_budget: max_tokens,
479 chars_used,
480 })
481 }
482
483 pub fn db(&self) -> std::sync::MutexGuard<'_, IndexDb> {
485 self.writer()
486 }
487}
488
489pub fn get_symbols_overview(
490 project: &ProjectRoot,
491 path: &str,
492 depth: usize,
493) -> Result<Vec<SymbolInfo>> {
494 let resolved = project.resolve(path)?;
495 if resolved.is_dir() {
496 return get_directory_symbols(project, &resolved, depth);
497 }
498 get_file_symbols(project, &resolved, depth)
499}
500
501pub fn find_symbol_range(
505 project: &ProjectRoot,
506 relative_path: &str,
507 symbol_name: &str,
508 name_path: Option<&str>,
509) -> Result<(usize, usize)> {
510 let file = project.resolve(relative_path)?;
511 let rel = project.to_relative(&file);
512 let Some(language_config) = language_for_path(&file) else {
513 bail!("unsupported file type: {}", file.display());
514 };
515 let source =
516 fs::read_to_string(&file).with_context(|| format!("failed to read {}", file.display()))?;
517 let parsed = parse_symbols(&language_config, &rel, &source, false)?;
518 let flat = flatten_symbols(parsed);
519
520 let candidate = if let Some(np) = name_path {
521 flat.into_iter()
522 .find(|sym| sym.name_path == np || sym.name == symbol_name)
523 } else {
524 flat.into_iter().find(|sym| sym.name == symbol_name)
525 };
526
527 match candidate {
528 Some(sym) => Ok((sym.start_byte as usize, sym.end_byte as usize)),
529 None => bail!(
530 "symbol '{}' not found in {}",
531 name_path.unwrap_or(symbol_name),
532 relative_path
533 ),
534 }
535}
536
537pub fn find_symbol(
538 project: &ProjectRoot,
539 name: &str,
540 file_path: Option<&str>,
541 include_body: bool,
542 exact_match: bool,
543 max_matches: usize,
544) -> Result<Vec<SymbolInfo>> {
545 if let Some((id_file, _id_kind, id_name_path)) = parse_symbol_id(name) {
547 let resolved = project.resolve(id_file)?;
548 let rel = project.to_relative(&resolved);
549 let Some(language_config) = language_for_path(&resolved) else {
550 return Ok(Vec::new());
551 };
552 let source = fs::read_to_string(&resolved)?;
553 let parsed = parse_symbols(&language_config, &rel, &source, include_body)?;
554 let mut results = Vec::new();
555 for symbol in flatten_symbols(parsed) {
556 if symbol.name_path == id_name_path {
557 results.push(to_symbol_info(symbol, usize::MAX));
558 if results.len() >= max_matches {
559 return Ok(results);
560 }
561 }
562 }
563 return Ok(results);
564 }
565
566 let files = match file_path {
567 Some(path) => vec![project.resolve(path)?],
568 None => collect_candidate_files(project.as_path())?,
569 };
570
571 let query = name.to_lowercase();
572 let mut results = Vec::new();
573
574 for file in files {
575 let rel = project.to_relative(&file);
576 let Some(language_config) = language_for_path(&file) else {
577 continue;
578 };
579 let source = match fs::read_to_string(&file) {
580 Ok(source) => source,
581 Err(_) => continue,
582 };
583 let parsed = parse_symbols(&language_config, &rel, &source, include_body)?;
584 for symbol in flatten_symbols(parsed) {
585 let matched = if exact_match {
586 symbol.name == name
587 } else {
588 scoring::contains_ascii_ci(&symbol.name, &query)
589 };
590 if matched {
591 results.push(to_symbol_info(symbol, usize::MAX));
592 if results.len() >= max_matches {
593 return Ok(results);
594 }
595 }
596 }
597 }
598
599 Ok(results)
600}
601
602fn get_directory_symbols(
603 project: &ProjectRoot,
604 dir: &Path,
605 depth: usize,
606) -> Result<Vec<SymbolInfo>> {
607 let mut symbols = Vec::new();
608 for entry in WalkDir::new(dir)
609 .into_iter()
610 .filter_entry(|entry| !is_excluded(entry.path()))
611 {
612 let entry = entry?;
613 if !entry.file_type().is_file() {
614 continue;
615 }
616 let path = entry.path();
617 if language_for_path(path).is_none() {
618 continue;
619 }
620 let file_symbols = get_file_symbols(project, path, depth)?;
621 if !file_symbols.is_empty() {
622 let relative = project.to_relative(path);
623 let id = make_symbol_id(&relative, &SymbolKind::File, &relative);
624 symbols.push(SymbolInfo {
625 name: relative.clone(),
626 kind: SymbolKind::File,
627 file_path: relative.clone(),
628 provenance: SymbolProvenance::from_path(&relative),
629 line: 0,
630 column: 0,
631 signature: format!(
632 "{} ({} symbols)",
633 path.file_name()
634 .and_then(|name| name.to_str())
635 .unwrap_or_default(),
636 file_symbols.len()
637 ),
638 name_path: relative,
639 id,
640 body: None,
641 children: file_symbols,
642 start_byte: 0,
643 end_byte: 0,
644 });
645 }
646 }
647 Ok(symbols)
648}
649
650fn get_file_symbols(project: &ProjectRoot, file: &Path, depth: usize) -> Result<Vec<SymbolInfo>> {
651 let relative = project.to_relative(file);
652 let Some(language_config) = language_for_path(file) else {
653 return Ok(Vec::new());
654 };
655 let source =
656 fs::read_to_string(file).with_context(|| format!("failed to read {}", file.display()))?;
657 let parsed = parse_symbols(&language_config, &relative, &source, false)?;
658 Ok(parsed
659 .into_iter()
660 .map(|symbol| to_symbol_info(symbol, depth))
661 .collect())
662}
663
664fn collect_candidate_files(root: &Path) -> Result<Vec<PathBuf>> {
665 collect_files(root, |path| language_for_path(path).is_some())
666}
667
668fn file_modified_ms(path: &Path) -> Result<u128> {
669 let modified = fs::metadata(path)
670 .with_context(|| format!("failed to stat {}", path.display()))?
671 .modified()
672 .with_context(|| format!("failed to read mtime for {}", path.display()))?;
673 Ok(modified
674 .duration_since(UNIX_EPOCH)
675 .unwrap_or_default()
676 .as_millis())
677}