1mod parser;
2mod ranking;
3mod reader;
4pub mod scoring;
5#[cfg(test)]
6mod tests;
7mod types;
8mod writer;
9
10use parser::{flatten_symbol_infos, flatten_symbols, parse_symbols, slice_source, to_symbol_info};
11use ranking::prune_to_budget;
12use scoring::score_symbol;
13pub use scoring::{
14 sparse_coverage_bonus_from_fields, sparse_max_bonus, sparse_threshold, sparse_weighting_enabled,
15};
16pub(crate) use types::ReadDb;
17pub use types::{
18 make_symbol_id, parse_symbol_id, IndexStats, RankedContextEntry, RankedContextResult,
19 SymbolInfo, SymbolKind,
20};
21
22use crate::db::{self, content_hash, index_db_path, IndexDb};
23pub(crate) use crate::lang_config::{language_for_path, LanguageConfig};
25use crate::project::ProjectRoot;
26use anyhow::{bail, Context, Result};
27use std::fs;
28use std::path::{Path, PathBuf};
29use std::time::UNIX_EPOCH;
30use walkdir::WalkDir;
31
32use crate::project::{collect_files, is_excluded};
33
34pub struct SymbolIndex {
43 project: ProjectRoot,
44 db_path: PathBuf,
45 writer: std::sync::Mutex<IndexDb>,
46 in_memory: bool,
48}
49
50impl SymbolIndex {
51 pub fn new(project: ProjectRoot) -> Self {
52 let db_path = index_db_path(project.as_path());
53 let db = IndexDb::open(&db_path).unwrap_or_else(|e| {
54 tracing::warn!(
55 path = %db_path.display(),
56 error = %e,
57 "failed to open DB, falling back to in-memory"
58 );
59 IndexDb::open_memory().unwrap()
60 });
61 let in_memory = !db_path.is_file();
62 let mut idx = Self {
63 project,
64 db_path,
65 writer: std::sync::Mutex::new(db),
66 in_memory,
67 };
68 if idx.writer().file_count().unwrap_or(0) == 0 {
70 let _ = idx.migrate_from_json();
71 }
72 idx
73 }
74
75 fn writer(&self) -> std::sync::MutexGuard<'_, IndexDb> {
77 self.writer
78 .lock()
79 .unwrap_or_else(|poisoned| poisoned.into_inner())
80 }
81
82 fn reader(&self) -> Result<ReadDb<'_>> {
84 if self.in_memory {
85 return Ok(ReadDb::Writer(self.writer()));
86 }
87 match IndexDb::open_readonly(&self.db_path)? {
88 Some(db) => Ok(ReadDb::Owned(db)),
89 None => Ok(ReadDb::Writer(self.writer())),
90 }
91 }
92
93 pub fn new_memory(project: ProjectRoot) -> Self {
95 let db = IndexDb::open_memory().unwrap();
96 Self {
97 db_path: PathBuf::new(),
98 project,
99 writer: std::sync::Mutex::new(db),
100 in_memory: true,
101 }
102 }
103
104 pub fn stats(&self) -> Result<IndexStats> {
105 let db = self.reader()?;
106 let supported_files = collect_candidate_files(self.project.as_path())?;
107 let indexed_files = db.file_count()?;
108 let indexed_paths = db.all_file_paths()?;
109
110 let mut stale = 0usize;
111 for rel in &indexed_paths {
112 let path = self.project.as_path().join(rel);
113 if !path.is_file() {
114 stale += 1;
115 continue;
116 }
117 let content = match fs::read(&path) {
118 Ok(c) => c,
119 Err(_) => {
120 stale += 1;
121 continue;
122 }
123 };
124 let hash = content_hash(&content);
125 let mtime = file_modified_ms(&path).unwrap_or(0) as i64;
126 if db.get_fresh_file(rel, mtime, &hash)?.is_none() {
127 stale += 1;
128 }
129 }
130
131 Ok(IndexStats {
132 indexed_files,
133 supported_files: supported_files.len(),
134 stale_files: stale,
135 })
136 }
137
138 fn select_solve_symbols(&self, query: &str, depth: usize) -> Result<Vec<SymbolInfo>> {
143 let fts_file_boost: std::collections::HashSet<String> = {
154 let query_lower = query.to_ascii_lowercase();
155 let tokens: Vec<&str> = query_lower
156 .split(|c: char| c.is_whitespace() || c == '_' || c == '-')
157 .filter(|t| t.len() >= 3)
158 .collect();
159 let mut boost_files = std::collections::HashSet::new();
160 if let Ok(hits) = self.find_symbol(query, None, false, false, 15) {
162 for sym in hits {
163 boost_files.insert(sym.file_path);
164 }
165 }
166 for token in &tokens {
168 if let Ok(hits) = self.find_symbol(token, None, false, false, 10) {
169 for sym in hits {
170 boost_files.insert(sym.file_path);
171 }
172 }
173 }
174 boost_files
175 };
176
177 let (top_files, importer_files): (Vec<String>, Vec<String>) = {
178 let db = self.reader()?;
179 let all_paths = db.all_file_paths()?;
180
181 let query_lower = query.to_ascii_lowercase();
182 let query_tokens: Vec<&str> = query_lower
183 .split(|c: char| c.is_whitespace() || c == '_' || c == '-')
184 .filter(|t| t.len() >= 3)
185 .collect();
186
187 let mut file_scores: Vec<(String, usize)> = all_paths
188 .into_iter()
189 .map(|path| {
190 let path_lower = path.to_ascii_lowercase();
191 let mut score = query_tokens
192 .iter()
193 .filter(|token| path_lower.contains(**token))
194 .count();
195 if fts_file_boost.contains(&path) {
197 score += 2;
198 }
199 (path, score)
200 })
201 .collect();
202
203 file_scores.sort_by(|a, b| b.1.cmp(&a.1));
204 let top: Vec<String> = file_scores
205 .into_iter()
206 .filter(|(_, score)| *score > 0)
207 .take(10)
208 .map(|(path, _)| path)
209 .collect();
210
211 let mut importers = Vec::new();
214 if !top.is_empty() && top.len() <= 5 {
215 for file_path in top.iter().take(3) {
216 if let Ok(imp) = db.get_importers(file_path) {
217 for importer_path in imp.into_iter().take(3) {
218 importers.push(importer_path);
219 }
220 }
221 }
222 }
223
224 (top, importers)
225 };
227
228 if top_files.is_empty() {
230 return self.find_symbol(query, None, false, false, 500);
231 }
232
233 let mut all_symbols = Vec::new();
235 for file_path in &top_files {
236 if let Ok(symbols) = self.get_symbols_overview_cached(file_path, depth) {
237 all_symbols.extend(symbols);
238 }
239 }
240
241 for importer_path in &importer_files {
244 if let Ok(symbols) = self.get_symbols_overview_cached(importer_path, 1) {
245 all_symbols.extend(symbols);
246 }
247 }
248
249 let mut seen_ids: std::collections::HashSet<String> =
251 all_symbols.iter().map(|s| s.id.clone()).collect();
252
253 if let Ok(direct) = self.find_symbol(query, None, false, false, 50) {
254 for sym in direct {
255 if seen_ids.insert(sym.id.clone()) {
256 all_symbols.push(sym);
257 }
258 }
259 }
260
261 let query_lower = query.to_ascii_lowercase();
264 let tokens: Vec<&str> = query_lower
265 .split(|c: char| c.is_whitespace() || c == '_' || c == '-')
266 .filter(|t| t.len() >= 3)
267 .collect();
268 if tokens.len() >= 2 {
269 for token in &tokens {
270 match self.find_symbol(token, None, false, false, 10) {
271 Ok(hits) => {
272 for sym in hits {
273 if seen_ids.insert(sym.id.clone()) {
274 all_symbols.push(sym);
275 }
276 }
277 }
278 Err(e) => {
279 tracing::debug!(token, error = %e, "token find_symbol failed");
280 }
281 }
282 }
283 }
284
285 Ok(all_symbols)
286 }
287
288 pub fn get_project_structure(&self) -> Result<Vec<db::DirStats>> {
291 let db = self.reader()?;
292 db.dir_stats()
293 }
294
295 pub fn get_symbols_overview(&self, path: &str, depth: usize) -> Result<Vec<SymbolInfo>> {
296 let resolved = self.project.resolve(path)?;
297 if resolved.is_dir() {
298 let mut symbols = Vec::new();
299 for file in WalkDir::new(&resolved)
300 .into_iter()
301 .filter_entry(|entry| !is_excluded(entry.path()))
302 {
303 let file = file?;
304 if !file.file_type().is_file() || language_for_path(file.path()).is_none() {
305 continue;
306 }
307 let relative = self.project.to_relative(file.path());
308 let parsed = self.ensure_indexed(file.path(), &relative)?;
309 if !parsed.is_empty() {
310 let id = make_symbol_id(&relative, &SymbolKind::File, &relative);
311 symbols.push(SymbolInfo {
312 name: relative.clone(),
313 kind: SymbolKind::File,
314 file_path: relative.clone(),
315 line: 0,
316 column: 0,
317 signature: format!(
318 "{} ({} symbols)",
319 file.file_name().to_string_lossy(),
320 parsed.len()
321 ),
322 name_path: relative,
323 id,
324 body: None,
325 children: parsed
326 .into_iter()
327 .map(|symbol| to_symbol_info(symbol, depth))
328 .collect(),
329 start_byte: 0,
330 end_byte: 0,
331 });
332 }
333 }
334 return Ok(symbols);
335 }
336
337 let relative = self.project.to_relative(&resolved);
338 let parsed = self.ensure_indexed(&resolved, &relative)?;
339 Ok(parsed
340 .into_iter()
341 .map(|symbol| to_symbol_info(symbol, depth))
342 .collect())
343 }
344
345 pub fn find_symbol(
346 &self,
347 name: &str,
348 file_path: Option<&str>,
349 include_body: bool,
350 exact_match: bool,
351 max_matches: usize,
352 ) -> Result<Vec<SymbolInfo>> {
353 if let Some((id_file, _id_kind, id_name_path)) = parse_symbol_id(name) {
355 let resolved = self.project.resolve(id_file)?;
356 let relative = self.project.to_relative(&resolved);
357 self.ensure_indexed(&resolved, &relative)?;
358 let leaf_name = id_name_path.rsplit('/').next().unwrap_or(id_name_path);
360 let db = self.writer();
361 let db_rows = db.find_symbols_by_name(leaf_name, Some(id_file), true, max_matches)?;
362 let mut results = Vec::new();
363 for row in db_rows {
364 if row.name_path != id_name_path {
365 continue;
366 }
367 let rel_path = db.get_file_path(row.file_id)?.unwrap_or_default();
368 let body = if include_body {
369 let abs = self.project.as_path().join(&rel_path);
370 fs::read_to_string(&abs).ok().map(|source| {
371 slice_source(&source, row.start_byte as u32, row.end_byte as u32)
372 })
373 } else {
374 None
375 };
376 let kind = SymbolKind::from_str_label(&row.kind);
377 let id = make_symbol_id(&rel_path, &kind, &row.name_path);
378 results.push(SymbolInfo {
379 name: row.name,
380 kind,
381 file_path: rel_path,
382 line: row.line as usize,
383 column: row.column_num as usize,
384 signature: row.signature,
385 name_path: row.name_path,
386 id,
387 body,
388 children: Vec::new(),
389 start_byte: row.start_byte as u32,
390 end_byte: row.end_byte as u32,
391 });
392 }
393 return Ok(results);
394 }
395
396 if let Some(fp) = file_path {
398 let resolved = self.project.resolve(fp)?;
399 let relative = self.project.to_relative(&resolved);
400 self.ensure_indexed(&resolved, &relative)?;
401 } else {
402 let files = collect_candidate_files(self.project.as_path())?;
404 for file in &files {
405 let relative = self.project.to_relative(file);
406 self.ensure_indexed(file, &relative)?;
407 }
408 }
409
410 let db = self.writer();
411 let db_rows = db.find_symbols_by_name(name, file_path, exact_match, max_matches)?;
412
413 let mut results = Vec::new();
414 for row in db_rows {
415 let rel_path = db.get_file_path(row.file_id)?.unwrap_or_default();
416 let body = if include_body {
417 let abs = self.project.as_path().join(&rel_path);
418 fs::read_to_string(&abs)
419 .ok()
420 .map(|source| slice_source(&source, row.start_byte as u32, row.end_byte as u32))
421 } else {
422 None
423 };
424 let kind = SymbolKind::from_str_label(&row.kind);
425 let id = make_symbol_id(&rel_path, &kind, &row.name_path);
426 results.push(SymbolInfo {
427 name: row.name,
428 kind,
429 file_path: rel_path,
430 line: row.line as usize,
431 column: row.column_num as usize,
432 signature: row.signature,
433 name_path: row.name_path,
434 id,
435 body,
436 children: Vec::new(),
437 start_byte: row.start_byte as u32,
438 end_byte: row.end_byte as u32,
439 });
440 }
441 Ok(results)
442 }
443
444 pub fn get_ranked_context(
445 &self,
446 query: &str,
447 path: Option<&str>,
448 max_tokens: usize,
449 include_body: bool,
450 depth: usize,
451 ) -> Result<RankedContextResult> {
452 let all_symbols = if let Some(path) = path {
453 self.get_symbols_overview(path, depth)?
454 } else {
455 self.select_solve_symbols(query, depth)?
457 };
458
459 let mut scored = all_symbols
460 .into_iter()
461 .flat_map(flatten_symbol_infos)
462 .filter_map(|symbol| score_symbol(query, &symbol).map(|score| (symbol, score)))
463 .collect::<Vec<_>>();
464 scored.sort_by(|left, right| right.1.cmp(&left.1));
465
466 let (selected, chars_used) =
467 prune_to_budget(scored, max_tokens, include_body, self.project.as_path());
468
469 Ok(RankedContextResult {
470 query: query.to_owned(),
471 count: selected.len(),
472 symbols: selected,
473 token_budget: max_tokens,
474 chars_used,
475 })
476 }
477
478 pub fn db(&self) -> std::sync::MutexGuard<'_, IndexDb> {
480 self.writer()
481 }
482}
483
484pub fn get_symbols_overview(
485 project: &ProjectRoot,
486 path: &str,
487 depth: usize,
488) -> Result<Vec<SymbolInfo>> {
489 let resolved = project.resolve(path)?;
490 if resolved.is_dir() {
491 return get_directory_symbols(project, &resolved, depth);
492 }
493 get_file_symbols(project, &resolved, depth)
494}
495
496pub fn find_symbol_range(
500 project: &ProjectRoot,
501 relative_path: &str,
502 symbol_name: &str,
503 name_path: Option<&str>,
504) -> Result<(usize, usize)> {
505 let file = project.resolve(relative_path)?;
506 let rel = project.to_relative(&file);
507 let Some(language_config) = language_for_path(&file) else {
508 bail!("unsupported file type: {}", file.display());
509 };
510 let source =
511 fs::read_to_string(&file).with_context(|| format!("failed to read {}", file.display()))?;
512 let parsed = parse_symbols(&language_config, &rel, &source, false)?;
513 let flat = flatten_symbols(parsed);
514
515 let candidate = if let Some(np) = name_path {
516 flat.into_iter()
517 .find(|sym| sym.name_path == np || sym.name == symbol_name)
518 } else {
519 flat.into_iter().find(|sym| sym.name == symbol_name)
520 };
521
522 match candidate {
523 Some(sym) => Ok((sym.start_byte as usize, sym.end_byte as usize)),
524 None => bail!(
525 "symbol '{}' not found in {}",
526 name_path.unwrap_or(symbol_name),
527 relative_path
528 ),
529 }
530}
531
532pub fn find_symbol(
533 project: &ProjectRoot,
534 name: &str,
535 file_path: Option<&str>,
536 include_body: bool,
537 exact_match: bool,
538 max_matches: usize,
539) -> Result<Vec<SymbolInfo>> {
540 if let Some((id_file, _id_kind, id_name_path)) = parse_symbol_id(name) {
542 let resolved = project.resolve(id_file)?;
543 let rel = project.to_relative(&resolved);
544 let Some(language_config) = language_for_path(&resolved) else {
545 return Ok(Vec::new());
546 };
547 let source = fs::read_to_string(&resolved)?;
548 let parsed = parse_symbols(&language_config, &rel, &source, include_body)?;
549 let mut results = Vec::new();
550 for symbol in flatten_symbols(parsed) {
551 if symbol.name_path == id_name_path {
552 results.push(to_symbol_info(symbol, usize::MAX));
553 if results.len() >= max_matches {
554 return Ok(results);
555 }
556 }
557 }
558 return Ok(results);
559 }
560
561 let files = match file_path {
562 Some(path) => vec![project.resolve(path)?],
563 None => collect_candidate_files(project.as_path())?,
564 };
565
566 let query = name.to_lowercase();
567 let mut results = Vec::new();
568
569 for file in files {
570 let rel = project.to_relative(&file);
571 let Some(language_config) = language_for_path(&file) else {
572 continue;
573 };
574 let source = match fs::read_to_string(&file) {
575 Ok(source) => source,
576 Err(_) => continue,
577 };
578 let parsed = parse_symbols(&language_config, &rel, &source, include_body)?;
579 for symbol in flatten_symbols(parsed) {
580 let matched = if exact_match {
581 symbol.name == name
582 } else {
583 scoring::contains_ascii_ci(&symbol.name, &query)
584 };
585 if matched {
586 results.push(to_symbol_info(symbol, usize::MAX));
587 if results.len() >= max_matches {
588 return Ok(results);
589 }
590 }
591 }
592 }
593
594 Ok(results)
595}
596
597fn get_directory_symbols(
598 project: &ProjectRoot,
599 dir: &Path,
600 depth: usize,
601) -> Result<Vec<SymbolInfo>> {
602 let mut symbols = Vec::new();
603 for entry in WalkDir::new(dir)
604 .into_iter()
605 .filter_entry(|entry| !is_excluded(entry.path()))
606 {
607 let entry = entry?;
608 if !entry.file_type().is_file() {
609 continue;
610 }
611 let path = entry.path();
612 if language_for_path(path).is_none() {
613 continue;
614 }
615 let file_symbols = get_file_symbols(project, path, depth)?;
616 if !file_symbols.is_empty() {
617 let relative = project.to_relative(path);
618 let id = make_symbol_id(&relative, &SymbolKind::File, &relative);
619 symbols.push(SymbolInfo {
620 name: relative.clone(),
621 kind: SymbolKind::File,
622 file_path: relative.clone(),
623 line: 0,
624 column: 0,
625 signature: format!(
626 "{} ({} symbols)",
627 path.file_name()
628 .and_then(|name| name.to_str())
629 .unwrap_or_default(),
630 file_symbols.len()
631 ),
632 name_path: relative,
633 id,
634 body: None,
635 children: file_symbols,
636 start_byte: 0,
637 end_byte: 0,
638 });
639 }
640 }
641 Ok(symbols)
642}
643
644fn get_file_symbols(project: &ProjectRoot, file: &Path, depth: usize) -> Result<Vec<SymbolInfo>> {
645 let relative = project.to_relative(file);
646 let Some(language_config) = language_for_path(file) else {
647 return Ok(Vec::new());
648 };
649 let source =
650 fs::read_to_string(file).with_context(|| format!("failed to read {}", file.display()))?;
651 let parsed = parse_symbols(&language_config, &relative, &source, false)?;
652 Ok(parsed
653 .into_iter()
654 .map(|symbol| to_symbol_info(symbol, depth))
655 .collect())
656}
657
658fn collect_candidate_files(root: &Path) -> Result<Vec<PathBuf>> {
659 collect_files(root, |path| language_for_path(path).is_some())
660}
661
662fn file_modified_ms(path: &Path) -> Result<u128> {
663 let modified = fs::metadata(path)
664 .with_context(|| format!("failed to stat {}", path.display()))?
665 .modified()
666 .with_context(|| format!("failed to read mtime for {}", path.display()))?;
667 Ok(modified
668 .duration_since(UNIX_EPOCH)
669 .unwrap_or_default()
670 .as_millis())
671}