1mod parser;
2mod ranking;
3mod reader;
4pub mod scoring;
5#[cfg(test)]
6mod tests;
7mod types;
8mod writer;
9
10use parser::{flatten_symbol_infos, flatten_symbols, parse_symbols, slice_source, to_symbol_info};
11use ranking::prune_to_budget;
12use scoring::score_symbol;
13pub use scoring::{
14 sparse_coverage_bonus_from_fields, sparse_max_bonus, sparse_threshold, sparse_weighting_enabled,
15};
16pub(crate) use types::ReadDb;
17pub use types::{
18 make_symbol_id, parse_symbol_id, IndexStats, RankedContextEntry, RankedContextResult,
19 SymbolInfo, SymbolKind,
20};
21
22use crate::db::{self, content_hash, index_db_path, IndexDb};
23pub(crate) use crate::lang_config::{language_for_path, LanguageConfig};
25use crate::project::ProjectRoot;
26use anyhow::{bail, Context, Result};
27use std::fs;
28use std::path::{Path, PathBuf};
29use std::time::UNIX_EPOCH;
30use walkdir::WalkDir;
31
32use crate::project::{collect_files, is_excluded};
33
34pub struct SymbolIndex {
43 project: ProjectRoot,
44 db_path: PathBuf,
45 writer: std::sync::Mutex<IndexDb>,
46 in_memory: bool,
48}
49
50impl SymbolIndex {
51 pub fn new(project: ProjectRoot) -> Self {
52 let db_path = index_db_path(project.as_path());
53 let db = IndexDb::open(&db_path).unwrap_or_else(|e| {
54 tracing::warn!(
55 path = %db_path.display(),
56 error = %e,
57 "failed to open DB, falling back to in-memory"
58 );
59 IndexDb::open_memory().unwrap()
60 });
61 let in_memory = !db_path.is_file();
62 let mut idx = Self {
63 project,
64 db_path,
65 writer: std::sync::Mutex::new(db),
66 in_memory,
67 };
68 if idx.writer().file_count().unwrap_or(0) == 0 {
70 let _ = idx.migrate_from_json();
71 }
72 idx
73 }
74
75 fn writer(&self) -> std::sync::MutexGuard<'_, IndexDb> {
77 self.writer
78 .lock()
79 .unwrap_or_else(|poisoned| poisoned.into_inner())
80 }
81
82 fn reader(&self) -> Result<ReadDb<'_>> {
84 if self.in_memory {
85 return Ok(ReadDb::Writer(self.writer()));
86 }
87 match IndexDb::open_readonly(&self.db_path)? {
88 Some(db) => Ok(ReadDb::Owned(db)),
89 None => Ok(ReadDb::Writer(self.writer())),
90 }
91 }
92
93 pub fn new_memory(project: ProjectRoot) -> Self {
95 let db = IndexDb::open_memory().unwrap();
96 Self {
97 db_path: PathBuf::new(),
98 project,
99 writer: std::sync::Mutex::new(db),
100 in_memory: true,
101 }
102 }
103
104 pub fn stats(&self) -> Result<IndexStats> {
105 let db = self.reader()?;
106 let supported_files = collect_candidate_files(self.project.as_path())?;
107 let indexed_files = db.file_count()?;
108 let indexed_paths = db.all_file_paths()?;
109
110 let mut stale = 0usize;
111 for rel in &indexed_paths {
112 let path = self.project.as_path().join(rel);
113 if !path.is_file() {
114 stale += 1;
115 continue;
116 }
117 let content = match fs::read(&path) {
118 Ok(c) => c,
119 Err(_) => {
120 stale += 1;
121 continue;
122 }
123 };
124 let hash = content_hash(&content);
125 let mtime = file_modified_ms(&path).unwrap_or(0) as i64;
126 if db.get_fresh_file(rel, mtime, &hash)?.is_none() {
127 stale += 1;
128 }
129 }
130
131 Ok(IndexStats {
132 indexed_files,
133 supported_files: supported_files.len(),
134 stale_files: stale,
135 })
136 }
137
138 fn select_solve_symbols(&self, query: &str, depth: usize) -> Result<Vec<SymbolInfo>> {
143 let fts_file_boost: std::collections::HashSet<String> = self
153 .find_symbol(query, None, false, false, 30)
154 .map(|hits| hits.into_iter().map(|s| s.file_path).collect())
155 .unwrap_or_default();
156
157 let (top_files, importer_files): (Vec<String>, Vec<String>) = {
158 let db = self.reader()?;
159 let all_paths = db.all_file_paths()?;
160
161 let query_lower = query.to_ascii_lowercase();
162 let query_tokens: Vec<&str> = query_lower
163 .split(|c: char| c.is_whitespace() || c == '_' || c == '-')
164 .filter(|t| t.len() >= 3)
165 .collect();
166
167 let mut file_scores: Vec<(String, usize)> = all_paths
168 .into_iter()
169 .map(|path| {
170 let path_lower = path.to_ascii_lowercase();
171 let mut score = query_tokens
172 .iter()
173 .filter(|token| path_lower.contains(**token))
174 .count();
175 if fts_file_boost.contains(&path) {
177 score += 2;
178 }
179 (path, score)
180 })
181 .collect();
182
183 file_scores.sort_by(|a, b| b.1.cmp(&a.1));
184 let top: Vec<String> = file_scores
185 .into_iter()
186 .filter(|(_, score)| *score > 0)
187 .take(10)
188 .map(|(path, _)| path)
189 .collect();
190
191 let mut importers = Vec::new();
194 if !top.is_empty() && top.len() <= 5 {
195 for file_path in top.iter().take(3) {
196 if let Ok(imp) = db.get_importers(file_path) {
197 for importer_path in imp.into_iter().take(3) {
198 importers.push(importer_path);
199 }
200 }
201 }
202 }
203
204 (top, importers)
205 };
207
208 if top_files.is_empty() {
210 return self.find_symbol(query, None, false, false, 500);
211 }
212
213 let mut all_symbols = Vec::new();
215 for file_path in &top_files {
216 if let Ok(symbols) = self.get_symbols_overview_cached(file_path, depth) {
217 all_symbols.extend(symbols);
218 }
219 }
220
221 for importer_path in &importer_files {
224 if let Ok(symbols) = self.get_symbols_overview_cached(importer_path, 1) {
225 all_symbols.extend(symbols);
226 }
227 }
228
229 let mut seen_ids: std::collections::HashSet<String> =
231 all_symbols.iter().map(|s| s.id.clone()).collect();
232
233 if let Ok(direct) = self.find_symbol(query, None, false, false, 50) {
234 for sym in direct {
235 if seen_ids.insert(sym.id.clone()) {
236 all_symbols.push(sym);
237 }
238 }
239 }
240
241 let query_lower = query.to_ascii_lowercase();
244 let tokens: Vec<&str> = query_lower
245 .split(|c: char| c.is_whitespace() || c == '_' || c == '-')
246 .filter(|t| t.len() >= 3)
247 .collect();
248 if tokens.len() >= 2 {
249 for token in &tokens {
250 match self.find_symbol(token, None, false, false, 10) {
251 Ok(hits) => {
252 for sym in hits {
253 if seen_ids.insert(sym.id.clone()) {
254 all_symbols.push(sym);
255 }
256 }
257 }
258 Err(e) => {
259 tracing::debug!(token, error = %e, "token find_symbol failed");
260 }
261 }
262 }
263 }
264
265 Ok(all_symbols)
266 }
267
268 pub fn get_project_structure(&self) -> Result<Vec<db::DirStats>> {
271 let db = self.reader()?;
272 db.dir_stats()
273 }
274
275 pub fn get_symbols_overview(&self, path: &str, depth: usize) -> Result<Vec<SymbolInfo>> {
276 let resolved = self.project.resolve(path)?;
277 if resolved.is_dir() {
278 let mut symbols = Vec::new();
279 for file in WalkDir::new(&resolved)
280 .into_iter()
281 .filter_entry(|entry| !is_excluded(entry.path()))
282 {
283 let file = file?;
284 if !file.file_type().is_file() || language_for_path(file.path()).is_none() {
285 continue;
286 }
287 let relative = self.project.to_relative(file.path());
288 let parsed = self.ensure_indexed(file.path(), &relative)?;
289 if !parsed.is_empty() {
290 let id = make_symbol_id(&relative, &SymbolKind::File, &relative);
291 symbols.push(SymbolInfo {
292 name: relative.clone(),
293 kind: SymbolKind::File,
294 file_path: relative.clone(),
295 line: 0,
296 column: 0,
297 signature: format!(
298 "{} ({} symbols)",
299 file.file_name().to_string_lossy(),
300 parsed.len()
301 ),
302 name_path: relative,
303 id,
304 body: None,
305 children: parsed
306 .into_iter()
307 .map(|symbol| to_symbol_info(symbol, depth))
308 .collect(),
309 start_byte: 0,
310 end_byte: 0,
311 });
312 }
313 }
314 return Ok(symbols);
315 }
316
317 let relative = self.project.to_relative(&resolved);
318 let parsed = self.ensure_indexed(&resolved, &relative)?;
319 Ok(parsed
320 .into_iter()
321 .map(|symbol| to_symbol_info(symbol, depth))
322 .collect())
323 }
324
325 pub fn find_symbol(
326 &self,
327 name: &str,
328 file_path: Option<&str>,
329 include_body: bool,
330 exact_match: bool,
331 max_matches: usize,
332 ) -> Result<Vec<SymbolInfo>> {
333 if let Some((id_file, _id_kind, id_name_path)) = parse_symbol_id(name) {
335 let resolved = self.project.resolve(id_file)?;
336 let relative = self.project.to_relative(&resolved);
337 self.ensure_indexed(&resolved, &relative)?;
338 let leaf_name = id_name_path.rsplit('/').next().unwrap_or(id_name_path);
340 let db = self.writer();
341 let db_rows = db.find_symbols_by_name(leaf_name, Some(id_file), true, max_matches)?;
342 let mut results = Vec::new();
343 for row in db_rows {
344 if row.name_path != id_name_path {
345 continue;
346 }
347 let rel_path = db.get_file_path(row.file_id)?.unwrap_or_default();
348 let body = if include_body {
349 let abs = self.project.as_path().join(&rel_path);
350 fs::read_to_string(&abs).ok().map(|source| {
351 slice_source(&source, row.start_byte as u32, row.end_byte as u32)
352 })
353 } else {
354 None
355 };
356 let kind = SymbolKind::from_str_label(&row.kind);
357 let id = make_symbol_id(&rel_path, &kind, &row.name_path);
358 results.push(SymbolInfo {
359 name: row.name,
360 kind,
361 file_path: rel_path,
362 line: row.line as usize,
363 column: row.column_num as usize,
364 signature: row.signature,
365 name_path: row.name_path,
366 id,
367 body,
368 children: Vec::new(),
369 start_byte: row.start_byte as u32,
370 end_byte: row.end_byte as u32,
371 });
372 }
373 return Ok(results);
374 }
375
376 if let Some(fp) = file_path {
378 let resolved = self.project.resolve(fp)?;
379 let relative = self.project.to_relative(&resolved);
380 self.ensure_indexed(&resolved, &relative)?;
381 } else {
382 let files = collect_candidate_files(self.project.as_path())?;
384 for file in &files {
385 let relative = self.project.to_relative(file);
386 self.ensure_indexed(file, &relative)?;
387 }
388 }
389
390 let db = self.writer();
391 let db_rows = db.find_symbols_by_name(name, file_path, exact_match, max_matches)?;
392
393 let mut results = Vec::new();
394 for row in db_rows {
395 let rel_path = db.get_file_path(row.file_id)?.unwrap_or_default();
396 let body = if include_body {
397 let abs = self.project.as_path().join(&rel_path);
398 fs::read_to_string(&abs)
399 .ok()
400 .map(|source| slice_source(&source, row.start_byte as u32, row.end_byte as u32))
401 } else {
402 None
403 };
404 let kind = SymbolKind::from_str_label(&row.kind);
405 let id = make_symbol_id(&rel_path, &kind, &row.name_path);
406 results.push(SymbolInfo {
407 name: row.name,
408 kind,
409 file_path: rel_path,
410 line: row.line as usize,
411 column: row.column_num as usize,
412 signature: row.signature,
413 name_path: row.name_path,
414 id,
415 body,
416 children: Vec::new(),
417 start_byte: row.start_byte as u32,
418 end_byte: row.end_byte as u32,
419 });
420 }
421 Ok(results)
422 }
423
424 pub fn get_ranked_context(
425 &self,
426 query: &str,
427 path: Option<&str>,
428 max_tokens: usize,
429 include_body: bool,
430 depth: usize,
431 ) -> Result<RankedContextResult> {
432 let all_symbols = if let Some(path) = path {
433 self.get_symbols_overview(path, depth)?
434 } else {
435 self.select_solve_symbols(query, depth)?
437 };
438
439 let mut scored = all_symbols
440 .into_iter()
441 .flat_map(flatten_symbol_infos)
442 .filter_map(|symbol| score_symbol(query, &symbol).map(|score| (symbol, score)))
443 .collect::<Vec<_>>();
444 scored.sort_by(|left, right| right.1.cmp(&left.1));
445
446 let (selected, chars_used) =
447 prune_to_budget(scored, max_tokens, include_body, self.project.as_path());
448
449 Ok(RankedContextResult {
450 query: query.to_owned(),
451 count: selected.len(),
452 symbols: selected,
453 token_budget: max_tokens,
454 chars_used,
455 })
456 }
457
458 pub fn db(&self) -> std::sync::MutexGuard<'_, IndexDb> {
460 self.writer()
461 }
462}
463
464pub fn get_symbols_overview(
465 project: &ProjectRoot,
466 path: &str,
467 depth: usize,
468) -> Result<Vec<SymbolInfo>> {
469 let resolved = project.resolve(path)?;
470 if resolved.is_dir() {
471 return get_directory_symbols(project, &resolved, depth);
472 }
473 get_file_symbols(project, &resolved, depth)
474}
475
476pub fn find_symbol_range(
480 project: &ProjectRoot,
481 relative_path: &str,
482 symbol_name: &str,
483 name_path: Option<&str>,
484) -> Result<(usize, usize)> {
485 let file = project.resolve(relative_path)?;
486 let rel = project.to_relative(&file);
487 let Some(language_config) = language_for_path(&file) else {
488 bail!("unsupported file type: {}", file.display());
489 };
490 let source =
491 fs::read_to_string(&file).with_context(|| format!("failed to read {}", file.display()))?;
492 let parsed = parse_symbols(&language_config, &rel, &source, false)?;
493 let flat = flatten_symbols(parsed);
494
495 let candidate = if let Some(np) = name_path {
496 flat.into_iter()
497 .find(|sym| sym.name_path == np || sym.name == symbol_name)
498 } else {
499 flat.into_iter().find(|sym| sym.name == symbol_name)
500 };
501
502 match candidate {
503 Some(sym) => Ok((sym.start_byte as usize, sym.end_byte as usize)),
504 None => bail!(
505 "symbol '{}' not found in {}",
506 name_path.unwrap_or(symbol_name),
507 relative_path
508 ),
509 }
510}
511
512pub fn find_symbol(
513 project: &ProjectRoot,
514 name: &str,
515 file_path: Option<&str>,
516 include_body: bool,
517 exact_match: bool,
518 max_matches: usize,
519) -> Result<Vec<SymbolInfo>> {
520 if let Some((id_file, _id_kind, id_name_path)) = parse_symbol_id(name) {
522 let resolved = project.resolve(id_file)?;
523 let rel = project.to_relative(&resolved);
524 let Some(language_config) = language_for_path(&resolved) else {
525 return Ok(Vec::new());
526 };
527 let source = fs::read_to_string(&resolved)?;
528 let parsed = parse_symbols(&language_config, &rel, &source, include_body)?;
529 let mut results = Vec::new();
530 for symbol in flatten_symbols(parsed) {
531 if symbol.name_path == id_name_path {
532 results.push(to_symbol_info(symbol, usize::MAX));
533 if results.len() >= max_matches {
534 return Ok(results);
535 }
536 }
537 }
538 return Ok(results);
539 }
540
541 let files = match file_path {
542 Some(path) => vec![project.resolve(path)?],
543 None => collect_candidate_files(project.as_path())?,
544 };
545
546 let query = name.to_lowercase();
547 let mut results = Vec::new();
548
549 for file in files {
550 let rel = project.to_relative(&file);
551 let Some(language_config) = language_for_path(&file) else {
552 continue;
553 };
554 let source = match fs::read_to_string(&file) {
555 Ok(source) => source,
556 Err(_) => continue,
557 };
558 let parsed = parse_symbols(&language_config, &rel, &source, include_body)?;
559 for symbol in flatten_symbols(parsed) {
560 let matched = if exact_match {
561 symbol.name == name
562 } else {
563 scoring::contains_ascii_ci(&symbol.name, &query)
564 };
565 if matched {
566 results.push(to_symbol_info(symbol, usize::MAX));
567 if results.len() >= max_matches {
568 return Ok(results);
569 }
570 }
571 }
572 }
573
574 Ok(results)
575}
576
577fn get_directory_symbols(
578 project: &ProjectRoot,
579 dir: &Path,
580 depth: usize,
581) -> Result<Vec<SymbolInfo>> {
582 let mut symbols = Vec::new();
583 for entry in WalkDir::new(dir)
584 .into_iter()
585 .filter_entry(|entry| !is_excluded(entry.path()))
586 {
587 let entry = entry?;
588 if !entry.file_type().is_file() {
589 continue;
590 }
591 let path = entry.path();
592 if language_for_path(path).is_none() {
593 continue;
594 }
595 let file_symbols = get_file_symbols(project, path, depth)?;
596 if !file_symbols.is_empty() {
597 let relative = project.to_relative(path);
598 let id = make_symbol_id(&relative, &SymbolKind::File, &relative);
599 symbols.push(SymbolInfo {
600 name: relative.clone(),
601 kind: SymbolKind::File,
602 file_path: relative.clone(),
603 line: 0,
604 column: 0,
605 signature: format!(
606 "{} ({} symbols)",
607 path.file_name()
608 .and_then(|name| name.to_str())
609 .unwrap_or_default(),
610 file_symbols.len()
611 ),
612 name_path: relative,
613 id,
614 body: None,
615 children: file_symbols,
616 start_byte: 0,
617 end_byte: 0,
618 });
619 }
620 }
621 Ok(symbols)
622}
623
624fn get_file_symbols(project: &ProjectRoot, file: &Path, depth: usize) -> Result<Vec<SymbolInfo>> {
625 let relative = project.to_relative(file);
626 let Some(language_config) = language_for_path(file) else {
627 return Ok(Vec::new());
628 };
629 let source =
630 fs::read_to_string(file).with_context(|| format!("failed to read {}", file.display()))?;
631 let parsed = parse_symbols(&language_config, &relative, &source, false)?;
632 Ok(parsed
633 .into_iter()
634 .map(|symbol| to_symbol_info(symbol, depth))
635 .collect())
636}
637
638fn collect_candidate_files(root: &Path) -> Result<Vec<PathBuf>> {
639 collect_files(root, |path| language_for_path(path).is_some())
640}
641
642fn file_modified_ms(path: &Path) -> Result<u128> {
643 let modified = fs::metadata(path)
644 .with_context(|| format!("failed to stat {}", path.display()))?
645 .modified()
646 .with_context(|| format!("failed to read mtime for {}", path.display()))?;
647 Ok(modified
648 .duration_since(UNIX_EPOCH)
649 .unwrap_or_default()
650 .as_millis())
651}