1mod parser;
2mod ranking;
3mod reader;
4pub mod scoring;
5#[cfg(test)]
6mod tests;
7mod types;
8mod writer;
9
10use parser::{flatten_symbol_infos, flatten_symbols, parse_symbols, slice_source, to_symbol_info};
11use ranking::prune_to_budget;
12use scoring::score_symbol;
13pub use scoring::{
14 sparse_coverage_bonus_from_fields, sparse_max_bonus, sparse_threshold, sparse_weighting_enabled,
15};
16pub(crate) use types::ReadDb;
17pub use types::{
18 IndexStats, RankedContextEntry, RankedContextResult, SymbolInfo, SymbolKind, make_symbol_id,
19 parse_symbol_id,
20};
21
22use crate::db::{self, IndexDb, content_hash, index_db_path};
23pub(crate) use crate::lang_config::{LanguageConfig, language_for_path};
25use crate::project::ProjectRoot;
26use anyhow::{Context, Result, bail};
27use std::fs;
28use std::path::{Path, PathBuf};
29use std::time::UNIX_EPOCH;
30use walkdir::WalkDir;
31
32use crate::project::{collect_files, is_excluded};
33
34pub struct SymbolIndex {
43 project: ProjectRoot,
44 db_path: PathBuf,
45 writer: std::sync::Mutex<IndexDb>,
46 in_memory: bool,
48}
49
50impl SymbolIndex {
51 pub fn new(project: ProjectRoot) -> Self {
52 let db_path = index_db_path(project.as_path());
53 let db = IndexDb::open(&db_path).unwrap_or_else(|e| {
54 tracing::warn!(
55 path = %db_path.display(),
56 error = %e,
57 "failed to open DB, falling back to in-memory"
58 );
59 IndexDb::open_memory().unwrap()
60 });
61 let in_memory = !db_path.is_file();
62 let mut idx = Self {
63 project,
64 db_path,
65 writer: std::sync::Mutex::new(db),
66 in_memory,
67 };
68 if idx.writer().file_count().unwrap_or(0) == 0 {
70 let _ = idx.migrate_from_json();
71 }
72 idx
73 }
74
75 fn writer(&self) -> std::sync::MutexGuard<'_, IndexDb> {
77 self.writer
78 .lock()
79 .unwrap_or_else(|poisoned| poisoned.into_inner())
80 }
81
82 fn reader(&self) -> Result<ReadDb<'_>> {
84 if self.in_memory {
85 return Ok(ReadDb::Writer(self.writer()));
86 }
87 match IndexDb::open_readonly(&self.db_path)? {
88 Some(db) => Ok(ReadDb::Owned(db)),
89 None => Ok(ReadDb::Writer(self.writer())),
90 }
91 }
92
93 pub fn new_memory(project: ProjectRoot) -> Self {
95 let db = IndexDb::open_memory().unwrap();
96 Self {
97 db_path: PathBuf::new(),
98 project,
99 writer: std::sync::Mutex::new(db),
100 in_memory: true,
101 }
102 }
103
104 pub fn stats(&self) -> Result<IndexStats> {
105 let db = self.reader()?;
106 let supported_files = collect_candidate_files(self.project.as_path())?;
107 let indexed_files = db.file_count()?;
108 let indexed_paths = db.all_file_paths()?;
109
110 let mut stale = 0usize;
111 for rel in &indexed_paths {
112 let path = self.project.as_path().join(rel);
113 if !path.is_file() {
114 stale += 1;
115 continue;
116 }
117 let content = match fs::read(&path) {
118 Ok(c) => c,
119 Err(_) => {
120 stale += 1;
121 continue;
122 }
123 };
124 let hash = content_hash(&content);
125 let mtime = file_modified_ms(&path).unwrap_or(0) as i64;
126 if db.get_fresh_file(rel, mtime, &hash)?.is_none() {
127 stale += 1;
128 }
129 }
130
131 Ok(IndexStats {
132 indexed_files,
133 supported_files: supported_files.len(),
134 stale_files: stale,
135 })
136 }
137
138 fn select_solve_symbols(&self, query: &str, depth: usize) -> Result<Vec<SymbolInfo>> {
141 let top_files: Vec<String> = {
146 let db = self.reader()?;
147 let all_paths = db.all_file_paths()?;
148
149 let query_lower = query.to_ascii_lowercase();
150 let query_tokens: Vec<&str> = query_lower
151 .split(|c: char| c.is_whitespace() || c == '_' || c == '-')
152 .filter(|t| t.len() >= 3)
153 .collect();
154
155 let mut file_scores: Vec<(String, usize)> = all_paths
156 .into_iter()
157 .map(|path| {
158 let path_lower = path.to_ascii_lowercase();
159 let score = query_tokens
160 .iter()
161 .filter(|token| path_lower.contains(**token))
162 .count();
163 (path, score)
164 })
165 .collect();
166
167 file_scores.sort_by(|a, b| b.1.cmp(&a.1));
168 file_scores
169 .into_iter()
170 .filter(|(_, score)| *score > 0)
171 .take(10)
172 .map(|(path, _)| path)
173 .collect()
174 };
176
177 if top_files.is_empty() {
179 return self.find_symbol(query, None, false, false, 500);
180 }
181
182 let mut all_symbols = Vec::new();
184 for file_path in &top_files {
185 if let Ok(symbols) = self.get_symbols_overview_cached(file_path, depth) {
186 all_symbols.extend(symbols);
187 }
188 }
189
190 let mut seen_ids: std::collections::HashSet<String> =
192 all_symbols.iter().map(|s| s.id.clone()).collect();
193
194 if let Ok(direct) = self.find_symbol(query, None, false, false, 50) {
195 for sym in direct {
196 if seen_ids.insert(sym.id.clone()) {
197 all_symbols.push(sym);
198 }
199 }
200 }
201
202 let query_lower = query.to_ascii_lowercase();
205 let tokens: Vec<&str> = query_lower
206 .split(|c: char| c.is_whitespace() || c == '_' || c == '-')
207 .filter(|t| t.len() >= 3)
208 .collect();
209 if tokens.len() >= 2 {
210 for token in &tokens {
211 match self.find_symbol(token, None, false, false, 10) {
212 Ok(hits) => {
213 for sym in hits {
214 if seen_ids.insert(sym.id.clone()) {
215 all_symbols.push(sym);
216 }
217 }
218 }
219 Err(e) => {
220 tracing::debug!(token, error = %e, "token find_symbol failed");
221 }
222 }
223 }
224 }
225
226 Ok(all_symbols)
227 }
228
229 pub fn get_project_structure(&self) -> Result<Vec<db::DirStats>> {
232 let db = self.reader()?;
233 db.dir_stats()
234 }
235
236 pub fn get_symbols_overview(&self, path: &str, depth: usize) -> Result<Vec<SymbolInfo>> {
237 let resolved = self.project.resolve(path)?;
238 if resolved.is_dir() {
239 let mut symbols = Vec::new();
240 for file in WalkDir::new(&resolved)
241 .into_iter()
242 .filter_entry(|entry| !is_excluded(entry.path()))
243 {
244 let file = file?;
245 if !file.file_type().is_file() || language_for_path(file.path()).is_none() {
246 continue;
247 }
248 let relative = self.project.to_relative(file.path());
249 let parsed = self.ensure_indexed(file.path(), &relative)?;
250 if !parsed.is_empty() {
251 let id = make_symbol_id(&relative, &SymbolKind::File, &relative);
252 symbols.push(SymbolInfo {
253 name: relative.clone(),
254 kind: SymbolKind::File,
255 file_path: relative.clone(),
256 line: 0,
257 column: 0,
258 signature: format!(
259 "{} ({} symbols)",
260 file.file_name().to_string_lossy(),
261 parsed.len()
262 ),
263 name_path: relative,
264 id,
265 body: None,
266 children: parsed
267 .into_iter()
268 .map(|symbol| to_symbol_info(symbol, depth))
269 .collect(),
270 start_byte: 0,
271 end_byte: 0,
272 });
273 }
274 }
275 return Ok(symbols);
276 }
277
278 let relative = self.project.to_relative(&resolved);
279 let parsed = self.ensure_indexed(&resolved, &relative)?;
280 Ok(parsed
281 .into_iter()
282 .map(|symbol| to_symbol_info(symbol, depth))
283 .collect())
284 }
285
286 pub fn find_symbol(
287 &self,
288 name: &str,
289 file_path: Option<&str>,
290 include_body: bool,
291 exact_match: bool,
292 max_matches: usize,
293 ) -> Result<Vec<SymbolInfo>> {
294 if let Some((id_file, _id_kind, id_name_path)) = parse_symbol_id(name) {
296 let resolved = self.project.resolve(id_file)?;
297 let relative = self.project.to_relative(&resolved);
298 self.ensure_indexed(&resolved, &relative)?;
299 let leaf_name = id_name_path.rsplit('/').next().unwrap_or(id_name_path);
301 let db = self.writer();
302 let db_rows = db.find_symbols_by_name(leaf_name, Some(id_file), true, max_matches)?;
303 let mut results = Vec::new();
304 for row in db_rows {
305 if row.name_path != id_name_path {
306 continue;
307 }
308 let rel_path = db.get_file_path(row.file_id)?.unwrap_or_default();
309 let body = if include_body {
310 let abs = self.project.as_path().join(&rel_path);
311 fs::read_to_string(&abs).ok().map(|source| {
312 slice_source(&source, row.start_byte as u32, row.end_byte as u32)
313 })
314 } else {
315 None
316 };
317 let kind = SymbolKind::from_str_label(&row.kind);
318 let id = make_symbol_id(&rel_path, &kind, &row.name_path);
319 results.push(SymbolInfo {
320 name: row.name,
321 kind,
322 file_path: rel_path,
323 line: row.line as usize,
324 column: row.column_num as usize,
325 signature: row.signature,
326 name_path: row.name_path,
327 id,
328 body,
329 children: Vec::new(),
330 start_byte: row.start_byte as u32,
331 end_byte: row.end_byte as u32,
332 });
333 }
334 return Ok(results);
335 }
336
337 if let Some(fp) = file_path {
339 let resolved = self.project.resolve(fp)?;
340 let relative = self.project.to_relative(&resolved);
341 self.ensure_indexed(&resolved, &relative)?;
342 } else {
343 let files = collect_candidate_files(self.project.as_path())?;
345 for file in &files {
346 let relative = self.project.to_relative(file);
347 self.ensure_indexed(file, &relative)?;
348 }
349 }
350
351 let db = self.writer();
352 let db_rows = db.find_symbols_by_name(name, file_path, exact_match, max_matches)?;
353
354 let mut results = Vec::new();
355 for row in db_rows {
356 let rel_path = db.get_file_path(row.file_id)?.unwrap_or_default();
357 let body = if include_body {
358 let abs = self.project.as_path().join(&rel_path);
359 fs::read_to_string(&abs)
360 .ok()
361 .map(|source| slice_source(&source, row.start_byte as u32, row.end_byte as u32))
362 } else {
363 None
364 };
365 let kind = SymbolKind::from_str_label(&row.kind);
366 let id = make_symbol_id(&rel_path, &kind, &row.name_path);
367 results.push(SymbolInfo {
368 name: row.name,
369 kind,
370 file_path: rel_path,
371 line: row.line as usize,
372 column: row.column_num as usize,
373 signature: row.signature,
374 name_path: row.name_path,
375 id,
376 body,
377 children: Vec::new(),
378 start_byte: row.start_byte as u32,
379 end_byte: row.end_byte as u32,
380 });
381 }
382 Ok(results)
383 }
384
385 pub fn get_ranked_context(
386 &self,
387 query: &str,
388 path: Option<&str>,
389 max_tokens: usize,
390 include_body: bool,
391 depth: usize,
392 ) -> Result<RankedContextResult> {
393 let all_symbols = if let Some(path) = path {
394 self.get_symbols_overview(path, depth)?
395 } else {
396 self.select_solve_symbols(query, depth)?
398 };
399
400 let mut scored = all_symbols
401 .into_iter()
402 .flat_map(flatten_symbol_infos)
403 .filter_map(|symbol| score_symbol(query, &symbol).map(|score| (symbol, score)))
404 .collect::<Vec<_>>();
405 scored.sort_by(|left, right| right.1.cmp(&left.1));
406
407 let (selected, chars_used) =
408 prune_to_budget(scored, max_tokens, include_body, self.project.as_path());
409
410 Ok(RankedContextResult {
411 query: query.to_owned(),
412 count: selected.len(),
413 symbols: selected,
414 token_budget: max_tokens,
415 chars_used,
416 })
417 }
418
419 pub fn db(&self) -> std::sync::MutexGuard<'_, IndexDb> {
421 self.writer()
422 }
423}
424
425pub fn get_symbols_overview(
426 project: &ProjectRoot,
427 path: &str,
428 depth: usize,
429) -> Result<Vec<SymbolInfo>> {
430 let resolved = project.resolve(path)?;
431 if resolved.is_dir() {
432 return get_directory_symbols(project, &resolved, depth);
433 }
434 get_file_symbols(project, &resolved, depth)
435}
436
437pub fn find_symbol_range(
441 project: &ProjectRoot,
442 relative_path: &str,
443 symbol_name: &str,
444 name_path: Option<&str>,
445) -> Result<(usize, usize)> {
446 let file = project.resolve(relative_path)?;
447 let rel = project.to_relative(&file);
448 let Some(language_config) = language_for_path(&file) else {
449 bail!("unsupported file type: {}", file.display());
450 };
451 let source =
452 fs::read_to_string(&file).with_context(|| format!("failed to read {}", file.display()))?;
453 let parsed = parse_symbols(&language_config, &rel, &source, false)?;
454 let flat = flatten_symbols(parsed);
455
456 let candidate = if let Some(np) = name_path {
457 flat.into_iter()
458 .find(|sym| sym.name_path == np || sym.name == symbol_name)
459 } else {
460 flat.into_iter().find(|sym| sym.name == symbol_name)
461 };
462
463 match candidate {
464 Some(sym) => Ok((sym.start_byte as usize, sym.end_byte as usize)),
465 None => bail!(
466 "symbol '{}' not found in {}",
467 name_path.unwrap_or(symbol_name),
468 relative_path
469 ),
470 }
471}
472
473pub fn find_symbol(
474 project: &ProjectRoot,
475 name: &str,
476 file_path: Option<&str>,
477 include_body: bool,
478 exact_match: bool,
479 max_matches: usize,
480) -> Result<Vec<SymbolInfo>> {
481 if let Some((id_file, _id_kind, id_name_path)) = parse_symbol_id(name) {
483 let resolved = project.resolve(id_file)?;
484 let rel = project.to_relative(&resolved);
485 let Some(language_config) = language_for_path(&resolved) else {
486 return Ok(Vec::new());
487 };
488 let source = fs::read_to_string(&resolved)?;
489 let parsed = parse_symbols(&language_config, &rel, &source, include_body)?;
490 let mut results = Vec::new();
491 for symbol in flatten_symbols(parsed) {
492 if symbol.name_path == id_name_path {
493 results.push(to_symbol_info(symbol, usize::MAX));
494 if results.len() >= max_matches {
495 return Ok(results);
496 }
497 }
498 }
499 return Ok(results);
500 }
501
502 let files = match file_path {
503 Some(path) => vec![project.resolve(path)?],
504 None => collect_candidate_files(project.as_path())?,
505 };
506
507 let query = name.to_lowercase();
508 let mut results = Vec::new();
509
510 for file in files {
511 let rel = project.to_relative(&file);
512 let Some(language_config) = language_for_path(&file) else {
513 continue;
514 };
515 let source = match fs::read_to_string(&file) {
516 Ok(source) => source,
517 Err(_) => continue,
518 };
519 let parsed = parse_symbols(&language_config, &rel, &source, include_body)?;
520 for symbol in flatten_symbols(parsed) {
521 let matched = if exact_match {
522 symbol.name == name
523 } else {
524 scoring::contains_ascii_ci(&symbol.name, &query)
525 };
526 if matched {
527 results.push(to_symbol_info(symbol, usize::MAX));
528 if results.len() >= max_matches {
529 return Ok(results);
530 }
531 }
532 }
533 }
534
535 Ok(results)
536}
537
538fn get_directory_symbols(
539 project: &ProjectRoot,
540 dir: &Path,
541 depth: usize,
542) -> Result<Vec<SymbolInfo>> {
543 let mut symbols = Vec::new();
544 for entry in WalkDir::new(dir)
545 .into_iter()
546 .filter_entry(|entry| !is_excluded(entry.path()))
547 {
548 let entry = entry?;
549 if !entry.file_type().is_file() {
550 continue;
551 }
552 let path = entry.path();
553 if language_for_path(path).is_none() {
554 continue;
555 }
556 let file_symbols = get_file_symbols(project, path, depth)?;
557 if !file_symbols.is_empty() {
558 let relative = project.to_relative(path);
559 let id = make_symbol_id(&relative, &SymbolKind::File, &relative);
560 symbols.push(SymbolInfo {
561 name: relative.clone(),
562 kind: SymbolKind::File,
563 file_path: relative.clone(),
564 line: 0,
565 column: 0,
566 signature: format!(
567 "{} ({} symbols)",
568 path.file_name()
569 .and_then(|name| name.to_str())
570 .unwrap_or_default(),
571 file_symbols.len()
572 ),
573 name_path: relative,
574 id,
575 body: None,
576 children: file_symbols,
577 start_byte: 0,
578 end_byte: 0,
579 });
580 }
581 }
582 Ok(symbols)
583}
584
585fn get_file_symbols(project: &ProjectRoot, file: &Path, depth: usize) -> Result<Vec<SymbolInfo>> {
586 let relative = project.to_relative(file);
587 let Some(language_config) = language_for_path(file) else {
588 return Ok(Vec::new());
589 };
590 let source =
591 fs::read_to_string(file).with_context(|| format!("failed to read {}", file.display()))?;
592 let parsed = parse_symbols(&language_config, &relative, &source, false)?;
593 Ok(parsed
594 .into_iter()
595 .map(|symbol| to_symbol_info(symbol, depth))
596 .collect())
597}
598
599fn collect_candidate_files(root: &Path) -> Result<Vec<PathBuf>> {
600 collect_files(root, |path| language_for_path(path).is_some())
601}
602
603fn file_modified_ms(path: &Path) -> Result<u128> {
604 let modified = fs::metadata(path)
605 .with_context(|| format!("failed to stat {}", path.display()))?
606 .modified()
607 .with_context(|| format!("failed to read mtime for {}", path.display()))?;
608 Ok(modified
609 .duration_since(UNIX_EPOCH)
610 .unwrap_or_default()
611 .as_millis())
612}