1use std::collections::HashSet;
2use std::path::{Path, PathBuf};
3use std::sync::Arc;
4
5use tokio::sync::RwLock;
6use tokio_util::sync::CancellationToken;
7
8use ignore::WalkBuilder;
9use tree_sitter::{Parser, Query, QueryCursor, StreamingIterator};
10
11use crate::semantic::language::{Lang, LanguageRegistry};
12
13use super::resolve::resolve_callee;
14use super::{CodeGraph, Edge, EdgeKind, SymbolKind, SymbolNode, Visibility};
15
16struct FileParseResult {
18 symbols: Vec<SymbolNode>,
19 raw_calls: Vec<RawCall>,
20}
21
22struct RawCall {
24 caller_name: String,
25 callee_name: String,
26 line: usize,
27}
28
29const INDEXED_EXTENSIONS: &[&str] = &[
31 "rs", "py", "js", "ts", "tsx", "go", "java", "c", "cpp", "vue",
32];
33
34pub struct GraphIndexer {
38 graph: Arc<RwLock<CodeGraph>>,
39 project_dir: PathBuf,
40 parser: Parser,
41}
42
43impl GraphIndexer {
44 pub fn new(graph: Arc<RwLock<CodeGraph>>, project_dir: PathBuf) -> Self {
46 Self {
47 graph,
48 project_dir,
49 parser: Parser::new(),
50 }
51 }
52
53 pub async fn index_all(&mut self, cancel: CancellationToken) {
70 if !should_index(&self.project_dir) {
78 return;
79 }
80
81 if cancel.is_cancelled() {
82 return;
83 }
84
85 let project_dir = self.project_dir.clone();
93 let files = tokio::task::spawn_blocking(move || collect_files_sync(&project_dir))
94 .await
95 .unwrap_or_default();
96 let current_paths: HashSet<PathBuf> = files.iter().map(|(p, _)| p.clone()).collect();
97
98 let (deleted, dirty_files) = {
100 let graph = self.graph.read().await;
101 let deleted: Vec<PathBuf> = graph
102 .file_mtimes
103 .keys()
104 .filter(|p| !current_paths.contains(*p))
105 .cloned()
106 .collect();
107 let dirty: Vec<(PathBuf, u64)> = files
108 .into_iter()
109 .filter(|(path, mtime)| graph.file_mtimes.get(path) != Some(mtime))
110 .collect();
111 (deleted, dirty)
112 };
113 const CPU_BREATHE_CHUNK: usize = 16;
132 const CPU_BREATHE_MS: u64 = 5;
133 let mut all_results: Vec<(PathBuf, u64, FileParseResult)> = Vec::new();
134 for (i, (path, mtime)) in dirty_files.into_iter().enumerate() {
135 if cancel.is_cancelled() {
136 return;
137 }
138 if let Some(result) = self.parse_file(&path) {
139 all_results.push((path, mtime, result));
140 }
141 tokio::task::yield_now().await;
142 if i > 0 && i % CPU_BREATHE_CHUNK == 0 {
143 tokio::time::sleep(std::time::Duration::from_millis(CPU_BREATHE_MS)).await;
144 }
145 }
146
147 if deleted.is_empty() && all_results.is_empty() {
148 return; }
150
151 if cancel.is_cancelled() {
155 return;
156 }
157
158 let mut graph = self.graph.write().await;
161
162 for path in &deleted {
164 graph.remove_file(path);
165 }
166
167 for (path, mtime, result) in &all_results {
169 graph.remove_file(path);
170 for sym in &result.symbols {
171 graph.add_symbol(sym.clone());
172 }
173 graph.file_mtimes.insert(path.clone(), *mtime);
174 }
175
176 for (_path, _mtime, result) in &all_results {
178 for raw_call in &result.raw_calls {
179 let caller_candidates = graph.find_by_name(&raw_call.caller_name);
180 let caller_id = caller_candidates.first().map(|s| s.id);
181 if let Some(caller_id) = caller_id {
182 let caller_file = graph.node(caller_id).unwrap().file.clone();
183 if let Some(callee_id) =
184 resolve_callee(&graph, &raw_call.callee_name, &caller_file, &[])
185 {
186 graph.add_edge(
187 caller_id,
188 Edge {
189 to: callee_id,
190 kind: EdgeKind::Calls,
191 line: raw_call.line,
192 },
193 );
194 }
195 }
196 }
197 }
198 }
200
201 pub async fn reindex_file(&mut self, path: &Path) {
203 let mtime = match std::fs::metadata(path) {
204 Ok(meta) => {
205 use std::time::UNIX_EPOCH;
206 meta.modified()
207 .ok()
208 .and_then(|t| t.duration_since(UNIX_EPOCH).ok())
209 .map(|d| d.as_secs())
210 .unwrap_or(0)
211 }
212 Err(_) => {
213 let mut graph = self.graph.write().await;
215 graph.remove_file(&path.to_path_buf());
216 return;
217 }
218 };
219
220 let result = match self.parse_file(path) {
221 Some(r) => r,
222 None => return,
223 };
224
225 let mut graph = self.graph.write().await;
226 let path_buf = path.to_path_buf();
227
228 graph.remove_file(&path_buf);
230
231 for sym in &result.symbols {
233 graph.add_symbol(sym.clone());
234 }
235 graph.file_mtimes.insert(path_buf.clone(), mtime);
236
237 for raw_call in &result.raw_calls {
239 let caller_candidates = graph.find_by_name(&raw_call.caller_name);
240 let caller_id = caller_candidates.first().map(|s| s.id);
241
242 if let Some(caller_id) = caller_id {
243 let caller_file = graph.node(caller_id).unwrap().file.clone();
244 if let Some(callee_id) =
245 resolve_callee(&graph, &raw_call.callee_name, &caller_file, &[])
246 {
247 graph.add_edge(
248 caller_id,
249 Edge {
250 to: callee_id,
251 kind: EdgeKind::Calls,
252 line: raw_call.line,
253 },
254 );
255 }
256 }
257 }
258 }
259
260 fn parse_file(&mut self, path: &Path) -> Option<FileParseResult> {
262 let source = std::fs::read_to_string(path).ok()?;
263 let lang = LanguageRegistry::detect(path)?;
264
265 self.parser.set_language(&lang.grammar()).ok()?;
266 let tree = self.parser.parse(source.as_bytes(), None)?;
267
268 let symbols = self.extract_symbols(path, &source, lang, &tree);
269 let raw_calls = self.extract_calls(path, &source, lang, &tree, &symbols);
270
271 Some(FileParseResult { symbols, raw_calls })
272 }
273
274 fn extract_symbols(
276 &self,
277 path: &Path,
278 source: &str,
279 lang: Lang,
280 tree: &tree_sitter::Tree,
281 ) -> Vec<SymbolNode> {
282 let query_src = lang.symbols_query();
283 let query = match Query::new(&lang.grammar(), query_src) {
284 Ok(q) => q,
285 Err(_) => return Vec::new(),
286 };
287
288 let def_idx = match query.capture_index_for_name("definition") {
289 Some(i) => i,
290 None => return Vec::new(),
291 };
292 let name_idx = match query.capture_index_for_name("name") {
293 Some(i) => i,
294 None => return Vec::new(),
295 };
296
297 let mut cursor = QueryCursor::new();
298 let mut matches = cursor.matches(&query, tree.root_node(), source.as_bytes());
299
300 let mut symbols = Vec::new();
301 let mut seen_ranges: HashSet<(usize, usize)> = HashSet::new();
302 let path_buf = path.to_path_buf();
303
304 loop {
305 matches.advance();
306 let m = match matches.get() {
307 Some(m) => m,
308 None => break,
309 };
310
311 let mut sym_name = None;
312 let mut def_start_line = 0usize;
313 let mut def_end_line = 0usize;
314 let mut def_start_byte = 0usize;
315 let mut def_end_byte = 0usize;
316 let mut ts_kind = "";
317 let mut has_def = false;
318
319 for capture in m.captures {
320 if capture.index == name_idx {
321 sym_name = Some(
322 source[capture.node.start_byte()..capture.node.end_byte()].to_string(),
323 );
324 }
325 if capture.index == def_idx {
326 def_start_byte = capture.node.start_byte();
327 def_end_byte = capture.node.end_byte();
328 def_start_line = capture.node.start_position().row + 1; def_end_line = capture.node.end_position().row + 1;
330 ts_kind = capture.node.kind();
331 has_def = true;
332 }
333 }
334
335 if let (Some(name), true) = (sym_name, has_def) {
336 let range = (def_start_byte, def_end_byte);
337 if seen_ranges.contains(&range) {
338 continue;
339 }
340 seen_ranges.insert(range);
341
342 let id = CodeGraph::make_id(&path_buf, &name, def_start_line);
343 let kind = classify_symbol_kind(ts_kind);
344
345 symbols.push(SymbolNode {
346 id,
347 name,
348 kind,
349 visibility: Visibility::Unknown,
350 file: path_buf.clone(),
351 start_line: def_start_line,
352 end_line: def_end_line,
353 signature: None,
354 });
355 }
356 }
357
358 symbols
359 }
360
361 fn extract_calls(
363 &self,
364 _path: &Path,
365 source: &str,
366 lang: Lang,
367 tree: &tree_sitter::Tree,
368 symbols: &[SymbolNode],
369 ) -> Vec<RawCall> {
370 let query_src = match lang.calls_query() {
371 Some(q) => q,
372 None => return Vec::new(),
373 };
374
375 let query = match Query::new(&lang.grammar(), query_src) {
376 Ok(q) => q,
377 Err(_) => return Vec::new(),
378 };
379
380 let callee_idx = match query.capture_index_for_name("callee") {
381 Some(i) => i,
382 None => return Vec::new(),
383 };
384
385 let mut cursor = QueryCursor::new();
386 let mut matches = cursor.matches(&query, tree.root_node(), source.as_bytes());
387
388 let mut raw_calls = Vec::new();
389
390 loop {
391 matches.advance();
392 let m = match matches.get() {
393 Some(m) => m,
394 None => break,
395 };
396
397 for capture in m.captures {
398 if capture.index == callee_idx {
399 let callee_name =
400 source[capture.node.start_byte()..capture.node.end_byte()].to_string();
401 let call_line = capture.node.start_position().row + 1; let caller_name = symbols
405 .iter()
406 .filter(|s| {
407 matches!(s.kind, SymbolKind::Function | SymbolKind::Method)
408 && s.start_line <= call_line
409 && call_line <= s.end_line
410 })
411 .last()
412 .map(|s| s.name.clone());
413
414 if let Some(caller_name) = caller_name {
415 if caller_name == callee_name {
417 continue;
418 }
419
420 raw_calls.push(RawCall {
421 caller_name,
422 callee_name,
423 line: call_line,
424 });
425 }
426 }
427 }
428 }
429
430 raw_calls
431 }
432}
433
434fn classify_symbol_kind(ts_kind: &str) -> SymbolKind {
436 match ts_kind {
437 "function_item" | "function_definition" | "function_declaration" | "func_literal" => {
438 SymbolKind::Function
439 }
440 "method_definition" | "method_declaration" => SymbolKind::Method,
441 "struct_item" | "struct_specifier" => SymbolKind::Struct,
442 "class_definition" | "class_declaration" | "class_specifier" => SymbolKind::Class,
443 "trait_item" => SymbolKind::Trait,
444 "interface_declaration" => SymbolKind::Interface,
445 "enum_item" | "enum_declaration" | "enum_specifier" => SymbolKind::Enum,
446 "const_item" | "const_declaration" => SymbolKind::Constant,
447 "let_declaration" | "variable_declaration" | "static_item" => SymbolKind::Variable,
448 "mod_item" | "module" => SymbolKind::Module,
449 "use_declaration" | "import_statement" | "import_declaration" => SymbolKind::Import,
450 "type_item" | "type_alias_declaration" => SymbolKind::TypeAlias,
451 "impl_item" => SymbolKind::Other("impl".to_string()),
452 other => SymbolKind::Other(other.to_string()),
453 }
454}
455
456pub fn should_index(project_dir: &Path) -> bool {
473 if looks_like_project(project_dir) {
474 return true;
475 }
476 if is_home_or_root(project_dir) {
477 return false;
478 }
479 if is_umbrella_dir(project_dir) {
480 return false;
481 }
482 true
483}
484
485fn is_home_or_root(path: &Path) -> bool {
486 if path == Path::new("/") {
487 return true;
488 }
489 if let Some(home) = crate::tool::real_home_dir() {
490 if path == home.as_path() {
491 return true;
492 }
493 }
494 false
495}
496
497fn is_umbrella_dir(dir: &Path) -> bool {
507 let Ok(entries) = std::fs::read_dir(dir) else {
508 return false;
509 };
510 let mut project_children = 0;
511 for entry in entries.flatten().take(200) {
512 let p = entry.path();
513 if p.is_dir() && looks_like_project(&p) {
514 project_children += 1;
515 if project_children >= 3 {
516 return true;
517 }
518 }
519 }
520 false
521}
522
523fn looks_like_project(dir: &Path) -> bool {
533 const MARKERS: &[&str] = &[
534 ".git",
535 "Cargo.toml",
536 "package.json",
537 "pyproject.toml",
538 "go.mod",
539 "pom.xml",
540 "build.gradle",
541 "build.gradle.kts",
542 ];
543 MARKERS.iter().any(|m| dir.join(m).exists())
544}
545
546fn collect_files_sync(project_dir: &Path) -> Vec<(PathBuf, u64)> {
549 let mut files = Vec::new();
550
551 let walker = WalkBuilder::new(project_dir)
552 .hidden(true)
553 .git_ignore(true)
554 .build();
555
556 for entry in walker {
557 let entry = match entry {
558 Ok(e) => e,
559 Err(_) => continue,
560 };
561
562 let path = entry.path();
563 if !path.is_file() {
564 continue;
565 }
566
567 let ext = match path.extension().and_then(|e| e.to_str()) {
568 Some(e) => e,
569 None => continue,
570 };
571
572 if !INDEXED_EXTENSIONS.contains(&ext) {
573 continue;
574 }
575
576 let mtime = match entry.metadata() {
577 Ok(meta) => {
578 use std::time::UNIX_EPOCH;
579 meta.modified()
580 .ok()
581 .and_then(|t| t.duration_since(UNIX_EPOCH).ok())
582 .map(|d| d.as_secs())
583 .unwrap_or(0)
584 }
585 Err(_) => 0,
586 };
587
588 files.push((path.to_path_buf(), mtime));
589 }
590
591 files
592}
593
594#[cfg(test)]
595mod tests {
596 use super::*;
597
598 fn mk(parent: &Path, name: &str, markers: &[&str]) {
599 let p = parent.join(name);
600 std::fs::create_dir_all(&p).unwrap();
601 for m in markers {
602 std::fs::write(p.join(m), "").unwrap();
603 }
604 }
605
606 #[test]
607 fn should_index_accepts_marked_project() {
608 let tmp = tempfile::TempDir::new().unwrap();
609 std::fs::write(tmp.path().join("Cargo.toml"), "[package]").unwrap();
610 assert!(should_index(tmp.path()));
611 }
612
613 #[test]
614 fn should_index_refuses_umbrella_dir_with_many_child_projects() {
615 let tmp = tempfile::TempDir::new().unwrap();
617 mk(tmp.path(), "a", &[".git"]);
618 mk(tmp.path(), "b", &[".git"]);
619 mk(tmp.path(), "c", &["package.json"]);
620 mk(tmp.path(), "d", &["Cargo.toml"]);
621 assert!(
622 !should_index(tmp.path()),
623 "umbrella of 4 projects without own marker must be skipped"
624 );
625 }
626
627 #[test]
628 fn should_index_accepts_umbrella_with_real_marker() {
629 let tmp = tempfile::TempDir::new().unwrap();
633 std::fs::write(tmp.path().join("Cargo.toml"), "[workspace]").unwrap();
634 mk(tmp.path(), "a", &[".git"]);
635 mk(tmp.path(), "b", &[".git"]);
636 mk(tmp.path(), "c", &[".git"]);
637 assert!(
638 should_index(tmp.path()),
639 "user-placed marker must override umbrella detection"
640 );
641 }
642
643 #[test]
652 fn should_index_refuses_umbrella_with_only_atomcode_storage_dir() {
653 let tmp = tempfile::TempDir::new().unwrap();
654 std::fs::create_dir_all(tmp.path().join(".atomcode")).unwrap();
656 std::fs::write(tmp.path().join(".atomcode").join("graph.bin"), b"x").unwrap();
657 mk(tmp.path(), "a", &[".git"]);
659 mk(tmp.path(), "b", &[".git"]);
660 mk(tmp.path(), "c", &[".git"]);
661 assert!(
662 !should_index(tmp.path()),
663 ".atomcode dir must not rescue an umbrella from the guard"
664 );
665 }
666
667 #[test]
668 fn should_index_accepts_dir_with_fewer_than_3_child_projects() {
669 let tmp = tempfile::TempDir::new().unwrap();
672 mk(tmp.path(), "a", &[".git"]);
673 mk(tmp.path(), "b", &[".git"]);
674 mk(tmp.path(), "other", &[]); assert!(
676 should_index(tmp.path()),
677 "2 child projects < umbrella threshold"
678 );
679 }
680
681 #[tokio::test]
687 async fn index_all_bails_on_cancelled_token() {
688 let tmp = tempfile::TempDir::new().unwrap();
689 std::fs::write(tmp.path().join(".atomcode"), "").unwrap();
692 std::fs::write(
694 tmp.path().join("lib.rs"),
695 "pub fn foo() {}\npub fn bar() {}\n",
696 )
697 .unwrap();
698
699 let graph = Arc::new(RwLock::new(super::super::CodeGraph::default()));
700 let mut indexer = GraphIndexer::new(graph.clone(), tmp.path().to_path_buf());
701
702 let cancel = CancellationToken::new();
703 cancel.cancel();
704 indexer.index_all(cancel).await;
705
706 let g = graph.read().await;
709 assert!(
710 g.file_mtimes.is_empty(),
711 "cancelled indexer must not mutate graph"
712 );
713 }
714}