1use std::collections::HashMap;
8use std::path::Path;
9
10use serde::{Deserialize, Serialize};
11
12use crate::core::import_resolver;
13use crate::core::signatures;
14
15const INDEX_VERSION: u32 = 6;
16
17pub fn is_safe_scan_root_public(path: &str) -> bool {
18 is_safe_scan_root(path)
19}
20
21fn is_filesystem_root(path: &str) -> bool {
22 let p = Path::new(path);
23 p.parent().is_none() || (cfg!(windows) && p.parent() == Some(Path::new("")))
24}
25
26fn is_safe_scan_root(path: &str) -> bool {
27 let normalized = normalize_project_root(path);
28 let p = Path::new(&normalized);
29
30 if normalized == "/" || normalized == "\\" || is_filesystem_root(&normalized) {
31 tracing::warn!("[graph_index: refusing to scan filesystem root]");
32 return false;
33 }
34
35 if normalized == "." || normalized.is_empty() {
36 tracing::warn!("[graph_index: refusing to scan relative/empty root]");
37 return false;
38 }
39
40 if let Some(home) = dirs::home_dir() {
41 let home_norm = normalize_project_root(&home.to_string_lossy());
42 if normalized == home_norm {
43 use std::sync::Once;
44 static HOME_WARN: Once = Once::new();
45 HOME_WARN.call_once(|| {
46 tracing::warn!(
47 "[graph_index: skipping — cannot index home directory {normalized}.\n \
48 Run from inside a project, or set LEAN_CTX_PROJECT_ROOT=/path/to/project]"
49 );
50 });
51 return false;
52 }
53 let home_path = Path::new(&home_norm);
55 const BLOCKED_HOME_SUBDIRS: &[&str] = &[
56 "Desktop",
57 "Documents",
58 "Downloads",
59 "Pictures",
60 "Music",
61 "Videos",
62 "Movies",
63 "Library",
64 ".local",
65 ".cache",
66 ".config",
67 "snap",
68 "Applications",
69 ];
70 for blocked in BLOCKED_HOME_SUBDIRS {
71 let blocked_path = home_path.join(blocked);
72 let is_inside_blocked = p == blocked_path || p.starts_with(&blocked_path);
73 let has_marker = p.join(".git").exists()
74 || p.join("Cargo.toml").exists()
75 || p.join("package.json").exists();
76 if is_inside_blocked
77 && !has_marker
78 && !crate::core::pathutil::has_multi_repo_children(p)
79 {
80 tracing::warn!(
81 "[graph_index: refusing to scan {normalized} — \
82 inside home/{blocked} without project markers]"
83 );
84 return false;
85 }
86 }
87
88 if p.parent() == Some(home_path) {
91 let has_marker = p.join(".git").exists()
92 || p.join("Cargo.toml").exists()
93 || p.join("package.json").exists()
94 || p.join("go.mod").exists()
95 || p.join("pyproject.toml").exists();
96 if !has_marker && !crate::core::pathutil::has_multi_repo_children(p) {
97 tracing::warn!(
98 "[graph_index: refusing to scan {normalized} — \
99 direct child of home without project markers]"
100 );
101 return false;
102 }
103 }
104 }
105
106 let breadth_markers = [
107 ".git",
108 "Cargo.toml",
109 "package.json",
110 "go.mod",
111 "pyproject.toml",
112 "setup.py",
113 "Makefile",
114 "CMakeLists.txt",
115 "pnpm-workspace.yaml",
116 ".projectile",
117 "BUILD.bazel",
118 "go.work",
119 ];
120
121 if !breadth_markers.iter().any(|m| p.join(m).exists()) {
122 if crate::core::pathutil::has_multi_repo_children(p) {
124 return true;
125 }
126
127 let child_count = std::fs::read_dir(p).map_or(0, |rd| {
128 rd.filter_map(Result::ok)
129 .filter(|e| e.path().is_dir())
130 .count()
131 });
132 if child_count > 50 {
133 tracing::warn!(
134 "[graph_index: {normalized} has no project markers and {child_count} subdirectories — \
135 skipping scan to avoid indexing broad directories]"
136 );
137 return false;
138 }
139 }
140
141 true
142}
143
144#[derive(Debug, Clone, Serialize, Deserialize)]
145pub struct ProjectIndex {
146 pub version: u32,
147 pub project_root: String,
148 pub last_scan: String,
149 pub files: HashMap<String, FileEntry>,
150 pub edges: Vec<IndexEdge>,
151 pub symbols: HashMap<String, SymbolEntry>,
152}
153
154#[derive(Debug, Clone, Serialize, Deserialize)]
155pub struct FileEntry {
156 pub path: String,
157 pub hash: String,
158 pub language: String,
159 pub line_count: usize,
160 pub token_count: usize,
161 pub exports: Vec<String>,
162 pub summary: String,
163}
164
165#[derive(Debug, Clone, Serialize, Deserialize)]
166pub struct SymbolEntry {
167 pub file: String,
168 pub name: String,
169 pub kind: String,
170 pub start_line: usize,
171 pub end_line: usize,
172 pub is_exported: bool,
173}
174
175#[derive(Debug, Clone, Serialize, Deserialize)]
176pub struct IndexEdge {
177 pub from: String,
178 pub to: String,
179 pub kind: String,
180 #[serde(default = "default_edge_weight")]
181 pub weight: f32,
182}
183
184fn default_edge_weight() -> f32 {
185 1.0
186}
187
188impl ProjectIndex {
189 pub fn new(project_root: &str) -> Self {
190 Self {
191 version: INDEX_VERSION,
192 project_root: normalize_project_root(project_root),
193 last_scan: chrono::Local::now().format("%Y-%m-%d %H:%M:%S").to_string(),
194 files: HashMap::new(),
195 edges: Vec::new(),
196 symbols: HashMap::new(),
197 }
198 }
199
200 pub fn index_dir(project_root: &str) -> Option<std::path::PathBuf> {
201 let normalized = normalize_project_root(project_root);
202 let hash = crate::core::project_hash::hash_project_root(&normalized);
203 crate::core::data_dir::lean_ctx_data_dir()
204 .ok()
205 .map(|d| d.join("graphs").join(hash))
206 }
207
208 pub fn load(project_root: &str) -> Option<Self> {
209 let dir = Self::index_dir(project_root)?;
210
211 let zst_path = dir.join("index.json.zst");
212 if zst_path.exists() {
213 let compressed = std::fs::read(&zst_path).ok()?;
214 let data = zstd::decode_all(compressed.as_slice()).ok()?;
215 let content = String::from_utf8(data).ok()?;
216 let index: Self = serde_json::from_str(&content).ok()?;
217 if index.version != INDEX_VERSION {
218 return None;
219 }
220 return Some(index);
221 }
222
223 let json_path = dir.join("index.json");
224 let content = std::fs::read_to_string(&json_path)
225 .or_else(|_| -> std::io::Result<String> {
226 let legacy_hash = short_hash(&normalize_project_root(project_root));
227 let legacy_dir = crate::core::data_dir::lean_ctx_data_dir()
228 .map_err(|_| std::io::Error::new(std::io::ErrorKind::NotFound, "no data dir"))?
229 .join("graphs")
230 .join(legacy_hash);
231 let legacy_path = legacy_dir.join("index.json");
232 let data = std::fs::read_to_string(&legacy_path)?;
233 if let Err(e) = copy_dir_fallible(&legacy_dir, &dir) {
234 tracing::debug!("graph index migration: {e}");
235 }
236 Ok(data)
237 })
238 .ok()?;
239 let index: Self = serde_json::from_str(&content).ok()?;
240 if index.version != INDEX_VERSION {
241 return None;
242 }
243 if let Ok(compressed) = zstd::encode_all(content.as_bytes(), 9) {
245 let zst_tmp = zst_path.with_extension("zst.tmp");
246 if std::fs::write(&zst_tmp, &compressed).is_ok()
247 && std::fs::rename(&zst_tmp, &zst_path).is_ok()
248 {
249 let _ = std::fs::remove_file(&json_path);
250 }
251 }
252 Some(index)
253 }
254
255 pub fn save(&self) -> Result<(), String> {
256 let dir = Self::index_dir(&self.project_root)
257 .ok_or_else(|| "Cannot determine data directory".to_string())?;
258 std::fs::create_dir_all(&dir).map_err(|e| e.to_string())?;
259 let json = serde_json::to_string(self).map_err(|e| e.to_string())?;
260 let compressed = zstd::encode_all(json.as_bytes(), 9).map_err(|e| format!("zstd: {e}"))?;
261 let target = dir.join("index.json.zst");
262 let tmp = target.with_extension("zst.tmp");
263 std::fs::write(&tmp, &compressed).map_err(|e| e.to_string())?;
264 std::fs::rename(&tmp, &target).map_err(|e| e.to_string())?;
265 let _ = std::fs::remove_file(dir.join("index.json"));
266 Ok(())
267 }
268
269 pub fn purge_stale_indices() {
272 let Ok(data_dir) = crate::core::data_dir::lean_ctx_data_dir() else {
273 return;
274 };
275 let graphs_dir = data_dir.join("graphs");
276 let Ok(entries) = std::fs::read_dir(&graphs_dir) else {
277 return;
278 };
279 let cfg = crate::core::config::Config::load();
280 let max_age_secs = cfg.archive_max_age_hours_effective() * 3600;
281
282 for entry in entries.filter_map(Result::ok) {
283 let path = entry.path();
284 if !path.is_dir() {
285 continue;
286 }
287 let zst = path.join("index.json.zst");
288 let json = path.join("index.json");
289 let index_file = if zst.exists() {
290 &zst
291 } else if json.exists() {
292 &json
293 } else {
294 continue;
295 };
296
297 let is_old = index_file
298 .metadata()
299 .and_then(|m| m.modified())
300 .is_ok_and(|mtime| {
301 mtime
302 .elapsed()
303 .is_ok_and(|age| age.as_secs() > max_age_secs)
304 });
305
306 if is_old {
307 tracing::info!("[graph_index: purging stale index at {}]", path.display());
308 let _ = std::fs::remove_dir_all(&path);
309 }
310 }
311 }
312
313 pub fn file_count(&self) -> usize {
314 self.files.len()
315 }
316
317 pub fn symbol_count(&self) -> usize {
318 self.symbols.len()
319 }
320
321 pub fn edge_count(&self) -> usize {
322 self.edges.len()
323 }
324
325 pub fn get_symbol(&self, key: &str) -> Option<&SymbolEntry> {
326 self.symbols.get(key)
327 }
328
329 pub fn get_reverse_deps(&self, path: &str, depth: usize) -> Vec<String> {
330 let mut result = Vec::new();
331 let mut visited = std::collections::HashSet::new();
332 let mut queue: Vec<(String, usize)> = vec![(path.to_string(), 0)];
333
334 while let Some((current, d)) = queue.pop() {
335 if d > depth || visited.contains(¤t) {
336 continue;
337 }
338 visited.insert(current.clone());
339 if current != path {
340 result.push(current.clone());
341 }
342
343 for edge in &self.edges {
344 if edge.to == current && edge.kind == "import" && !visited.contains(&edge.from) {
345 queue.push((edge.from.clone(), d + 1));
346 }
347 }
348 }
349 result
350 }
351
352 pub fn get_related(&self, path: &str, depth: usize) -> Vec<String> {
353 let mut result = Vec::new();
354 let mut visited = std::collections::HashSet::new();
355 let mut queue: Vec<(String, usize)> = vec![(path.to_string(), 0)];
356
357 while let Some((current, d)) = queue.pop() {
358 if d > depth || visited.contains(¤t) {
359 continue;
360 }
361 visited.insert(current.clone());
362 if current != path {
363 result.push(current.clone());
364 }
365
366 for edge in &self.edges {
367 if edge.from == current && !visited.contains(&edge.to) {
368 queue.push((edge.to.clone(), d + 1));
369 }
370 if edge.to == current && !visited.contains(&edge.from) {
371 queue.push((edge.from.clone(), d + 1));
372 }
373 }
374 }
375 result
376 }
377}
378
379pub fn load_or_build(project_root: &str) -> ProjectIndex {
383 if std::env::var("LEAN_CTX_NO_INDEX").is_ok() {
384 return ProjectIndex::load(project_root).unwrap_or_else(|| ProjectIndex::new(project_root));
385 }
386
387 let root_abs = if project_root.trim().is_empty() || project_root == "." {
390 std::env::current_dir().ok().map_or_else(
391 || ".".to_string(),
392 |p| normalize_project_root(&p.to_string_lossy()),
393 )
394 } else {
395 normalize_project_root(project_root)
396 };
397
398 if !is_safe_scan_root(&root_abs) {
399 return ProjectIndex::new(&root_abs);
400 }
401
402 if let Some(idx) = ProjectIndex::load(&root_abs) {
404 if !idx.files.is_empty() {
405 if index_looks_stale(&idx, &root_abs) {
406 tracing::warn!("[graph_index: stale index detected for {root_abs}; rebuilding]");
407 return scan(&root_abs);
408 }
409 return idx;
410 }
411 }
412
413 if let Ok(cwd) = std::env::current_dir() {
415 let cwd_str = normalize_project_root(&cwd.to_string_lossy());
416 if cwd_str != root_abs && cwd_str.starts_with(&root_abs) {
417 if let Some(idx) = ProjectIndex::load(&cwd_str) {
418 if !idx.files.is_empty() {
419 if index_looks_stale(&idx, &cwd_str) {
420 return scan(&cwd_str);
421 }
422 return idx;
423 }
424 }
425 }
426 }
427
428 scan(&root_abs)
429}
430
431fn index_looks_stale(index: &ProjectIndex, root_abs: &str) -> bool {
432 if index.files.is_empty() {
433 return true;
434 }
435
436 if let Ok(scan_time) =
438 chrono::NaiveDateTime::parse_from_str(&index.last_scan, "%Y-%m-%d %H:%M:%S")
439 {
440 let cfg = crate::core::config::Config::load();
441 let effective_hours = cfg.archive_max_age_hours_effective();
442 let max_age = chrono::Duration::hours(effective_hours as i64);
443 let now = chrono::Local::now().naive_local();
444 if now.signed_duration_since(scan_time) > max_age {
445 tracing::info!(
446 "[graph_index: index is older than {}h — marking stale]",
447 effective_hours
448 );
449 return true;
450 }
451 }
452
453 const CONTAMINATION_MARKERS: &[&str] = &[
456 "Desktop/",
457 "Documents/",
458 "Downloads/",
459 "Pictures/",
460 "Music/",
461 "Videos/",
462 "Movies/",
463 "Library/",
464 ".cache/",
465 "snap/",
466 ];
467 let contaminated = index.files.keys().take(200).any(|rel| {
468 CONTAMINATION_MARKERS
469 .iter()
470 .any(|m| rel.starts_with(m) || rel.contains(&format!("/{m}")))
471 });
472 if contaminated {
473 tracing::warn!(
474 "[graph_index: index contains files from user directories (Desktop/Documents/...) — \
475 marking stale to force clean rebuild]"
476 );
477 return true;
478 }
479
480 let root_path = Path::new(root_abs);
481 let sample_size = index.files.len().min(20);
483 for rel in index.files.keys().take(sample_size) {
484 let rel = rel.trim_start_matches(['/', '\\']);
485 if rel.is_empty() {
486 continue;
487 }
488 let abs = root_path.join(rel);
489 if !abs.exists() {
490 return true;
491 }
492 }
493
494 false
495}
496
497pub fn scan(project_root: &str) -> ProjectIndex {
498 scan_inner(project_root).0
499}
500
501pub fn scan_with_content_cache(project_root: &str) -> (ProjectIndex, HashMap<String, String>) {
502 scan_inner(project_root)
503}
504
505fn scan_inner(project_root: &str) -> (ProjectIndex, HashMap<String, String>) {
506 if std::env::var("LEAN_CTX_NO_INDEX").is_ok() {
507 tracing::info!("[graph_index: LEAN_CTX_NO_INDEX set — skipping scan]");
508 return (ProjectIndex::new(project_root), HashMap::new());
509 }
510
511 let project_root = normalize_project_root(project_root);
512
513 if !is_safe_scan_root(&project_root) {
514 tracing::warn!("[graph_index: scan aborted for unsafe root {project_root}]");
515 return (ProjectIndex::new(&project_root), HashMap::new());
516 }
517
518 let lock_name = format!(
519 "graph-idx-{}",
520 &crate::core::index_namespace::namespace_hash(Path::new(&project_root))[..8]
521 );
522 let _lock = crate::core::startup_guard::try_acquire_lock(
523 &lock_name,
524 std::time::Duration::from_millis(800),
525 std::time::Duration::from_mins(3),
526 );
527 if _lock.is_none() {
528 tracing::info!(
529 "[graph_index: another process is scanning {project_root} — returning cached or empty]"
530 );
531 return (
532 ProjectIndex::load(&project_root).unwrap_or_else(|| ProjectIndex::new(&project_root)),
533 HashMap::new(),
534 );
535 }
536
537 let existing = ProjectIndex::load(&project_root);
538 let mut index = ProjectIndex::new(&project_root);
539
540 let old_files: HashMap<String, (String, Vec<(String, SymbolEntry)>)> =
541 if let Some(ref prev) = existing {
542 prev.files
543 .iter()
544 .map(|(path, entry)| {
545 let syms: Vec<(String, SymbolEntry)> = prev
546 .symbols
547 .iter()
548 .filter(|(_, s)| s.file == *path)
549 .map(|(k, v)| (k.clone(), v.clone()))
550 .collect();
551 (path.clone(), (entry.hash.clone(), syms))
552 })
553 .collect()
554 } else {
555 HashMap::new()
556 };
557
558 let walker = ignore::WalkBuilder::new(&project_root)
559 .hidden(true)
560 .git_ignore(true)
561 .git_global(true)
562 .git_exclude(true)
563 .max_depth(Some(20))
564 .build();
565
566 let cfg = crate::core::config::Config::load();
567 let extra_ignores: Vec<glob::Pattern> = cfg
568 .extra_ignore_patterns
569 .iter()
570 .filter_map(|p| glob::Pattern::new(p).ok())
571 .collect();
572
573 let mut scanned = 0usize;
574 let mut reused = 0usize;
575 let mut entries_visited = 0usize;
576 let mut content_cache: HashMap<String, String> = HashMap::new();
577 let max_files = if cfg.graph_index_max_files == 0 {
578 usize::MAX } else {
580 cfg.graph_index_max_files as usize
581 };
582 const MAX_ENTRIES_VISITED: usize = 500_000;
583 const MAX_FILE_SIZE_BYTES: u64 = 2 * 1024 * 1024; let scan_deadline = std::time::Instant::now() + std::time::Duration::from_mins(5);
585
586 for entry in walker.filter_map(std::result::Result::ok) {
587 entries_visited += 1;
588 if entries_visited > MAX_ENTRIES_VISITED {
589 tracing::warn!(
590 "[graph_index: walked {entries_visited} entries — aborting scan to prevent \
591 runaway traversal. Indexed {} files so far.]",
592 index.files.len()
593 );
594 break;
595 }
596 if entries_visited.is_multiple_of(5000) {
597 if std::time::Instant::now() > scan_deadline {
598 tracing::warn!(
599 "[graph_index: scan timeout (120s) after {entries_visited} entries — \
600 saving partial index with {} files]",
601 index.files.len()
602 );
603 break;
604 }
605 if crate::core::memory_guard::abort_requested() {
606 tracing::warn!(
607 "[graph_index: memory pressure abort after {entries_visited} entries — \
608 saving partial index with {} files]",
609 index.files.len()
610 );
611 break;
612 }
613 if crate::core::memory_guard::is_under_pressure() {
614 tracing::warn!(
615 "[graph_index: memory pressure detected at {entries_visited} entries — \
616 stopping scan with {} files]",
617 index.files.len()
618 );
619 break;
620 }
621 if let Some(ref g) = _lock {
622 g.touch();
623 }
624 }
625
626 if !entry.file_type().is_some_and(|ft| ft.is_file()) {
627 continue;
628 }
629
630 if entry.path_is_symlink() {
631 continue;
632 }
633 let file_path = normalize_absolute_path(&entry.path().to_string_lossy());
634
635 if !std::path::Path::new(&file_path).starts_with(std::path::Path::new(&project_root)) {
636 continue;
637 }
638
639 if let Ok(meta) = std::fs::symlink_metadata(&file_path) {
640 if meta.file_type().is_symlink() || !meta.is_file() {
641 continue;
642 }
643 if meta.len() > MAX_FILE_SIZE_BYTES {
644 tracing::debug!(
645 "[graph_index: skipping {file_path} — {:.1}MB exceeds {}MB limit]",
646 meta.len() as f64 / 1_048_576.0,
647 MAX_FILE_SIZE_BYTES / (1024 * 1024),
648 );
649 continue;
650 }
651 }
652
653 let ext = Path::new(&file_path)
654 .extension()
655 .and_then(|e| e.to_str())
656 .unwrap_or("");
657
658 if !is_indexable_ext(ext) {
659 continue;
660 }
661
662 let rel = make_relative(&file_path, &project_root);
663 if extra_ignores.iter().any(|p| p.matches(&rel)) {
664 continue;
665 }
666
667 if max_files != usize::MAX && index.files.len() >= max_files {
668 tracing::info!(
669 "[graph_index: reached configured limit of {} files. Set graph_index_max_files = 0 for unlimited.]",
670 max_files
671 );
672 break;
673 }
674
675 let Ok(content) = std::fs::read_to_string(&file_path) else {
676 continue;
677 };
678
679 let hash = compute_hash(&content);
680 let rel_path = make_relative(&file_path, &project_root);
681
682 if let Some((old_hash, old_syms)) = old_files.get(&rel_path) {
683 if *old_hash == hash {
684 if let Some(old_entry) = existing.as_ref().and_then(|p| p.files.get(&rel_path)) {
685 index.files.insert(rel_path.clone(), old_entry.clone());
686 for (key, sym) in old_syms {
687 index.symbols.insert(key.clone(), sym.clone());
688 }
689 content_cache.insert(rel_path, content);
690 reused += 1;
691 continue;
692 }
693 }
694 }
695
696 let sigs = signatures::extract_signatures(&content, ext);
697 let line_count = content.lines().count();
698 let token_count = crate::core::tokens::count_tokens(&content);
699 let summary = extract_summary(&content);
700
701 let exports: Vec<String> = sigs
702 .iter()
703 .filter(|s| s.is_exported)
704 .map(|s| s.name.clone())
705 .collect();
706
707 index.files.insert(
708 rel_path.clone(),
709 FileEntry {
710 path: rel_path.clone(),
711 hash,
712 language: ext.to_string(),
713 line_count,
714 token_count,
715 exports,
716 summary,
717 },
718 );
719
720 for sig in &sigs {
721 let (start, end) = sig
722 .start_line
723 .zip(sig.end_line)
724 .unwrap_or_else(|| find_symbol_range(&content, sig));
725 let key = format!("{}::{}", rel_path, sig.name);
726 index.symbols.insert(
727 key,
728 SymbolEntry {
729 file: rel_path.clone(),
730 name: sig.name.clone(),
731 kind: sig.kind.to_string(),
732 start_line: start,
733 end_line: end,
734 is_exported: sig.is_exported,
735 },
736 );
737 }
738
739 content_cache.insert(rel_path, content);
740 scanned += 1;
741 }
742
743 build_edges_cached(&mut index, &content_cache);
744
745 if let Err(e) = index.save() {
746 tracing::warn!("could not save graph index: {e}");
747 }
748
749 tracing::warn!(
750 "[graph_index: {} files ({} scanned, {} reused), {} symbols, {} edges]",
751 index.file_count(),
752 scanned,
753 reused,
754 index.symbol_count(),
755 index.edge_count()
756 );
757
758 (index, content_cache)
759}
760
761fn build_edges_cached(index: &mut ProjectIndex, content_cache: &HashMap<String, String>) {
762 build_edges_with_cache(index, content_cache);
763 build_implicit_edges_with_cache(index, content_cache);
764 build_cochange_edges(index);
765 build_sibling_edges(index);
766}
767
768fn build_edges_with_cache(index: &mut ProjectIndex, content_cache: &HashMap<String, String>) {
769 index.edges.clear();
770
771 if crate::core::memory_guard::abort_requested() {
772 tracing::warn!("[graph_index: skipping edge-building due to memory pressure]");
773 return;
774 }
775
776 let root = normalize_project_root(&index.project_root);
777 let root_path = Path::new(&root);
778
779 let mut file_paths: Vec<String> = index.files.keys().cloned().collect();
780 file_paths.sort();
781
782 let resolver_ctx = import_resolver::ResolverContext::new(root_path, file_paths.clone());
783
784 const MAX_FILE_SIZE_FOR_EDGES: u64 = 2 * 1024 * 1024;
785
786 for (i, rel_path) in file_paths.iter().enumerate() {
787 if i.is_multiple_of(1000) && crate::core::memory_guard::is_under_pressure() {
788 tracing::warn!(
789 "[graph_index: stopping edge-building at file {i}/{} due to memory pressure]",
790 file_paths.len()
791 );
792 break;
793 }
794
795 let content = if let Some(cached) = content_cache.get(rel_path) {
796 std::borrow::Cow::Borrowed(cached.as_str())
797 } else {
798 let abs_path = root_path.join(rel_path.trim_start_matches(['/', '\\']));
799 if let Ok(meta) = abs_path.metadata() {
800 if meta.len() > MAX_FILE_SIZE_FOR_EDGES {
801 continue;
802 }
803 }
804 match std::fs::read_to_string(&abs_path) {
805 Ok(c) => std::borrow::Cow::Owned(c),
806 Err(_) => continue,
807 }
808 };
809
810 let ext = Path::new(rel_path)
811 .extension()
812 .and_then(|e| e.to_str())
813 .unwrap_or("");
814
815 let resolve_ext = match ext {
816 "vue" | "svelte" => "ts",
817 _ => ext,
818 };
819
820 let analysis_content = if ext == "vue" || ext == "svelte" {
821 if let Some(script) = crate::core::signatures_ts::sfc::extract_script_block(&content) {
822 std::borrow::Cow::Owned(script)
823 } else {
824 content
825 }
826 } else {
827 content
828 };
829
830 let imports = crate::core::deep_queries::analyze(&analysis_content, resolve_ext).imports;
831 if imports.is_empty() {
832 continue;
833 }
834
835 let resolved =
836 import_resolver::resolve_imports(&imports, rel_path, resolve_ext, &resolver_ctx);
837 for r in resolved {
838 if r.is_external {
839 continue;
840 }
841 if let Some(to) = r.resolved_path {
842 index.edges.push(IndexEdge {
843 from: rel_path.clone(),
844 to,
845 kind: "import".to_string(),
846 weight: 1.0,
847 });
848 }
849 }
850 }
851
852 index.edges.sort_by(|a, b| {
853 a.from
854 .cmp(&b.from)
855 .then_with(|| a.to.cmp(&b.to))
856 .then_with(|| a.kind.cmp(&b.kind))
857 });
858 index
859 .edges
860 .dedup_by(|a, b| a.from == b.from && a.to == b.to && a.kind == b.kind);
861}
862
863fn build_implicit_edges_with_cache(
868 index: &mut ProjectIndex,
869 content_cache: &HashMap<String, String>,
870) {
871 let file_paths: Vec<String> = index.files.keys().cloned().collect();
872 let file_set: std::collections::HashSet<&str> = file_paths.iter().map(String::as_str).collect();
873
874 let mut new_edges: Vec<IndexEdge> = Vec::new();
875
876 for file in &file_paths {
877 let ext = Path::new(file.as_str())
878 .extension()
879 .and_then(|e| e.to_str())
880 .unwrap_or("");
881
882 match ext {
883 "rs" => {
884 collect_rust_mod_edges_cached(
885 file,
886 &file_set,
887 index,
888 &mut new_edges,
889 content_cache,
890 );
891 }
892 "go" => collect_go_package_edges(file, &file_paths, &mut new_edges),
893 "py" => collect_python_init_edges(file, &file_paths, &mut new_edges),
894 "ts" | "js" | "tsx" | "jsx" => {
895 collect_barrel_edges_cached(file, &file_set, index, &mut new_edges, content_cache);
896 }
897 _ => {}
898 }
899 }
900
901 index.edges.extend(new_edges);
902}
903
904fn collect_rust_mod_edges_cached(
905 file: &str,
906 file_set: &std::collections::HashSet<&str>,
907 index: &ProjectIndex,
908 edges: &mut Vec<IndexEdge>,
909 content_cache: &HashMap<String, String>,
910) {
911 if !index.files.contains_key(file) {
912 return;
913 }
914
915 let content = if let Some(cached) = content_cache.get(file) {
916 std::borrow::Cow::Borrowed(cached.as_str())
917 } else {
918 let full_path = Path::new(&index.project_root).join(file);
919 match std::fs::read_to_string(&full_path) {
920 Ok(c) => std::borrow::Cow::Owned(c),
921 Err(_) => return,
922 }
923 };
924
925 let dir = Path::new(file)
926 .parent()
927 .map(|p| p.to_string_lossy().to_string());
928
929 for line in content.lines() {
930 let trimmed = line.trim();
931 if !trimmed.starts_with("mod ") || trimmed.contains('{') {
932 continue;
933 }
934 let mod_name = trimmed
935 .trim_start_matches("mod ")
936 .trim_start_matches("pub mod ")
937 .trim_start_matches("pub(crate) mod ")
938 .trim_end_matches(';')
939 .trim();
940
941 if mod_name.is_empty() || mod_name.contains(' ') {
942 continue;
943 }
944
945 let candidates = if let Some(ref d) = dir {
946 vec![
947 format!("{d}/{mod_name}.rs"),
948 format!("{d}/{mod_name}/mod.rs"),
949 ]
950 } else {
951 vec![format!("{mod_name}.rs"), format!("{mod_name}/mod.rs")]
952 };
953
954 for candidate in candidates {
955 if file_set.contains(candidate.as_str()) {
956 edges.push(IndexEdge {
957 from: file.to_string(),
958 to: candidate,
959 kind: "module".to_string(),
960 weight: 0.8,
961 });
962 break;
963 }
964 }
965 }
966}
967
968fn collect_go_package_edges(file: &str, file_paths: &[String], edges: &mut Vec<IndexEdge>) {
969 let p = Path::new(file);
970 if p.extension().and_then(|e| e.to_str()) != Some("go") {
971 return;
972 }
973 if file.ends_with("_test.go") {
974 return;
975 }
976
977 let Some(dir) = p.parent().map(|d| d.to_string_lossy().to_string()) else {
978 return;
979 };
980
981 for other in file_paths {
982 if other == file {
983 continue;
984 }
985 let op = Path::new(other.as_str());
986 if op.extension().and_then(|e| e.to_str()) != Some("go") {
987 continue;
988 }
989 if other.ends_with("_test.go") {
990 continue;
991 }
992 let other_dir = op
993 .parent()
994 .map(|d| d.to_string_lossy().to_string())
995 .unwrap_or_default();
996 if other_dir == dir {
997 edges.push(IndexEdge {
998 from: file.to_string(),
999 to: other.clone(),
1000 kind: "package".to_string(),
1001 weight: 0.5,
1002 });
1003 break;
1004 }
1005 }
1006}
1007
1008fn collect_python_init_edges(file: &str, file_paths: &[String], edges: &mut Vec<IndexEdge>) {
1009 let p = Path::new(file);
1010 if p.file_name().and_then(|n| n.to_str()) != Some("__init__.py") {
1011 return;
1012 }
1013
1014 let Some(dir) = p.parent().map(|d| d.to_string_lossy().to_string()) else {
1015 return;
1016 };
1017
1018 for other in file_paths {
1019 if other == file {
1020 continue;
1021 }
1022 let op = Path::new(other.as_str());
1023 if op.extension().and_then(|e| e.to_str()) != Some("py") {
1024 continue;
1025 }
1026 let other_dir = op
1027 .parent()
1028 .map(|d| d.to_string_lossy().to_string())
1029 .unwrap_or_default();
1030 if other_dir == dir {
1031 edges.push(IndexEdge {
1032 from: file.to_string(),
1033 to: other.clone(),
1034 kind: "module".to_string(),
1035 weight: 0.8,
1036 });
1037 }
1038 }
1039}
1040
1041fn collect_barrel_edges_cached(
1042 file: &str,
1043 file_set: &std::collections::HashSet<&str>,
1044 index: &ProjectIndex,
1045 edges: &mut Vec<IndexEdge>,
1046 content_cache: &HashMap<String, String>,
1047) {
1048 let basename = Path::new(file)
1049 .file_stem()
1050 .and_then(|s| s.to_str())
1051 .unwrap_or("");
1052 if basename != "index" {
1053 return;
1054 }
1055
1056 let content = if let Some(cached) = content_cache.get(file) {
1057 std::borrow::Cow::Borrowed(cached.as_str())
1058 } else {
1059 let full_path = Path::new(&index.project_root).join(file);
1060 match std::fs::read_to_string(&full_path) {
1061 Ok(c) => std::borrow::Cow::Owned(c),
1062 Err(_) => return,
1063 }
1064 };
1065
1066 let dir = Path::new(file)
1067 .parent()
1068 .map(|p| p.to_string_lossy().to_string())
1069 .unwrap_or_default();
1070
1071 let ext = Path::new(file)
1072 .extension()
1073 .and_then(|e| e.to_str())
1074 .unwrap_or("ts");
1075
1076 for line in content.lines() {
1077 let trimmed = line.trim();
1078 if !trimmed.starts_with("export") || !trimmed.contains("from") {
1079 continue;
1080 }
1081 if let Some(from_pos) = trimmed.find("from") {
1082 let after = &trimmed[from_pos + 4..];
1083 let source = after
1084 .trim()
1085 .trim_start_matches(['\'', '"'])
1086 .trim_end_matches([';', '\'', '"'])
1087 .trim_end_matches(['\'', '"']);
1088
1089 if source.starts_with("./") || source.starts_with("../") {
1090 let resolved = if dir.is_empty() {
1091 source.trim_start_matches("./").to_string()
1092 } else {
1093 format!("{dir}/{}", source.trim_start_matches("./"))
1094 };
1095
1096 let candidates = vec![
1097 format!("{resolved}.{ext}"),
1098 format!("{resolved}/index.{ext}"),
1099 resolved.clone(),
1100 ];
1101
1102 for candidate in candidates {
1103 if file_set.contains(candidate.as_str()) {
1104 edges.push(IndexEdge {
1105 from: file.to_string(),
1106 to: candidate,
1107 kind: "reexport".to_string(),
1108 weight: 0.8,
1109 });
1110 break;
1111 }
1112 }
1113 }
1114 }
1115 }
1116}
1117
1118fn build_cochange_edges(index: &mut ProjectIndex) {
1123 let project_root = &index.project_root;
1124
1125 let output = match std::process::Command::new("git")
1126 .args([
1127 "log",
1128 "--name-only",
1129 "--pretty=format:---",
1130 "--since=6 months",
1131 "--",
1132 ".",
1133 ])
1134 .current_dir(project_root)
1135 .output()
1136 {
1137 Ok(o) if o.status.success() => String::from_utf8_lossy(&o.stdout).to_string(),
1138 _ => return,
1139 };
1140
1141 let file_set: std::collections::HashSet<&str> =
1142 index.files.keys().map(String::as_str).collect();
1143
1144 let connected: std::collections::HashSet<&str> = index
1145 .edges
1146 .iter()
1147 .flat_map(|e| [e.from.as_str(), e.to.as_str()])
1148 .collect();
1149
1150 let mut cooccurrence: HashMap<(String, String), u32> = HashMap::new();
1152 let mut current_commit: Vec<&str> = Vec::new();
1153
1154 for line in output.lines() {
1155 if line == "---" {
1156 if current_commit.len() >= 2 && current_commit.len() <= 20 {
1157 for i in 0..current_commit.len() {
1158 for j in (i + 1)..current_commit.len() {
1159 let a = current_commit[i];
1160 let b = current_commit[j];
1161 if !file_set.contains(a) || !file_set.contains(b) {
1162 continue;
1163 }
1164 if connected.contains(a) && connected.contains(b) {
1166 continue;
1167 }
1168 let key = if a < b {
1169 (a.to_string(), b.to_string())
1170 } else {
1171 (b.to_string(), a.to_string())
1172 };
1173 *cooccurrence.entry(key).or_insert(0) += 1;
1174 }
1175 }
1176 }
1177 current_commit.clear();
1178 } else if !line.is_empty() {
1179 current_commit.push(line.trim());
1180 }
1181 }
1182
1183 let mut cochange_edges: Vec<IndexEdge> = cooccurrence
1185 .into_iter()
1186 .filter(|(_, count)| *count >= 5)
1187 .map(|((from, to), _)| IndexEdge {
1188 from,
1189 to,
1190 kind: "cochange".to_string(),
1191 weight: 0.5,
1192 })
1193 .collect();
1194
1195 cochange_edges.sort_by(|a, b| a.from.cmp(&b.from).then_with(|| a.to.cmp(&b.to)));
1197 cochange_edges.truncate(500);
1198
1199 index.edges.extend(cochange_edges);
1200}
1201
1202fn build_sibling_edges(index: &mut ProjectIndex) {
1207 let connected: std::collections::HashSet<&str> = index
1208 .edges
1209 .iter()
1210 .flat_map(|e| [e.from.as_str(), e.to.as_str()])
1211 .collect();
1212
1213 let file_paths: Vec<String> = index.files.keys().cloned().collect();
1214 let mut new_edges: Vec<IndexEdge> = Vec::new();
1215
1216 for file in &file_paths {
1217 if connected.contains(file.as_str()) {
1218 continue;
1219 }
1220
1221 let ext = Path::new(file.as_str())
1222 .extension()
1223 .and_then(|e| e.to_str())
1224 .unwrap_or("");
1225 let dir = Path::new(file.as_str())
1226 .parent()
1227 .map(|p| p.to_string_lossy().to_string())
1228 .unwrap_or_default();
1229
1230 for other in &file_paths {
1232 if other == file {
1233 continue;
1234 }
1235 let other_ext = Path::new(other.as_str())
1236 .extension()
1237 .and_then(|e| e.to_str())
1238 .unwrap_or("");
1239 let other_dir = Path::new(other.as_str())
1240 .parent()
1241 .map(|p| p.to_string_lossy().to_string())
1242 .unwrap_or_default();
1243
1244 if other_ext == ext && other_dir == dir {
1245 new_edges.push(IndexEdge {
1246 from: file.clone(),
1247 to: other.clone(),
1248 kind: "sibling".to_string(),
1249 weight: 0.2,
1250 });
1251 break; }
1253 }
1254 }
1255
1256 index.edges.extend(new_edges);
1257}
1258
1259fn find_symbol_range(content: &str, sig: &signatures::Signature) -> (usize, usize) {
1260 let lines: Vec<&str> = content.lines().collect();
1261 let mut start = 0;
1262
1263 for (i, line) in lines.iter().enumerate() {
1264 if line.contains(&sig.name) {
1265 let trimmed = line.trim();
1266 let is_def = trimmed.starts_with("fn ")
1267 || trimmed.starts_with("pub fn ")
1268 || trimmed.starts_with("pub(crate) fn ")
1269 || trimmed.starts_with("async fn ")
1270 || trimmed.starts_with("pub async fn ")
1271 || trimmed.starts_with("struct ")
1272 || trimmed.starts_with("pub struct ")
1273 || trimmed.starts_with("enum ")
1274 || trimmed.starts_with("pub enum ")
1275 || trimmed.starts_with("trait ")
1276 || trimmed.starts_with("pub trait ")
1277 || trimmed.starts_with("impl ")
1278 || trimmed.starts_with("class ")
1279 || trimmed.starts_with("export class ")
1280 || trimmed.starts_with("export function ")
1281 || trimmed.starts_with("export async function ")
1282 || trimmed.starts_with("function ")
1283 || trimmed.starts_with("async function ")
1284 || trimmed.starts_with("def ")
1285 || trimmed.starts_with("async def ")
1286 || trimmed.starts_with("func ")
1287 || trimmed.starts_with("interface ")
1288 || trimmed.starts_with("export interface ")
1289 || trimmed.starts_with("type ")
1290 || trimmed.starts_with("export type ")
1291 || trimmed.starts_with("const ")
1292 || trimmed.starts_with("export const ")
1293 || trimmed.starts_with("fun ")
1294 || trimmed.starts_with("private fun ")
1295 || trimmed.starts_with("public fun ")
1296 || trimmed.starts_with("internal fun ")
1297 || trimmed.starts_with("class ")
1298 || trimmed.starts_with("data class ")
1299 || trimmed.starts_with("sealed class ")
1300 || trimmed.starts_with("sealed interface ")
1301 || trimmed.starts_with("enum class ")
1302 || trimmed.starts_with("object ")
1303 || trimmed.starts_with("private object ")
1304 || trimmed.starts_with("interface ")
1305 || trimmed.starts_with("typealias ")
1306 || trimmed.starts_with("private typealias ");
1307 if is_def {
1308 start = i + 1;
1309 break;
1310 }
1311 }
1312 }
1313
1314 if start == 0 {
1315 return (1, lines.len().min(20));
1316 }
1317
1318 let base_indent = lines
1319 .get(start - 1)
1320 .map_or(0, |l| l.len() - l.trim_start().len());
1321
1322 let mut end = start;
1323 let mut brace_depth: i32 = 0;
1324 let mut found_open = false;
1325
1326 for (i, line) in lines.iter().enumerate().skip(start - 1) {
1327 for ch in line.chars() {
1328 if ch == '{' {
1329 brace_depth += 1;
1330 found_open = true;
1331 } else if ch == '}' {
1332 brace_depth -= 1;
1333 }
1334 }
1335
1336 end = i + 1;
1337
1338 if found_open && brace_depth <= 0 {
1339 break;
1340 }
1341
1342 if !found_open && i > start {
1343 let indent = line.len() - line.trim_start().len();
1344 if indent <= base_indent && !line.trim().is_empty() && i > start {
1345 end = i;
1346 break;
1347 }
1348 }
1349
1350 if end - start > 200 {
1351 break;
1352 }
1353 }
1354
1355 (start, end)
1356}
1357
1358fn extract_summary(content: &str) -> String {
1359 for line in content.lines().take(20) {
1360 let trimmed = line.trim();
1361 if trimmed.is_empty()
1362 || trimmed.starts_with("//")
1363 || trimmed.starts_with('#')
1364 || trimmed.starts_with("/*")
1365 || trimmed.starts_with('*')
1366 || trimmed.starts_with("use ")
1367 || trimmed.starts_with("import ")
1368 || trimmed.starts_with("from ")
1369 || trimmed.starts_with("require(")
1370 || trimmed.starts_with("package ")
1371 {
1372 continue;
1373 }
1374 return trimmed.chars().take(120).collect();
1375 }
1376 String::new()
1377}
1378
1379fn compute_hash(content: &str) -> String {
1380 use std::collections::hash_map::DefaultHasher;
1381 use std::hash::{Hash, Hasher};
1382
1383 let mut hasher = DefaultHasher::new();
1384 content.hash(&mut hasher);
1385 format!("{:016x}", hasher.finish())
1386}
1387
1388fn short_hash(input: &str) -> String {
1389 use std::collections::hash_map::DefaultHasher;
1390 use std::hash::{Hash, Hasher};
1391
1392 let mut hasher = DefaultHasher::new();
1393 input.hash(&mut hasher);
1394 format!("{:08x}", hasher.finish() & 0xFFFF_FFFF)
1395}
1396
1397fn copy_dir_fallible(src: &std::path::Path, dst: &std::path::Path) -> Result<(), std::io::Error> {
1398 std::fs::create_dir_all(dst)?;
1399 for entry in std::fs::read_dir(src)?.flatten() {
1400 let from = entry.path();
1401 let to = dst.join(entry.file_name());
1402 if from.is_dir() {
1403 copy_dir_fallible(&from, &to)?;
1404 } else {
1405 std::fs::copy(&from, &to)?;
1406 }
1407 }
1408 Ok(())
1409}
1410
1411fn normalize_absolute_path(path: &str) -> String {
1412 if let Ok(canon) = crate::core::pathutil::safe_canonicalize(std::path::Path::new(path)) {
1413 return canon.to_string_lossy().to_string();
1414 }
1415
1416 let mut normalized = path.to_string();
1417 while normalized.ends_with("\\.") || normalized.ends_with("/.") {
1418 normalized.truncate(normalized.len() - 2);
1419 }
1420 while normalized.len() > 1
1421 && (normalized.ends_with('\\') || normalized.ends_with('/'))
1422 && !normalized.ends_with(":\\")
1423 && !normalized.ends_with(":/")
1424 && normalized != "\\"
1425 && normalized != "/"
1426 {
1427 normalized.pop();
1428 }
1429 normalized
1430}
1431
1432pub fn normalize_project_root(path: &str) -> String {
1433 normalize_absolute_path(path)
1434}
1435
1436pub fn graph_match_key(path: &str) -> String {
1437 let stripped =
1438 crate::core::pathutil::strip_verbatim_str(path).unwrap_or_else(|| path.replace('\\', "/"));
1439 stripped.trim_start_matches('/').to_string()
1440}
1441
1442pub fn graph_relative_key(path: &str, root: &str) -> String {
1443 let root_norm = normalize_project_root(root);
1444 let path_norm = normalize_absolute_path(path);
1445 let root_path = Path::new(&root_norm);
1446 let path_path = Path::new(&path_norm);
1447
1448 if let Ok(rel) = path_path.strip_prefix(root_path) {
1449 let rel = rel.to_string_lossy().to_string();
1450 return rel.trim_start_matches(['/', '\\']).to_string();
1451 }
1452
1453 path.trim_start_matches(['/', '\\'])
1454 .replace('/', std::path::MAIN_SEPARATOR_STR)
1455}
1456
1457fn make_relative(path: &str, root: &str) -> String {
1458 graph_relative_key(path, root)
1459}
1460
1461fn is_indexable_ext(ext: &str) -> bool {
1462 crate::core::language_capabilities::is_indexable_ext(ext)
1463}
1464
1465#[cfg(test)]
1466fn kotlin_package_name(content: &str) -> Option<String> {
1467 content.lines().map(str::trim).find_map(|line| {
1468 line.strip_prefix("package ")
1469 .map(|rest| rest.trim().trim_end_matches(';').to_string())
1470 })
1471}
1472
1473#[cfg(test)]
1474mod tests {
1475 use super::*;
1476 use tempfile::tempdir;
1477
1478 #[test]
1479 fn test_short_hash_deterministic() {
1480 let h1 = short_hash("/Users/test/project");
1481 let h2 = short_hash("/Users/test/project");
1482 assert_eq!(h1, h2);
1483 assert_eq!(h1.len(), 8);
1484 }
1485
1486 #[test]
1487 fn test_make_relative() {
1488 assert_eq!(
1489 make_relative("/foo/bar/src/main.rs", "/foo/bar"),
1490 graph_relative_key("/foo/bar/src/main.rs", "/foo/bar")
1491 );
1492 assert_eq!(
1493 make_relative("src/main.rs", "/foo/bar"),
1494 graph_relative_key("src/main.rs", "/foo/bar")
1495 );
1496 assert_eq!(
1497 make_relative("C:\\repo\\src\\main\\kotlin\\Example.kt", "C:\\repo"),
1498 graph_relative_key("C:\\repo\\src\\main\\kotlin\\Example.kt", "C:\\repo")
1499 );
1500 assert_eq!(
1501 make_relative("//?/C:/repo/src/main/kotlin/Example.kt", "//?/C:/repo"),
1502 graph_relative_key("//?/C:/repo/src/main/kotlin/Example.kt", "//?/C:/repo")
1503 );
1504 }
1505
1506 #[test]
1507 fn test_normalize_project_root() {
1508 assert_eq!(normalize_project_root("C:\\repo\\"), "C:\\repo");
1509 assert_eq!(normalize_project_root("C:\\repo\\."), "C:\\repo");
1510 assert_eq!(normalize_project_root("//?/C:/repo/"), "//?/C:/repo");
1511 }
1512
1513 #[test]
1514 fn test_graph_match_key_normalizes_windows_forms() {
1515 assert_eq!(
1516 graph_match_key(r"C:\repo\src\main.rs"),
1517 "C:/repo/src/main.rs"
1518 );
1519 assert_eq!(
1520 graph_match_key(r"\\?\C:\repo\src\main.rs"),
1521 "C:/repo/src/main.rs"
1522 );
1523 assert_eq!(graph_match_key(r"\src\main.rs"), "src/main.rs");
1524 }
1525
1526 #[test]
1527 fn test_extract_summary() {
1528 let content = "// comment\nuse std::io;\n\npub fn main() {\n println!(\"hello\");\n}";
1529 let summary = extract_summary(content);
1530 assert_eq!(summary, "pub fn main() {");
1531 }
1532
1533 #[test]
1534 fn test_compute_hash_deterministic() {
1535 let h1 = compute_hash("hello world");
1536 let h2 = compute_hash("hello world");
1537 assert_eq!(h1, h2);
1538 assert_ne!(h1, compute_hash("hello world!"));
1539 }
1540
1541 #[test]
1542 fn test_project_index_new() {
1543 let idx = ProjectIndex::new("/test");
1544 assert_eq!(idx.version, INDEX_VERSION);
1545 assert_eq!(idx.project_root, "/test");
1546 assert!(idx.files.is_empty());
1547 }
1548
1549 fn fe(path: &str, content: &str, language: &str) -> FileEntry {
1550 FileEntry {
1551 path: path.to_string(),
1552 hash: compute_hash(content),
1553 language: language.to_string(),
1554 line_count: content.lines().count(),
1555 token_count: crate::core::tokens::count_tokens(content),
1556 exports: Vec::new(),
1557 summary: extract_summary(content),
1558 }
1559 }
1560
1561 #[test]
1562 fn test_index_looks_stale_when_any_file_missing() {
1563 let td = tempdir().expect("tempdir");
1564 let root = td.path();
1565 std::fs::write(root.join("a.rs"), "pub fn a() {}\n").expect("write a.rs");
1566
1567 let root_s = normalize_project_root(&root.to_string_lossy());
1568 let mut idx = ProjectIndex::new(&root_s);
1569 idx.files
1570 .insert("a.rs".to_string(), fe("a.rs", "pub fn a() {}\n", "rs"));
1571 idx.files.insert(
1572 "missing.rs".to_string(),
1573 fe("missing.rs", "pub fn m() {}\n", "rs"),
1574 );
1575
1576 assert!(index_looks_stale(&idx, &root_s));
1577 }
1578
1579 #[test]
1580 fn test_index_looks_fresh_when_all_files_exist() {
1581 let td = tempdir().expect("tempdir");
1582 let root = td.path();
1583 std::fs::write(root.join("a.rs"), "pub fn a() {}\n").expect("write a.rs");
1584
1585 let root_s = normalize_project_root(&root.to_string_lossy());
1586 let mut idx = ProjectIndex::new(&root_s);
1587 idx.files
1588 .insert("a.rs".to_string(), fe("a.rs", "pub fn a() {}\n", "rs"));
1589
1590 assert!(!index_looks_stale(&idx, &root_s));
1591 }
1592
1593 #[test]
1594 fn test_reverse_deps() {
1595 let mut idx = ProjectIndex::new("/test");
1596 idx.edges.push(IndexEdge {
1597 from: "a.rs".to_string(),
1598 to: "b.rs".to_string(),
1599 kind: "import".to_string(),
1600 weight: 1.0,
1601 });
1602 idx.edges.push(IndexEdge {
1603 from: "c.rs".to_string(),
1604 to: "b.rs".to_string(),
1605 kind: "import".to_string(),
1606 weight: 1.0,
1607 });
1608
1609 let deps = idx.get_reverse_deps("b.rs", 1);
1610 assert_eq!(deps.len(), 2);
1611 assert!(deps.contains(&"a.rs".to_string()));
1612 assert!(deps.contains(&"c.rs".to_string()));
1613 }
1614
1615 #[test]
1616 fn test_find_symbol_range_kotlin_function() {
1617 let content = r#"
1618package com.example
1619
1620class UserService {
1621 fun greet(name: String): String {
1622 return "hi $name"
1623 }
1624}
1625"#;
1626 let sig = signatures::Signature {
1627 kind: "method",
1628 name: "greet".to_string(),
1629 params: "name:String".to_string(),
1630 return_type: "String".to_string(),
1631 is_async: false,
1632 is_exported: true,
1633 indent: 2,
1634 ..signatures::Signature::no_span()
1635 };
1636 let (start, end) = find_symbol_range(content, &sig);
1637 assert_eq!(start, 5);
1638 assert!(end >= start);
1639 }
1640
1641 #[test]
1642 fn test_signature_spans_override_fallback_range() {
1643 let sig = signatures::Signature {
1644 kind: "method",
1645 name: "release".to_string(),
1646 params: "id:String".to_string(),
1647 return_type: "Boolean".to_string(),
1648 is_async: true,
1649 is_exported: true,
1650 indent: 2,
1651 start_line: Some(42),
1652 end_line: Some(43),
1653 };
1654
1655 let (start, end) = sig
1656 .start_line
1657 .zip(sig.end_line)
1658 .unwrap_or_else(|| find_symbol_range("ignored", &sig));
1659 assert_eq!((start, end), (42, 43));
1660 }
1661
1662 #[test]
1663 fn test_parse_stale_index_version() {
1664 let json = format!(
1665 r#"{{"version":{},"project_root":"/test","last_scan":"now","files":{{}},"edges":[],"symbols":{{}}}}"#,
1666 INDEX_VERSION - 1
1667 );
1668 let parsed: ProjectIndex = serde_json::from_str(&json).unwrap();
1669 assert_ne!(parsed.version, INDEX_VERSION);
1670 }
1671
1672 #[test]
1673 fn test_kotlin_package_name() {
1674 let content = "package com.example.feature\n\nclass UserService";
1675 assert_eq!(
1676 kotlin_package_name(content).as_deref(),
1677 Some("com.example.feature")
1678 );
1679 }
1680
1681 #[test]
1682 fn safe_scan_root_rejects_fs_root() {
1683 assert!(!is_safe_scan_root("/"));
1684 assert!(!is_safe_scan_root("\\"));
1685 #[cfg(windows)]
1686 {
1687 assert!(!is_safe_scan_root("C:\\"));
1688 assert!(!is_safe_scan_root("D:\\"));
1689 }
1690 }
1691
1692 #[test]
1693 fn safe_scan_root_rejects_home() {
1694 if let Some(home) = dirs::home_dir() {
1695 let home_str = home.to_string_lossy().to_string();
1696 assert!(
1697 !is_safe_scan_root(&home_str),
1698 "home dir should be rejected: {home_str}"
1699 );
1700 }
1701 }
1702
1703 #[test]
1704 fn safe_scan_root_accepts_project_dir() {
1705 let tmp = tempdir().unwrap();
1706 std::fs::write(
1707 tmp.path().join("Cargo.toml"),
1708 "[package]\nname = \"test\"\n",
1709 )
1710 .unwrap();
1711 let root = tmp.path().to_string_lossy().to_string();
1712 assert!(is_safe_scan_root(&root));
1713 }
1714
1715 #[test]
1716 fn safe_scan_root_rejects_broad_dir() {
1717 let tmp = tempdir().unwrap();
1718 for i in 0..55 {
1719 std::fs::create_dir(tmp.path().join(format!("dir{i}"))).unwrap();
1720 }
1721 let root = tmp.path().to_string_lossy().to_string();
1722 assert!(!is_safe_scan_root(&root));
1723 }
1724
1725 #[test]
1726 fn no_index_env_skips_scan() {
1727 let _env = crate::core::data_dir::test_env_lock();
1728 let tmp = tempdir().unwrap();
1729 std::fs::write(tmp.path().join("Cargo.toml"), "").unwrap();
1730 std::fs::write(tmp.path().join("main.rs"), "fn main() {}").unwrap();
1731
1732 std::env::set_var("LEAN_CTX_NO_INDEX", "1");
1733 let idx = scan(&tmp.path().to_string_lossy());
1734 std::env::remove_var("LEAN_CTX_NO_INDEX");
1735 assert!(idx.files.is_empty(), "LEAN_CTX_NO_INDEX should skip scan");
1736 }
1737
1738 #[test]
1739 fn stale_index_detected_by_contamination() {
1740 let root_s = "/home/testuser/myproject";
1741 let mut idx = ProjectIndex::new(root_s);
1742 idx.files.insert(
1744 "Desktop/random.py".to_string(),
1745 fe("Desktop/random.py", "x = 1\n", "py"),
1746 );
1747 idx.files.insert(
1748 "src/main.rs".to_string(),
1749 fe("src/main.rs", "fn main() {}\n", "rs"),
1750 );
1751 assert!(
1752 index_looks_stale(&idx, root_s),
1753 "Index with Desktop/ files should be considered stale"
1754 );
1755 }
1756
1757 #[test]
1758 fn stale_index_detected_by_age() {
1759 let td = tempdir().expect("tempdir");
1760 let root = td.path();
1761 std::fs::write(root.join("a.rs"), "fn a() {}\n").unwrap();
1762
1763 let root_s = normalize_project_root(&root.to_string_lossy());
1764 let mut idx = ProjectIndex::new(&root_s);
1765 idx.files
1766 .insert("a.rs".to_string(), fe("a.rs", "fn a() {}\n", "rs"));
1767 let old_time = chrono::Local::now().naive_local() - chrono::Duration::hours(100);
1769 idx.last_scan = old_time.format("%Y-%m-%d %H:%M:%S").to_string();
1770
1771 assert!(
1772 index_looks_stale(&idx, &root_s),
1773 "Index older than max_age_hours should be stale"
1774 );
1775 }
1776
1777 #[test]
1778 fn safe_scan_root_rejects_home_downloads() {
1779 if let Some(home) = dirs::home_dir() {
1780 let downloads = home.join("Downloads");
1781 if !downloads.join(".git").exists() {
1783 let downloads_str = downloads.to_string_lossy().to_string();
1784 assert!(
1785 !is_safe_scan_root(&downloads_str),
1786 "~/Downloads should be rejected without project markers"
1787 );
1788 }
1789 }
1790 }
1791
1792 #[test]
1793 fn safe_scan_root_accepts_multi_repo_parent() {
1794 let tmp = tempdir().unwrap();
1795 let parent = tmp.path().join("code");
1796 std::fs::create_dir_all(&parent).unwrap();
1797
1798 std::fs::create_dir_all(parent.join("repo-a").join(".git")).unwrap();
1800 std::fs::create_dir_all(parent.join("repo-b").join(".git")).unwrap();
1801
1802 for i in 0..55 {
1804 std::fs::create_dir(parent.join(format!("dir-{i}"))).unwrap();
1805 }
1806
1807 let parent_str = parent.to_string_lossy().to_string();
1808 assert!(
1809 is_safe_scan_root(&parent_str),
1810 "Multi-repo parent with >50 subdirs should be accepted"
1811 );
1812 }
1813
1814 #[test]
1815 fn safe_scan_root_rejects_broad_dir_without_repos() {
1816 let tmp = tempdir().unwrap();
1817 let broad = tmp.path().join("broad");
1818 std::fs::create_dir_all(&broad).unwrap();
1819
1820 for i in 0..55 {
1822 std::fs::create_dir(broad.join(format!("dir-{i}"))).unwrap();
1823 }
1824
1825 let broad_str = broad.to_string_lossy().to_string();
1826 assert!(
1827 !is_safe_scan_root(&broad_str),
1828 "Broad dir without project markers should be rejected"
1829 );
1830 }
1831}