1use std::collections::HashMap;
2use std::path::Path;
3
4use serde::{Deserialize, Serialize};
5
6use crate::core::import_resolver;
7use crate::core::signatures;
8
9const INDEX_VERSION: u32 = 6;
10
11pub fn is_safe_scan_root_public(path: &str) -> bool {
12 is_safe_scan_root(path)
13}
14
15fn is_filesystem_root(path: &str) -> bool {
16 let p = Path::new(path);
17 p.parent().is_none() || (cfg!(windows) && p.parent() == Some(Path::new("")))
18}
19
20fn is_safe_scan_root(path: &str) -> bool {
21 let normalized = normalize_project_root(path);
22 let p = Path::new(&normalized);
23
24 if normalized == "/" || normalized == "\\" || is_filesystem_root(&normalized) {
25 tracing::warn!("[graph_index: refusing to scan filesystem root]");
26 return false;
27 }
28
29 if normalized == "." || normalized.is_empty() {
30 tracing::warn!("[graph_index: refusing to scan relative/empty root]");
31 return false;
32 }
33
34 if let Some(home) = dirs::home_dir() {
35 let home_norm = normalize_project_root(&home.to_string_lossy());
36 if normalized == home_norm {
37 use std::sync::Once;
38 static HOME_WARN: Once = Once::new();
39 HOME_WARN.call_once(|| {
40 tracing::warn!(
41 "[graph_index: skipping — cannot index home directory {normalized}.\n \
42 Run from inside a project, or set LEAN_CTX_PROJECT_ROOT=/path/to/project]"
43 );
44 });
45 return false;
46 }
47 let home_path = Path::new(&home_norm);
49 const BLOCKED_HOME_SUBDIRS: &[&str] = &[
50 "Desktop",
51 "Documents",
52 "Downloads",
53 "Pictures",
54 "Music",
55 "Videos",
56 "Movies",
57 "Library",
58 ".local",
59 ".cache",
60 ".config",
61 "snap",
62 "Applications",
63 ];
64 for blocked in BLOCKED_HOME_SUBDIRS {
65 let blocked_path = home_path.join(blocked);
66 let is_inside_blocked = p == blocked_path || p.starts_with(&blocked_path);
67 let has_project_marker = p.join(".git").exists()
68 || p.join("Cargo.toml").exists()
69 || p.join("package.json").exists();
70 if is_inside_blocked && !has_project_marker {
71 tracing::warn!(
72 "[graph_index: refusing to scan {normalized} — \
73 inside home/{blocked} without project markers]"
74 );
75 return false;
76 }
77 }
78
79 if p.parent() == Some(home_path) {
81 let has_marker = p.join(".git").exists()
82 || p.join("Cargo.toml").exists()
83 || p.join("package.json").exists()
84 || p.join("go.mod").exists()
85 || p.join("pyproject.toml").exists();
86 if !has_marker {
87 tracing::warn!(
88 "[graph_index: refusing to scan {normalized} — \
89 direct child of home without project markers]"
90 );
91 return false;
92 }
93 }
94 }
95
96 let breadth_markers = [
97 ".git",
98 "Cargo.toml",
99 "package.json",
100 "go.mod",
101 "pyproject.toml",
102 "setup.py",
103 "Makefile",
104 "CMakeLists.txt",
105 "pnpm-workspace.yaml",
106 ".projectile",
107 "BUILD.bazel",
108 "go.work",
109 ];
110
111 if !breadth_markers.iter().any(|m| p.join(m).exists()) {
112 let child_count = std::fs::read_dir(p).map_or(0, |rd| {
113 rd.filter_map(Result::ok)
114 .filter(|e| e.path().is_dir())
115 .count()
116 });
117 if child_count > 50 {
118 tracing::warn!(
119 "[graph_index: {normalized} has no project markers and {child_count} subdirectories — \
120 skipping scan to avoid indexing broad directories]"
121 );
122 return false;
123 }
124 }
125
126 true
127}
128
129#[derive(Debug, Serialize, Deserialize)]
130pub struct ProjectIndex {
131 pub version: u32,
132 pub project_root: String,
133 pub last_scan: String,
134 pub files: HashMap<String, FileEntry>,
135 pub edges: Vec<IndexEdge>,
136 pub symbols: HashMap<String, SymbolEntry>,
137}
138
139#[derive(Debug, Clone, Serialize, Deserialize)]
140pub struct FileEntry {
141 pub path: String,
142 pub hash: String,
143 pub language: String,
144 pub line_count: usize,
145 pub token_count: usize,
146 pub exports: Vec<String>,
147 pub summary: String,
148}
149
150#[derive(Debug, Clone, Serialize, Deserialize)]
151pub struct SymbolEntry {
152 pub file: String,
153 pub name: String,
154 pub kind: String,
155 pub start_line: usize,
156 pub end_line: usize,
157 pub is_exported: bool,
158}
159
160#[derive(Debug, Clone, Serialize, Deserialize)]
161pub struct IndexEdge {
162 pub from: String,
163 pub to: String,
164 pub kind: String,
165}
166
167impl ProjectIndex {
168 pub fn new(project_root: &str) -> Self {
169 Self {
170 version: INDEX_VERSION,
171 project_root: normalize_project_root(project_root),
172 last_scan: chrono::Local::now().format("%Y-%m-%d %H:%M:%S").to_string(),
173 files: HashMap::new(),
174 edges: Vec::new(),
175 symbols: HashMap::new(),
176 }
177 }
178
179 pub fn index_dir(project_root: &str) -> Option<std::path::PathBuf> {
180 let normalized = normalize_project_root(project_root);
181 let hash = crate::core::project_hash::hash_project_root(&normalized);
182 crate::core::data_dir::lean_ctx_data_dir()
183 .ok()
184 .map(|d| d.join("graphs").join(hash))
185 }
186
187 pub fn load(project_root: &str) -> Option<Self> {
188 let dir = Self::index_dir(project_root)?;
189
190 let zst_path = dir.join("index.json.zst");
191 if zst_path.exists() {
192 let compressed = std::fs::read(&zst_path).ok()?;
193 let data = zstd::decode_all(compressed.as_slice()).ok()?;
194 let content = String::from_utf8(data).ok()?;
195 let index: Self = serde_json::from_str(&content).ok()?;
196 if index.version != INDEX_VERSION {
197 return None;
198 }
199 return Some(index);
200 }
201
202 let json_path = dir.join("index.json");
203 let content = std::fs::read_to_string(&json_path)
204 .or_else(|_| -> std::io::Result<String> {
205 let legacy_hash = short_hash(&normalize_project_root(project_root));
206 let legacy_dir = crate::core::data_dir::lean_ctx_data_dir()
207 .map_err(|_| std::io::Error::new(std::io::ErrorKind::NotFound, "no data dir"))?
208 .join("graphs")
209 .join(legacy_hash);
210 let legacy_path = legacy_dir.join("index.json");
211 let data = std::fs::read_to_string(&legacy_path)?;
212 if let Err(e) = copy_dir_fallible(&legacy_dir, &dir) {
213 tracing::debug!("graph index migration: {e}");
214 }
215 Ok(data)
216 })
217 .ok()?;
218 let index: Self = serde_json::from_str(&content).ok()?;
219 if index.version != INDEX_VERSION {
220 return None;
221 }
222 if let Ok(compressed) = zstd::encode_all(content.as_bytes(), 9) {
224 let zst_tmp = zst_path.with_extension("zst.tmp");
225 if std::fs::write(&zst_tmp, &compressed).is_ok()
226 && std::fs::rename(&zst_tmp, &zst_path).is_ok()
227 {
228 let _ = std::fs::remove_file(&json_path);
229 }
230 }
231 Some(index)
232 }
233
234 pub fn save(&self) -> Result<(), String> {
235 let dir = Self::index_dir(&self.project_root)
236 .ok_or_else(|| "Cannot determine data directory".to_string())?;
237 std::fs::create_dir_all(&dir).map_err(|e| e.to_string())?;
238 let json = serde_json::to_string(self).map_err(|e| e.to_string())?;
239 let compressed = zstd::encode_all(json.as_bytes(), 9).map_err(|e| format!("zstd: {e}"))?;
240 let target = dir.join("index.json.zst");
241 let tmp = target.with_extension("zst.tmp");
242 std::fs::write(&tmp, &compressed).map_err(|e| e.to_string())?;
243 std::fs::rename(&tmp, &target).map_err(|e| e.to_string())?;
244 let _ = std::fs::remove_file(dir.join("index.json"));
245 Ok(())
246 }
247
248 pub fn purge_stale_indices() {
251 let Ok(data_dir) = crate::core::data_dir::lean_ctx_data_dir() else {
252 return;
253 };
254 let graphs_dir = data_dir.join("graphs");
255 let Ok(entries) = std::fs::read_dir(&graphs_dir) else {
256 return;
257 };
258 let cfg = crate::core::config::Config::load();
259 let max_age_secs = cfg.archive.max_age_hours * 3600;
260
261 for entry in entries.filter_map(Result::ok) {
262 let path = entry.path();
263 if !path.is_dir() {
264 continue;
265 }
266 let zst = path.join("index.json.zst");
267 let json = path.join("index.json");
268 let index_file = if zst.exists() {
269 &zst
270 } else if json.exists() {
271 &json
272 } else {
273 continue;
274 };
275
276 let is_old = index_file
277 .metadata()
278 .and_then(|m| m.modified())
279 .is_ok_and(|mtime| {
280 mtime
281 .elapsed()
282 .is_ok_and(|age| age.as_secs() > max_age_secs)
283 });
284
285 if is_old {
286 tracing::info!("[graph_index: purging stale index at {}]", path.display());
287 let _ = std::fs::remove_dir_all(&path);
288 }
289 }
290 }
291
292 pub fn file_count(&self) -> usize {
293 self.files.len()
294 }
295
296 pub fn symbol_count(&self) -> usize {
297 self.symbols.len()
298 }
299
300 pub fn edge_count(&self) -> usize {
301 self.edges.len()
302 }
303
304 pub fn get_symbol(&self, key: &str) -> Option<&SymbolEntry> {
305 self.symbols.get(key)
306 }
307
308 pub fn get_reverse_deps(&self, path: &str, depth: usize) -> Vec<String> {
309 let mut result = Vec::new();
310 let mut visited = std::collections::HashSet::new();
311 let mut queue: Vec<(String, usize)> = vec![(path.to_string(), 0)];
312
313 while let Some((current, d)) = queue.pop() {
314 if d > depth || visited.contains(¤t) {
315 continue;
316 }
317 visited.insert(current.clone());
318 if current != path {
319 result.push(current.clone());
320 }
321
322 for edge in &self.edges {
323 if edge.to == current && edge.kind == "import" && !visited.contains(&edge.from) {
324 queue.push((edge.from.clone(), d + 1));
325 }
326 }
327 }
328 result
329 }
330
331 pub fn get_related(&self, path: &str, depth: usize) -> Vec<String> {
332 let mut result = Vec::new();
333 let mut visited = std::collections::HashSet::new();
334 let mut queue: Vec<(String, usize)> = vec![(path.to_string(), 0)];
335
336 while let Some((current, d)) = queue.pop() {
337 if d > depth || visited.contains(¤t) {
338 continue;
339 }
340 visited.insert(current.clone());
341 if current != path {
342 result.push(current.clone());
343 }
344
345 for edge in &self.edges {
346 if edge.from == current && !visited.contains(&edge.to) {
347 queue.push((edge.to.clone(), d + 1));
348 }
349 if edge.to == current && !visited.contains(&edge.from) {
350 queue.push((edge.from.clone(), d + 1));
351 }
352 }
353 }
354 result
355 }
356}
357
358pub fn load_or_build(project_root: &str) -> ProjectIndex {
362 if std::env::var("LEAN_CTX_NO_INDEX").is_ok() {
363 return ProjectIndex::load(project_root).unwrap_or_else(|| ProjectIndex::new(project_root));
364 }
365
366 let root_abs = if project_root.trim().is_empty() || project_root == "." {
369 std::env::current_dir().ok().map_or_else(
370 || ".".to_string(),
371 |p| normalize_project_root(&p.to_string_lossy()),
372 )
373 } else {
374 normalize_project_root(project_root)
375 };
376
377 if !is_safe_scan_root(&root_abs) {
378 return ProjectIndex::new(&root_abs);
379 }
380
381 if let Some(idx) = ProjectIndex::load(&root_abs) {
383 if !idx.files.is_empty() {
384 if index_looks_stale(&idx, &root_abs) {
385 tracing::warn!("[graph_index: stale index detected for {root_abs}; rebuilding]");
386 return scan(&root_abs);
387 }
388 return idx;
389 }
390 }
391
392 if let Some(idx) = ProjectIndex::load(".") {
395 if !idx.files.is_empty() {
396 let mut migrated = idx;
397 migrated.project_root.clone_from(&root_abs);
398 let _ = migrated.save();
399 if index_looks_stale(&migrated, &root_abs) {
400 tracing::warn!(
401 "[graph_index: stale legacy index detected for {root_abs}; rebuilding]"
402 );
403 return scan(&root_abs);
404 }
405 return migrated;
406 }
407 }
408
409 if let Ok(cwd) = std::env::current_dir() {
411 let cwd_str = normalize_project_root(&cwd.to_string_lossy());
412 if cwd_str != root_abs {
413 if let Some(idx) = ProjectIndex::load(&cwd_str) {
414 if !idx.files.is_empty() {
415 if index_looks_stale(&idx, &cwd_str) {
416 tracing::warn!(
417 "[graph_index: stale index detected for {cwd_str}; rebuilding]"
418 );
419 return scan(&cwd_str);
420 }
421 return idx;
422 }
423 }
424 }
425 }
426
427 scan(&root_abs)
429}
430
431fn index_looks_stale(index: &ProjectIndex, root_abs: &str) -> bool {
432 if index.files.is_empty() {
433 return true;
434 }
435
436 if let Ok(scan_time) =
438 chrono::NaiveDateTime::parse_from_str(&index.last_scan, "%Y-%m-%d %H:%M:%S")
439 {
440 let cfg = crate::core::config::Config::load();
441 let max_age = chrono::Duration::hours(cfg.archive.max_age_hours as i64);
442 let now = chrono::Local::now().naive_local();
443 if now.signed_duration_since(scan_time) > max_age {
444 tracing::info!(
445 "[graph_index: index is older than {}h — marking stale]",
446 cfg.archive.max_age_hours
447 );
448 return true;
449 }
450 }
451
452 const CONTAMINATION_MARKERS: &[&str] = &[
455 "Desktop/",
456 "Documents/",
457 "Downloads/",
458 "Pictures/",
459 "Music/",
460 "Videos/",
461 "Movies/",
462 "Library/",
463 ".cache/",
464 "snap/",
465 ];
466 let contaminated = index.files.keys().take(200).any(|rel| {
467 CONTAMINATION_MARKERS
468 .iter()
469 .any(|m| rel.starts_with(m) || rel.contains(&format!("/{m}")))
470 });
471 if contaminated {
472 tracing::warn!(
473 "[graph_index: index contains files from user directories (Desktop/Documents/...) — \
474 marking stale to force clean rebuild]"
475 );
476 return true;
477 }
478
479 let root_path = Path::new(root_abs);
480 let sample_size = index.files.len().min(20);
482 for rel in index.files.keys().take(sample_size) {
483 let rel = rel.trim_start_matches(['/', '\\']);
484 if rel.is_empty() {
485 continue;
486 }
487 let abs = root_path.join(rel);
488 if !abs.exists() {
489 return true;
490 }
491 }
492
493 false
494}
495
496pub fn scan(project_root: &str) -> ProjectIndex {
497 if std::env::var("LEAN_CTX_NO_INDEX").is_ok() {
498 tracing::info!("[graph_index: LEAN_CTX_NO_INDEX set — skipping scan]");
499 return ProjectIndex::new(project_root);
500 }
501
502 let project_root = normalize_project_root(project_root);
503
504 if !is_safe_scan_root(&project_root) {
505 tracing::warn!("[graph_index: scan aborted for unsafe root {project_root}]");
506 return ProjectIndex::new(&project_root);
507 }
508
509 let lock_name = format!(
510 "graph-idx-{}",
511 &crate::core::index_namespace::namespace_hash(Path::new(&project_root))[..8]
512 );
513 let _lock = crate::core::startup_guard::try_acquire_lock(
514 &lock_name,
515 std::time::Duration::from_millis(800),
516 std::time::Duration::from_mins(3),
517 );
518 if _lock.is_none() {
519 tracing::info!(
520 "[graph_index: another process is scanning {project_root} — returning cached or empty]"
521 );
522 return ProjectIndex::load(&project_root)
523 .unwrap_or_else(|| ProjectIndex::new(&project_root));
524 }
525
526 let existing = ProjectIndex::load(&project_root);
527 let mut index = ProjectIndex::new(&project_root);
528
529 let old_files: HashMap<String, (String, Vec<(String, SymbolEntry)>)> =
530 if let Some(ref prev) = existing {
531 prev.files
532 .iter()
533 .map(|(path, entry)| {
534 let syms: Vec<(String, SymbolEntry)> = prev
535 .symbols
536 .iter()
537 .filter(|(_, s)| s.file == *path)
538 .map(|(k, v)| (k.clone(), v.clone()))
539 .collect();
540 (path.clone(), (entry.hash.clone(), syms))
541 })
542 .collect()
543 } else {
544 HashMap::new()
545 };
546
547 let walker = ignore::WalkBuilder::new(&project_root)
548 .hidden(true)
549 .git_ignore(true)
550 .git_global(true)
551 .git_exclude(true)
552 .max_depth(Some(20))
553 .build();
554
555 let cfg = crate::core::config::Config::load();
556 let extra_ignores: Vec<glob::Pattern> = cfg
557 .extra_ignore_patterns
558 .iter()
559 .filter_map(|p| glob::Pattern::new(p).ok())
560 .collect();
561
562 let mut scanned = 0usize;
563 let mut reused = 0usize;
564 let mut entries_visited = 0usize;
565 let max_files = if cfg.graph_index_max_files == 0 {
566 usize::MAX } else {
568 cfg.graph_index_max_files as usize
569 };
570 const MAX_ENTRIES_VISITED: usize = 500_000;
571 const MAX_FILE_SIZE_BYTES: u64 = 2 * 1024 * 1024; let scan_deadline = std::time::Instant::now() + std::time::Duration::from_mins(5);
573
574 for entry in walker.filter_map(std::result::Result::ok) {
575 entries_visited += 1;
576 if entries_visited > MAX_ENTRIES_VISITED {
577 tracing::warn!(
578 "[graph_index: walked {entries_visited} entries — aborting scan to prevent \
579 runaway traversal. Indexed {} files so far.]",
580 index.files.len()
581 );
582 break;
583 }
584 if entries_visited.is_multiple_of(5000) {
585 if std::time::Instant::now() > scan_deadline {
586 tracing::warn!(
587 "[graph_index: scan timeout (120s) after {entries_visited} entries — \
588 saving partial index with {} files]",
589 index.files.len()
590 );
591 break;
592 }
593 if crate::core::memory_guard::abort_requested() {
594 tracing::warn!(
595 "[graph_index: memory pressure abort after {entries_visited} entries — \
596 saving partial index with {} files]",
597 index.files.len()
598 );
599 break;
600 }
601 if crate::core::memory_guard::is_under_pressure() {
602 tracing::warn!(
603 "[graph_index: memory pressure detected at {entries_visited} entries — \
604 stopping scan with {} files]",
605 index.files.len()
606 );
607 break;
608 }
609 if let Some(ref g) = _lock {
610 g.touch();
611 }
612 }
613
614 if !entry.file_type().is_some_and(|ft| ft.is_file()) {
615 continue;
616 }
617 let file_path = normalize_absolute_path(&entry.path().to_string_lossy());
618
619 if !file_path.starts_with(&project_root) {
621 continue;
622 }
623
624 if let Ok(meta) = std::fs::metadata(&file_path) {
626 if !meta.is_file() {
627 continue;
628 }
629 if meta.len() > MAX_FILE_SIZE_BYTES {
630 tracing::debug!(
631 "[graph_index: skipping {file_path} — {:.1}MB exceeds {}MB limit]",
632 meta.len() as f64 / 1_048_576.0,
633 MAX_FILE_SIZE_BYTES / (1024 * 1024),
634 );
635 continue;
636 }
637 }
638
639 let ext = Path::new(&file_path)
640 .extension()
641 .and_then(|e| e.to_str())
642 .unwrap_or("");
643
644 if !is_indexable_ext(ext) {
645 continue;
646 }
647
648 let rel = make_relative(&file_path, &project_root);
649 if extra_ignores.iter().any(|p| p.matches(&rel)) {
650 continue;
651 }
652
653 if max_files != usize::MAX && index.files.len() >= max_files {
654 tracing::info!(
655 "[graph_index: reached configured limit of {} files. Set graph_index_max_files = 0 for unlimited.]",
656 max_files
657 );
658 break;
659 }
660
661 let Ok(content) = std::fs::read_to_string(&file_path) else {
662 continue;
663 };
664
665 let hash = compute_hash(&content);
666 let rel_path = make_relative(&file_path, &project_root);
667
668 if let Some((old_hash, old_syms)) = old_files.get(&rel_path) {
669 if *old_hash == hash {
670 if let Some(old_entry) = existing.as_ref().and_then(|p| p.files.get(&rel_path)) {
671 index.files.insert(rel_path.clone(), old_entry.clone());
672 for (key, sym) in old_syms {
673 index.symbols.insert(key.clone(), sym.clone());
674 }
675 reused += 1;
676 continue;
677 }
678 }
679 }
680
681 let sigs = signatures::extract_signatures(&content, ext);
682 let line_count = content.lines().count();
683 let token_count = crate::core::tokens::count_tokens(&content);
684 let summary = extract_summary(&content);
685
686 let exports: Vec<String> = sigs
687 .iter()
688 .filter(|s| s.is_exported)
689 .map(|s| s.name.clone())
690 .collect();
691
692 index.files.insert(
693 rel_path.clone(),
694 FileEntry {
695 path: rel_path.clone(),
696 hash,
697 language: ext.to_string(),
698 line_count,
699 token_count,
700 exports,
701 summary,
702 },
703 );
704
705 for sig in &sigs {
706 let (start, end) = sig
707 .start_line
708 .zip(sig.end_line)
709 .unwrap_or_else(|| find_symbol_range(&content, sig));
710 let key = format!("{}::{}", rel_path, sig.name);
711 index.symbols.insert(
712 key,
713 SymbolEntry {
714 file: rel_path.clone(),
715 name: sig.name.clone(),
716 kind: sig.kind.to_string(),
717 start_line: start,
718 end_line: end,
719 is_exported: sig.is_exported,
720 },
721 );
722 }
723
724 scanned += 1;
725 }
726
727 build_edges(&mut index);
728
729 if let Err(e) = index.save() {
730 tracing::warn!("could not save graph index: {e}");
731 }
732
733 tracing::warn!(
734 "[graph_index: {} files ({} scanned, {} reused), {} symbols, {} edges]",
735 index.file_count(),
736 scanned,
737 reused,
738 index.symbol_count(),
739 index.edge_count()
740 );
741
742 index
743}
744
745fn build_edges(index: &mut ProjectIndex) {
746 build_edges_with_cache(index, &HashMap::new());
747}
748
749fn build_edges_with_cache(index: &mut ProjectIndex, content_cache: &HashMap<String, String>) {
750 index.edges.clear();
751
752 if crate::core::memory_guard::abort_requested() {
753 tracing::warn!("[graph_index: skipping edge-building due to memory pressure]");
754 return;
755 }
756
757 let root = normalize_project_root(&index.project_root);
758 let root_path = Path::new(&root);
759
760 let mut file_paths: Vec<String> = index.files.keys().cloned().collect();
761 file_paths.sort();
762
763 let resolver_ctx = import_resolver::ResolverContext::new(root_path, file_paths.clone());
764
765 const MAX_FILE_SIZE_FOR_EDGES: u64 = 2 * 1024 * 1024;
766
767 for (i, rel_path) in file_paths.iter().enumerate() {
768 if i.is_multiple_of(1000) && crate::core::memory_guard::is_under_pressure() {
769 tracing::warn!(
770 "[graph_index: stopping edge-building at file {i}/{} due to memory pressure]",
771 file_paths.len()
772 );
773 break;
774 }
775
776 let content = if let Some(cached) = content_cache.get(rel_path) {
777 std::borrow::Cow::Borrowed(cached.as_str())
778 } else {
779 let abs_path = root_path.join(rel_path.trim_start_matches(['/', '\\']));
780 if let Ok(meta) = abs_path.metadata() {
781 if meta.len() > MAX_FILE_SIZE_FOR_EDGES {
782 continue;
783 }
784 }
785 match std::fs::read_to_string(&abs_path) {
786 Ok(c) => std::borrow::Cow::Owned(c),
787 Err(_) => continue,
788 }
789 };
790
791 let ext = Path::new(rel_path)
792 .extension()
793 .and_then(|e| e.to_str())
794 .unwrap_or("");
795
796 let resolve_ext = match ext {
797 "vue" | "svelte" => "ts",
798 _ => ext,
799 };
800
801 let imports = crate::core::deep_queries::analyze(&content, resolve_ext).imports;
802 if imports.is_empty() {
803 continue;
804 }
805
806 let resolved =
807 import_resolver::resolve_imports(&imports, rel_path, resolve_ext, &resolver_ctx);
808 for r in resolved {
809 if r.is_external {
810 continue;
811 }
812 if let Some(to) = r.resolved_path {
813 index.edges.push(IndexEdge {
814 from: rel_path.clone(),
815 to,
816 kind: "import".to_string(),
817 });
818 }
819 }
820 }
821
822 index.edges.sort_by(|a, b| {
823 a.from
824 .cmp(&b.from)
825 .then_with(|| a.to.cmp(&b.to))
826 .then_with(|| a.kind.cmp(&b.kind))
827 });
828 index
829 .edges
830 .dedup_by(|a, b| a.from == b.from && a.to == b.to && a.kind == b.kind);
831}
832
833fn find_symbol_range(content: &str, sig: &signatures::Signature) -> (usize, usize) {
834 let lines: Vec<&str> = content.lines().collect();
835 let mut start = 0;
836
837 for (i, line) in lines.iter().enumerate() {
838 if line.contains(&sig.name) {
839 let trimmed = line.trim();
840 let is_def = trimmed.starts_with("fn ")
841 || trimmed.starts_with("pub fn ")
842 || trimmed.starts_with("pub(crate) fn ")
843 || trimmed.starts_with("async fn ")
844 || trimmed.starts_with("pub async fn ")
845 || trimmed.starts_with("struct ")
846 || trimmed.starts_with("pub struct ")
847 || trimmed.starts_with("enum ")
848 || trimmed.starts_with("pub enum ")
849 || trimmed.starts_with("trait ")
850 || trimmed.starts_with("pub trait ")
851 || trimmed.starts_with("impl ")
852 || trimmed.starts_with("class ")
853 || trimmed.starts_with("export class ")
854 || trimmed.starts_with("export function ")
855 || trimmed.starts_with("export async function ")
856 || trimmed.starts_with("function ")
857 || trimmed.starts_with("async function ")
858 || trimmed.starts_with("def ")
859 || trimmed.starts_with("async def ")
860 || trimmed.starts_with("func ")
861 || trimmed.starts_with("interface ")
862 || trimmed.starts_with("export interface ")
863 || trimmed.starts_with("type ")
864 || trimmed.starts_with("export type ")
865 || trimmed.starts_with("const ")
866 || trimmed.starts_with("export const ")
867 || trimmed.starts_with("fun ")
868 || trimmed.starts_with("private fun ")
869 || trimmed.starts_with("public fun ")
870 || trimmed.starts_with("internal fun ")
871 || trimmed.starts_with("class ")
872 || trimmed.starts_with("data class ")
873 || trimmed.starts_with("sealed class ")
874 || trimmed.starts_with("sealed interface ")
875 || trimmed.starts_with("enum class ")
876 || trimmed.starts_with("object ")
877 || trimmed.starts_with("private object ")
878 || trimmed.starts_with("interface ")
879 || trimmed.starts_with("typealias ")
880 || trimmed.starts_with("private typealias ");
881 if is_def {
882 start = i + 1;
883 break;
884 }
885 }
886 }
887
888 if start == 0 {
889 return (1, lines.len().min(20));
890 }
891
892 let base_indent = lines
893 .get(start - 1)
894 .map_or(0, |l| l.len() - l.trim_start().len());
895
896 let mut end = start;
897 let mut brace_depth: i32 = 0;
898 let mut found_open = false;
899
900 for (i, line) in lines.iter().enumerate().skip(start - 1) {
901 for ch in line.chars() {
902 if ch == '{' {
903 brace_depth += 1;
904 found_open = true;
905 } else if ch == '}' {
906 brace_depth -= 1;
907 }
908 }
909
910 end = i + 1;
911
912 if found_open && brace_depth <= 0 {
913 break;
914 }
915
916 if !found_open && i > start {
917 let indent = line.len() - line.trim_start().len();
918 if indent <= base_indent && !line.trim().is_empty() && i > start {
919 end = i;
920 break;
921 }
922 }
923
924 if end - start > 200 {
925 break;
926 }
927 }
928
929 (start, end)
930}
931
932fn extract_summary(content: &str) -> String {
933 for line in content.lines().take(20) {
934 let trimmed = line.trim();
935 if trimmed.is_empty()
936 || trimmed.starts_with("//")
937 || trimmed.starts_with('#')
938 || trimmed.starts_with("/*")
939 || trimmed.starts_with('*')
940 || trimmed.starts_with("use ")
941 || trimmed.starts_with("import ")
942 || trimmed.starts_with("from ")
943 || trimmed.starts_with("require(")
944 || trimmed.starts_with("package ")
945 {
946 continue;
947 }
948 return trimmed.chars().take(120).collect();
949 }
950 String::new()
951}
952
953fn compute_hash(content: &str) -> String {
954 use std::collections::hash_map::DefaultHasher;
955 use std::hash::{Hash, Hasher};
956
957 let mut hasher = DefaultHasher::new();
958 content.hash(&mut hasher);
959 format!("{:016x}", hasher.finish())
960}
961
962fn short_hash(input: &str) -> String {
963 use std::collections::hash_map::DefaultHasher;
964 use std::hash::{Hash, Hasher};
965
966 let mut hasher = DefaultHasher::new();
967 input.hash(&mut hasher);
968 format!("{:08x}", hasher.finish() & 0xFFFF_FFFF)
969}
970
971fn copy_dir_fallible(src: &std::path::Path, dst: &std::path::Path) -> Result<(), std::io::Error> {
972 std::fs::create_dir_all(dst)?;
973 for entry in std::fs::read_dir(src)?.flatten() {
974 let from = entry.path();
975 let to = dst.join(entry.file_name());
976 if from.is_dir() {
977 copy_dir_fallible(&from, &to)?;
978 } else {
979 std::fs::copy(&from, &to)?;
980 }
981 }
982 Ok(())
983}
984
985fn normalize_absolute_path(path: &str) -> String {
986 if let Ok(canon) = crate::core::pathutil::safe_canonicalize(std::path::Path::new(path)) {
987 return canon.to_string_lossy().to_string();
988 }
989
990 let mut normalized = path.to_string();
991 while normalized.ends_with("\\.") || normalized.ends_with("/.") {
992 normalized.truncate(normalized.len() - 2);
993 }
994 while normalized.len() > 1
995 && (normalized.ends_with('\\') || normalized.ends_with('/'))
996 && !normalized.ends_with(":\\")
997 && !normalized.ends_with(":/")
998 && normalized != "\\"
999 && normalized != "/"
1000 {
1001 normalized.pop();
1002 }
1003 normalized
1004}
1005
1006pub fn normalize_project_root(path: &str) -> String {
1007 normalize_absolute_path(path)
1008}
1009
1010pub fn graph_match_key(path: &str) -> String {
1011 let stripped =
1012 crate::core::pathutil::strip_verbatim_str(path).unwrap_or_else(|| path.replace('\\', "/"));
1013 stripped.trim_start_matches('/').to_string()
1014}
1015
1016pub fn graph_relative_key(path: &str, root: &str) -> String {
1017 let root_norm = normalize_project_root(root);
1018 let path_norm = normalize_absolute_path(path);
1019 let root_path = Path::new(&root_norm);
1020 let path_path = Path::new(&path_norm);
1021
1022 if let Ok(rel) = path_path.strip_prefix(root_path) {
1023 let rel = rel.to_string_lossy().to_string();
1024 return rel.trim_start_matches(['/', '\\']).to_string();
1025 }
1026
1027 path.trim_start_matches(['/', '\\'])
1028 .replace('/', std::path::MAIN_SEPARATOR_STR)
1029}
1030
1031fn make_relative(path: &str, root: &str) -> String {
1032 graph_relative_key(path, root)
1033}
1034
1035fn is_indexable_ext(ext: &str) -> bool {
1036 crate::core::language_capabilities::is_indexable_ext(ext)
1037}
1038
1039#[cfg(test)]
1040fn kotlin_package_name(content: &str) -> Option<String> {
1041 content.lines().map(str::trim).find_map(|line| {
1042 line.strip_prefix("package ")
1043 .map(|rest| rest.trim().trim_end_matches(';').to_string())
1044 })
1045}
1046
1047#[cfg(test)]
1048mod tests {
1049 use super::*;
1050 use tempfile::tempdir;
1051
1052 #[test]
1053 fn test_short_hash_deterministic() {
1054 let h1 = short_hash("/Users/test/project");
1055 let h2 = short_hash("/Users/test/project");
1056 assert_eq!(h1, h2);
1057 assert_eq!(h1.len(), 8);
1058 }
1059
1060 #[test]
1061 fn test_make_relative() {
1062 assert_eq!(
1063 make_relative("/foo/bar/src/main.rs", "/foo/bar"),
1064 graph_relative_key("/foo/bar/src/main.rs", "/foo/bar")
1065 );
1066 assert_eq!(
1067 make_relative("src/main.rs", "/foo/bar"),
1068 graph_relative_key("src/main.rs", "/foo/bar")
1069 );
1070 assert_eq!(
1071 make_relative("C:\\repo\\src\\main\\kotlin\\Example.kt", "C:\\repo"),
1072 graph_relative_key("C:\\repo\\src\\main\\kotlin\\Example.kt", "C:\\repo")
1073 );
1074 assert_eq!(
1075 make_relative("//?/C:/repo/src/main/kotlin/Example.kt", "//?/C:/repo"),
1076 graph_relative_key("//?/C:/repo/src/main/kotlin/Example.kt", "//?/C:/repo")
1077 );
1078 }
1079
1080 #[test]
1081 fn test_normalize_project_root() {
1082 assert_eq!(normalize_project_root("C:\\repo\\"), "C:\\repo");
1083 assert_eq!(normalize_project_root("C:\\repo\\."), "C:\\repo");
1084 assert_eq!(normalize_project_root("//?/C:/repo/"), "//?/C:/repo");
1085 }
1086
1087 #[test]
1088 fn test_graph_match_key_normalizes_windows_forms() {
1089 assert_eq!(
1090 graph_match_key(r"C:\repo\src\main.rs"),
1091 "C:/repo/src/main.rs"
1092 );
1093 assert_eq!(
1094 graph_match_key(r"\\?\C:\repo\src\main.rs"),
1095 "C:/repo/src/main.rs"
1096 );
1097 assert_eq!(graph_match_key(r"\src\main.rs"), "src/main.rs");
1098 }
1099
1100 #[test]
1101 fn test_extract_summary() {
1102 let content = "// comment\nuse std::io;\n\npub fn main() {\n println!(\"hello\");\n}";
1103 let summary = extract_summary(content);
1104 assert_eq!(summary, "pub fn main() {");
1105 }
1106
1107 #[test]
1108 fn test_compute_hash_deterministic() {
1109 let h1 = compute_hash("hello world");
1110 let h2 = compute_hash("hello world");
1111 assert_eq!(h1, h2);
1112 assert_ne!(h1, compute_hash("hello world!"));
1113 }
1114
1115 #[test]
1116 fn test_project_index_new() {
1117 let idx = ProjectIndex::new("/test");
1118 assert_eq!(idx.version, INDEX_VERSION);
1119 assert_eq!(idx.project_root, "/test");
1120 assert!(idx.files.is_empty());
1121 }
1122
1123 fn fe(path: &str, content: &str, language: &str) -> FileEntry {
1124 FileEntry {
1125 path: path.to_string(),
1126 hash: compute_hash(content),
1127 language: language.to_string(),
1128 line_count: content.lines().count(),
1129 token_count: crate::core::tokens::count_tokens(content),
1130 exports: Vec::new(),
1131 summary: extract_summary(content),
1132 }
1133 }
1134
1135 #[test]
1136 fn test_index_looks_stale_when_any_file_missing() {
1137 let td = tempdir().expect("tempdir");
1138 let root = td.path();
1139 std::fs::write(root.join("a.rs"), "pub fn a() {}\n").expect("write a.rs");
1140
1141 let root_s = normalize_project_root(&root.to_string_lossy());
1142 let mut idx = ProjectIndex::new(&root_s);
1143 idx.files
1144 .insert("a.rs".to_string(), fe("a.rs", "pub fn a() {}\n", "rs"));
1145 idx.files.insert(
1146 "missing.rs".to_string(),
1147 fe("missing.rs", "pub fn m() {}\n", "rs"),
1148 );
1149
1150 assert!(index_looks_stale(&idx, &root_s));
1151 }
1152
1153 #[test]
1154 fn test_index_looks_fresh_when_all_files_exist() {
1155 let td = tempdir().expect("tempdir");
1156 let root = td.path();
1157 std::fs::write(root.join("a.rs"), "pub fn a() {}\n").expect("write a.rs");
1158
1159 let root_s = normalize_project_root(&root.to_string_lossy());
1160 let mut idx = ProjectIndex::new(&root_s);
1161 idx.files
1162 .insert("a.rs".to_string(), fe("a.rs", "pub fn a() {}\n", "rs"));
1163
1164 assert!(!index_looks_stale(&idx, &root_s));
1165 }
1166
1167 #[test]
1168 fn test_reverse_deps() {
1169 let mut idx = ProjectIndex::new("/test");
1170 idx.edges.push(IndexEdge {
1171 from: "a.rs".to_string(),
1172 to: "b.rs".to_string(),
1173 kind: "import".to_string(),
1174 });
1175 idx.edges.push(IndexEdge {
1176 from: "c.rs".to_string(),
1177 to: "b.rs".to_string(),
1178 kind: "import".to_string(),
1179 });
1180
1181 let deps = idx.get_reverse_deps("b.rs", 1);
1182 assert_eq!(deps.len(), 2);
1183 assert!(deps.contains(&"a.rs".to_string()));
1184 assert!(deps.contains(&"c.rs".to_string()));
1185 }
1186
1187 #[test]
1188 fn test_find_symbol_range_kotlin_function() {
1189 let content = r#"
1190package com.example
1191
1192class UserService {
1193 fun greet(name: String): String {
1194 return "hi $name"
1195 }
1196}
1197"#;
1198 let sig = signatures::Signature {
1199 kind: "method",
1200 name: "greet".to_string(),
1201 params: "name:String".to_string(),
1202 return_type: "String".to_string(),
1203 is_async: false,
1204 is_exported: true,
1205 indent: 2,
1206 ..signatures::Signature::no_span()
1207 };
1208 let (start, end) = find_symbol_range(content, &sig);
1209 assert_eq!(start, 5);
1210 assert!(end >= start);
1211 }
1212
1213 #[test]
1214 fn test_signature_spans_override_fallback_range() {
1215 let sig = signatures::Signature {
1216 kind: "method",
1217 name: "release".to_string(),
1218 params: "id:String".to_string(),
1219 return_type: "Boolean".to_string(),
1220 is_async: true,
1221 is_exported: true,
1222 indent: 2,
1223 start_line: Some(42),
1224 end_line: Some(43),
1225 };
1226
1227 let (start, end) = sig
1228 .start_line
1229 .zip(sig.end_line)
1230 .unwrap_or_else(|| find_symbol_range("ignored", &sig));
1231 assert_eq!((start, end), (42, 43));
1232 }
1233
1234 #[test]
1235 fn test_parse_stale_index_version() {
1236 let json = format!(
1237 r#"{{"version":{},"project_root":"/test","last_scan":"now","files":{{}},"edges":[],"symbols":{{}}}}"#,
1238 INDEX_VERSION - 1
1239 );
1240 let parsed: ProjectIndex = serde_json::from_str(&json).unwrap();
1241 assert_ne!(parsed.version, INDEX_VERSION);
1242 }
1243
1244 #[test]
1245 fn test_kotlin_package_name() {
1246 let content = "package com.example.feature\n\nclass UserService";
1247 assert_eq!(
1248 kotlin_package_name(content).as_deref(),
1249 Some("com.example.feature")
1250 );
1251 }
1252
1253 #[test]
1254 fn safe_scan_root_rejects_fs_root() {
1255 assert!(!is_safe_scan_root("/"));
1256 assert!(!is_safe_scan_root("\\"));
1257 #[cfg(windows)]
1258 {
1259 assert!(!is_safe_scan_root("C:\\"));
1260 assert!(!is_safe_scan_root("D:\\"));
1261 }
1262 }
1263
1264 #[test]
1265 fn safe_scan_root_rejects_home() {
1266 if let Some(home) = dirs::home_dir() {
1267 let home_str = home.to_string_lossy().to_string();
1268 assert!(
1269 !is_safe_scan_root(&home_str),
1270 "home dir should be rejected: {home_str}"
1271 );
1272 }
1273 }
1274
1275 #[test]
1276 fn safe_scan_root_accepts_project_dir() {
1277 let tmp = tempdir().unwrap();
1278 std::fs::write(
1279 tmp.path().join("Cargo.toml"),
1280 "[package]\nname = \"test\"\n",
1281 )
1282 .unwrap();
1283 let root = tmp.path().to_string_lossy().to_string();
1284 assert!(is_safe_scan_root(&root));
1285 }
1286
1287 #[test]
1288 fn safe_scan_root_rejects_broad_dir() {
1289 let tmp = tempdir().unwrap();
1290 for i in 0..55 {
1291 std::fs::create_dir(tmp.path().join(format!("dir{i}"))).unwrap();
1292 }
1293 let root = tmp.path().to_string_lossy().to_string();
1294 assert!(!is_safe_scan_root(&root));
1295 }
1296
1297 #[test]
1298 fn no_index_env_skips_scan() {
1299 let _env = crate::core::data_dir::test_env_lock();
1300 let tmp = tempdir().unwrap();
1301 std::fs::write(tmp.path().join("Cargo.toml"), "").unwrap();
1302 std::fs::write(tmp.path().join("main.rs"), "fn main() {}").unwrap();
1303
1304 std::env::set_var("LEAN_CTX_NO_INDEX", "1");
1305 let idx = scan(&tmp.path().to_string_lossy());
1306 std::env::remove_var("LEAN_CTX_NO_INDEX");
1307 assert!(idx.files.is_empty(), "LEAN_CTX_NO_INDEX should skip scan");
1308 }
1309
1310 #[test]
1311 fn stale_index_detected_by_contamination() {
1312 let root_s = "/home/testuser/myproject";
1313 let mut idx = ProjectIndex::new(root_s);
1314 idx.files.insert(
1316 "Desktop/random.py".to_string(),
1317 fe("Desktop/random.py", "x = 1\n", "py"),
1318 );
1319 idx.files.insert(
1320 "src/main.rs".to_string(),
1321 fe("src/main.rs", "fn main() {}\n", "rs"),
1322 );
1323 assert!(
1324 index_looks_stale(&idx, root_s),
1325 "Index with Desktop/ files should be considered stale"
1326 );
1327 }
1328
1329 #[test]
1330 fn stale_index_detected_by_age() {
1331 let td = tempdir().expect("tempdir");
1332 let root = td.path();
1333 std::fs::write(root.join("a.rs"), "fn a() {}\n").unwrap();
1334
1335 let root_s = normalize_project_root(&root.to_string_lossy());
1336 let mut idx = ProjectIndex::new(&root_s);
1337 idx.files
1338 .insert("a.rs".to_string(), fe("a.rs", "fn a() {}\n", "rs"));
1339 let old_time = chrono::Local::now().naive_local() - chrono::Duration::hours(100);
1341 idx.last_scan = old_time.format("%Y-%m-%d %H:%M:%S").to_string();
1342
1343 assert!(
1344 index_looks_stale(&idx, &root_s),
1345 "Index older than max_age_hours should be stale"
1346 );
1347 }
1348
1349 #[test]
1350 fn safe_scan_root_rejects_home_downloads() {
1351 if let Some(home) = dirs::home_dir() {
1352 let downloads = home.join("Downloads");
1353 if !downloads.join(".git").exists() {
1355 let downloads_str = downloads.to_string_lossy().to_string();
1356 assert!(
1357 !is_safe_scan_root(&downloads_str),
1358 "~/Downloads should be rejected without project markers"
1359 );
1360 }
1361 }
1362 }
1363}