Skip to main content

lean_ctx/core/
graph_index.rs

1// DEPRECATED: This module is being replaced by PropertyGraph (core/property_graph/).
2// New code should use GraphProvider (core/graph_provider.rs) instead of accessing
3// ProjectIndex directly. Remaining direct consumers: call_graph, graph_enricher,
4// ctx_callgraph, ctx_graph_diagram, ctx_routes, autonomy, dashboard/callgraph.
5// See OPT-14/15 plan for the full migration path.
6
7use std::collections::HashMap;
8use std::path::Path;
9
10use serde::{Deserialize, Serialize};
11
12use crate::core::import_resolver;
13use crate::core::signatures;
14
15const INDEX_VERSION: u32 = 6;
16
17pub fn is_safe_scan_root_public(path: &str) -> bool {
18    is_safe_scan_root(path)
19}
20
21fn is_filesystem_root(path: &str) -> bool {
22    let p = Path::new(path);
23    p.parent().is_none() || (cfg!(windows) && p.parent() == Some(Path::new("")))
24}
25
26fn is_safe_scan_root(path: &str) -> bool {
27    let normalized = normalize_project_root(path);
28    let p = Path::new(&normalized);
29
30    if normalized == "/" || normalized == "\\" || is_filesystem_root(&normalized) {
31        tracing::warn!("[graph_index: refusing to scan filesystem root]");
32        return false;
33    }
34
35    if normalized == "." || normalized.is_empty() {
36        tracing::warn!("[graph_index: refusing to scan relative/empty root]");
37        return false;
38    }
39
40    if let Some(home) = dirs::home_dir() {
41        let home_norm = normalize_project_root(&home.to_string_lossy());
42        if normalized == home_norm {
43            use std::sync::Once;
44            static HOME_WARN: Once = Once::new();
45            HOME_WARN.call_once(|| {
46                tracing::warn!(
47                    "[graph_index: skipping — cannot index home directory {normalized}.\n  \
48                     Run from inside a project, or set LEAN_CTX_PROJECT_ROOT=/path/to/project]"
49                );
50            });
51            return false;
52        }
53        // Block common broad home subdirectories that are never valid project roots
54        let home_path = Path::new(&home_norm);
55        const BLOCKED_HOME_SUBDIRS: &[&str] = &[
56            "Desktop",
57            "Documents",
58            "Downloads",
59            "Pictures",
60            "Music",
61            "Videos",
62            "Movies",
63            "Library",
64            ".local",
65            ".cache",
66            ".config",
67            "snap",
68            "Applications",
69        ];
70        for blocked in BLOCKED_HOME_SUBDIRS {
71            let blocked_path = home_path.join(blocked);
72            let is_inside_blocked = p == blocked_path || p.starts_with(&blocked_path);
73            let has_marker = p.join(".git").exists()
74                || p.join("Cargo.toml").exists()
75                || p.join("package.json").exists();
76            if is_inside_blocked
77                && !has_marker
78                && !crate::core::pathutil::has_multi_repo_children(p)
79            {
80                tracing::warn!(
81                    "[graph_index: refusing to scan {normalized} — \
82                     inside home/{blocked} without project markers]"
83                );
84                return false;
85            }
86        }
87
88        // Block directories that are direct children of home without project markers
89        // (but allow multi-repo workspace parents like ~/code/)
90        if p.parent() == Some(home_path) {
91            let has_marker = p.join(".git").exists()
92                || p.join("Cargo.toml").exists()
93                || p.join("package.json").exists()
94                || p.join("go.mod").exists()
95                || p.join("pyproject.toml").exists();
96            if !has_marker && !crate::core::pathutil::has_multi_repo_children(p) {
97                tracing::warn!(
98                    "[graph_index: refusing to scan {normalized} — \
99                     direct child of home without project markers]"
100                );
101                return false;
102            }
103        }
104    }
105
106    let breadth_markers = [
107        ".git",
108        "Cargo.toml",
109        "package.json",
110        "go.mod",
111        "pyproject.toml",
112        "setup.py",
113        "Makefile",
114        "CMakeLists.txt",
115        "pnpm-workspace.yaml",
116        ".projectile",
117        "BUILD.bazel",
118        "go.work",
119    ];
120
121    if !breadth_markers.iter().any(|m| p.join(m).exists()) {
122        // Multi-repo workspace parent: >=2 children with project markers is always safe
123        if crate::core::pathutil::has_multi_repo_children(p) {
124            return true;
125        }
126
127        let child_count = std::fs::read_dir(p).map_or(0, |rd| {
128            rd.filter_map(Result::ok)
129                .filter(|e| e.path().is_dir())
130                .count()
131        });
132        if child_count > 50 {
133            tracing::warn!(
134                "[graph_index: {normalized} has no project markers and {child_count} subdirectories — \
135                 skipping scan to avoid indexing broad directories]"
136            );
137            return false;
138        }
139    }
140
141    true
142}
143
144#[derive(Debug, Clone, Serialize, Deserialize)]
145pub struct ProjectIndex {
146    pub version: u32,
147    pub project_root: String,
148    pub last_scan: String,
149    pub files: HashMap<String, FileEntry>,
150    pub edges: Vec<IndexEdge>,
151    pub symbols: HashMap<String, SymbolEntry>,
152}
153
154#[derive(Debug, Clone, Serialize, Deserialize)]
155pub struct FileEntry {
156    pub path: String,
157    pub hash: String,
158    pub language: String,
159    pub line_count: usize,
160    pub token_count: usize,
161    pub exports: Vec<String>,
162    pub summary: String,
163}
164
165#[derive(Debug, Clone, Serialize, Deserialize)]
166pub struct SymbolEntry {
167    pub file: String,
168    pub name: String,
169    pub kind: String,
170    pub start_line: usize,
171    pub end_line: usize,
172    pub is_exported: bool,
173}
174
175#[derive(Debug, Clone, Serialize, Deserialize)]
176pub struct IndexEdge {
177    pub from: String,
178    pub to: String,
179    pub kind: String,
180    #[serde(default = "default_edge_weight")]
181    pub weight: f32,
182}
183
184fn default_edge_weight() -> f32 {
185    1.0
186}
187
188impl ProjectIndex {
189    pub fn new(project_root: &str) -> Self {
190        Self {
191            version: INDEX_VERSION,
192            project_root: normalize_project_root(project_root),
193            last_scan: chrono::Local::now().format("%Y-%m-%d %H:%M:%S").to_string(),
194            files: HashMap::new(),
195            edges: Vec::new(),
196            symbols: HashMap::new(),
197        }
198    }
199
200    pub fn index_dir(project_root: &str) -> Option<std::path::PathBuf> {
201        let normalized = normalize_project_root(project_root);
202        let hash = crate::core::project_hash::hash_project_root(&normalized);
203        crate::core::data_dir::lean_ctx_data_dir()
204            .ok()
205            .map(|d| d.join("graphs").join(hash))
206    }
207
208    pub fn load(project_root: &str) -> Option<Self> {
209        let dir = Self::index_dir(project_root)?;
210
211        let zst_path = dir.join("index.json.zst");
212        if zst_path.exists() {
213            let compressed = std::fs::read(&zst_path).ok()?;
214            let data = zstd::decode_all(compressed.as_slice()).ok()?;
215            let content = String::from_utf8(data).ok()?;
216            let index: Self = serde_json::from_str(&content).ok()?;
217            if index.version != INDEX_VERSION {
218                return None;
219            }
220            return Some(index);
221        }
222
223        let json_path = dir.join("index.json");
224        let content = std::fs::read_to_string(&json_path)
225            .or_else(|_| -> std::io::Result<String> {
226                let legacy_hash = short_hash(&normalize_project_root(project_root));
227                let legacy_dir = crate::core::data_dir::lean_ctx_data_dir()
228                    .map_err(|_| std::io::Error::new(std::io::ErrorKind::NotFound, "no data dir"))?
229                    .join("graphs")
230                    .join(legacy_hash);
231                let legacy_path = legacy_dir.join("index.json");
232                let data = std::fs::read_to_string(&legacy_path)?;
233                if let Err(e) = copy_dir_fallible(&legacy_dir, &dir) {
234                    tracing::debug!("graph index migration: {e}");
235                }
236                Ok(data)
237            })
238            .ok()?;
239        let index: Self = serde_json::from_str(&content).ok()?;
240        if index.version != INDEX_VERSION {
241            return None;
242        }
243        // Auto-migrate: compress legacy JSON to zstd
244        if let Ok(compressed) = zstd::encode_all(content.as_bytes(), 9) {
245            let zst_tmp = zst_path.with_extension("zst.tmp");
246            if std::fs::write(&zst_tmp, &compressed).is_ok()
247                && std::fs::rename(&zst_tmp, &zst_path).is_ok()
248            {
249                let _ = std::fs::remove_file(&json_path);
250            }
251        }
252        Some(index)
253    }
254
255    pub fn save(&self) -> Result<(), String> {
256        let dir = Self::index_dir(&self.project_root)
257            .ok_or_else(|| "Cannot determine data directory".to_string())?;
258        std::fs::create_dir_all(&dir).map_err(|e| e.to_string())?;
259        let json = serde_json::to_string(self).map_err(|e| e.to_string())?;
260        let compressed = zstd::encode_all(json.as_bytes(), 9).map_err(|e| format!("zstd: {e}"))?;
261        let target = dir.join("index.json.zst");
262        let tmp = target.with_extension("zst.tmp");
263        std::fs::write(&tmp, &compressed).map_err(|e| e.to_string())?;
264        std::fs::rename(&tmp, &target).map_err(|e| e.to_string())?;
265        let _ = std::fs::remove_file(dir.join("index.json"));
266        Ok(())
267    }
268
269    /// Remove all cached graph indices that are older than max_age_hours.
270    /// Called on startup/update to prevent stale data from persisting.
271    pub fn purge_stale_indices() {
272        let Ok(data_dir) = crate::core::data_dir::lean_ctx_data_dir() else {
273            return;
274        };
275        let graphs_dir = data_dir.join("graphs");
276        let Ok(entries) = std::fs::read_dir(&graphs_dir) else {
277            return;
278        };
279        let cfg = crate::core::config::Config::load();
280        let max_age_secs = cfg.archive_max_age_hours_effective() * 3600;
281
282        for entry in entries.filter_map(Result::ok) {
283            let path = entry.path();
284            if !path.is_dir() {
285                continue;
286            }
287            let zst = path.join("index.json.zst");
288            let json = path.join("index.json");
289            let index_file = if zst.exists() {
290                &zst
291            } else if json.exists() {
292                &json
293            } else {
294                continue;
295            };
296
297            let is_old = index_file
298                .metadata()
299                .and_then(|m| m.modified())
300                .is_ok_and(|mtime| {
301                    mtime
302                        .elapsed()
303                        .is_ok_and(|age| age.as_secs() > max_age_secs)
304                });
305
306            if is_old {
307                tracing::info!("[graph_index: purging stale index at {}]", path.display());
308                let _ = std::fs::remove_dir_all(&path);
309            }
310        }
311    }
312
313    pub fn file_count(&self) -> usize {
314        self.files.len()
315    }
316
317    pub fn symbol_count(&self) -> usize {
318        self.symbols.len()
319    }
320
321    pub fn edge_count(&self) -> usize {
322        self.edges.len()
323    }
324
325    pub fn get_symbol(&self, key: &str) -> Option<&SymbolEntry> {
326        self.symbols.get(key)
327    }
328
329    pub fn get_reverse_deps(&self, path: &str, depth: usize) -> Vec<String> {
330        let mut result = Vec::new();
331        let mut visited = std::collections::HashSet::new();
332        let mut queue: Vec<(String, usize)> = vec![(path.to_string(), 0)];
333
334        while let Some((current, d)) = queue.pop() {
335            if d > depth || visited.contains(&current) {
336                continue;
337            }
338            visited.insert(current.clone());
339            if current != path {
340                result.push(current.clone());
341            }
342
343            for edge in &self.edges {
344                if edge.to == current && edge.kind == "import" && !visited.contains(&edge.from) {
345                    queue.push((edge.from.clone(), d + 1));
346                }
347            }
348        }
349        result
350    }
351
352    pub fn get_related(&self, path: &str, depth: usize) -> Vec<String> {
353        let mut result = Vec::new();
354        let mut visited = std::collections::HashSet::new();
355        let mut queue: Vec<(String, usize)> = vec![(path.to_string(), 0)];
356
357        while let Some((current, d)) = queue.pop() {
358            if d > depth || visited.contains(&current) {
359                continue;
360            }
361            visited.insert(current.clone());
362            if current != path {
363                result.push(current.clone());
364            }
365
366            for edge in &self.edges {
367                if edge.from == current && !visited.contains(&edge.to) {
368                    queue.push((edge.to.clone(), d + 1));
369                }
370                if edge.to == current && !visited.contains(&edge.from) {
371                    queue.push((edge.from.clone(), d + 1));
372                }
373            }
374        }
375        result
376    }
377}
378
379/// Load the best available graph index, trying multiple root path variants.
380/// If no valid index exists, automatically scans the project to build one.
381/// This is the primary entry point — ensures zero-config usage.
382pub fn load_or_build(project_root: &str) -> ProjectIndex {
383    if std::env::var("LEAN_CTX_NO_INDEX").is_ok() {
384        return ProjectIndex::load(project_root).unwrap_or_else(|| ProjectIndex::new(project_root));
385    }
386
387    // Prefer stable absolute roots. Using "." as a cache key is fragile because
388    // it depends on the process cwd and can accidentally load the wrong project.
389    let root_abs = if project_root.trim().is_empty() || project_root == "." {
390        std::env::current_dir().ok().map_or_else(
391            || ".".to_string(),
392            |p| normalize_project_root(&p.to_string_lossy()),
393        )
394    } else {
395        normalize_project_root(project_root)
396    };
397
398    if !is_safe_scan_root(&root_abs) {
399        return ProjectIndex::new(&root_abs);
400    }
401
402    // Try the absolute/root-normalized path first.
403    if let Some(idx) = ProjectIndex::load(&root_abs) {
404        if !idx.files.is_empty() {
405            if index_looks_stale(&idx, &root_abs) {
406                tracing::warn!("[graph_index: stale index detected for {root_abs}; rebuilding]");
407                return scan(&root_abs);
408            }
409            return idx;
410        }
411    }
412
413    // CWD fallback: only use if CWD is a subdirectory of root_abs (same project)
414    if let Ok(cwd) = std::env::current_dir() {
415        let cwd_str = normalize_project_root(&cwd.to_string_lossy());
416        if cwd_str != root_abs && cwd_str.starts_with(&root_abs) {
417            if let Some(idx) = ProjectIndex::load(&cwd_str) {
418                if !idx.files.is_empty() {
419                    if index_looks_stale(&idx, &cwd_str) {
420                        return scan(&cwd_str);
421                    }
422                    return idx;
423                }
424            }
425        }
426    }
427
428    scan(&root_abs)
429}
430
431fn index_looks_stale(index: &ProjectIndex, root_abs: &str) -> bool {
432    if index.files.is_empty() {
433        return true;
434    }
435
436    // TTL check: rebuild if index is older than configured max_age_hours
437    if let Ok(scan_time) =
438        chrono::NaiveDateTime::parse_from_str(&index.last_scan, "%Y-%m-%d %H:%M:%S")
439    {
440        let cfg = crate::core::config::Config::load();
441        let effective_hours = cfg.archive_max_age_hours_effective();
442        let max_age = chrono::Duration::hours(effective_hours as i64);
443        let now = chrono::Local::now().naive_local();
444        if now.signed_duration_since(scan_time) > max_age {
445            tracing::info!(
446                "[graph_index: index is older than {}h — marking stale]",
447                effective_hours
448            );
449            return true;
450        }
451    }
452
453    // Contamination check: if index contains paths from common user directories,
454    // it was built from a too-broad root and must be rebuilt
455    const CONTAMINATION_MARKERS: &[&str] = &[
456        "Desktop/",
457        "Documents/",
458        "Downloads/",
459        "Pictures/",
460        "Music/",
461        "Videos/",
462        "Movies/",
463        "Library/",
464        ".cache/",
465        "snap/",
466    ];
467    let contaminated = index.files.keys().take(200).any(|rel| {
468        CONTAMINATION_MARKERS
469            .iter()
470            .any(|m| rel.starts_with(m) || rel.contains(&format!("/{m}")))
471    });
472    if contaminated {
473        tracing::warn!(
474            "[graph_index: index contains files from user directories (Desktop/Documents/...) — \
475             marking stale to force clean rebuild]"
476        );
477        return true;
478    }
479
480    let root_path = Path::new(root_abs);
481    // Sample up to 20 files for existence check (avoid scanning all files in large indices)
482    let sample_size = index.files.len().min(20);
483    for rel in index.files.keys().take(sample_size) {
484        let rel = rel.trim_start_matches(['/', '\\']);
485        if rel.is_empty() {
486            continue;
487        }
488        let abs = root_path.join(rel);
489        if !abs.exists() {
490            return true;
491        }
492    }
493
494    false
495}
496
497pub fn scan(project_root: &str) -> ProjectIndex {
498    scan_inner(project_root).0
499}
500
501pub fn scan_with_content_cache(project_root: &str) -> (ProjectIndex, HashMap<String, String>) {
502    scan_inner(project_root)
503}
504
505fn scan_inner(project_root: &str) -> (ProjectIndex, HashMap<String, String>) {
506    if std::env::var("LEAN_CTX_NO_INDEX").is_ok() {
507        tracing::info!("[graph_index: LEAN_CTX_NO_INDEX set — skipping scan]");
508        return (ProjectIndex::new(project_root), HashMap::new());
509    }
510
511    let project_root = normalize_project_root(project_root);
512
513    if !is_safe_scan_root(&project_root) {
514        tracing::warn!("[graph_index: scan aborted for unsafe root {project_root}]");
515        return (ProjectIndex::new(&project_root), HashMap::new());
516    }
517
518    let lock_name = format!(
519        "graph-idx-{}",
520        &crate::core::index_namespace::namespace_hash(Path::new(&project_root))[..8]
521    );
522    let _lock = crate::core::startup_guard::try_acquire_lock(
523        &lock_name,
524        std::time::Duration::from_millis(800),
525        std::time::Duration::from_mins(3),
526    );
527    if _lock.is_none() {
528        tracing::info!(
529            "[graph_index: another process is scanning {project_root} — returning cached or empty]"
530        );
531        return (
532            ProjectIndex::load(&project_root).unwrap_or_else(|| ProjectIndex::new(&project_root)),
533            HashMap::new(),
534        );
535    }
536
537    let existing = ProjectIndex::load(&project_root);
538    let mut index = ProjectIndex::new(&project_root);
539
540    let old_files: HashMap<String, (String, Vec<(String, SymbolEntry)>)> =
541        if let Some(ref prev) = existing {
542            prev.files
543                .iter()
544                .map(|(path, entry)| {
545                    let syms: Vec<(String, SymbolEntry)> = prev
546                        .symbols
547                        .iter()
548                        .filter(|(_, s)| s.file == *path)
549                        .map(|(k, v)| (k.clone(), v.clone()))
550                        .collect();
551                    (path.clone(), (entry.hash.clone(), syms))
552                })
553                .collect()
554        } else {
555            HashMap::new()
556        };
557
558    let walker = ignore::WalkBuilder::new(&project_root)
559        .hidden(true)
560        .git_ignore(true)
561        .git_global(true)
562        .git_exclude(true)
563        .max_depth(Some(20))
564        .build();
565
566    let cfg = crate::core::config::Config::load();
567    let extra_ignores: Vec<glob::Pattern> = cfg
568        .extra_ignore_patterns
569        .iter()
570        .filter_map(|p| glob::Pattern::new(p).ok())
571        .collect();
572
573    let mut scanned = 0usize;
574    let mut reused = 0usize;
575    let mut entries_visited = 0usize;
576    let mut content_cache: HashMap<String, String> = HashMap::new();
577    let max_files = if cfg.graph_index_max_files == 0 {
578        usize::MAX // unlimited
579    } else {
580        cfg.graph_index_max_files as usize
581    };
582    const MAX_ENTRIES_VISITED: usize = 500_000;
583    const MAX_FILE_SIZE_BYTES: u64 = 2 * 1024 * 1024; // 2 MB per file
584    let scan_deadline = std::time::Instant::now() + std::time::Duration::from_mins(5);
585
586    for entry in walker.filter_map(std::result::Result::ok) {
587        entries_visited += 1;
588        if entries_visited > MAX_ENTRIES_VISITED {
589            tracing::warn!(
590                "[graph_index: walked {entries_visited} entries — aborting scan to prevent \
591                 runaway traversal. Indexed {} files so far.]",
592                index.files.len()
593            );
594            break;
595        }
596        if entries_visited.is_multiple_of(5000) {
597            if std::time::Instant::now() > scan_deadline {
598                tracing::warn!(
599                    "[graph_index: scan timeout (120s) after {entries_visited} entries — \
600                     saving partial index with {} files]",
601                    index.files.len()
602                );
603                break;
604            }
605            if crate::core::memory_guard::abort_requested() {
606                tracing::warn!(
607                    "[graph_index: memory pressure abort after {entries_visited} entries — \
608                     saving partial index with {} files]",
609                    index.files.len()
610                );
611                break;
612            }
613            if crate::core::memory_guard::is_under_pressure() {
614                tracing::warn!(
615                    "[graph_index: memory pressure detected at {entries_visited} entries — \
616                     stopping scan with {} files]",
617                    index.files.len()
618                );
619                break;
620            }
621            if let Some(ref g) = _lock {
622                g.touch();
623            }
624        }
625
626        if !entry.file_type().is_some_and(|ft| ft.is_file()) {
627            continue;
628        }
629
630        if entry.path_is_symlink() {
631            continue;
632        }
633        let file_path = normalize_absolute_path(&entry.path().to_string_lossy());
634
635        if !std::path::Path::new(&file_path).starts_with(std::path::Path::new(&project_root)) {
636            continue;
637        }
638
639        if let Ok(meta) = std::fs::symlink_metadata(&file_path) {
640            if meta.file_type().is_symlink() || !meta.is_file() {
641                continue;
642            }
643            if meta.len() > MAX_FILE_SIZE_BYTES {
644                tracing::debug!(
645                    "[graph_index: skipping {file_path} — {:.1}MB exceeds {}MB limit]",
646                    meta.len() as f64 / 1_048_576.0,
647                    MAX_FILE_SIZE_BYTES / (1024 * 1024),
648                );
649                continue;
650            }
651        }
652
653        let ext = Path::new(&file_path)
654            .extension()
655            .and_then(|e| e.to_str())
656            .unwrap_or("");
657
658        if !is_indexable_ext(ext) {
659            continue;
660        }
661
662        let rel = make_relative(&file_path, &project_root);
663        if extra_ignores.iter().any(|p| p.matches(&rel)) {
664            continue;
665        }
666
667        if max_files != usize::MAX && index.files.len() >= max_files {
668            tracing::info!(
669                "[graph_index: reached configured limit of {} files. Set graph_index_max_files = 0 for unlimited.]",
670                max_files
671            );
672            break;
673        }
674
675        let Ok(content) = std::fs::read_to_string(&file_path) else {
676            continue;
677        };
678
679        let hash = compute_hash(&content);
680        let rel_path = make_relative(&file_path, &project_root);
681
682        if let Some((old_hash, old_syms)) = old_files.get(&rel_path) {
683            if *old_hash == hash {
684                if let Some(old_entry) = existing.as_ref().and_then(|p| p.files.get(&rel_path)) {
685                    index.files.insert(rel_path.clone(), old_entry.clone());
686                    for (key, sym) in old_syms {
687                        index.symbols.insert(key.clone(), sym.clone());
688                    }
689                    content_cache.insert(rel_path, content);
690                    reused += 1;
691                    continue;
692                }
693            }
694        }
695
696        let sigs = signatures::extract_signatures(&content, ext);
697        let line_count = content.lines().count();
698        let token_count = crate::core::tokens::count_tokens(&content);
699        let summary = extract_summary(&content);
700
701        let exports: Vec<String> = sigs
702            .iter()
703            .filter(|s| s.is_exported)
704            .map(|s| s.name.clone())
705            .collect();
706
707        index.files.insert(
708            rel_path.clone(),
709            FileEntry {
710                path: rel_path.clone(),
711                hash,
712                language: ext.to_string(),
713                line_count,
714                token_count,
715                exports,
716                summary,
717            },
718        );
719
720        for sig in &sigs {
721            let (start, end) = sig
722                .start_line
723                .zip(sig.end_line)
724                .unwrap_or_else(|| find_symbol_range(&content, sig));
725            let key = format!("{}::{}", rel_path, sig.name);
726            index.symbols.insert(
727                key,
728                SymbolEntry {
729                    file: rel_path.clone(),
730                    name: sig.name.clone(),
731                    kind: sig.kind.to_string(),
732                    start_line: start,
733                    end_line: end,
734                    is_exported: sig.is_exported,
735                },
736            );
737        }
738
739        content_cache.insert(rel_path, content);
740        scanned += 1;
741    }
742
743    build_edges_cached(&mut index, &content_cache);
744
745    if let Err(e) = index.save() {
746        tracing::warn!("could not save graph index: {e}");
747    }
748
749    tracing::warn!(
750        "[graph_index: {} files ({} scanned, {} reused), {} symbols, {} edges]",
751        index.file_count(),
752        scanned,
753        reused,
754        index.symbol_count(),
755        index.edge_count()
756    );
757
758    (index, content_cache)
759}
760
761fn build_edges_cached(index: &mut ProjectIndex, content_cache: &HashMap<String, String>) {
762    build_edges_with_cache(index, content_cache);
763    build_implicit_edges_with_cache(index, content_cache);
764    build_cochange_edges(index);
765    build_sibling_edges(index);
766}
767
768fn build_edges_with_cache(index: &mut ProjectIndex, content_cache: &HashMap<String, String>) {
769    index.edges.clear();
770
771    if crate::core::memory_guard::abort_requested() {
772        tracing::warn!("[graph_index: skipping edge-building due to memory pressure]");
773        return;
774    }
775
776    let root = normalize_project_root(&index.project_root);
777    let root_path = Path::new(&root);
778
779    let mut file_paths: Vec<String> = index.files.keys().cloned().collect();
780    file_paths.sort();
781
782    let resolver_ctx = import_resolver::ResolverContext::new(root_path, file_paths.clone());
783
784    const MAX_FILE_SIZE_FOR_EDGES: u64 = 2 * 1024 * 1024;
785
786    for (i, rel_path) in file_paths.iter().enumerate() {
787        if i.is_multiple_of(1000) && crate::core::memory_guard::is_under_pressure() {
788            tracing::warn!(
789                "[graph_index: stopping edge-building at file {i}/{} due to memory pressure]",
790                file_paths.len()
791            );
792            break;
793        }
794
795        let content = if let Some(cached) = content_cache.get(rel_path) {
796            std::borrow::Cow::Borrowed(cached.as_str())
797        } else {
798            let abs_path = root_path.join(rel_path.trim_start_matches(['/', '\\']));
799            if let Ok(meta) = abs_path.metadata() {
800                if meta.len() > MAX_FILE_SIZE_FOR_EDGES {
801                    continue;
802                }
803            }
804            match std::fs::read_to_string(&abs_path) {
805                Ok(c) => std::borrow::Cow::Owned(c),
806                Err(_) => continue,
807            }
808        };
809
810        let ext = Path::new(rel_path)
811            .extension()
812            .and_then(|e| e.to_str())
813            .unwrap_or("");
814
815        let resolve_ext = match ext {
816            "vue" | "svelte" => "ts",
817            _ => ext,
818        };
819
820        let analysis_content = if ext == "vue" || ext == "svelte" {
821            if let Some(script) = crate::core::signatures_ts::sfc::extract_script_block(&content) {
822                std::borrow::Cow::Owned(script)
823            } else {
824                content
825            }
826        } else {
827            content
828        };
829
830        let imports = crate::core::deep_queries::analyze(&analysis_content, resolve_ext).imports;
831        if imports.is_empty() {
832            continue;
833        }
834
835        let resolved =
836            import_resolver::resolve_imports(&imports, rel_path, resolve_ext, &resolver_ctx);
837        for r in resolved {
838            if r.is_external {
839                continue;
840            }
841            if let Some(to) = r.resolved_path {
842                index.edges.push(IndexEdge {
843                    from: rel_path.clone(),
844                    to,
845                    kind: "import".to_string(),
846                    weight: 1.0,
847                });
848            }
849        }
850    }
851
852    index.edges.sort_by(|a, b| {
853        a.from
854            .cmp(&b.from)
855            .then_with(|| a.to.cmp(&b.to))
856            .then_with(|| a.kind.cmp(&b.kind))
857    });
858    index
859        .edges
860        .dedup_by(|a, b| a.from == b.from && a.to == b.to && a.kind == b.kind);
861}
862
863// ---------------------------------------------------------------------------
864// Layer 2: Implicit Language Edges (weight 0.8)
865// ---------------------------------------------------------------------------
866
867fn build_implicit_edges_with_cache(
868    index: &mut ProjectIndex,
869    content_cache: &HashMap<String, String>,
870) {
871    let file_paths: Vec<String> = index.files.keys().cloned().collect();
872    let file_set: std::collections::HashSet<&str> = file_paths.iter().map(String::as_str).collect();
873
874    let mut new_edges: Vec<IndexEdge> = Vec::new();
875
876    for file in &file_paths {
877        let ext = Path::new(file.as_str())
878            .extension()
879            .and_then(|e| e.to_str())
880            .unwrap_or("");
881
882        match ext {
883            "rs" => {
884                collect_rust_mod_edges_cached(
885                    file,
886                    &file_set,
887                    index,
888                    &mut new_edges,
889                    content_cache,
890                );
891            }
892            "go" => collect_go_package_edges(file, &file_paths, &mut new_edges),
893            "py" => collect_python_init_edges(file, &file_paths, &mut new_edges),
894            "ts" | "js" | "tsx" | "jsx" => {
895                collect_barrel_edges_cached(file, &file_set, index, &mut new_edges, content_cache);
896            }
897            _ => {}
898        }
899    }
900
901    index.edges.extend(new_edges);
902}
903
904fn collect_rust_mod_edges_cached(
905    file: &str,
906    file_set: &std::collections::HashSet<&str>,
907    index: &ProjectIndex,
908    edges: &mut Vec<IndexEdge>,
909    content_cache: &HashMap<String, String>,
910) {
911    if !index.files.contains_key(file) {
912        return;
913    }
914
915    let content = if let Some(cached) = content_cache.get(file) {
916        std::borrow::Cow::Borrowed(cached.as_str())
917    } else {
918        let full_path = Path::new(&index.project_root).join(file);
919        match std::fs::read_to_string(&full_path) {
920            Ok(c) => std::borrow::Cow::Owned(c),
921            Err(_) => return,
922        }
923    };
924
925    let dir = Path::new(file)
926        .parent()
927        .map(|p| p.to_string_lossy().to_string());
928
929    for line in content.lines() {
930        let trimmed = line.trim();
931        if !trimmed.starts_with("mod ") || trimmed.contains('{') {
932            continue;
933        }
934        let mod_name = trimmed
935            .trim_start_matches("mod ")
936            .trim_start_matches("pub mod ")
937            .trim_start_matches("pub(crate) mod ")
938            .trim_end_matches(';')
939            .trim();
940
941        if mod_name.is_empty() || mod_name.contains(' ') {
942            continue;
943        }
944
945        let candidates = if let Some(ref d) = dir {
946            vec![
947                format!("{d}/{mod_name}.rs"),
948                format!("{d}/{mod_name}/mod.rs"),
949            ]
950        } else {
951            vec![format!("{mod_name}.rs"), format!("{mod_name}/mod.rs")]
952        };
953
954        for candidate in candidates {
955            if file_set.contains(candidate.as_str()) {
956                edges.push(IndexEdge {
957                    from: file.to_string(),
958                    to: candidate,
959                    kind: "module".to_string(),
960                    weight: 0.8,
961                });
962                break;
963            }
964        }
965    }
966}
967
968fn collect_go_package_edges(file: &str, file_paths: &[String], edges: &mut Vec<IndexEdge>) {
969    let p = Path::new(file);
970    if p.extension().and_then(|e| e.to_str()) != Some("go") {
971        return;
972    }
973    if file.ends_with("_test.go") {
974        return;
975    }
976
977    let Some(dir) = p.parent().map(|d| d.to_string_lossy().to_string()) else {
978        return;
979    };
980
981    for other in file_paths {
982        if other == file {
983            continue;
984        }
985        let op = Path::new(other.as_str());
986        if op.extension().and_then(|e| e.to_str()) != Some("go") {
987            continue;
988        }
989        if other.ends_with("_test.go") {
990            continue;
991        }
992        let other_dir = op
993            .parent()
994            .map(|d| d.to_string_lossy().to_string())
995            .unwrap_or_default();
996        if other_dir == dir {
997            edges.push(IndexEdge {
998                from: file.to_string(),
999                to: other.clone(),
1000                kind: "package".to_string(),
1001                weight: 0.5,
1002            });
1003            break;
1004        }
1005    }
1006}
1007
1008fn collect_python_init_edges(file: &str, file_paths: &[String], edges: &mut Vec<IndexEdge>) {
1009    let p = Path::new(file);
1010    if p.file_name().and_then(|n| n.to_str()) != Some("__init__.py") {
1011        return;
1012    }
1013
1014    let Some(dir) = p.parent().map(|d| d.to_string_lossy().to_string()) else {
1015        return;
1016    };
1017
1018    for other in file_paths {
1019        if other == file {
1020            continue;
1021        }
1022        let op = Path::new(other.as_str());
1023        if op.extension().and_then(|e| e.to_str()) != Some("py") {
1024            continue;
1025        }
1026        let other_dir = op
1027            .parent()
1028            .map(|d| d.to_string_lossy().to_string())
1029            .unwrap_or_default();
1030        if other_dir == dir {
1031            edges.push(IndexEdge {
1032                from: file.to_string(),
1033                to: other.clone(),
1034                kind: "module".to_string(),
1035                weight: 0.8,
1036            });
1037        }
1038    }
1039}
1040
1041fn collect_barrel_edges_cached(
1042    file: &str,
1043    file_set: &std::collections::HashSet<&str>,
1044    index: &ProjectIndex,
1045    edges: &mut Vec<IndexEdge>,
1046    content_cache: &HashMap<String, String>,
1047) {
1048    let basename = Path::new(file)
1049        .file_stem()
1050        .and_then(|s| s.to_str())
1051        .unwrap_or("");
1052    if basename != "index" {
1053        return;
1054    }
1055
1056    let content = if let Some(cached) = content_cache.get(file) {
1057        std::borrow::Cow::Borrowed(cached.as_str())
1058    } else {
1059        let full_path = Path::new(&index.project_root).join(file);
1060        match std::fs::read_to_string(&full_path) {
1061            Ok(c) => std::borrow::Cow::Owned(c),
1062            Err(_) => return,
1063        }
1064    };
1065
1066    let dir = Path::new(file)
1067        .parent()
1068        .map(|p| p.to_string_lossy().to_string())
1069        .unwrap_or_default();
1070
1071    let ext = Path::new(file)
1072        .extension()
1073        .and_then(|e| e.to_str())
1074        .unwrap_or("ts");
1075
1076    for line in content.lines() {
1077        let trimmed = line.trim();
1078        if !trimmed.starts_with("export") || !trimmed.contains("from") {
1079            continue;
1080        }
1081        if let Some(from_pos) = trimmed.find("from") {
1082            let after = &trimmed[from_pos + 4..];
1083            let source = after
1084                .trim()
1085                .trim_start_matches(['\'', '"'])
1086                .trim_end_matches([';', '\'', '"'])
1087                .trim_end_matches(['\'', '"']);
1088
1089            if source.starts_with("./") || source.starts_with("../") {
1090                let resolved = if dir.is_empty() {
1091                    source.trim_start_matches("./").to_string()
1092                } else {
1093                    format!("{dir}/{}", source.trim_start_matches("./"))
1094                };
1095
1096                let candidates = vec![
1097                    format!("{resolved}.{ext}"),
1098                    format!("{resolved}/index.{ext}"),
1099                    resolved.clone(),
1100                ];
1101
1102                for candidate in candidates {
1103                    if file_set.contains(candidate.as_str()) {
1104                        edges.push(IndexEdge {
1105                            from: file.to_string(),
1106                            to: candidate,
1107                            kind: "reexport".to_string(),
1108                            weight: 0.8,
1109                        });
1110                        break;
1111                    }
1112                }
1113            }
1114        }
1115    }
1116}
1117
1118// ---------------------------------------------------------------------------
1119// Layer 3: Co-Change Edges (weight 0.5)
1120// ---------------------------------------------------------------------------
1121
1122fn build_cochange_edges(index: &mut ProjectIndex) {
1123    let project_root = &index.project_root;
1124
1125    let output = match std::process::Command::new("git")
1126        .args([
1127            "log",
1128            "--name-only",
1129            "--pretty=format:---",
1130            "--since=6 months",
1131            "--",
1132            ".",
1133        ])
1134        .current_dir(project_root)
1135        .output()
1136    {
1137        Ok(o) if o.status.success() => String::from_utf8_lossy(&o.stdout).to_string(),
1138        _ => return,
1139    };
1140
1141    let file_set: std::collections::HashSet<&str> =
1142        index.files.keys().map(String::as_str).collect();
1143
1144    let connected: std::collections::HashSet<&str> = index
1145        .edges
1146        .iter()
1147        .flat_map(|e| [e.from.as_str(), e.to.as_str()])
1148        .collect();
1149
1150    // Parse commits into groups of files
1151    let mut cooccurrence: HashMap<(String, String), u32> = HashMap::new();
1152    let mut current_commit: Vec<&str> = Vec::new();
1153
1154    for line in output.lines() {
1155        if line == "---" {
1156            if current_commit.len() >= 2 && current_commit.len() <= 20 {
1157                for i in 0..current_commit.len() {
1158                    for j in (i + 1)..current_commit.len() {
1159                        let a = current_commit[i];
1160                        let b = current_commit[j];
1161                        if !file_set.contains(a) || !file_set.contains(b) {
1162                            continue;
1163                        }
1164                        // Only add if at least one is currently isolated
1165                        if connected.contains(a) && connected.contains(b) {
1166                            continue;
1167                        }
1168                        let key = if a < b {
1169                            (a.to_string(), b.to_string())
1170                        } else {
1171                            (b.to_string(), a.to_string())
1172                        };
1173                        *cooccurrence.entry(key).or_insert(0) += 1;
1174                    }
1175                }
1176            }
1177            current_commit.clear();
1178        } else if !line.is_empty() {
1179            current_commit.push(line.trim());
1180        }
1181    }
1182
1183    // Filter: min 5 shared commits
1184    let mut cochange_edges: Vec<IndexEdge> = cooccurrence
1185        .into_iter()
1186        .filter(|(_, count)| *count >= 5)
1187        .map(|((from, to), _)| IndexEdge {
1188            from,
1189            to,
1190            kind: "cochange".to_string(),
1191            weight: 0.5,
1192        })
1193        .collect();
1194
1195    // Cap at 500 to prevent noise
1196    cochange_edges.sort_by(|a, b| a.from.cmp(&b.from).then_with(|| a.to.cmp(&b.to)));
1197    cochange_edges.truncate(500);
1198
1199    index.edges.extend(cochange_edges);
1200}
1201
1202// ---------------------------------------------------------------------------
1203// Layer 4: Sibling Edges (weight 0.2)
1204// ---------------------------------------------------------------------------
1205
1206fn build_sibling_edges(index: &mut ProjectIndex) {
1207    let connected: std::collections::HashSet<&str> = index
1208        .edges
1209        .iter()
1210        .flat_map(|e| [e.from.as_str(), e.to.as_str()])
1211        .collect();
1212
1213    let file_paths: Vec<String> = index.files.keys().cloned().collect();
1214    let mut new_edges: Vec<IndexEdge> = Vec::new();
1215
1216    for file in &file_paths {
1217        if connected.contains(file.as_str()) {
1218            continue;
1219        }
1220
1221        let ext = Path::new(file.as_str())
1222            .extension()
1223            .and_then(|e| e.to_str())
1224            .unwrap_or("");
1225        let dir = Path::new(file.as_str())
1226            .parent()
1227            .map(|p| p.to_string_lossy().to_string())
1228            .unwrap_or_default();
1229
1230        // Find one sibling with same extension
1231        for other in &file_paths {
1232            if other == file {
1233                continue;
1234            }
1235            let other_ext = Path::new(other.as_str())
1236                .extension()
1237                .and_then(|e| e.to_str())
1238                .unwrap_or("");
1239            let other_dir = Path::new(other.as_str())
1240                .parent()
1241                .map(|p| p.to_string_lossy().to_string())
1242                .unwrap_or_default();
1243
1244            if other_ext == ext && other_dir == dir {
1245                new_edges.push(IndexEdge {
1246                    from: file.clone(),
1247                    to: other.clone(),
1248                    kind: "sibling".to_string(),
1249                    weight: 0.2,
1250                });
1251                break; // Max 1 sibling edge per isolate
1252            }
1253        }
1254    }
1255
1256    index.edges.extend(new_edges);
1257}
1258
1259fn find_symbol_range(content: &str, sig: &signatures::Signature) -> (usize, usize) {
1260    let lines: Vec<&str> = content.lines().collect();
1261    let mut start = 0;
1262
1263    for (i, line) in lines.iter().enumerate() {
1264        if line.contains(&sig.name) {
1265            let trimmed = line.trim();
1266            let is_def = trimmed.starts_with("fn ")
1267                || trimmed.starts_with("pub fn ")
1268                || trimmed.starts_with("pub(crate) fn ")
1269                || trimmed.starts_with("async fn ")
1270                || trimmed.starts_with("pub async fn ")
1271                || trimmed.starts_with("struct ")
1272                || trimmed.starts_with("pub struct ")
1273                || trimmed.starts_with("enum ")
1274                || trimmed.starts_with("pub enum ")
1275                || trimmed.starts_with("trait ")
1276                || trimmed.starts_with("pub trait ")
1277                || trimmed.starts_with("impl ")
1278                || trimmed.starts_with("class ")
1279                || trimmed.starts_with("export class ")
1280                || trimmed.starts_with("export function ")
1281                || trimmed.starts_with("export async function ")
1282                || trimmed.starts_with("function ")
1283                || trimmed.starts_with("async function ")
1284                || trimmed.starts_with("def ")
1285                || trimmed.starts_with("async def ")
1286                || trimmed.starts_with("func ")
1287                || trimmed.starts_with("interface ")
1288                || trimmed.starts_with("export interface ")
1289                || trimmed.starts_with("type ")
1290                || trimmed.starts_with("export type ")
1291                || trimmed.starts_with("const ")
1292                || trimmed.starts_with("export const ")
1293                || trimmed.starts_with("fun ")
1294                || trimmed.starts_with("private fun ")
1295                || trimmed.starts_with("public fun ")
1296                || trimmed.starts_with("internal fun ")
1297                || trimmed.starts_with("class ")
1298                || trimmed.starts_with("data class ")
1299                || trimmed.starts_with("sealed class ")
1300                || trimmed.starts_with("sealed interface ")
1301                || trimmed.starts_with("enum class ")
1302                || trimmed.starts_with("object ")
1303                || trimmed.starts_with("private object ")
1304                || trimmed.starts_with("interface ")
1305                || trimmed.starts_with("typealias ")
1306                || trimmed.starts_with("private typealias ");
1307            if is_def {
1308                start = i + 1;
1309                break;
1310            }
1311        }
1312    }
1313
1314    if start == 0 {
1315        return (1, lines.len().min(20));
1316    }
1317
1318    let base_indent = lines
1319        .get(start - 1)
1320        .map_or(0, |l| l.len() - l.trim_start().len());
1321
1322    let mut end = start;
1323    let mut brace_depth: i32 = 0;
1324    let mut found_open = false;
1325
1326    for (i, line) in lines.iter().enumerate().skip(start - 1) {
1327        for ch in line.chars() {
1328            if ch == '{' {
1329                brace_depth += 1;
1330                found_open = true;
1331            } else if ch == '}' {
1332                brace_depth -= 1;
1333            }
1334        }
1335
1336        end = i + 1;
1337
1338        if found_open && brace_depth <= 0 {
1339            break;
1340        }
1341
1342        if !found_open && i > start {
1343            let indent = line.len() - line.trim_start().len();
1344            if indent <= base_indent && !line.trim().is_empty() && i > start {
1345                end = i;
1346                break;
1347            }
1348        }
1349
1350        if end - start > 200 {
1351            break;
1352        }
1353    }
1354
1355    (start, end)
1356}
1357
1358fn extract_summary(content: &str) -> String {
1359    for line in content.lines().take(20) {
1360        let trimmed = line.trim();
1361        if trimmed.is_empty()
1362            || trimmed.starts_with("//")
1363            || trimmed.starts_with('#')
1364            || trimmed.starts_with("/*")
1365            || trimmed.starts_with('*')
1366            || trimmed.starts_with("use ")
1367            || trimmed.starts_with("import ")
1368            || trimmed.starts_with("from ")
1369            || trimmed.starts_with("require(")
1370            || trimmed.starts_with("package ")
1371        {
1372            continue;
1373        }
1374        return trimmed.chars().take(120).collect();
1375    }
1376    String::new()
1377}
1378
1379fn compute_hash(content: &str) -> String {
1380    use std::collections::hash_map::DefaultHasher;
1381    use std::hash::{Hash, Hasher};
1382
1383    let mut hasher = DefaultHasher::new();
1384    content.hash(&mut hasher);
1385    format!("{:016x}", hasher.finish())
1386}
1387
1388fn short_hash(input: &str) -> String {
1389    use std::collections::hash_map::DefaultHasher;
1390    use std::hash::{Hash, Hasher};
1391
1392    let mut hasher = DefaultHasher::new();
1393    input.hash(&mut hasher);
1394    format!("{:08x}", hasher.finish() & 0xFFFF_FFFF)
1395}
1396
1397fn copy_dir_fallible(src: &std::path::Path, dst: &std::path::Path) -> Result<(), std::io::Error> {
1398    std::fs::create_dir_all(dst)?;
1399    for entry in std::fs::read_dir(src)?.flatten() {
1400        let from = entry.path();
1401        let to = dst.join(entry.file_name());
1402        if from.is_dir() {
1403            copy_dir_fallible(&from, &to)?;
1404        } else {
1405            std::fs::copy(&from, &to)?;
1406        }
1407    }
1408    Ok(())
1409}
1410
1411fn normalize_absolute_path(path: &str) -> String {
1412    if let Ok(canon) = crate::core::pathutil::safe_canonicalize(std::path::Path::new(path)) {
1413        return canon.to_string_lossy().to_string();
1414    }
1415
1416    let mut normalized = path.to_string();
1417    while normalized.ends_with("\\.") || normalized.ends_with("/.") {
1418        normalized.truncate(normalized.len() - 2);
1419    }
1420    while normalized.len() > 1
1421        && (normalized.ends_with('\\') || normalized.ends_with('/'))
1422        && !normalized.ends_with(":\\")
1423        && !normalized.ends_with(":/")
1424        && normalized != "\\"
1425        && normalized != "/"
1426    {
1427        normalized.pop();
1428    }
1429    normalized
1430}
1431
1432pub fn normalize_project_root(path: &str) -> String {
1433    normalize_absolute_path(path)
1434}
1435
1436pub fn graph_match_key(path: &str) -> String {
1437    let stripped =
1438        crate::core::pathutil::strip_verbatim_str(path).unwrap_or_else(|| path.replace('\\', "/"));
1439    stripped.trim_start_matches('/').to_string()
1440}
1441
1442pub fn graph_relative_key(path: &str, root: &str) -> String {
1443    let root_norm = normalize_project_root(root);
1444    let path_norm = normalize_absolute_path(path);
1445    let root_path = Path::new(&root_norm);
1446    let path_path = Path::new(&path_norm);
1447
1448    if let Ok(rel) = path_path.strip_prefix(root_path) {
1449        let rel = rel.to_string_lossy().to_string();
1450        return rel.trim_start_matches(['/', '\\']).to_string();
1451    }
1452
1453    path.trim_start_matches(['/', '\\'])
1454        .replace('/', std::path::MAIN_SEPARATOR_STR)
1455}
1456
1457fn make_relative(path: &str, root: &str) -> String {
1458    graph_relative_key(path, root)
1459}
1460
1461fn is_indexable_ext(ext: &str) -> bool {
1462    crate::core::language_capabilities::is_indexable_ext(ext)
1463}
1464
1465#[cfg(test)]
1466fn kotlin_package_name(content: &str) -> Option<String> {
1467    content.lines().map(str::trim).find_map(|line| {
1468        line.strip_prefix("package ")
1469            .map(|rest| rest.trim().trim_end_matches(';').to_string())
1470    })
1471}
1472
1473#[cfg(test)]
1474mod tests {
1475    use super::*;
1476    use tempfile::tempdir;
1477
1478    #[test]
1479    fn test_short_hash_deterministic() {
1480        let h1 = short_hash("/Users/test/project");
1481        let h2 = short_hash("/Users/test/project");
1482        assert_eq!(h1, h2);
1483        assert_eq!(h1.len(), 8);
1484    }
1485
1486    #[test]
1487    fn test_make_relative() {
1488        assert_eq!(
1489            make_relative("/foo/bar/src/main.rs", "/foo/bar"),
1490            graph_relative_key("/foo/bar/src/main.rs", "/foo/bar")
1491        );
1492        assert_eq!(
1493            make_relative("src/main.rs", "/foo/bar"),
1494            graph_relative_key("src/main.rs", "/foo/bar")
1495        );
1496        assert_eq!(
1497            make_relative("C:\\repo\\src\\main\\kotlin\\Example.kt", "C:\\repo"),
1498            graph_relative_key("C:\\repo\\src\\main\\kotlin\\Example.kt", "C:\\repo")
1499        );
1500        assert_eq!(
1501            make_relative("//?/C:/repo/src/main/kotlin/Example.kt", "//?/C:/repo"),
1502            graph_relative_key("//?/C:/repo/src/main/kotlin/Example.kt", "//?/C:/repo")
1503        );
1504    }
1505
1506    #[test]
1507    fn test_normalize_project_root() {
1508        assert_eq!(normalize_project_root("C:\\repo\\"), "C:\\repo");
1509        assert_eq!(normalize_project_root("C:\\repo\\."), "C:\\repo");
1510        assert_eq!(normalize_project_root("//?/C:/repo/"), "//?/C:/repo");
1511    }
1512
1513    #[test]
1514    fn test_graph_match_key_normalizes_windows_forms() {
1515        assert_eq!(
1516            graph_match_key(r"C:\repo\src\main.rs"),
1517            "C:/repo/src/main.rs"
1518        );
1519        assert_eq!(
1520            graph_match_key(r"\\?\C:\repo\src\main.rs"),
1521            "C:/repo/src/main.rs"
1522        );
1523        assert_eq!(graph_match_key(r"\src\main.rs"), "src/main.rs");
1524    }
1525
1526    #[test]
1527    fn test_extract_summary() {
1528        let content = "// comment\nuse std::io;\n\npub fn main() {\n    println!(\"hello\");\n}";
1529        let summary = extract_summary(content);
1530        assert_eq!(summary, "pub fn main() {");
1531    }
1532
1533    #[test]
1534    fn test_compute_hash_deterministic() {
1535        let h1 = compute_hash("hello world");
1536        let h2 = compute_hash("hello world");
1537        assert_eq!(h1, h2);
1538        assert_ne!(h1, compute_hash("hello world!"));
1539    }
1540
1541    #[test]
1542    fn test_project_index_new() {
1543        let idx = ProjectIndex::new("/test");
1544        assert_eq!(idx.version, INDEX_VERSION);
1545        assert_eq!(idx.project_root, "/test");
1546        assert!(idx.files.is_empty());
1547    }
1548
1549    fn fe(path: &str, content: &str, language: &str) -> FileEntry {
1550        FileEntry {
1551            path: path.to_string(),
1552            hash: compute_hash(content),
1553            language: language.to_string(),
1554            line_count: content.lines().count(),
1555            token_count: crate::core::tokens::count_tokens(content),
1556            exports: Vec::new(),
1557            summary: extract_summary(content),
1558        }
1559    }
1560
1561    #[test]
1562    fn test_index_looks_stale_when_any_file_missing() {
1563        let td = tempdir().expect("tempdir");
1564        let root = td.path();
1565        std::fs::write(root.join("a.rs"), "pub fn a() {}\n").expect("write a.rs");
1566
1567        let root_s = normalize_project_root(&root.to_string_lossy());
1568        let mut idx = ProjectIndex::new(&root_s);
1569        idx.files
1570            .insert("a.rs".to_string(), fe("a.rs", "pub fn a() {}\n", "rs"));
1571        idx.files.insert(
1572            "missing.rs".to_string(),
1573            fe("missing.rs", "pub fn m() {}\n", "rs"),
1574        );
1575
1576        assert!(index_looks_stale(&idx, &root_s));
1577    }
1578
1579    #[test]
1580    fn test_index_looks_fresh_when_all_files_exist() {
1581        let td = tempdir().expect("tempdir");
1582        let root = td.path();
1583        std::fs::write(root.join("a.rs"), "pub fn a() {}\n").expect("write a.rs");
1584
1585        let root_s = normalize_project_root(&root.to_string_lossy());
1586        let mut idx = ProjectIndex::new(&root_s);
1587        idx.files
1588            .insert("a.rs".to_string(), fe("a.rs", "pub fn a() {}\n", "rs"));
1589
1590        assert!(!index_looks_stale(&idx, &root_s));
1591    }
1592
1593    #[test]
1594    fn test_reverse_deps() {
1595        let mut idx = ProjectIndex::new("/test");
1596        idx.edges.push(IndexEdge {
1597            from: "a.rs".to_string(),
1598            to: "b.rs".to_string(),
1599            kind: "import".to_string(),
1600            weight: 1.0,
1601        });
1602        idx.edges.push(IndexEdge {
1603            from: "c.rs".to_string(),
1604            to: "b.rs".to_string(),
1605            kind: "import".to_string(),
1606            weight: 1.0,
1607        });
1608
1609        let deps = idx.get_reverse_deps("b.rs", 1);
1610        assert_eq!(deps.len(), 2);
1611        assert!(deps.contains(&"a.rs".to_string()));
1612        assert!(deps.contains(&"c.rs".to_string()));
1613    }
1614
1615    #[test]
1616    fn test_find_symbol_range_kotlin_function() {
1617        let content = r#"
1618package com.example
1619
1620class UserService {
1621    fun greet(name: String): String {
1622        return "hi $name"
1623    }
1624}
1625"#;
1626        let sig = signatures::Signature {
1627            kind: "method",
1628            name: "greet".to_string(),
1629            params: "name:String".to_string(),
1630            return_type: "String".to_string(),
1631            is_async: false,
1632            is_exported: true,
1633            indent: 2,
1634            ..signatures::Signature::no_span()
1635        };
1636        let (start, end) = find_symbol_range(content, &sig);
1637        assert_eq!(start, 5);
1638        assert!(end >= start);
1639    }
1640
1641    #[test]
1642    fn test_signature_spans_override_fallback_range() {
1643        let sig = signatures::Signature {
1644            kind: "method",
1645            name: "release".to_string(),
1646            params: "id:String".to_string(),
1647            return_type: "Boolean".to_string(),
1648            is_async: true,
1649            is_exported: true,
1650            indent: 2,
1651            start_line: Some(42),
1652            end_line: Some(43),
1653        };
1654
1655        let (start, end) = sig
1656            .start_line
1657            .zip(sig.end_line)
1658            .unwrap_or_else(|| find_symbol_range("ignored", &sig));
1659        assert_eq!((start, end), (42, 43));
1660    }
1661
1662    #[test]
1663    fn test_parse_stale_index_version() {
1664        let json = format!(
1665            r#"{{"version":{},"project_root":"/test","last_scan":"now","files":{{}},"edges":[],"symbols":{{}}}}"#,
1666            INDEX_VERSION - 1
1667        );
1668        let parsed: ProjectIndex = serde_json::from_str(&json).unwrap();
1669        assert_ne!(parsed.version, INDEX_VERSION);
1670    }
1671
1672    #[test]
1673    fn test_kotlin_package_name() {
1674        let content = "package com.example.feature\n\nclass UserService";
1675        assert_eq!(
1676            kotlin_package_name(content).as_deref(),
1677            Some("com.example.feature")
1678        );
1679    }
1680
1681    #[test]
1682    fn safe_scan_root_rejects_fs_root() {
1683        assert!(!is_safe_scan_root("/"));
1684        assert!(!is_safe_scan_root("\\"));
1685        #[cfg(windows)]
1686        {
1687            assert!(!is_safe_scan_root("C:\\"));
1688            assert!(!is_safe_scan_root("D:\\"));
1689        }
1690    }
1691
1692    #[test]
1693    fn safe_scan_root_rejects_home() {
1694        if let Some(home) = dirs::home_dir() {
1695            let home_str = home.to_string_lossy().to_string();
1696            assert!(
1697                !is_safe_scan_root(&home_str),
1698                "home dir should be rejected: {home_str}"
1699            );
1700        }
1701    }
1702
1703    #[test]
1704    fn safe_scan_root_accepts_project_dir() {
1705        let tmp = tempdir().unwrap();
1706        std::fs::write(
1707            tmp.path().join("Cargo.toml"),
1708            "[package]\nname = \"test\"\n",
1709        )
1710        .unwrap();
1711        let root = tmp.path().to_string_lossy().to_string();
1712        assert!(is_safe_scan_root(&root));
1713    }
1714
1715    #[test]
1716    fn safe_scan_root_rejects_broad_dir() {
1717        let tmp = tempdir().unwrap();
1718        for i in 0..55 {
1719            std::fs::create_dir(tmp.path().join(format!("dir{i}"))).unwrap();
1720        }
1721        let root = tmp.path().to_string_lossy().to_string();
1722        assert!(!is_safe_scan_root(&root));
1723    }
1724
1725    #[test]
1726    fn no_index_env_skips_scan() {
1727        let _env = crate::core::data_dir::test_env_lock();
1728        let tmp = tempdir().unwrap();
1729        std::fs::write(tmp.path().join("Cargo.toml"), "").unwrap();
1730        std::fs::write(tmp.path().join("main.rs"), "fn main() {}").unwrap();
1731
1732        std::env::set_var("LEAN_CTX_NO_INDEX", "1");
1733        let idx = scan(&tmp.path().to_string_lossy());
1734        std::env::remove_var("LEAN_CTX_NO_INDEX");
1735        assert!(idx.files.is_empty(), "LEAN_CTX_NO_INDEX should skip scan");
1736    }
1737
1738    #[test]
1739    fn stale_index_detected_by_contamination() {
1740        let root_s = "/home/testuser/myproject";
1741        let mut idx = ProjectIndex::new(root_s);
1742        // Simulate a contaminated index with Desktop files
1743        idx.files.insert(
1744            "Desktop/random.py".to_string(),
1745            fe("Desktop/random.py", "x = 1\n", "py"),
1746        );
1747        idx.files.insert(
1748            "src/main.rs".to_string(),
1749            fe("src/main.rs", "fn main() {}\n", "rs"),
1750        );
1751        assert!(
1752            index_looks_stale(&idx, root_s),
1753            "Index with Desktop/ files should be considered stale"
1754        );
1755    }
1756
1757    #[test]
1758    fn stale_index_detected_by_age() {
1759        let td = tempdir().expect("tempdir");
1760        let root = td.path();
1761        std::fs::write(root.join("a.rs"), "fn a() {}\n").unwrap();
1762
1763        let root_s = normalize_project_root(&root.to_string_lossy());
1764        let mut idx = ProjectIndex::new(&root_s);
1765        idx.files
1766            .insert("a.rs".to_string(), fe("a.rs", "fn a() {}\n", "rs"));
1767        // Set last_scan to 100 hours ago (default max_age_hours is 48)
1768        let old_time = chrono::Local::now().naive_local() - chrono::Duration::hours(100);
1769        idx.last_scan = old_time.format("%Y-%m-%d %H:%M:%S").to_string();
1770
1771        assert!(
1772            index_looks_stale(&idx, &root_s),
1773            "Index older than max_age_hours should be stale"
1774        );
1775    }
1776
1777    #[test]
1778    fn safe_scan_root_rejects_home_downloads() {
1779        if let Some(home) = dirs::home_dir() {
1780            let downloads = home.join("Downloads");
1781            // Only test if Downloads doesn't contain a .git (unlikely but possible)
1782            if !downloads.join(".git").exists() {
1783                let downloads_str = downloads.to_string_lossy().to_string();
1784                assert!(
1785                    !is_safe_scan_root(&downloads_str),
1786                    "~/Downloads should be rejected without project markers"
1787                );
1788            }
1789        }
1790    }
1791
1792    #[test]
1793    fn safe_scan_root_accepts_multi_repo_parent() {
1794        let tmp = tempdir().unwrap();
1795        let parent = tmp.path().join("code");
1796        std::fs::create_dir_all(&parent).unwrap();
1797
1798        // Create 2 child repos
1799        std::fs::create_dir_all(parent.join("repo-a").join(".git")).unwrap();
1800        std::fs::create_dir_all(parent.join("repo-b").join(".git")).unwrap();
1801
1802        // Add >50 empty subdirs to trigger the breadth guard
1803        for i in 0..55 {
1804            std::fs::create_dir(parent.join(format!("dir-{i}"))).unwrap();
1805        }
1806
1807        let parent_str = parent.to_string_lossy().to_string();
1808        assert!(
1809            is_safe_scan_root(&parent_str),
1810            "Multi-repo parent with >50 subdirs should be accepted"
1811        );
1812    }
1813
1814    #[test]
1815    fn safe_scan_root_rejects_broad_dir_without_repos() {
1816        let tmp = tempdir().unwrap();
1817        let broad = tmp.path().join("broad");
1818        std::fs::create_dir_all(&broad).unwrap();
1819
1820        // Create >50 subdirs but no project markers
1821        for i in 0..55 {
1822            std::fs::create_dir(broad.join(format!("dir-{i}"))).unwrap();
1823        }
1824
1825        let broad_str = broad.to_string_lossy().to_string();
1826        assert!(
1827            !is_safe_scan_root(&broad_str),
1828            "Broad dir without project markers should be rejected"
1829        );
1830    }
1831}