Skip to main content

lean_ctx/core/
graph_index.rs

1use std::collections::HashMap;
2use std::path::Path;
3
4use serde::{Deserialize, Serialize};
5
6use crate::core::import_resolver;
7use crate::core::signatures;
8
9const INDEX_VERSION: u32 = 6;
10
11pub fn is_safe_scan_root_public(path: &str) -> bool {
12    is_safe_scan_root(path)
13}
14
15fn is_filesystem_root(path: &str) -> bool {
16    let p = Path::new(path);
17    p.parent().is_none() || (cfg!(windows) && p.parent() == Some(Path::new("")))
18}
19
20fn is_safe_scan_root(path: &str) -> bool {
21    let normalized = normalize_project_root(path);
22    let p = Path::new(&normalized);
23
24    if normalized == "/" || normalized == "\\" || is_filesystem_root(&normalized) {
25        tracing::warn!("[graph_index: refusing to scan filesystem root]");
26        return false;
27    }
28
29    if normalized == "." || normalized.is_empty() {
30        tracing::warn!("[graph_index: refusing to scan relative/empty root]");
31        return false;
32    }
33
34    if let Some(home) = dirs::home_dir() {
35        let home_norm = normalize_project_root(&home.to_string_lossy());
36        if normalized == home_norm {
37            use std::sync::Once;
38            static HOME_WARN: Once = Once::new();
39            HOME_WARN.call_once(|| {
40                tracing::warn!(
41                    "[graph_index: skipping — cannot index home directory {normalized}.\n  \
42                     Run from inside a project, or set LEAN_CTX_PROJECT_ROOT=/path/to/project]"
43                );
44            });
45            return false;
46        }
47        // Block common broad home subdirectories that are never valid project roots
48        let home_path = Path::new(&home_norm);
49        const BLOCKED_HOME_SUBDIRS: &[&str] = &[
50            "Desktop",
51            "Documents",
52            "Downloads",
53            "Pictures",
54            "Music",
55            "Videos",
56            "Movies",
57            "Library",
58            ".local",
59            ".cache",
60            ".config",
61            "snap",
62            "Applications",
63        ];
64        for blocked in BLOCKED_HOME_SUBDIRS {
65            let blocked_path = home_path.join(blocked);
66            let is_inside_blocked = p == blocked_path || p.starts_with(&blocked_path);
67            let has_project_marker = p.join(".git").exists()
68                || p.join("Cargo.toml").exists()
69                || p.join("package.json").exists();
70            if is_inside_blocked && !has_project_marker {
71                tracing::warn!(
72                    "[graph_index: refusing to scan {normalized} — \
73                     inside home/{blocked} without project markers]"
74                );
75                return false;
76            }
77        }
78
79        // Block directories that are direct children of home without project markers
80        if p.parent() == Some(home_path) {
81            let has_marker = p.join(".git").exists()
82                || p.join("Cargo.toml").exists()
83                || p.join("package.json").exists()
84                || p.join("go.mod").exists()
85                || p.join("pyproject.toml").exists();
86            if !has_marker {
87                tracing::warn!(
88                    "[graph_index: refusing to scan {normalized} — \
89                     direct child of home without project markers]"
90                );
91                return false;
92            }
93        }
94    }
95
96    let breadth_markers = [
97        ".git",
98        "Cargo.toml",
99        "package.json",
100        "go.mod",
101        "pyproject.toml",
102        "setup.py",
103        "Makefile",
104        "CMakeLists.txt",
105        "pnpm-workspace.yaml",
106        ".projectile",
107        "BUILD.bazel",
108        "go.work",
109    ];
110
111    if !breadth_markers.iter().any(|m| p.join(m).exists()) {
112        let child_count = std::fs::read_dir(p).map_or(0, |rd| {
113            rd.filter_map(Result::ok)
114                .filter(|e| e.path().is_dir())
115                .count()
116        });
117        if child_count > 50 {
118            tracing::warn!(
119                "[graph_index: {normalized} has no project markers and {child_count} subdirectories — \
120                 skipping scan to avoid indexing broad directories]"
121            );
122            return false;
123        }
124    }
125
126    true
127}
128
129#[derive(Debug, Serialize, Deserialize)]
130pub struct ProjectIndex {
131    pub version: u32,
132    pub project_root: String,
133    pub last_scan: String,
134    pub files: HashMap<String, FileEntry>,
135    pub edges: Vec<IndexEdge>,
136    pub symbols: HashMap<String, SymbolEntry>,
137}
138
139#[derive(Debug, Clone, Serialize, Deserialize)]
140pub struct FileEntry {
141    pub path: String,
142    pub hash: String,
143    pub language: String,
144    pub line_count: usize,
145    pub token_count: usize,
146    pub exports: Vec<String>,
147    pub summary: String,
148}
149
150#[derive(Debug, Clone, Serialize, Deserialize)]
151pub struct SymbolEntry {
152    pub file: String,
153    pub name: String,
154    pub kind: String,
155    pub start_line: usize,
156    pub end_line: usize,
157    pub is_exported: bool,
158}
159
160#[derive(Debug, Clone, Serialize, Deserialize)]
161pub struct IndexEdge {
162    pub from: String,
163    pub to: String,
164    pub kind: String,
165    #[serde(default = "default_edge_weight")]
166    pub weight: f32,
167}
168
169fn default_edge_weight() -> f32 {
170    1.0
171}
172
173impl ProjectIndex {
174    pub fn new(project_root: &str) -> Self {
175        Self {
176            version: INDEX_VERSION,
177            project_root: normalize_project_root(project_root),
178            last_scan: chrono::Local::now().format("%Y-%m-%d %H:%M:%S").to_string(),
179            files: HashMap::new(),
180            edges: Vec::new(),
181            symbols: HashMap::new(),
182        }
183    }
184
185    pub fn index_dir(project_root: &str) -> Option<std::path::PathBuf> {
186        let normalized = normalize_project_root(project_root);
187        let hash = crate::core::project_hash::hash_project_root(&normalized);
188        crate::core::data_dir::lean_ctx_data_dir()
189            .ok()
190            .map(|d| d.join("graphs").join(hash))
191    }
192
193    pub fn load(project_root: &str) -> Option<Self> {
194        let dir = Self::index_dir(project_root)?;
195
196        let zst_path = dir.join("index.json.zst");
197        if zst_path.exists() {
198            let compressed = std::fs::read(&zst_path).ok()?;
199            let data = zstd::decode_all(compressed.as_slice()).ok()?;
200            let content = String::from_utf8(data).ok()?;
201            let index: Self = serde_json::from_str(&content).ok()?;
202            if index.version != INDEX_VERSION {
203                return None;
204            }
205            return Some(index);
206        }
207
208        let json_path = dir.join("index.json");
209        let content = std::fs::read_to_string(&json_path)
210            .or_else(|_| -> std::io::Result<String> {
211                let legacy_hash = short_hash(&normalize_project_root(project_root));
212                let legacy_dir = crate::core::data_dir::lean_ctx_data_dir()
213                    .map_err(|_| std::io::Error::new(std::io::ErrorKind::NotFound, "no data dir"))?
214                    .join("graphs")
215                    .join(legacy_hash);
216                let legacy_path = legacy_dir.join("index.json");
217                let data = std::fs::read_to_string(&legacy_path)?;
218                if let Err(e) = copy_dir_fallible(&legacy_dir, &dir) {
219                    tracing::debug!("graph index migration: {e}");
220                }
221                Ok(data)
222            })
223            .ok()?;
224        let index: Self = serde_json::from_str(&content).ok()?;
225        if index.version != INDEX_VERSION {
226            return None;
227        }
228        // Auto-migrate: compress legacy JSON to zstd
229        if let Ok(compressed) = zstd::encode_all(content.as_bytes(), 9) {
230            let zst_tmp = zst_path.with_extension("zst.tmp");
231            if std::fs::write(&zst_tmp, &compressed).is_ok()
232                && std::fs::rename(&zst_tmp, &zst_path).is_ok()
233            {
234                let _ = std::fs::remove_file(&json_path);
235            }
236        }
237        Some(index)
238    }
239
240    pub fn save(&self) -> Result<(), String> {
241        let dir = Self::index_dir(&self.project_root)
242            .ok_or_else(|| "Cannot determine data directory".to_string())?;
243        std::fs::create_dir_all(&dir).map_err(|e| e.to_string())?;
244        let json = serde_json::to_string(self).map_err(|e| e.to_string())?;
245        let compressed = zstd::encode_all(json.as_bytes(), 9).map_err(|e| format!("zstd: {e}"))?;
246        let target = dir.join("index.json.zst");
247        let tmp = target.with_extension("zst.tmp");
248        std::fs::write(&tmp, &compressed).map_err(|e| e.to_string())?;
249        std::fs::rename(&tmp, &target).map_err(|e| e.to_string())?;
250        let _ = std::fs::remove_file(dir.join("index.json"));
251        Ok(())
252    }
253
254    /// Remove all cached graph indices that are older than max_age_hours.
255    /// Called on startup/update to prevent stale data from persisting.
256    pub fn purge_stale_indices() {
257        let Ok(data_dir) = crate::core::data_dir::lean_ctx_data_dir() else {
258            return;
259        };
260        let graphs_dir = data_dir.join("graphs");
261        let Ok(entries) = std::fs::read_dir(&graphs_dir) else {
262            return;
263        };
264        let cfg = crate::core::config::Config::load();
265        let max_age_secs = cfg.archive.max_age_hours * 3600;
266
267        for entry in entries.filter_map(Result::ok) {
268            let path = entry.path();
269            if !path.is_dir() {
270                continue;
271            }
272            let zst = path.join("index.json.zst");
273            let json = path.join("index.json");
274            let index_file = if zst.exists() {
275                &zst
276            } else if json.exists() {
277                &json
278            } else {
279                continue;
280            };
281
282            let is_old = index_file
283                .metadata()
284                .and_then(|m| m.modified())
285                .is_ok_and(|mtime| {
286                    mtime
287                        .elapsed()
288                        .is_ok_and(|age| age.as_secs() > max_age_secs)
289                });
290
291            if is_old {
292                tracing::info!("[graph_index: purging stale index at {}]", path.display());
293                let _ = std::fs::remove_dir_all(&path);
294            }
295        }
296    }
297
298    pub fn file_count(&self) -> usize {
299        self.files.len()
300    }
301
302    pub fn symbol_count(&self) -> usize {
303        self.symbols.len()
304    }
305
306    pub fn edge_count(&self) -> usize {
307        self.edges.len()
308    }
309
310    pub fn get_symbol(&self, key: &str) -> Option<&SymbolEntry> {
311        self.symbols.get(key)
312    }
313
314    pub fn get_reverse_deps(&self, path: &str, depth: usize) -> Vec<String> {
315        let mut result = Vec::new();
316        let mut visited = std::collections::HashSet::new();
317        let mut queue: Vec<(String, usize)> = vec![(path.to_string(), 0)];
318
319        while let Some((current, d)) = queue.pop() {
320            if d > depth || visited.contains(&current) {
321                continue;
322            }
323            visited.insert(current.clone());
324            if current != path {
325                result.push(current.clone());
326            }
327
328            for edge in &self.edges {
329                if edge.to == current && edge.kind == "import" && !visited.contains(&edge.from) {
330                    queue.push((edge.from.clone(), d + 1));
331                }
332            }
333        }
334        result
335    }
336
337    pub fn get_related(&self, path: &str, depth: usize) -> Vec<String> {
338        let mut result = Vec::new();
339        let mut visited = std::collections::HashSet::new();
340        let mut queue: Vec<(String, usize)> = vec![(path.to_string(), 0)];
341
342        while let Some((current, d)) = queue.pop() {
343            if d > depth || visited.contains(&current) {
344                continue;
345            }
346            visited.insert(current.clone());
347            if current != path {
348                result.push(current.clone());
349            }
350
351            for edge in &self.edges {
352                if edge.from == current && !visited.contains(&edge.to) {
353                    queue.push((edge.to.clone(), d + 1));
354                }
355                if edge.to == current && !visited.contains(&edge.from) {
356                    queue.push((edge.from.clone(), d + 1));
357                }
358            }
359        }
360        result
361    }
362}
363
364/// Load the best available graph index, trying multiple root path variants.
365/// If no valid index exists, automatically scans the project to build one.
366/// This is the primary entry point — ensures zero-config usage.
367pub fn load_or_build(project_root: &str) -> ProjectIndex {
368    if std::env::var("LEAN_CTX_NO_INDEX").is_ok() {
369        return ProjectIndex::load(project_root).unwrap_or_else(|| ProjectIndex::new(project_root));
370    }
371
372    // Prefer stable absolute roots. Using "." as a cache key is fragile because
373    // it depends on the process cwd and can accidentally load the wrong project.
374    let root_abs = if project_root.trim().is_empty() || project_root == "." {
375        std::env::current_dir().ok().map_or_else(
376            || ".".to_string(),
377            |p| normalize_project_root(&p.to_string_lossy()),
378        )
379    } else {
380        normalize_project_root(project_root)
381    };
382
383    if !is_safe_scan_root(&root_abs) {
384        return ProjectIndex::new(&root_abs);
385    }
386
387    // Try the absolute/root-normalized path first.
388    if let Some(idx) = ProjectIndex::load(&root_abs) {
389        if !idx.files.is_empty() {
390            if index_looks_stale(&idx, &root_abs) {
391                tracing::warn!("[graph_index: stale index detected for {root_abs}; rebuilding]");
392                return scan(&root_abs);
393            }
394            return idx;
395        }
396    }
397
398    // Legacy: older builds may have cached the index under ".". Only accept it if it
399    // actually refers to the current cwd project, then migrate it to `root_abs`.
400    if let Some(idx) = ProjectIndex::load(".") {
401        if !idx.files.is_empty() {
402            let mut migrated = idx;
403            migrated.project_root.clone_from(&root_abs);
404            let _ = migrated.save();
405            if index_looks_stale(&migrated, &root_abs) {
406                tracing::warn!(
407                    "[graph_index: stale legacy index detected for {root_abs}; rebuilding]"
408                );
409                return scan(&root_abs);
410            }
411            return migrated;
412        }
413    }
414
415    // Try absolute cwd
416    if let Ok(cwd) = std::env::current_dir() {
417        let cwd_str = normalize_project_root(&cwd.to_string_lossy());
418        if cwd_str != root_abs {
419            if let Some(idx) = ProjectIndex::load(&cwd_str) {
420                if !idx.files.is_empty() {
421                    if index_looks_stale(&idx, &cwd_str) {
422                        tracing::warn!(
423                            "[graph_index: stale index detected for {cwd_str}; rebuilding]"
424                        );
425                        return scan(&cwd_str);
426                    }
427                    return idx;
428                }
429            }
430        }
431    }
432
433    // No existing index found anywhere — auto-build
434    scan(&root_abs)
435}
436
437fn index_looks_stale(index: &ProjectIndex, root_abs: &str) -> bool {
438    if index.files.is_empty() {
439        return true;
440    }
441
442    // TTL check: rebuild if index is older than configured max_age_hours
443    if let Ok(scan_time) =
444        chrono::NaiveDateTime::parse_from_str(&index.last_scan, "%Y-%m-%d %H:%M:%S")
445    {
446        let cfg = crate::core::config::Config::load();
447        let max_age = chrono::Duration::hours(cfg.archive.max_age_hours as i64);
448        let now = chrono::Local::now().naive_local();
449        if now.signed_duration_since(scan_time) > max_age {
450            tracing::info!(
451                "[graph_index: index is older than {}h — marking stale]",
452                cfg.archive.max_age_hours
453            );
454            return true;
455        }
456    }
457
458    // Contamination check: if index contains paths from common user directories,
459    // it was built from a too-broad root and must be rebuilt
460    const CONTAMINATION_MARKERS: &[&str] = &[
461        "Desktop/",
462        "Documents/",
463        "Downloads/",
464        "Pictures/",
465        "Music/",
466        "Videos/",
467        "Movies/",
468        "Library/",
469        ".cache/",
470        "snap/",
471    ];
472    let contaminated = index.files.keys().take(200).any(|rel| {
473        CONTAMINATION_MARKERS
474            .iter()
475            .any(|m| rel.starts_with(m) || rel.contains(&format!("/{m}")))
476    });
477    if contaminated {
478        tracing::warn!(
479            "[graph_index: index contains files from user directories (Desktop/Documents/...) — \
480             marking stale to force clean rebuild]"
481        );
482        return true;
483    }
484
485    let root_path = Path::new(root_abs);
486    // Sample up to 20 files for existence check (avoid scanning all files in large indices)
487    let sample_size = index.files.len().min(20);
488    for rel in index.files.keys().take(sample_size) {
489        let rel = rel.trim_start_matches(['/', '\\']);
490        if rel.is_empty() {
491            continue;
492        }
493        let abs = root_path.join(rel);
494        if !abs.exists() {
495            return true;
496        }
497    }
498
499    false
500}
501
502pub fn scan(project_root: &str) -> ProjectIndex {
503    if std::env::var("LEAN_CTX_NO_INDEX").is_ok() {
504        tracing::info!("[graph_index: LEAN_CTX_NO_INDEX set — skipping scan]");
505        return ProjectIndex::new(project_root);
506    }
507
508    let project_root = normalize_project_root(project_root);
509
510    if !is_safe_scan_root(&project_root) {
511        tracing::warn!("[graph_index: scan aborted for unsafe root {project_root}]");
512        return ProjectIndex::new(&project_root);
513    }
514
515    let lock_name = format!(
516        "graph-idx-{}",
517        &crate::core::index_namespace::namespace_hash(Path::new(&project_root))[..8]
518    );
519    let _lock = crate::core::startup_guard::try_acquire_lock(
520        &lock_name,
521        std::time::Duration::from_millis(800),
522        std::time::Duration::from_mins(3),
523    );
524    if _lock.is_none() {
525        tracing::info!(
526            "[graph_index: another process is scanning {project_root} — returning cached or empty]"
527        );
528        return ProjectIndex::load(&project_root)
529            .unwrap_or_else(|| ProjectIndex::new(&project_root));
530    }
531
532    let existing = ProjectIndex::load(&project_root);
533    let mut index = ProjectIndex::new(&project_root);
534
535    let old_files: HashMap<String, (String, Vec<(String, SymbolEntry)>)> =
536        if let Some(ref prev) = existing {
537            prev.files
538                .iter()
539                .map(|(path, entry)| {
540                    let syms: Vec<(String, SymbolEntry)> = prev
541                        .symbols
542                        .iter()
543                        .filter(|(_, s)| s.file == *path)
544                        .map(|(k, v)| (k.clone(), v.clone()))
545                        .collect();
546                    (path.clone(), (entry.hash.clone(), syms))
547                })
548                .collect()
549        } else {
550            HashMap::new()
551        };
552
553    let walker = ignore::WalkBuilder::new(&project_root)
554        .hidden(true)
555        .git_ignore(true)
556        .git_global(true)
557        .git_exclude(true)
558        .max_depth(Some(20))
559        .build();
560
561    let cfg = crate::core::config::Config::load();
562    let extra_ignores: Vec<glob::Pattern> = cfg
563        .extra_ignore_patterns
564        .iter()
565        .filter_map(|p| glob::Pattern::new(p).ok())
566        .collect();
567
568    let mut scanned = 0usize;
569    let mut reused = 0usize;
570    let mut entries_visited = 0usize;
571    let max_files = if cfg.graph_index_max_files == 0 {
572        usize::MAX // unlimited
573    } else {
574        cfg.graph_index_max_files as usize
575    };
576    const MAX_ENTRIES_VISITED: usize = 500_000;
577    const MAX_FILE_SIZE_BYTES: u64 = 2 * 1024 * 1024; // 2 MB per file
578    let scan_deadline = std::time::Instant::now() + std::time::Duration::from_mins(5);
579
580    for entry in walker.filter_map(std::result::Result::ok) {
581        entries_visited += 1;
582        if entries_visited > MAX_ENTRIES_VISITED {
583            tracing::warn!(
584                "[graph_index: walked {entries_visited} entries — aborting scan to prevent \
585                 runaway traversal. Indexed {} files so far.]",
586                index.files.len()
587            );
588            break;
589        }
590        if entries_visited.is_multiple_of(5000) {
591            if std::time::Instant::now() > scan_deadline {
592                tracing::warn!(
593                    "[graph_index: scan timeout (120s) after {entries_visited} entries — \
594                     saving partial index with {} files]",
595                    index.files.len()
596                );
597                break;
598            }
599            if crate::core::memory_guard::abort_requested() {
600                tracing::warn!(
601                    "[graph_index: memory pressure abort after {entries_visited} entries — \
602                     saving partial index with {} files]",
603                    index.files.len()
604                );
605                break;
606            }
607            if crate::core::memory_guard::is_under_pressure() {
608                tracing::warn!(
609                    "[graph_index: memory pressure detected at {entries_visited} entries — \
610                     stopping scan with {} files]",
611                    index.files.len()
612                );
613                break;
614            }
615            if let Some(ref g) = _lock {
616                g.touch();
617            }
618        }
619
620        if !entry.file_type().is_some_and(|ft| ft.is_file()) {
621            continue;
622        }
623        let file_path = normalize_absolute_path(&entry.path().to_string_lossy());
624
625        // Prevent indexing files that escaped the project root (symlinks, mount points)
626        if !file_path.starts_with(&project_root) {
627            continue;
628        }
629
630        // Skip special files (devices, FIFOs, sockets) that can stream infinite data
631        if let Ok(meta) = std::fs::metadata(&file_path) {
632            if !meta.is_file() {
633                continue;
634            }
635            if meta.len() > MAX_FILE_SIZE_BYTES {
636                tracing::debug!(
637                    "[graph_index: skipping {file_path} — {:.1}MB exceeds {}MB limit]",
638                    meta.len() as f64 / 1_048_576.0,
639                    MAX_FILE_SIZE_BYTES / (1024 * 1024),
640                );
641                continue;
642            }
643        }
644
645        let ext = Path::new(&file_path)
646            .extension()
647            .and_then(|e| e.to_str())
648            .unwrap_or("");
649
650        if !is_indexable_ext(ext) {
651            continue;
652        }
653
654        let rel = make_relative(&file_path, &project_root);
655        if extra_ignores.iter().any(|p| p.matches(&rel)) {
656            continue;
657        }
658
659        if max_files != usize::MAX && index.files.len() >= max_files {
660            tracing::info!(
661                "[graph_index: reached configured limit of {} files. Set graph_index_max_files = 0 for unlimited.]",
662                max_files
663            );
664            break;
665        }
666
667        let Ok(content) = std::fs::read_to_string(&file_path) else {
668            continue;
669        };
670
671        let hash = compute_hash(&content);
672        let rel_path = make_relative(&file_path, &project_root);
673
674        if let Some((old_hash, old_syms)) = old_files.get(&rel_path) {
675            if *old_hash == hash {
676                if let Some(old_entry) = existing.as_ref().and_then(|p| p.files.get(&rel_path)) {
677                    index.files.insert(rel_path.clone(), old_entry.clone());
678                    for (key, sym) in old_syms {
679                        index.symbols.insert(key.clone(), sym.clone());
680                    }
681                    reused += 1;
682                    continue;
683                }
684            }
685        }
686
687        let sigs = signatures::extract_signatures(&content, ext);
688        let line_count = content.lines().count();
689        let token_count = crate::core::tokens::count_tokens(&content);
690        let summary = extract_summary(&content);
691
692        let exports: Vec<String> = sigs
693            .iter()
694            .filter(|s| s.is_exported)
695            .map(|s| s.name.clone())
696            .collect();
697
698        index.files.insert(
699            rel_path.clone(),
700            FileEntry {
701                path: rel_path.clone(),
702                hash,
703                language: ext.to_string(),
704                line_count,
705                token_count,
706                exports,
707                summary,
708            },
709        );
710
711        for sig in &sigs {
712            let (start, end) = sig
713                .start_line
714                .zip(sig.end_line)
715                .unwrap_or_else(|| find_symbol_range(&content, sig));
716            let key = format!("{}::{}", rel_path, sig.name);
717            index.symbols.insert(
718                key,
719                SymbolEntry {
720                    file: rel_path.clone(),
721                    name: sig.name.clone(),
722                    kind: sig.kind.to_string(),
723                    start_line: start,
724                    end_line: end,
725                    is_exported: sig.is_exported,
726                },
727            );
728        }
729
730        scanned += 1;
731    }
732
733    build_edges(&mut index);
734
735    if let Err(e) = index.save() {
736        tracing::warn!("could not save graph index: {e}");
737    }
738
739    tracing::warn!(
740        "[graph_index: {} files ({} scanned, {} reused), {} symbols, {} edges]",
741        index.file_count(),
742        scanned,
743        reused,
744        index.symbol_count(),
745        index.edge_count()
746    );
747
748    index
749}
750
751fn build_edges(index: &mut ProjectIndex) {
752    build_edges_with_cache(index, &HashMap::new());
753    build_implicit_edges(index);
754    build_cochange_edges(index);
755    build_sibling_edges(index);
756}
757
758fn build_edges_with_cache(index: &mut ProjectIndex, content_cache: &HashMap<String, String>) {
759    index.edges.clear();
760
761    if crate::core::memory_guard::abort_requested() {
762        tracing::warn!("[graph_index: skipping edge-building due to memory pressure]");
763        return;
764    }
765
766    let root = normalize_project_root(&index.project_root);
767    let root_path = Path::new(&root);
768
769    let mut file_paths: Vec<String> = index.files.keys().cloned().collect();
770    file_paths.sort();
771
772    let resolver_ctx = import_resolver::ResolverContext::new(root_path, file_paths.clone());
773
774    const MAX_FILE_SIZE_FOR_EDGES: u64 = 2 * 1024 * 1024;
775
776    for (i, rel_path) in file_paths.iter().enumerate() {
777        if i.is_multiple_of(1000) && crate::core::memory_guard::is_under_pressure() {
778            tracing::warn!(
779                "[graph_index: stopping edge-building at file {i}/{} due to memory pressure]",
780                file_paths.len()
781            );
782            break;
783        }
784
785        let content = if let Some(cached) = content_cache.get(rel_path) {
786            std::borrow::Cow::Borrowed(cached.as_str())
787        } else {
788            let abs_path = root_path.join(rel_path.trim_start_matches(['/', '\\']));
789            if let Ok(meta) = abs_path.metadata() {
790                if meta.len() > MAX_FILE_SIZE_FOR_EDGES {
791                    continue;
792                }
793            }
794            match std::fs::read_to_string(&abs_path) {
795                Ok(c) => std::borrow::Cow::Owned(c),
796                Err(_) => continue,
797            }
798        };
799
800        let ext = Path::new(rel_path)
801            .extension()
802            .and_then(|e| e.to_str())
803            .unwrap_or("");
804
805        let resolve_ext = match ext {
806            "vue" | "svelte" => "ts",
807            _ => ext,
808        };
809
810        let analysis_content = if ext == "vue" || ext == "svelte" {
811            if let Some(script) = crate::core::signatures_ts::sfc::extract_script_block(&content) {
812                std::borrow::Cow::Owned(script)
813            } else {
814                content
815            }
816        } else {
817            content
818        };
819
820        let imports = crate::core::deep_queries::analyze(&analysis_content, resolve_ext).imports;
821        if imports.is_empty() {
822            continue;
823        }
824
825        let resolved =
826            import_resolver::resolve_imports(&imports, rel_path, resolve_ext, &resolver_ctx);
827        for r in resolved {
828            if r.is_external {
829                continue;
830            }
831            if let Some(to) = r.resolved_path {
832                index.edges.push(IndexEdge {
833                    from: rel_path.clone(),
834                    to,
835                    kind: "import".to_string(),
836                    weight: 1.0,
837                });
838            }
839        }
840    }
841
842    index.edges.sort_by(|a, b| {
843        a.from
844            .cmp(&b.from)
845            .then_with(|| a.to.cmp(&b.to))
846            .then_with(|| a.kind.cmp(&b.kind))
847    });
848    index
849        .edges
850        .dedup_by(|a, b| a.from == b.from && a.to == b.to && a.kind == b.kind);
851}
852
853// ---------------------------------------------------------------------------
854// Layer 2: Implicit Language Edges (weight 0.8)
855// ---------------------------------------------------------------------------
856
857fn build_implicit_edges(index: &mut ProjectIndex) {
858    let file_paths: Vec<String> = index.files.keys().cloned().collect();
859    let file_set: std::collections::HashSet<&str> = file_paths.iter().map(String::as_str).collect();
860
861    let mut new_edges: Vec<IndexEdge> = Vec::new();
862
863    for file in &file_paths {
864        let ext = Path::new(file.as_str())
865            .extension()
866            .and_then(|e| e.to_str())
867            .unwrap_or("");
868
869        match ext {
870            "rs" => collect_rust_mod_edges(file, &file_set, index, &mut new_edges),
871            "go" => collect_go_package_edges(file, &file_paths, &mut new_edges),
872            "py" => collect_python_init_edges(file, &file_paths, &mut new_edges),
873            "ts" | "js" | "tsx" | "jsx" => {
874                collect_barrel_edges(file, &file_set, index, &mut new_edges);
875            }
876            _ => {}
877        }
878    }
879
880    index.edges.extend(new_edges);
881}
882
883fn collect_rust_mod_edges(
884    file: &str,
885    file_set: &std::collections::HashSet<&str>,
886    index: &ProjectIndex,
887    edges: &mut Vec<IndexEdge>,
888) {
889    if !index.files.contains_key(file) {
890        return;
891    }
892
893    let full_path = Path::new(&index.project_root).join(file);
894    let Ok(content) = std::fs::read_to_string(&full_path) else {
895        return;
896    };
897
898    let dir = Path::new(file)
899        .parent()
900        .map(|p| p.to_string_lossy().to_string());
901
902    for line in content.lines() {
903        let trimmed = line.trim();
904        if !trimmed.starts_with("mod ") || trimmed.contains('{') {
905            continue;
906        }
907        let mod_name = trimmed
908            .trim_start_matches("mod ")
909            .trim_start_matches("pub mod ")
910            .trim_start_matches("pub(crate) mod ")
911            .trim_end_matches(';')
912            .trim();
913
914        if mod_name.is_empty() || mod_name.contains(' ') {
915            continue;
916        }
917
918        let candidates = if let Some(ref d) = dir {
919            vec![
920                format!("{d}/{mod_name}.rs"),
921                format!("{d}/{mod_name}/mod.rs"),
922            ]
923        } else {
924            vec![format!("{mod_name}.rs"), format!("{mod_name}/mod.rs")]
925        };
926
927        for candidate in candidates {
928            if file_set.contains(candidate.as_str()) {
929                edges.push(IndexEdge {
930                    from: file.to_string(),
931                    to: candidate,
932                    kind: "module".to_string(),
933                    weight: 0.8,
934                });
935                break;
936            }
937        }
938    }
939}
940
941fn collect_go_package_edges(file: &str, file_paths: &[String], edges: &mut Vec<IndexEdge>) {
942    let p = Path::new(file);
943    if p.extension().and_then(|e| e.to_str()) != Some("go") {
944        return;
945    }
946    if file.ends_with("_test.go") {
947        return;
948    }
949
950    let Some(dir) = p.parent().map(|d| d.to_string_lossy().to_string()) else {
951        return;
952    };
953
954    for other in file_paths {
955        if other == file {
956            continue;
957        }
958        let op = Path::new(other.as_str());
959        if op.extension().and_then(|e| e.to_str()) != Some("go") {
960            continue;
961        }
962        if other.ends_with("_test.go") {
963            continue;
964        }
965        let other_dir = op
966            .parent()
967            .map(|d| d.to_string_lossy().to_string())
968            .unwrap_or_default();
969        if other_dir == dir {
970            edges.push(IndexEdge {
971                from: file.to_string(),
972                to: other.clone(),
973                kind: "package".to_string(),
974                weight: 0.5,
975            });
976            break;
977        }
978    }
979}
980
981fn collect_python_init_edges(file: &str, file_paths: &[String], edges: &mut Vec<IndexEdge>) {
982    let p = Path::new(file);
983    if p.file_name().and_then(|n| n.to_str()) != Some("__init__.py") {
984        return;
985    }
986
987    let Some(dir) = p.parent().map(|d| d.to_string_lossy().to_string()) else {
988        return;
989    };
990
991    for other in file_paths {
992        if other == file {
993            continue;
994        }
995        let op = Path::new(other.as_str());
996        if op.extension().and_then(|e| e.to_str()) != Some("py") {
997            continue;
998        }
999        let other_dir = op
1000            .parent()
1001            .map(|d| d.to_string_lossy().to_string())
1002            .unwrap_or_default();
1003        if other_dir == dir {
1004            edges.push(IndexEdge {
1005                from: file.to_string(),
1006                to: other.clone(),
1007                kind: "module".to_string(),
1008                weight: 0.8,
1009            });
1010        }
1011    }
1012}
1013
1014fn collect_barrel_edges(
1015    file: &str,
1016    file_set: &std::collections::HashSet<&str>,
1017    index: &ProjectIndex,
1018    edges: &mut Vec<IndexEdge>,
1019) {
1020    let basename = Path::new(file)
1021        .file_stem()
1022        .and_then(|s| s.to_str())
1023        .unwrap_or("");
1024    if basename != "index" {
1025        return;
1026    }
1027
1028    let full_path = Path::new(&index.project_root).join(file);
1029    let Ok(content) = std::fs::read_to_string(&full_path) else {
1030        return;
1031    };
1032
1033    let dir = Path::new(file)
1034        .parent()
1035        .map(|p| p.to_string_lossy().to_string())
1036        .unwrap_or_default();
1037
1038    let ext = Path::new(file)
1039        .extension()
1040        .and_then(|e| e.to_str())
1041        .unwrap_or("ts");
1042
1043    for line in content.lines() {
1044        let trimmed = line.trim();
1045        if !trimmed.starts_with("export") || !trimmed.contains("from") {
1046            continue;
1047        }
1048        if let Some(from_pos) = trimmed.find("from") {
1049            let after = &trimmed[from_pos + 4..];
1050            let source = after
1051                .trim()
1052                .trim_start_matches(['\'', '"'])
1053                .trim_end_matches([';', '\'', '"'])
1054                .trim_end_matches(['\'', '"']);
1055
1056            if source.starts_with("./") || source.starts_with("../") {
1057                let resolved = if dir.is_empty() {
1058                    source.trim_start_matches("./").to_string()
1059                } else {
1060                    format!("{dir}/{}", source.trim_start_matches("./"))
1061                };
1062
1063                let candidates = vec![
1064                    format!("{resolved}.{ext}"),
1065                    format!("{resolved}/index.{ext}"),
1066                    resolved.clone(),
1067                ];
1068
1069                for candidate in candidates {
1070                    if file_set.contains(candidate.as_str()) {
1071                        edges.push(IndexEdge {
1072                            from: file.to_string(),
1073                            to: candidate,
1074                            kind: "reexport".to_string(),
1075                            weight: 0.8,
1076                        });
1077                        break;
1078                    }
1079                }
1080            }
1081        }
1082    }
1083}
1084
1085// ---------------------------------------------------------------------------
1086// Layer 3: Co-Change Edges (weight 0.5)
1087// ---------------------------------------------------------------------------
1088
1089fn build_cochange_edges(index: &mut ProjectIndex) {
1090    let project_root = &index.project_root;
1091
1092    let output = match std::process::Command::new("git")
1093        .args([
1094            "log",
1095            "--name-only",
1096            "--pretty=format:---",
1097            "--since=6 months",
1098            "--",
1099            ".",
1100        ])
1101        .current_dir(project_root)
1102        .output()
1103    {
1104        Ok(o) if o.status.success() => String::from_utf8_lossy(&o.stdout).to_string(),
1105        _ => return,
1106    };
1107
1108    let file_set: std::collections::HashSet<&str> =
1109        index.files.keys().map(String::as_str).collect();
1110
1111    let connected: std::collections::HashSet<&str> = index
1112        .edges
1113        .iter()
1114        .flat_map(|e| [e.from.as_str(), e.to.as_str()])
1115        .collect();
1116
1117    // Parse commits into groups of files
1118    let mut cooccurrence: HashMap<(String, String), u32> = HashMap::new();
1119    let mut current_commit: Vec<&str> = Vec::new();
1120
1121    for line in output.lines() {
1122        if line == "---" {
1123            if current_commit.len() >= 2 && current_commit.len() <= 20 {
1124                for i in 0..current_commit.len() {
1125                    for j in (i + 1)..current_commit.len() {
1126                        let a = current_commit[i];
1127                        let b = current_commit[j];
1128                        if !file_set.contains(a) || !file_set.contains(b) {
1129                            continue;
1130                        }
1131                        // Only add if at least one is currently isolated
1132                        if connected.contains(a) && connected.contains(b) {
1133                            continue;
1134                        }
1135                        let key = if a < b {
1136                            (a.to_string(), b.to_string())
1137                        } else {
1138                            (b.to_string(), a.to_string())
1139                        };
1140                        *cooccurrence.entry(key).or_insert(0) += 1;
1141                    }
1142                }
1143            }
1144            current_commit.clear();
1145        } else if !line.is_empty() {
1146            current_commit.push(line.trim());
1147        }
1148    }
1149
1150    // Filter: min 5 shared commits
1151    let mut cochange_edges: Vec<IndexEdge> = cooccurrence
1152        .into_iter()
1153        .filter(|(_, count)| *count >= 5)
1154        .map(|((from, to), _)| IndexEdge {
1155            from,
1156            to,
1157            kind: "cochange".to_string(),
1158            weight: 0.5,
1159        })
1160        .collect();
1161
1162    // Cap at 500 to prevent noise
1163    cochange_edges.sort_by(|a, b| a.from.cmp(&b.from).then_with(|| a.to.cmp(&b.to)));
1164    cochange_edges.truncate(500);
1165
1166    index.edges.extend(cochange_edges);
1167}
1168
1169// ---------------------------------------------------------------------------
1170// Layer 4: Sibling Edges (weight 0.2)
1171// ---------------------------------------------------------------------------
1172
1173fn build_sibling_edges(index: &mut ProjectIndex) {
1174    let connected: std::collections::HashSet<&str> = index
1175        .edges
1176        .iter()
1177        .flat_map(|e| [e.from.as_str(), e.to.as_str()])
1178        .collect();
1179
1180    let file_paths: Vec<String> = index.files.keys().cloned().collect();
1181    let mut new_edges: Vec<IndexEdge> = Vec::new();
1182
1183    for file in &file_paths {
1184        if connected.contains(file.as_str()) {
1185            continue;
1186        }
1187
1188        let ext = Path::new(file.as_str())
1189            .extension()
1190            .and_then(|e| e.to_str())
1191            .unwrap_or("");
1192        let dir = Path::new(file.as_str())
1193            .parent()
1194            .map(|p| p.to_string_lossy().to_string())
1195            .unwrap_or_default();
1196
1197        // Find one sibling with same extension
1198        for other in &file_paths {
1199            if other == file {
1200                continue;
1201            }
1202            let other_ext = Path::new(other.as_str())
1203                .extension()
1204                .and_then(|e| e.to_str())
1205                .unwrap_or("");
1206            let other_dir = Path::new(other.as_str())
1207                .parent()
1208                .map(|p| p.to_string_lossy().to_string())
1209                .unwrap_or_default();
1210
1211            if other_ext == ext && other_dir == dir {
1212                new_edges.push(IndexEdge {
1213                    from: file.clone(),
1214                    to: other.clone(),
1215                    kind: "sibling".to_string(),
1216                    weight: 0.2,
1217                });
1218                break; // Max 1 sibling edge per isolate
1219            }
1220        }
1221    }
1222
1223    index.edges.extend(new_edges);
1224}
1225
1226fn find_symbol_range(content: &str, sig: &signatures::Signature) -> (usize, usize) {
1227    let lines: Vec<&str> = content.lines().collect();
1228    let mut start = 0;
1229
1230    for (i, line) in lines.iter().enumerate() {
1231        if line.contains(&sig.name) {
1232            let trimmed = line.trim();
1233            let is_def = trimmed.starts_with("fn ")
1234                || trimmed.starts_with("pub fn ")
1235                || trimmed.starts_with("pub(crate) fn ")
1236                || trimmed.starts_with("async fn ")
1237                || trimmed.starts_with("pub async fn ")
1238                || trimmed.starts_with("struct ")
1239                || trimmed.starts_with("pub struct ")
1240                || trimmed.starts_with("enum ")
1241                || trimmed.starts_with("pub enum ")
1242                || trimmed.starts_with("trait ")
1243                || trimmed.starts_with("pub trait ")
1244                || trimmed.starts_with("impl ")
1245                || trimmed.starts_with("class ")
1246                || trimmed.starts_with("export class ")
1247                || trimmed.starts_with("export function ")
1248                || trimmed.starts_with("export async function ")
1249                || trimmed.starts_with("function ")
1250                || trimmed.starts_with("async function ")
1251                || trimmed.starts_with("def ")
1252                || trimmed.starts_with("async def ")
1253                || trimmed.starts_with("func ")
1254                || trimmed.starts_with("interface ")
1255                || trimmed.starts_with("export interface ")
1256                || trimmed.starts_with("type ")
1257                || trimmed.starts_with("export type ")
1258                || trimmed.starts_with("const ")
1259                || trimmed.starts_with("export const ")
1260                || trimmed.starts_with("fun ")
1261                || trimmed.starts_with("private fun ")
1262                || trimmed.starts_with("public fun ")
1263                || trimmed.starts_with("internal fun ")
1264                || trimmed.starts_with("class ")
1265                || trimmed.starts_with("data class ")
1266                || trimmed.starts_with("sealed class ")
1267                || trimmed.starts_with("sealed interface ")
1268                || trimmed.starts_with("enum class ")
1269                || trimmed.starts_with("object ")
1270                || trimmed.starts_with("private object ")
1271                || trimmed.starts_with("interface ")
1272                || trimmed.starts_with("typealias ")
1273                || trimmed.starts_with("private typealias ");
1274            if is_def {
1275                start = i + 1;
1276                break;
1277            }
1278        }
1279    }
1280
1281    if start == 0 {
1282        return (1, lines.len().min(20));
1283    }
1284
1285    let base_indent = lines
1286        .get(start - 1)
1287        .map_or(0, |l| l.len() - l.trim_start().len());
1288
1289    let mut end = start;
1290    let mut brace_depth: i32 = 0;
1291    let mut found_open = false;
1292
1293    for (i, line) in lines.iter().enumerate().skip(start - 1) {
1294        for ch in line.chars() {
1295            if ch == '{' {
1296                brace_depth += 1;
1297                found_open = true;
1298            } else if ch == '}' {
1299                brace_depth -= 1;
1300            }
1301        }
1302
1303        end = i + 1;
1304
1305        if found_open && brace_depth <= 0 {
1306            break;
1307        }
1308
1309        if !found_open && i > start {
1310            let indent = line.len() - line.trim_start().len();
1311            if indent <= base_indent && !line.trim().is_empty() && i > start {
1312                end = i;
1313                break;
1314            }
1315        }
1316
1317        if end - start > 200 {
1318            break;
1319        }
1320    }
1321
1322    (start, end)
1323}
1324
1325fn extract_summary(content: &str) -> String {
1326    for line in content.lines().take(20) {
1327        let trimmed = line.trim();
1328        if trimmed.is_empty()
1329            || trimmed.starts_with("//")
1330            || trimmed.starts_with('#')
1331            || trimmed.starts_with("/*")
1332            || trimmed.starts_with('*')
1333            || trimmed.starts_with("use ")
1334            || trimmed.starts_with("import ")
1335            || trimmed.starts_with("from ")
1336            || trimmed.starts_with("require(")
1337            || trimmed.starts_with("package ")
1338        {
1339            continue;
1340        }
1341        return trimmed.chars().take(120).collect();
1342    }
1343    String::new()
1344}
1345
1346fn compute_hash(content: &str) -> String {
1347    use std::collections::hash_map::DefaultHasher;
1348    use std::hash::{Hash, Hasher};
1349
1350    let mut hasher = DefaultHasher::new();
1351    content.hash(&mut hasher);
1352    format!("{:016x}", hasher.finish())
1353}
1354
1355fn short_hash(input: &str) -> String {
1356    use std::collections::hash_map::DefaultHasher;
1357    use std::hash::{Hash, Hasher};
1358
1359    let mut hasher = DefaultHasher::new();
1360    input.hash(&mut hasher);
1361    format!("{:08x}", hasher.finish() & 0xFFFF_FFFF)
1362}
1363
1364fn copy_dir_fallible(src: &std::path::Path, dst: &std::path::Path) -> Result<(), std::io::Error> {
1365    std::fs::create_dir_all(dst)?;
1366    for entry in std::fs::read_dir(src)?.flatten() {
1367        let from = entry.path();
1368        let to = dst.join(entry.file_name());
1369        if from.is_dir() {
1370            copy_dir_fallible(&from, &to)?;
1371        } else {
1372            std::fs::copy(&from, &to)?;
1373        }
1374    }
1375    Ok(())
1376}
1377
1378fn normalize_absolute_path(path: &str) -> String {
1379    if let Ok(canon) = crate::core::pathutil::safe_canonicalize(std::path::Path::new(path)) {
1380        return canon.to_string_lossy().to_string();
1381    }
1382
1383    let mut normalized = path.to_string();
1384    while normalized.ends_with("\\.") || normalized.ends_with("/.") {
1385        normalized.truncate(normalized.len() - 2);
1386    }
1387    while normalized.len() > 1
1388        && (normalized.ends_with('\\') || normalized.ends_with('/'))
1389        && !normalized.ends_with(":\\")
1390        && !normalized.ends_with(":/")
1391        && normalized != "\\"
1392        && normalized != "/"
1393    {
1394        normalized.pop();
1395    }
1396    normalized
1397}
1398
1399pub fn normalize_project_root(path: &str) -> String {
1400    normalize_absolute_path(path)
1401}
1402
1403pub fn graph_match_key(path: &str) -> String {
1404    let stripped =
1405        crate::core::pathutil::strip_verbatim_str(path).unwrap_or_else(|| path.replace('\\', "/"));
1406    stripped.trim_start_matches('/').to_string()
1407}
1408
1409pub fn graph_relative_key(path: &str, root: &str) -> String {
1410    let root_norm = normalize_project_root(root);
1411    let path_norm = normalize_absolute_path(path);
1412    let root_path = Path::new(&root_norm);
1413    let path_path = Path::new(&path_norm);
1414
1415    if let Ok(rel) = path_path.strip_prefix(root_path) {
1416        let rel = rel.to_string_lossy().to_string();
1417        return rel.trim_start_matches(['/', '\\']).to_string();
1418    }
1419
1420    path.trim_start_matches(['/', '\\'])
1421        .replace('/', std::path::MAIN_SEPARATOR_STR)
1422}
1423
1424fn make_relative(path: &str, root: &str) -> String {
1425    graph_relative_key(path, root)
1426}
1427
1428fn is_indexable_ext(ext: &str) -> bool {
1429    crate::core::language_capabilities::is_indexable_ext(ext)
1430}
1431
1432#[cfg(test)]
1433fn kotlin_package_name(content: &str) -> Option<String> {
1434    content.lines().map(str::trim).find_map(|line| {
1435        line.strip_prefix("package ")
1436            .map(|rest| rest.trim().trim_end_matches(';').to_string())
1437    })
1438}
1439
1440#[cfg(test)]
1441mod tests {
1442    use super::*;
1443    use tempfile::tempdir;
1444
1445    #[test]
1446    fn test_short_hash_deterministic() {
1447        let h1 = short_hash("/Users/test/project");
1448        let h2 = short_hash("/Users/test/project");
1449        assert_eq!(h1, h2);
1450        assert_eq!(h1.len(), 8);
1451    }
1452
1453    #[test]
1454    fn test_make_relative() {
1455        assert_eq!(
1456            make_relative("/foo/bar/src/main.rs", "/foo/bar"),
1457            graph_relative_key("/foo/bar/src/main.rs", "/foo/bar")
1458        );
1459        assert_eq!(
1460            make_relative("src/main.rs", "/foo/bar"),
1461            graph_relative_key("src/main.rs", "/foo/bar")
1462        );
1463        assert_eq!(
1464            make_relative("C:\\repo\\src\\main\\kotlin\\Example.kt", "C:\\repo"),
1465            graph_relative_key("C:\\repo\\src\\main\\kotlin\\Example.kt", "C:\\repo")
1466        );
1467        assert_eq!(
1468            make_relative("//?/C:/repo/src/main/kotlin/Example.kt", "//?/C:/repo"),
1469            graph_relative_key("//?/C:/repo/src/main/kotlin/Example.kt", "//?/C:/repo")
1470        );
1471    }
1472
1473    #[test]
1474    fn test_normalize_project_root() {
1475        assert_eq!(normalize_project_root("C:\\repo\\"), "C:\\repo");
1476        assert_eq!(normalize_project_root("C:\\repo\\."), "C:\\repo");
1477        assert_eq!(normalize_project_root("//?/C:/repo/"), "//?/C:/repo");
1478    }
1479
1480    #[test]
1481    fn test_graph_match_key_normalizes_windows_forms() {
1482        assert_eq!(
1483            graph_match_key(r"C:\repo\src\main.rs"),
1484            "C:/repo/src/main.rs"
1485        );
1486        assert_eq!(
1487            graph_match_key(r"\\?\C:\repo\src\main.rs"),
1488            "C:/repo/src/main.rs"
1489        );
1490        assert_eq!(graph_match_key(r"\src\main.rs"), "src/main.rs");
1491    }
1492
1493    #[test]
1494    fn test_extract_summary() {
1495        let content = "// comment\nuse std::io;\n\npub fn main() {\n    println!(\"hello\");\n}";
1496        let summary = extract_summary(content);
1497        assert_eq!(summary, "pub fn main() {");
1498    }
1499
1500    #[test]
1501    fn test_compute_hash_deterministic() {
1502        let h1 = compute_hash("hello world");
1503        let h2 = compute_hash("hello world");
1504        assert_eq!(h1, h2);
1505        assert_ne!(h1, compute_hash("hello world!"));
1506    }
1507
1508    #[test]
1509    fn test_project_index_new() {
1510        let idx = ProjectIndex::new("/test");
1511        assert_eq!(idx.version, INDEX_VERSION);
1512        assert_eq!(idx.project_root, "/test");
1513        assert!(idx.files.is_empty());
1514    }
1515
1516    fn fe(path: &str, content: &str, language: &str) -> FileEntry {
1517        FileEntry {
1518            path: path.to_string(),
1519            hash: compute_hash(content),
1520            language: language.to_string(),
1521            line_count: content.lines().count(),
1522            token_count: crate::core::tokens::count_tokens(content),
1523            exports: Vec::new(),
1524            summary: extract_summary(content),
1525        }
1526    }
1527
1528    #[test]
1529    fn test_index_looks_stale_when_any_file_missing() {
1530        let td = tempdir().expect("tempdir");
1531        let root = td.path();
1532        std::fs::write(root.join("a.rs"), "pub fn a() {}\n").expect("write a.rs");
1533
1534        let root_s = normalize_project_root(&root.to_string_lossy());
1535        let mut idx = ProjectIndex::new(&root_s);
1536        idx.files
1537            .insert("a.rs".to_string(), fe("a.rs", "pub fn a() {}\n", "rs"));
1538        idx.files.insert(
1539            "missing.rs".to_string(),
1540            fe("missing.rs", "pub fn m() {}\n", "rs"),
1541        );
1542
1543        assert!(index_looks_stale(&idx, &root_s));
1544    }
1545
1546    #[test]
1547    fn test_index_looks_fresh_when_all_files_exist() {
1548        let td = tempdir().expect("tempdir");
1549        let root = td.path();
1550        std::fs::write(root.join("a.rs"), "pub fn a() {}\n").expect("write a.rs");
1551
1552        let root_s = normalize_project_root(&root.to_string_lossy());
1553        let mut idx = ProjectIndex::new(&root_s);
1554        idx.files
1555            .insert("a.rs".to_string(), fe("a.rs", "pub fn a() {}\n", "rs"));
1556
1557        assert!(!index_looks_stale(&idx, &root_s));
1558    }
1559
1560    #[test]
1561    fn test_reverse_deps() {
1562        let mut idx = ProjectIndex::new("/test");
1563        idx.edges.push(IndexEdge {
1564            from: "a.rs".to_string(),
1565            to: "b.rs".to_string(),
1566            kind: "import".to_string(),
1567            weight: 1.0,
1568        });
1569        idx.edges.push(IndexEdge {
1570            from: "c.rs".to_string(),
1571            to: "b.rs".to_string(),
1572            kind: "import".to_string(),
1573            weight: 1.0,
1574        });
1575
1576        let deps = idx.get_reverse_deps("b.rs", 1);
1577        assert_eq!(deps.len(), 2);
1578        assert!(deps.contains(&"a.rs".to_string()));
1579        assert!(deps.contains(&"c.rs".to_string()));
1580    }
1581
1582    #[test]
1583    fn test_find_symbol_range_kotlin_function() {
1584        let content = r#"
1585package com.example
1586
1587class UserService {
1588    fun greet(name: String): String {
1589        return "hi $name"
1590    }
1591}
1592"#;
1593        let sig = signatures::Signature {
1594            kind: "method",
1595            name: "greet".to_string(),
1596            params: "name:String".to_string(),
1597            return_type: "String".to_string(),
1598            is_async: false,
1599            is_exported: true,
1600            indent: 2,
1601            ..signatures::Signature::no_span()
1602        };
1603        let (start, end) = find_symbol_range(content, &sig);
1604        assert_eq!(start, 5);
1605        assert!(end >= start);
1606    }
1607
1608    #[test]
1609    fn test_signature_spans_override_fallback_range() {
1610        let sig = signatures::Signature {
1611            kind: "method",
1612            name: "release".to_string(),
1613            params: "id:String".to_string(),
1614            return_type: "Boolean".to_string(),
1615            is_async: true,
1616            is_exported: true,
1617            indent: 2,
1618            start_line: Some(42),
1619            end_line: Some(43),
1620        };
1621
1622        let (start, end) = sig
1623            .start_line
1624            .zip(sig.end_line)
1625            .unwrap_or_else(|| find_symbol_range("ignored", &sig));
1626        assert_eq!((start, end), (42, 43));
1627    }
1628
1629    #[test]
1630    fn test_parse_stale_index_version() {
1631        let json = format!(
1632            r#"{{"version":{},"project_root":"/test","last_scan":"now","files":{{}},"edges":[],"symbols":{{}}}}"#,
1633            INDEX_VERSION - 1
1634        );
1635        let parsed: ProjectIndex = serde_json::from_str(&json).unwrap();
1636        assert_ne!(parsed.version, INDEX_VERSION);
1637    }
1638
1639    #[test]
1640    fn test_kotlin_package_name() {
1641        let content = "package com.example.feature\n\nclass UserService";
1642        assert_eq!(
1643            kotlin_package_name(content).as_deref(),
1644            Some("com.example.feature")
1645        );
1646    }
1647
1648    #[test]
1649    fn safe_scan_root_rejects_fs_root() {
1650        assert!(!is_safe_scan_root("/"));
1651        assert!(!is_safe_scan_root("\\"));
1652        #[cfg(windows)]
1653        {
1654            assert!(!is_safe_scan_root("C:\\"));
1655            assert!(!is_safe_scan_root("D:\\"));
1656        }
1657    }
1658
1659    #[test]
1660    fn safe_scan_root_rejects_home() {
1661        if let Some(home) = dirs::home_dir() {
1662            let home_str = home.to_string_lossy().to_string();
1663            assert!(
1664                !is_safe_scan_root(&home_str),
1665                "home dir should be rejected: {home_str}"
1666            );
1667        }
1668    }
1669
1670    #[test]
1671    fn safe_scan_root_accepts_project_dir() {
1672        let tmp = tempdir().unwrap();
1673        std::fs::write(
1674            tmp.path().join("Cargo.toml"),
1675            "[package]\nname = \"test\"\n",
1676        )
1677        .unwrap();
1678        let root = tmp.path().to_string_lossy().to_string();
1679        assert!(is_safe_scan_root(&root));
1680    }
1681
1682    #[test]
1683    fn safe_scan_root_rejects_broad_dir() {
1684        let tmp = tempdir().unwrap();
1685        for i in 0..55 {
1686            std::fs::create_dir(tmp.path().join(format!("dir{i}"))).unwrap();
1687        }
1688        let root = tmp.path().to_string_lossy().to_string();
1689        assert!(!is_safe_scan_root(&root));
1690    }
1691
1692    #[test]
1693    fn no_index_env_skips_scan() {
1694        let _env = crate::core::data_dir::test_env_lock();
1695        let tmp = tempdir().unwrap();
1696        std::fs::write(tmp.path().join("Cargo.toml"), "").unwrap();
1697        std::fs::write(tmp.path().join("main.rs"), "fn main() {}").unwrap();
1698
1699        std::env::set_var("LEAN_CTX_NO_INDEX", "1");
1700        let idx = scan(&tmp.path().to_string_lossy());
1701        std::env::remove_var("LEAN_CTX_NO_INDEX");
1702        assert!(idx.files.is_empty(), "LEAN_CTX_NO_INDEX should skip scan");
1703    }
1704
1705    #[test]
1706    fn stale_index_detected_by_contamination() {
1707        let root_s = "/home/testuser/myproject";
1708        let mut idx = ProjectIndex::new(root_s);
1709        // Simulate a contaminated index with Desktop files
1710        idx.files.insert(
1711            "Desktop/random.py".to_string(),
1712            fe("Desktop/random.py", "x = 1\n", "py"),
1713        );
1714        idx.files.insert(
1715            "src/main.rs".to_string(),
1716            fe("src/main.rs", "fn main() {}\n", "rs"),
1717        );
1718        assert!(
1719            index_looks_stale(&idx, root_s),
1720            "Index with Desktop/ files should be considered stale"
1721        );
1722    }
1723
1724    #[test]
1725    fn stale_index_detected_by_age() {
1726        let td = tempdir().expect("tempdir");
1727        let root = td.path();
1728        std::fs::write(root.join("a.rs"), "fn a() {}\n").unwrap();
1729
1730        let root_s = normalize_project_root(&root.to_string_lossy());
1731        let mut idx = ProjectIndex::new(&root_s);
1732        idx.files
1733            .insert("a.rs".to_string(), fe("a.rs", "fn a() {}\n", "rs"));
1734        // Set last_scan to 100 hours ago (default max_age_hours is 48)
1735        let old_time = chrono::Local::now().naive_local() - chrono::Duration::hours(100);
1736        idx.last_scan = old_time.format("%Y-%m-%d %H:%M:%S").to_string();
1737
1738        assert!(
1739            index_looks_stale(&idx, &root_s),
1740            "Index older than max_age_hours should be stale"
1741        );
1742    }
1743
1744    #[test]
1745    fn safe_scan_root_rejects_home_downloads() {
1746        if let Some(home) = dirs::home_dir() {
1747            let downloads = home.join("Downloads");
1748            // Only test if Downloads doesn't contain a .git (unlikely but possible)
1749            if !downloads.join(".git").exists() {
1750                let downloads_str = downloads.to_string_lossy().to_string();
1751                assert!(
1752                    !is_safe_scan_root(&downloads_str),
1753                    "~/Downloads should be rejected without project markers"
1754                );
1755            }
1756        }
1757    }
1758}