Skip to main content

lean_ctx/core/
graph_index.rs

1use std::collections::HashMap;
2use std::path::Path;
3
4use serde::{Deserialize, Serialize};
5
6use crate::core::import_resolver;
7use crate::core::signatures;
8
9const INDEX_VERSION: u32 = 6;
10
11pub fn is_safe_scan_root_public(path: &str) -> bool {
12    is_safe_scan_root(path)
13}
14
15fn is_filesystem_root(path: &str) -> bool {
16    let p = Path::new(path);
17    p.parent().is_none() || (cfg!(windows) && p.parent() == Some(Path::new("")))
18}
19
20fn is_safe_scan_root(path: &str) -> bool {
21    let normalized = normalize_project_root(path);
22    let p = Path::new(&normalized);
23
24    if normalized == "/" || normalized == "\\" || is_filesystem_root(&normalized) {
25        tracing::warn!("[graph_index: refusing to scan filesystem root]");
26        return false;
27    }
28
29    if normalized == "." || normalized.is_empty() {
30        tracing::warn!("[graph_index: refusing to scan relative/empty root]");
31        return false;
32    }
33
34    if let Some(home) = dirs::home_dir() {
35        let home_norm = normalize_project_root(&home.to_string_lossy());
36        if normalized == home_norm {
37            use std::sync::Once;
38            static HOME_WARN: Once = Once::new();
39            HOME_WARN.call_once(|| {
40                tracing::warn!(
41                    "[graph_index: skipping — cannot index home directory {normalized}.\n  \
42                     Run from inside a project, or set LEAN_CTX_PROJECT_ROOT=/path/to/project]"
43                );
44            });
45            return false;
46        }
47        // Block common broad home subdirectories that are never valid project roots
48        let home_path = Path::new(&home_norm);
49        const BLOCKED_HOME_SUBDIRS: &[&str] = &[
50            "Desktop",
51            "Documents",
52            "Downloads",
53            "Pictures",
54            "Music",
55            "Videos",
56            "Movies",
57            "Library",
58            ".local",
59            ".cache",
60            ".config",
61            "snap",
62            "Applications",
63        ];
64        for blocked in BLOCKED_HOME_SUBDIRS {
65            let blocked_path = home_path.join(blocked);
66            let is_inside_blocked = p == blocked_path || p.starts_with(&blocked_path);
67            let has_project_marker = p.join(".git").exists()
68                || p.join("Cargo.toml").exists()
69                || p.join("package.json").exists();
70            if is_inside_blocked && !has_project_marker {
71                tracing::warn!(
72                    "[graph_index: refusing to scan {normalized} — \
73                     inside home/{blocked} without project markers]"
74                );
75                return false;
76            }
77        }
78
79        // Block directories that are direct children of home without project markers
80        if p.parent() == Some(home_path) {
81            let has_marker = p.join(".git").exists()
82                || p.join("Cargo.toml").exists()
83                || p.join("package.json").exists()
84                || p.join("go.mod").exists()
85                || p.join("pyproject.toml").exists();
86            if !has_marker {
87                tracing::warn!(
88                    "[graph_index: refusing to scan {normalized} — \
89                     direct child of home without project markers]"
90                );
91                return false;
92            }
93        }
94    }
95
96    let breadth_markers = [
97        ".git",
98        "Cargo.toml",
99        "package.json",
100        "go.mod",
101        "pyproject.toml",
102        "setup.py",
103        "Makefile",
104        "CMakeLists.txt",
105        "pnpm-workspace.yaml",
106        ".projectile",
107        "BUILD.bazel",
108        "go.work",
109    ];
110
111    if !breadth_markers.iter().any(|m| p.join(m).exists()) {
112        let child_count = std::fs::read_dir(p).map_or(0, |rd| {
113            rd.filter_map(Result::ok)
114                .filter(|e| e.path().is_dir())
115                .count()
116        });
117        if child_count > 50 {
118            tracing::warn!(
119                "[graph_index: {normalized} has no project markers and {child_count} subdirectories — \
120                 skipping scan to avoid indexing broad directories]"
121            );
122            return false;
123        }
124    }
125
126    true
127}
128
129#[derive(Debug, Serialize, Deserialize)]
130pub struct ProjectIndex {
131    pub version: u32,
132    pub project_root: String,
133    pub last_scan: String,
134    pub files: HashMap<String, FileEntry>,
135    pub edges: Vec<IndexEdge>,
136    pub symbols: HashMap<String, SymbolEntry>,
137}
138
139#[derive(Debug, Clone, Serialize, Deserialize)]
140pub struct FileEntry {
141    pub path: String,
142    pub hash: String,
143    pub language: String,
144    pub line_count: usize,
145    pub token_count: usize,
146    pub exports: Vec<String>,
147    pub summary: String,
148}
149
150#[derive(Debug, Clone, Serialize, Deserialize)]
151pub struct SymbolEntry {
152    pub file: String,
153    pub name: String,
154    pub kind: String,
155    pub start_line: usize,
156    pub end_line: usize,
157    pub is_exported: bool,
158}
159
160#[derive(Debug, Clone, Serialize, Deserialize)]
161pub struct IndexEdge {
162    pub from: String,
163    pub to: String,
164    pub kind: String,
165    #[serde(default = "default_edge_weight")]
166    pub weight: f32,
167}
168
169fn default_edge_weight() -> f32 {
170    1.0
171}
172
173impl ProjectIndex {
174    pub fn new(project_root: &str) -> Self {
175        Self {
176            version: INDEX_VERSION,
177            project_root: normalize_project_root(project_root),
178            last_scan: chrono::Local::now().format("%Y-%m-%d %H:%M:%S").to_string(),
179            files: HashMap::new(),
180            edges: Vec::new(),
181            symbols: HashMap::new(),
182        }
183    }
184
185    pub fn index_dir(project_root: &str) -> Option<std::path::PathBuf> {
186        let normalized = normalize_project_root(project_root);
187        let hash = crate::core::project_hash::hash_project_root(&normalized);
188        crate::core::data_dir::lean_ctx_data_dir()
189            .ok()
190            .map(|d| d.join("graphs").join(hash))
191    }
192
193    pub fn load(project_root: &str) -> Option<Self> {
194        let dir = Self::index_dir(project_root)?;
195
196        let zst_path = dir.join("index.json.zst");
197        if zst_path.exists() {
198            let compressed = std::fs::read(&zst_path).ok()?;
199            let data = zstd::decode_all(compressed.as_slice()).ok()?;
200            let content = String::from_utf8(data).ok()?;
201            let index: Self = serde_json::from_str(&content).ok()?;
202            if index.version != INDEX_VERSION {
203                return None;
204            }
205            return Some(index);
206        }
207
208        let json_path = dir.join("index.json");
209        let content = std::fs::read_to_string(&json_path)
210            .or_else(|_| -> std::io::Result<String> {
211                let legacy_hash = short_hash(&normalize_project_root(project_root));
212                let legacy_dir = crate::core::data_dir::lean_ctx_data_dir()
213                    .map_err(|_| std::io::Error::new(std::io::ErrorKind::NotFound, "no data dir"))?
214                    .join("graphs")
215                    .join(legacy_hash);
216                let legacy_path = legacy_dir.join("index.json");
217                let data = std::fs::read_to_string(&legacy_path)?;
218                if let Err(e) = copy_dir_fallible(&legacy_dir, &dir) {
219                    tracing::debug!("graph index migration: {e}");
220                }
221                Ok(data)
222            })
223            .ok()?;
224        let index: Self = serde_json::from_str(&content).ok()?;
225        if index.version != INDEX_VERSION {
226            return None;
227        }
228        // Auto-migrate: compress legacy JSON to zstd
229        if let Ok(compressed) = zstd::encode_all(content.as_bytes(), 9) {
230            let zst_tmp = zst_path.with_extension("zst.tmp");
231            if std::fs::write(&zst_tmp, &compressed).is_ok()
232                && std::fs::rename(&zst_tmp, &zst_path).is_ok()
233            {
234                let _ = std::fs::remove_file(&json_path);
235            }
236        }
237        Some(index)
238    }
239
240    pub fn save(&self) -> Result<(), String> {
241        let dir = Self::index_dir(&self.project_root)
242            .ok_or_else(|| "Cannot determine data directory".to_string())?;
243        std::fs::create_dir_all(&dir).map_err(|e| e.to_string())?;
244        let json = serde_json::to_string(self).map_err(|e| e.to_string())?;
245        let compressed = zstd::encode_all(json.as_bytes(), 9).map_err(|e| format!("zstd: {e}"))?;
246        let target = dir.join("index.json.zst");
247        let tmp = target.with_extension("zst.tmp");
248        std::fs::write(&tmp, &compressed).map_err(|e| e.to_string())?;
249        std::fs::rename(&tmp, &target).map_err(|e| e.to_string())?;
250        let _ = std::fs::remove_file(dir.join("index.json"));
251        Ok(())
252    }
253
254    /// Remove all cached graph indices that are older than max_age_hours.
255    /// Called on startup/update to prevent stale data from persisting.
256    pub fn purge_stale_indices() {
257        let Ok(data_dir) = crate::core::data_dir::lean_ctx_data_dir() else {
258            return;
259        };
260        let graphs_dir = data_dir.join("graphs");
261        let Ok(entries) = std::fs::read_dir(&graphs_dir) else {
262            return;
263        };
264        let cfg = crate::core::config::Config::load();
265        let max_age_secs = cfg.archive.max_age_hours * 3600;
266
267        for entry in entries.filter_map(Result::ok) {
268            let path = entry.path();
269            if !path.is_dir() {
270                continue;
271            }
272            let zst = path.join("index.json.zst");
273            let json = path.join("index.json");
274            let index_file = if zst.exists() {
275                &zst
276            } else if json.exists() {
277                &json
278            } else {
279                continue;
280            };
281
282            let is_old = index_file
283                .metadata()
284                .and_then(|m| m.modified())
285                .is_ok_and(|mtime| {
286                    mtime
287                        .elapsed()
288                        .is_ok_and(|age| age.as_secs() > max_age_secs)
289                });
290
291            if is_old {
292                tracing::info!("[graph_index: purging stale index at {}]", path.display());
293                let _ = std::fs::remove_dir_all(&path);
294            }
295        }
296    }
297
298    pub fn file_count(&self) -> usize {
299        self.files.len()
300    }
301
302    pub fn symbol_count(&self) -> usize {
303        self.symbols.len()
304    }
305
306    pub fn edge_count(&self) -> usize {
307        self.edges.len()
308    }
309
310    pub fn get_symbol(&self, key: &str) -> Option<&SymbolEntry> {
311        self.symbols.get(key)
312    }
313
314    pub fn get_reverse_deps(&self, path: &str, depth: usize) -> Vec<String> {
315        let mut result = Vec::new();
316        let mut visited = std::collections::HashSet::new();
317        let mut queue: Vec<(String, usize)> = vec![(path.to_string(), 0)];
318
319        while let Some((current, d)) = queue.pop() {
320            if d > depth || visited.contains(&current) {
321                continue;
322            }
323            visited.insert(current.clone());
324            if current != path {
325                result.push(current.clone());
326            }
327
328            for edge in &self.edges {
329                if edge.to == current && edge.kind == "import" && !visited.contains(&edge.from) {
330                    queue.push((edge.from.clone(), d + 1));
331                }
332            }
333        }
334        result
335    }
336
337    pub fn get_related(&self, path: &str, depth: usize) -> Vec<String> {
338        let mut result = Vec::new();
339        let mut visited = std::collections::HashSet::new();
340        let mut queue: Vec<(String, usize)> = vec![(path.to_string(), 0)];
341
342        while let Some((current, d)) = queue.pop() {
343            if d > depth || visited.contains(&current) {
344                continue;
345            }
346            visited.insert(current.clone());
347            if current != path {
348                result.push(current.clone());
349            }
350
351            for edge in &self.edges {
352                if edge.from == current && !visited.contains(&edge.to) {
353                    queue.push((edge.to.clone(), d + 1));
354                }
355                if edge.to == current && !visited.contains(&edge.from) {
356                    queue.push((edge.from.clone(), d + 1));
357                }
358            }
359        }
360        result
361    }
362}
363
364/// Load the best available graph index, trying multiple root path variants.
365/// If no valid index exists, automatically scans the project to build one.
366/// This is the primary entry point — ensures zero-config usage.
367pub fn load_or_build(project_root: &str) -> ProjectIndex {
368    if std::env::var("LEAN_CTX_NO_INDEX").is_ok() {
369        return ProjectIndex::load(project_root).unwrap_or_else(|| ProjectIndex::new(project_root));
370    }
371
372    // Prefer stable absolute roots. Using "." as a cache key is fragile because
373    // it depends on the process cwd and can accidentally load the wrong project.
374    let root_abs = if project_root.trim().is_empty() || project_root == "." {
375        std::env::current_dir().ok().map_or_else(
376            || ".".to_string(),
377            |p| normalize_project_root(&p.to_string_lossy()),
378        )
379    } else {
380        normalize_project_root(project_root)
381    };
382
383    if !is_safe_scan_root(&root_abs) {
384        return ProjectIndex::new(&root_abs);
385    }
386
387    // Try the absolute/root-normalized path first.
388    if let Some(idx) = ProjectIndex::load(&root_abs) {
389        if !idx.files.is_empty() {
390            if index_looks_stale(&idx, &root_abs) {
391                tracing::warn!("[graph_index: stale index detected for {root_abs}; rebuilding]");
392                return scan(&root_abs);
393            }
394            return idx;
395        }
396    }
397
398    // CWD fallback: only use if CWD is a subdirectory of root_abs (same project)
399    if let Ok(cwd) = std::env::current_dir() {
400        let cwd_str = normalize_project_root(&cwd.to_string_lossy());
401        if cwd_str != root_abs && cwd_str.starts_with(&root_abs) {
402            if let Some(idx) = ProjectIndex::load(&cwd_str) {
403                if !idx.files.is_empty() {
404                    if index_looks_stale(&idx, &cwd_str) {
405                        return scan(&cwd_str);
406                    }
407                    return idx;
408                }
409            }
410        }
411    }
412
413    scan(&root_abs)
414}
415
416fn index_looks_stale(index: &ProjectIndex, root_abs: &str) -> bool {
417    if index.files.is_empty() {
418        return true;
419    }
420
421    // TTL check: rebuild if index is older than configured max_age_hours
422    if let Ok(scan_time) =
423        chrono::NaiveDateTime::parse_from_str(&index.last_scan, "%Y-%m-%d %H:%M:%S")
424    {
425        let cfg = crate::core::config::Config::load();
426        let max_age = chrono::Duration::hours(cfg.archive.max_age_hours as i64);
427        let now = chrono::Local::now().naive_local();
428        if now.signed_duration_since(scan_time) > max_age {
429            tracing::info!(
430                "[graph_index: index is older than {}h — marking stale]",
431                cfg.archive.max_age_hours
432            );
433            return true;
434        }
435    }
436
437    // Contamination check: if index contains paths from common user directories,
438    // it was built from a too-broad root and must be rebuilt
439    const CONTAMINATION_MARKERS: &[&str] = &[
440        "Desktop/",
441        "Documents/",
442        "Downloads/",
443        "Pictures/",
444        "Music/",
445        "Videos/",
446        "Movies/",
447        "Library/",
448        ".cache/",
449        "snap/",
450    ];
451    let contaminated = index.files.keys().take(200).any(|rel| {
452        CONTAMINATION_MARKERS
453            .iter()
454            .any(|m| rel.starts_with(m) || rel.contains(&format!("/{m}")))
455    });
456    if contaminated {
457        tracing::warn!(
458            "[graph_index: index contains files from user directories (Desktop/Documents/...) — \
459             marking stale to force clean rebuild]"
460        );
461        return true;
462    }
463
464    let root_path = Path::new(root_abs);
465    // Sample up to 20 files for existence check (avoid scanning all files in large indices)
466    let sample_size = index.files.len().min(20);
467    for rel in index.files.keys().take(sample_size) {
468        let rel = rel.trim_start_matches(['/', '\\']);
469        if rel.is_empty() {
470            continue;
471        }
472        let abs = root_path.join(rel);
473        if !abs.exists() {
474            return true;
475        }
476    }
477
478    false
479}
480
481pub fn scan(project_root: &str) -> ProjectIndex {
482    if std::env::var("LEAN_CTX_NO_INDEX").is_ok() {
483        tracing::info!("[graph_index: LEAN_CTX_NO_INDEX set — skipping scan]");
484        return ProjectIndex::new(project_root);
485    }
486
487    let project_root = normalize_project_root(project_root);
488
489    if !is_safe_scan_root(&project_root) {
490        tracing::warn!("[graph_index: scan aborted for unsafe root {project_root}]");
491        return ProjectIndex::new(&project_root);
492    }
493
494    let lock_name = format!(
495        "graph-idx-{}",
496        &crate::core::index_namespace::namespace_hash(Path::new(&project_root))[..8]
497    );
498    let _lock = crate::core::startup_guard::try_acquire_lock(
499        &lock_name,
500        std::time::Duration::from_millis(800),
501        std::time::Duration::from_mins(3),
502    );
503    if _lock.is_none() {
504        tracing::info!(
505            "[graph_index: another process is scanning {project_root} — returning cached or empty]"
506        );
507        return ProjectIndex::load(&project_root)
508            .unwrap_or_else(|| ProjectIndex::new(&project_root));
509    }
510
511    let existing = ProjectIndex::load(&project_root);
512    let mut index = ProjectIndex::new(&project_root);
513
514    let old_files: HashMap<String, (String, Vec<(String, SymbolEntry)>)> =
515        if let Some(ref prev) = existing {
516            prev.files
517                .iter()
518                .map(|(path, entry)| {
519                    let syms: Vec<(String, SymbolEntry)> = prev
520                        .symbols
521                        .iter()
522                        .filter(|(_, s)| s.file == *path)
523                        .map(|(k, v)| (k.clone(), v.clone()))
524                        .collect();
525                    (path.clone(), (entry.hash.clone(), syms))
526                })
527                .collect()
528        } else {
529            HashMap::new()
530        };
531
532    let walker = ignore::WalkBuilder::new(&project_root)
533        .hidden(true)
534        .git_ignore(true)
535        .git_global(true)
536        .git_exclude(true)
537        .max_depth(Some(20))
538        .build();
539
540    let cfg = crate::core::config::Config::load();
541    let extra_ignores: Vec<glob::Pattern> = cfg
542        .extra_ignore_patterns
543        .iter()
544        .filter_map(|p| glob::Pattern::new(p).ok())
545        .collect();
546
547    let mut scanned = 0usize;
548    let mut reused = 0usize;
549    let mut entries_visited = 0usize;
550    let max_files = if cfg.graph_index_max_files == 0 {
551        usize::MAX // unlimited
552    } else {
553        cfg.graph_index_max_files as usize
554    };
555    const MAX_ENTRIES_VISITED: usize = 500_000;
556    const MAX_FILE_SIZE_BYTES: u64 = 2 * 1024 * 1024; // 2 MB per file
557    let scan_deadline = std::time::Instant::now() + std::time::Duration::from_mins(5);
558
559    for entry in walker.filter_map(std::result::Result::ok) {
560        entries_visited += 1;
561        if entries_visited > MAX_ENTRIES_VISITED {
562            tracing::warn!(
563                "[graph_index: walked {entries_visited} entries — aborting scan to prevent \
564                 runaway traversal. Indexed {} files so far.]",
565                index.files.len()
566            );
567            break;
568        }
569        if entries_visited.is_multiple_of(5000) {
570            if std::time::Instant::now() > scan_deadline {
571                tracing::warn!(
572                    "[graph_index: scan timeout (120s) after {entries_visited} entries — \
573                     saving partial index with {} files]",
574                    index.files.len()
575                );
576                break;
577            }
578            if crate::core::memory_guard::abort_requested() {
579                tracing::warn!(
580                    "[graph_index: memory pressure abort after {entries_visited} entries — \
581                     saving partial index with {} files]",
582                    index.files.len()
583                );
584                break;
585            }
586            if crate::core::memory_guard::is_under_pressure() {
587                tracing::warn!(
588                    "[graph_index: memory pressure detected at {entries_visited} entries — \
589                     stopping scan with {} files]",
590                    index.files.len()
591                );
592                break;
593            }
594            if let Some(ref g) = _lock {
595                g.touch();
596            }
597        }
598
599        if !entry.file_type().is_some_and(|ft| ft.is_file()) {
600            continue;
601        }
602        let file_path = normalize_absolute_path(&entry.path().to_string_lossy());
603
604        // Prevent indexing files that escaped the project root (symlinks, mount points)
605        if !file_path.starts_with(&project_root) {
606            continue;
607        }
608
609        // Skip special files (devices, FIFOs, sockets) that can stream infinite data
610        if let Ok(meta) = std::fs::metadata(&file_path) {
611            if !meta.is_file() {
612                continue;
613            }
614            if meta.len() > MAX_FILE_SIZE_BYTES {
615                tracing::debug!(
616                    "[graph_index: skipping {file_path} — {:.1}MB exceeds {}MB limit]",
617                    meta.len() as f64 / 1_048_576.0,
618                    MAX_FILE_SIZE_BYTES / (1024 * 1024),
619                );
620                continue;
621            }
622        }
623
624        let ext = Path::new(&file_path)
625            .extension()
626            .and_then(|e| e.to_str())
627            .unwrap_or("");
628
629        if !is_indexable_ext(ext) {
630            continue;
631        }
632
633        let rel = make_relative(&file_path, &project_root);
634        if extra_ignores.iter().any(|p| p.matches(&rel)) {
635            continue;
636        }
637
638        if max_files != usize::MAX && index.files.len() >= max_files {
639            tracing::info!(
640                "[graph_index: reached configured limit of {} files. Set graph_index_max_files = 0 for unlimited.]",
641                max_files
642            );
643            break;
644        }
645
646        let Ok(content) = std::fs::read_to_string(&file_path) else {
647            continue;
648        };
649
650        let hash = compute_hash(&content);
651        let rel_path = make_relative(&file_path, &project_root);
652
653        if let Some((old_hash, old_syms)) = old_files.get(&rel_path) {
654            if *old_hash == hash {
655                if let Some(old_entry) = existing.as_ref().and_then(|p| p.files.get(&rel_path)) {
656                    index.files.insert(rel_path.clone(), old_entry.clone());
657                    for (key, sym) in old_syms {
658                        index.symbols.insert(key.clone(), sym.clone());
659                    }
660                    reused += 1;
661                    continue;
662                }
663            }
664        }
665
666        let sigs = signatures::extract_signatures(&content, ext);
667        let line_count = content.lines().count();
668        let token_count = crate::core::tokens::count_tokens(&content);
669        let summary = extract_summary(&content);
670
671        let exports: Vec<String> = sigs
672            .iter()
673            .filter(|s| s.is_exported)
674            .map(|s| s.name.clone())
675            .collect();
676
677        index.files.insert(
678            rel_path.clone(),
679            FileEntry {
680                path: rel_path.clone(),
681                hash,
682                language: ext.to_string(),
683                line_count,
684                token_count,
685                exports,
686                summary,
687            },
688        );
689
690        for sig in &sigs {
691            let (start, end) = sig
692                .start_line
693                .zip(sig.end_line)
694                .unwrap_or_else(|| find_symbol_range(&content, sig));
695            let key = format!("{}::{}", rel_path, sig.name);
696            index.symbols.insert(
697                key,
698                SymbolEntry {
699                    file: rel_path.clone(),
700                    name: sig.name.clone(),
701                    kind: sig.kind.to_string(),
702                    start_line: start,
703                    end_line: end,
704                    is_exported: sig.is_exported,
705                },
706            );
707        }
708
709        scanned += 1;
710    }
711
712    build_edges(&mut index);
713
714    if let Err(e) = index.save() {
715        tracing::warn!("could not save graph index: {e}");
716    }
717
718    tracing::warn!(
719        "[graph_index: {} files ({} scanned, {} reused), {} symbols, {} edges]",
720        index.file_count(),
721        scanned,
722        reused,
723        index.symbol_count(),
724        index.edge_count()
725    );
726
727    index
728}
729
730fn build_edges(index: &mut ProjectIndex) {
731    build_edges_with_cache(index, &HashMap::new());
732    build_implicit_edges(index);
733    build_cochange_edges(index);
734    build_sibling_edges(index);
735}
736
737fn build_edges_with_cache(index: &mut ProjectIndex, content_cache: &HashMap<String, String>) {
738    index.edges.clear();
739
740    if crate::core::memory_guard::abort_requested() {
741        tracing::warn!("[graph_index: skipping edge-building due to memory pressure]");
742        return;
743    }
744
745    let root = normalize_project_root(&index.project_root);
746    let root_path = Path::new(&root);
747
748    let mut file_paths: Vec<String> = index.files.keys().cloned().collect();
749    file_paths.sort();
750
751    let resolver_ctx = import_resolver::ResolverContext::new(root_path, file_paths.clone());
752
753    const MAX_FILE_SIZE_FOR_EDGES: u64 = 2 * 1024 * 1024;
754
755    for (i, rel_path) in file_paths.iter().enumerate() {
756        if i.is_multiple_of(1000) && crate::core::memory_guard::is_under_pressure() {
757            tracing::warn!(
758                "[graph_index: stopping edge-building at file {i}/{} due to memory pressure]",
759                file_paths.len()
760            );
761            break;
762        }
763
764        let content = if let Some(cached) = content_cache.get(rel_path) {
765            std::borrow::Cow::Borrowed(cached.as_str())
766        } else {
767            let abs_path = root_path.join(rel_path.trim_start_matches(['/', '\\']));
768            if let Ok(meta) = abs_path.metadata() {
769                if meta.len() > MAX_FILE_SIZE_FOR_EDGES {
770                    continue;
771                }
772            }
773            match std::fs::read_to_string(&abs_path) {
774                Ok(c) => std::borrow::Cow::Owned(c),
775                Err(_) => continue,
776            }
777        };
778
779        let ext = Path::new(rel_path)
780            .extension()
781            .and_then(|e| e.to_str())
782            .unwrap_or("");
783
784        let resolve_ext = match ext {
785            "vue" | "svelte" => "ts",
786            _ => ext,
787        };
788
789        let analysis_content = if ext == "vue" || ext == "svelte" {
790            if let Some(script) = crate::core::signatures_ts::sfc::extract_script_block(&content) {
791                std::borrow::Cow::Owned(script)
792            } else {
793                content
794            }
795        } else {
796            content
797        };
798
799        let imports = crate::core::deep_queries::analyze(&analysis_content, resolve_ext).imports;
800        if imports.is_empty() {
801            continue;
802        }
803
804        let resolved =
805            import_resolver::resolve_imports(&imports, rel_path, resolve_ext, &resolver_ctx);
806        for r in resolved {
807            if r.is_external {
808                continue;
809            }
810            if let Some(to) = r.resolved_path {
811                index.edges.push(IndexEdge {
812                    from: rel_path.clone(),
813                    to,
814                    kind: "import".to_string(),
815                    weight: 1.0,
816                });
817            }
818        }
819    }
820
821    index.edges.sort_by(|a, b| {
822        a.from
823            .cmp(&b.from)
824            .then_with(|| a.to.cmp(&b.to))
825            .then_with(|| a.kind.cmp(&b.kind))
826    });
827    index
828        .edges
829        .dedup_by(|a, b| a.from == b.from && a.to == b.to && a.kind == b.kind);
830}
831
832// ---------------------------------------------------------------------------
833// Layer 2: Implicit Language Edges (weight 0.8)
834// ---------------------------------------------------------------------------
835
836fn build_implicit_edges(index: &mut ProjectIndex) {
837    let file_paths: Vec<String> = index.files.keys().cloned().collect();
838    let file_set: std::collections::HashSet<&str> = file_paths.iter().map(String::as_str).collect();
839
840    let mut new_edges: Vec<IndexEdge> = Vec::new();
841
842    for file in &file_paths {
843        let ext = Path::new(file.as_str())
844            .extension()
845            .and_then(|e| e.to_str())
846            .unwrap_or("");
847
848        match ext {
849            "rs" => collect_rust_mod_edges(file, &file_set, index, &mut new_edges),
850            "go" => collect_go_package_edges(file, &file_paths, &mut new_edges),
851            "py" => collect_python_init_edges(file, &file_paths, &mut new_edges),
852            "ts" | "js" | "tsx" | "jsx" => {
853                collect_barrel_edges(file, &file_set, index, &mut new_edges);
854            }
855            _ => {}
856        }
857    }
858
859    index.edges.extend(new_edges);
860}
861
862fn collect_rust_mod_edges(
863    file: &str,
864    file_set: &std::collections::HashSet<&str>,
865    index: &ProjectIndex,
866    edges: &mut Vec<IndexEdge>,
867) {
868    if !index.files.contains_key(file) {
869        return;
870    }
871
872    let full_path = Path::new(&index.project_root).join(file);
873    let Ok(content) = std::fs::read_to_string(&full_path) else {
874        return;
875    };
876
877    let dir = Path::new(file)
878        .parent()
879        .map(|p| p.to_string_lossy().to_string());
880
881    for line in content.lines() {
882        let trimmed = line.trim();
883        if !trimmed.starts_with("mod ") || trimmed.contains('{') {
884            continue;
885        }
886        let mod_name = trimmed
887            .trim_start_matches("mod ")
888            .trim_start_matches("pub mod ")
889            .trim_start_matches("pub(crate) mod ")
890            .trim_end_matches(';')
891            .trim();
892
893        if mod_name.is_empty() || mod_name.contains(' ') {
894            continue;
895        }
896
897        let candidates = if let Some(ref d) = dir {
898            vec![
899                format!("{d}/{mod_name}.rs"),
900                format!("{d}/{mod_name}/mod.rs"),
901            ]
902        } else {
903            vec![format!("{mod_name}.rs"), format!("{mod_name}/mod.rs")]
904        };
905
906        for candidate in candidates {
907            if file_set.contains(candidate.as_str()) {
908                edges.push(IndexEdge {
909                    from: file.to_string(),
910                    to: candidate,
911                    kind: "module".to_string(),
912                    weight: 0.8,
913                });
914                break;
915            }
916        }
917    }
918}
919
920fn collect_go_package_edges(file: &str, file_paths: &[String], edges: &mut Vec<IndexEdge>) {
921    let p = Path::new(file);
922    if p.extension().and_then(|e| e.to_str()) != Some("go") {
923        return;
924    }
925    if file.ends_with("_test.go") {
926        return;
927    }
928
929    let Some(dir) = p.parent().map(|d| d.to_string_lossy().to_string()) else {
930        return;
931    };
932
933    for other in file_paths {
934        if other == file {
935            continue;
936        }
937        let op = Path::new(other.as_str());
938        if op.extension().and_then(|e| e.to_str()) != Some("go") {
939            continue;
940        }
941        if other.ends_with("_test.go") {
942            continue;
943        }
944        let other_dir = op
945            .parent()
946            .map(|d| d.to_string_lossy().to_string())
947            .unwrap_or_default();
948        if other_dir == dir {
949            edges.push(IndexEdge {
950                from: file.to_string(),
951                to: other.clone(),
952                kind: "package".to_string(),
953                weight: 0.5,
954            });
955            break;
956        }
957    }
958}
959
960fn collect_python_init_edges(file: &str, file_paths: &[String], edges: &mut Vec<IndexEdge>) {
961    let p = Path::new(file);
962    if p.file_name().and_then(|n| n.to_str()) != Some("__init__.py") {
963        return;
964    }
965
966    let Some(dir) = p.parent().map(|d| d.to_string_lossy().to_string()) else {
967        return;
968    };
969
970    for other in file_paths {
971        if other == file {
972            continue;
973        }
974        let op = Path::new(other.as_str());
975        if op.extension().and_then(|e| e.to_str()) != Some("py") {
976            continue;
977        }
978        let other_dir = op
979            .parent()
980            .map(|d| d.to_string_lossy().to_string())
981            .unwrap_or_default();
982        if other_dir == dir {
983            edges.push(IndexEdge {
984                from: file.to_string(),
985                to: other.clone(),
986                kind: "module".to_string(),
987                weight: 0.8,
988            });
989        }
990    }
991}
992
993fn collect_barrel_edges(
994    file: &str,
995    file_set: &std::collections::HashSet<&str>,
996    index: &ProjectIndex,
997    edges: &mut Vec<IndexEdge>,
998) {
999    let basename = Path::new(file)
1000        .file_stem()
1001        .and_then(|s| s.to_str())
1002        .unwrap_or("");
1003    if basename != "index" {
1004        return;
1005    }
1006
1007    let full_path = Path::new(&index.project_root).join(file);
1008    let Ok(content) = std::fs::read_to_string(&full_path) else {
1009        return;
1010    };
1011
1012    let dir = Path::new(file)
1013        .parent()
1014        .map(|p| p.to_string_lossy().to_string())
1015        .unwrap_or_default();
1016
1017    let ext = Path::new(file)
1018        .extension()
1019        .and_then(|e| e.to_str())
1020        .unwrap_or("ts");
1021
1022    for line in content.lines() {
1023        let trimmed = line.trim();
1024        if !trimmed.starts_with("export") || !trimmed.contains("from") {
1025            continue;
1026        }
1027        if let Some(from_pos) = trimmed.find("from") {
1028            let after = &trimmed[from_pos + 4..];
1029            let source = after
1030                .trim()
1031                .trim_start_matches(['\'', '"'])
1032                .trim_end_matches([';', '\'', '"'])
1033                .trim_end_matches(['\'', '"']);
1034
1035            if source.starts_with("./") || source.starts_with("../") {
1036                let resolved = if dir.is_empty() {
1037                    source.trim_start_matches("./").to_string()
1038                } else {
1039                    format!("{dir}/{}", source.trim_start_matches("./"))
1040                };
1041
1042                let candidates = vec![
1043                    format!("{resolved}.{ext}"),
1044                    format!("{resolved}/index.{ext}"),
1045                    resolved.clone(),
1046                ];
1047
1048                for candidate in candidates {
1049                    if file_set.contains(candidate.as_str()) {
1050                        edges.push(IndexEdge {
1051                            from: file.to_string(),
1052                            to: candidate,
1053                            kind: "reexport".to_string(),
1054                            weight: 0.8,
1055                        });
1056                        break;
1057                    }
1058                }
1059            }
1060        }
1061    }
1062}
1063
1064// ---------------------------------------------------------------------------
1065// Layer 3: Co-Change Edges (weight 0.5)
1066// ---------------------------------------------------------------------------
1067
1068fn build_cochange_edges(index: &mut ProjectIndex) {
1069    let project_root = &index.project_root;
1070
1071    let output = match std::process::Command::new("git")
1072        .args([
1073            "log",
1074            "--name-only",
1075            "--pretty=format:---",
1076            "--since=6 months",
1077            "--",
1078            ".",
1079        ])
1080        .current_dir(project_root)
1081        .output()
1082    {
1083        Ok(o) if o.status.success() => String::from_utf8_lossy(&o.stdout).to_string(),
1084        _ => return,
1085    };
1086
1087    let file_set: std::collections::HashSet<&str> =
1088        index.files.keys().map(String::as_str).collect();
1089
1090    let connected: std::collections::HashSet<&str> = index
1091        .edges
1092        .iter()
1093        .flat_map(|e| [e.from.as_str(), e.to.as_str()])
1094        .collect();
1095
1096    // Parse commits into groups of files
1097    let mut cooccurrence: HashMap<(String, String), u32> = HashMap::new();
1098    let mut current_commit: Vec<&str> = Vec::new();
1099
1100    for line in output.lines() {
1101        if line == "---" {
1102            if current_commit.len() >= 2 && current_commit.len() <= 20 {
1103                for i in 0..current_commit.len() {
1104                    for j in (i + 1)..current_commit.len() {
1105                        let a = current_commit[i];
1106                        let b = current_commit[j];
1107                        if !file_set.contains(a) || !file_set.contains(b) {
1108                            continue;
1109                        }
1110                        // Only add if at least one is currently isolated
1111                        if connected.contains(a) && connected.contains(b) {
1112                            continue;
1113                        }
1114                        let key = if a < b {
1115                            (a.to_string(), b.to_string())
1116                        } else {
1117                            (b.to_string(), a.to_string())
1118                        };
1119                        *cooccurrence.entry(key).or_insert(0) += 1;
1120                    }
1121                }
1122            }
1123            current_commit.clear();
1124        } else if !line.is_empty() {
1125            current_commit.push(line.trim());
1126        }
1127    }
1128
1129    // Filter: min 5 shared commits
1130    let mut cochange_edges: Vec<IndexEdge> = cooccurrence
1131        .into_iter()
1132        .filter(|(_, count)| *count >= 5)
1133        .map(|((from, to), _)| IndexEdge {
1134            from,
1135            to,
1136            kind: "cochange".to_string(),
1137            weight: 0.5,
1138        })
1139        .collect();
1140
1141    // Cap at 500 to prevent noise
1142    cochange_edges.sort_by(|a, b| a.from.cmp(&b.from).then_with(|| a.to.cmp(&b.to)));
1143    cochange_edges.truncate(500);
1144
1145    index.edges.extend(cochange_edges);
1146}
1147
1148// ---------------------------------------------------------------------------
1149// Layer 4: Sibling Edges (weight 0.2)
1150// ---------------------------------------------------------------------------
1151
1152fn build_sibling_edges(index: &mut ProjectIndex) {
1153    let connected: std::collections::HashSet<&str> = index
1154        .edges
1155        .iter()
1156        .flat_map(|e| [e.from.as_str(), e.to.as_str()])
1157        .collect();
1158
1159    let file_paths: Vec<String> = index.files.keys().cloned().collect();
1160    let mut new_edges: Vec<IndexEdge> = Vec::new();
1161
1162    for file in &file_paths {
1163        if connected.contains(file.as_str()) {
1164            continue;
1165        }
1166
1167        let ext = Path::new(file.as_str())
1168            .extension()
1169            .and_then(|e| e.to_str())
1170            .unwrap_or("");
1171        let dir = Path::new(file.as_str())
1172            .parent()
1173            .map(|p| p.to_string_lossy().to_string())
1174            .unwrap_or_default();
1175
1176        // Find one sibling with same extension
1177        for other in &file_paths {
1178            if other == file {
1179                continue;
1180            }
1181            let other_ext = Path::new(other.as_str())
1182                .extension()
1183                .and_then(|e| e.to_str())
1184                .unwrap_or("");
1185            let other_dir = Path::new(other.as_str())
1186                .parent()
1187                .map(|p| p.to_string_lossy().to_string())
1188                .unwrap_or_default();
1189
1190            if other_ext == ext && other_dir == dir {
1191                new_edges.push(IndexEdge {
1192                    from: file.clone(),
1193                    to: other.clone(),
1194                    kind: "sibling".to_string(),
1195                    weight: 0.2,
1196                });
1197                break; // Max 1 sibling edge per isolate
1198            }
1199        }
1200    }
1201
1202    index.edges.extend(new_edges);
1203}
1204
1205fn find_symbol_range(content: &str, sig: &signatures::Signature) -> (usize, usize) {
1206    let lines: Vec<&str> = content.lines().collect();
1207    let mut start = 0;
1208
1209    for (i, line) in lines.iter().enumerate() {
1210        if line.contains(&sig.name) {
1211            let trimmed = line.trim();
1212            let is_def = trimmed.starts_with("fn ")
1213                || trimmed.starts_with("pub fn ")
1214                || trimmed.starts_with("pub(crate) fn ")
1215                || trimmed.starts_with("async fn ")
1216                || trimmed.starts_with("pub async fn ")
1217                || trimmed.starts_with("struct ")
1218                || trimmed.starts_with("pub struct ")
1219                || trimmed.starts_with("enum ")
1220                || trimmed.starts_with("pub enum ")
1221                || trimmed.starts_with("trait ")
1222                || trimmed.starts_with("pub trait ")
1223                || trimmed.starts_with("impl ")
1224                || trimmed.starts_with("class ")
1225                || trimmed.starts_with("export class ")
1226                || trimmed.starts_with("export function ")
1227                || trimmed.starts_with("export async function ")
1228                || trimmed.starts_with("function ")
1229                || trimmed.starts_with("async function ")
1230                || trimmed.starts_with("def ")
1231                || trimmed.starts_with("async def ")
1232                || trimmed.starts_with("func ")
1233                || trimmed.starts_with("interface ")
1234                || trimmed.starts_with("export interface ")
1235                || trimmed.starts_with("type ")
1236                || trimmed.starts_with("export type ")
1237                || trimmed.starts_with("const ")
1238                || trimmed.starts_with("export const ")
1239                || trimmed.starts_with("fun ")
1240                || trimmed.starts_with("private fun ")
1241                || trimmed.starts_with("public fun ")
1242                || trimmed.starts_with("internal fun ")
1243                || trimmed.starts_with("class ")
1244                || trimmed.starts_with("data class ")
1245                || trimmed.starts_with("sealed class ")
1246                || trimmed.starts_with("sealed interface ")
1247                || trimmed.starts_with("enum class ")
1248                || trimmed.starts_with("object ")
1249                || trimmed.starts_with("private object ")
1250                || trimmed.starts_with("interface ")
1251                || trimmed.starts_with("typealias ")
1252                || trimmed.starts_with("private typealias ");
1253            if is_def {
1254                start = i + 1;
1255                break;
1256            }
1257        }
1258    }
1259
1260    if start == 0 {
1261        return (1, lines.len().min(20));
1262    }
1263
1264    let base_indent = lines
1265        .get(start - 1)
1266        .map_or(0, |l| l.len() - l.trim_start().len());
1267
1268    let mut end = start;
1269    let mut brace_depth: i32 = 0;
1270    let mut found_open = false;
1271
1272    for (i, line) in lines.iter().enumerate().skip(start - 1) {
1273        for ch in line.chars() {
1274            if ch == '{' {
1275                brace_depth += 1;
1276                found_open = true;
1277            } else if ch == '}' {
1278                brace_depth -= 1;
1279            }
1280        }
1281
1282        end = i + 1;
1283
1284        if found_open && brace_depth <= 0 {
1285            break;
1286        }
1287
1288        if !found_open && i > start {
1289            let indent = line.len() - line.trim_start().len();
1290            if indent <= base_indent && !line.trim().is_empty() && i > start {
1291                end = i;
1292                break;
1293            }
1294        }
1295
1296        if end - start > 200 {
1297            break;
1298        }
1299    }
1300
1301    (start, end)
1302}
1303
1304fn extract_summary(content: &str) -> String {
1305    for line in content.lines().take(20) {
1306        let trimmed = line.trim();
1307        if trimmed.is_empty()
1308            || trimmed.starts_with("//")
1309            || trimmed.starts_with('#')
1310            || trimmed.starts_with("/*")
1311            || trimmed.starts_with('*')
1312            || trimmed.starts_with("use ")
1313            || trimmed.starts_with("import ")
1314            || trimmed.starts_with("from ")
1315            || trimmed.starts_with("require(")
1316            || trimmed.starts_with("package ")
1317        {
1318            continue;
1319        }
1320        return trimmed.chars().take(120).collect();
1321    }
1322    String::new()
1323}
1324
1325fn compute_hash(content: &str) -> String {
1326    use std::collections::hash_map::DefaultHasher;
1327    use std::hash::{Hash, Hasher};
1328
1329    let mut hasher = DefaultHasher::new();
1330    content.hash(&mut hasher);
1331    format!("{:016x}", hasher.finish())
1332}
1333
1334fn short_hash(input: &str) -> String {
1335    use std::collections::hash_map::DefaultHasher;
1336    use std::hash::{Hash, Hasher};
1337
1338    let mut hasher = DefaultHasher::new();
1339    input.hash(&mut hasher);
1340    format!("{:08x}", hasher.finish() & 0xFFFF_FFFF)
1341}
1342
1343fn copy_dir_fallible(src: &std::path::Path, dst: &std::path::Path) -> Result<(), std::io::Error> {
1344    std::fs::create_dir_all(dst)?;
1345    for entry in std::fs::read_dir(src)?.flatten() {
1346        let from = entry.path();
1347        let to = dst.join(entry.file_name());
1348        if from.is_dir() {
1349            copy_dir_fallible(&from, &to)?;
1350        } else {
1351            std::fs::copy(&from, &to)?;
1352        }
1353    }
1354    Ok(())
1355}
1356
1357fn normalize_absolute_path(path: &str) -> String {
1358    if let Ok(canon) = crate::core::pathutil::safe_canonicalize(std::path::Path::new(path)) {
1359        return canon.to_string_lossy().to_string();
1360    }
1361
1362    let mut normalized = path.to_string();
1363    while normalized.ends_with("\\.") || normalized.ends_with("/.") {
1364        normalized.truncate(normalized.len() - 2);
1365    }
1366    while normalized.len() > 1
1367        && (normalized.ends_with('\\') || normalized.ends_with('/'))
1368        && !normalized.ends_with(":\\")
1369        && !normalized.ends_with(":/")
1370        && normalized != "\\"
1371        && normalized != "/"
1372    {
1373        normalized.pop();
1374    }
1375    normalized
1376}
1377
1378pub fn normalize_project_root(path: &str) -> String {
1379    normalize_absolute_path(path)
1380}
1381
1382pub fn graph_match_key(path: &str) -> String {
1383    let stripped =
1384        crate::core::pathutil::strip_verbatim_str(path).unwrap_or_else(|| path.replace('\\', "/"));
1385    stripped.trim_start_matches('/').to_string()
1386}
1387
1388pub fn graph_relative_key(path: &str, root: &str) -> String {
1389    let root_norm = normalize_project_root(root);
1390    let path_norm = normalize_absolute_path(path);
1391    let root_path = Path::new(&root_norm);
1392    let path_path = Path::new(&path_norm);
1393
1394    if let Ok(rel) = path_path.strip_prefix(root_path) {
1395        let rel = rel.to_string_lossy().to_string();
1396        return rel.trim_start_matches(['/', '\\']).to_string();
1397    }
1398
1399    path.trim_start_matches(['/', '\\'])
1400        .replace('/', std::path::MAIN_SEPARATOR_STR)
1401}
1402
1403fn make_relative(path: &str, root: &str) -> String {
1404    graph_relative_key(path, root)
1405}
1406
1407fn is_indexable_ext(ext: &str) -> bool {
1408    crate::core::language_capabilities::is_indexable_ext(ext)
1409}
1410
1411#[cfg(test)]
1412fn kotlin_package_name(content: &str) -> Option<String> {
1413    content.lines().map(str::trim).find_map(|line| {
1414        line.strip_prefix("package ")
1415            .map(|rest| rest.trim().trim_end_matches(';').to_string())
1416    })
1417}
1418
1419#[cfg(test)]
1420mod tests {
1421    use super::*;
1422    use tempfile::tempdir;
1423
1424    #[test]
1425    fn test_short_hash_deterministic() {
1426        let h1 = short_hash("/Users/test/project");
1427        let h2 = short_hash("/Users/test/project");
1428        assert_eq!(h1, h2);
1429        assert_eq!(h1.len(), 8);
1430    }
1431
1432    #[test]
1433    fn test_make_relative() {
1434        assert_eq!(
1435            make_relative("/foo/bar/src/main.rs", "/foo/bar"),
1436            graph_relative_key("/foo/bar/src/main.rs", "/foo/bar")
1437        );
1438        assert_eq!(
1439            make_relative("src/main.rs", "/foo/bar"),
1440            graph_relative_key("src/main.rs", "/foo/bar")
1441        );
1442        assert_eq!(
1443            make_relative("C:\\repo\\src\\main\\kotlin\\Example.kt", "C:\\repo"),
1444            graph_relative_key("C:\\repo\\src\\main\\kotlin\\Example.kt", "C:\\repo")
1445        );
1446        assert_eq!(
1447            make_relative("//?/C:/repo/src/main/kotlin/Example.kt", "//?/C:/repo"),
1448            graph_relative_key("//?/C:/repo/src/main/kotlin/Example.kt", "//?/C:/repo")
1449        );
1450    }
1451
1452    #[test]
1453    fn test_normalize_project_root() {
1454        assert_eq!(normalize_project_root("C:\\repo\\"), "C:\\repo");
1455        assert_eq!(normalize_project_root("C:\\repo\\."), "C:\\repo");
1456        assert_eq!(normalize_project_root("//?/C:/repo/"), "//?/C:/repo");
1457    }
1458
1459    #[test]
1460    fn test_graph_match_key_normalizes_windows_forms() {
1461        assert_eq!(
1462            graph_match_key(r"C:\repo\src\main.rs"),
1463            "C:/repo/src/main.rs"
1464        );
1465        assert_eq!(
1466            graph_match_key(r"\\?\C:\repo\src\main.rs"),
1467            "C:/repo/src/main.rs"
1468        );
1469        assert_eq!(graph_match_key(r"\src\main.rs"), "src/main.rs");
1470    }
1471
1472    #[test]
1473    fn test_extract_summary() {
1474        let content = "// comment\nuse std::io;\n\npub fn main() {\n    println!(\"hello\");\n}";
1475        let summary = extract_summary(content);
1476        assert_eq!(summary, "pub fn main() {");
1477    }
1478
1479    #[test]
1480    fn test_compute_hash_deterministic() {
1481        let h1 = compute_hash("hello world");
1482        let h2 = compute_hash("hello world");
1483        assert_eq!(h1, h2);
1484        assert_ne!(h1, compute_hash("hello world!"));
1485    }
1486
1487    #[test]
1488    fn test_project_index_new() {
1489        let idx = ProjectIndex::new("/test");
1490        assert_eq!(idx.version, INDEX_VERSION);
1491        assert_eq!(idx.project_root, "/test");
1492        assert!(idx.files.is_empty());
1493    }
1494
1495    fn fe(path: &str, content: &str, language: &str) -> FileEntry {
1496        FileEntry {
1497            path: path.to_string(),
1498            hash: compute_hash(content),
1499            language: language.to_string(),
1500            line_count: content.lines().count(),
1501            token_count: crate::core::tokens::count_tokens(content),
1502            exports: Vec::new(),
1503            summary: extract_summary(content),
1504        }
1505    }
1506
1507    #[test]
1508    fn test_index_looks_stale_when_any_file_missing() {
1509        let td = tempdir().expect("tempdir");
1510        let root = td.path();
1511        std::fs::write(root.join("a.rs"), "pub fn a() {}\n").expect("write a.rs");
1512
1513        let root_s = normalize_project_root(&root.to_string_lossy());
1514        let mut idx = ProjectIndex::new(&root_s);
1515        idx.files
1516            .insert("a.rs".to_string(), fe("a.rs", "pub fn a() {}\n", "rs"));
1517        idx.files.insert(
1518            "missing.rs".to_string(),
1519            fe("missing.rs", "pub fn m() {}\n", "rs"),
1520        );
1521
1522        assert!(index_looks_stale(&idx, &root_s));
1523    }
1524
1525    #[test]
1526    fn test_index_looks_fresh_when_all_files_exist() {
1527        let td = tempdir().expect("tempdir");
1528        let root = td.path();
1529        std::fs::write(root.join("a.rs"), "pub fn a() {}\n").expect("write a.rs");
1530
1531        let root_s = normalize_project_root(&root.to_string_lossy());
1532        let mut idx = ProjectIndex::new(&root_s);
1533        idx.files
1534            .insert("a.rs".to_string(), fe("a.rs", "pub fn a() {}\n", "rs"));
1535
1536        assert!(!index_looks_stale(&idx, &root_s));
1537    }
1538
1539    #[test]
1540    fn test_reverse_deps() {
1541        let mut idx = ProjectIndex::new("/test");
1542        idx.edges.push(IndexEdge {
1543            from: "a.rs".to_string(),
1544            to: "b.rs".to_string(),
1545            kind: "import".to_string(),
1546            weight: 1.0,
1547        });
1548        idx.edges.push(IndexEdge {
1549            from: "c.rs".to_string(),
1550            to: "b.rs".to_string(),
1551            kind: "import".to_string(),
1552            weight: 1.0,
1553        });
1554
1555        let deps = idx.get_reverse_deps("b.rs", 1);
1556        assert_eq!(deps.len(), 2);
1557        assert!(deps.contains(&"a.rs".to_string()));
1558        assert!(deps.contains(&"c.rs".to_string()));
1559    }
1560
1561    #[test]
1562    fn test_find_symbol_range_kotlin_function() {
1563        let content = r#"
1564package com.example
1565
1566class UserService {
1567    fun greet(name: String): String {
1568        return "hi $name"
1569    }
1570}
1571"#;
1572        let sig = signatures::Signature {
1573            kind: "method",
1574            name: "greet".to_string(),
1575            params: "name:String".to_string(),
1576            return_type: "String".to_string(),
1577            is_async: false,
1578            is_exported: true,
1579            indent: 2,
1580            ..signatures::Signature::no_span()
1581        };
1582        let (start, end) = find_symbol_range(content, &sig);
1583        assert_eq!(start, 5);
1584        assert!(end >= start);
1585    }
1586
1587    #[test]
1588    fn test_signature_spans_override_fallback_range() {
1589        let sig = signatures::Signature {
1590            kind: "method",
1591            name: "release".to_string(),
1592            params: "id:String".to_string(),
1593            return_type: "Boolean".to_string(),
1594            is_async: true,
1595            is_exported: true,
1596            indent: 2,
1597            start_line: Some(42),
1598            end_line: Some(43),
1599        };
1600
1601        let (start, end) = sig
1602            .start_line
1603            .zip(sig.end_line)
1604            .unwrap_or_else(|| find_symbol_range("ignored", &sig));
1605        assert_eq!((start, end), (42, 43));
1606    }
1607
1608    #[test]
1609    fn test_parse_stale_index_version() {
1610        let json = format!(
1611            r#"{{"version":{},"project_root":"/test","last_scan":"now","files":{{}},"edges":[],"symbols":{{}}}}"#,
1612            INDEX_VERSION - 1
1613        );
1614        let parsed: ProjectIndex = serde_json::from_str(&json).unwrap();
1615        assert_ne!(parsed.version, INDEX_VERSION);
1616    }
1617
1618    #[test]
1619    fn test_kotlin_package_name() {
1620        let content = "package com.example.feature\n\nclass UserService";
1621        assert_eq!(
1622            kotlin_package_name(content).as_deref(),
1623            Some("com.example.feature")
1624        );
1625    }
1626
1627    #[test]
1628    fn safe_scan_root_rejects_fs_root() {
1629        assert!(!is_safe_scan_root("/"));
1630        assert!(!is_safe_scan_root("\\"));
1631        #[cfg(windows)]
1632        {
1633            assert!(!is_safe_scan_root("C:\\"));
1634            assert!(!is_safe_scan_root("D:\\"));
1635        }
1636    }
1637
1638    #[test]
1639    fn safe_scan_root_rejects_home() {
1640        if let Some(home) = dirs::home_dir() {
1641            let home_str = home.to_string_lossy().to_string();
1642            assert!(
1643                !is_safe_scan_root(&home_str),
1644                "home dir should be rejected: {home_str}"
1645            );
1646        }
1647    }
1648
1649    #[test]
1650    fn safe_scan_root_accepts_project_dir() {
1651        let tmp = tempdir().unwrap();
1652        std::fs::write(
1653            tmp.path().join("Cargo.toml"),
1654            "[package]\nname = \"test\"\n",
1655        )
1656        .unwrap();
1657        let root = tmp.path().to_string_lossy().to_string();
1658        assert!(is_safe_scan_root(&root));
1659    }
1660
1661    #[test]
1662    fn safe_scan_root_rejects_broad_dir() {
1663        let tmp = tempdir().unwrap();
1664        for i in 0..55 {
1665            std::fs::create_dir(tmp.path().join(format!("dir{i}"))).unwrap();
1666        }
1667        let root = tmp.path().to_string_lossy().to_string();
1668        assert!(!is_safe_scan_root(&root));
1669    }
1670
1671    #[test]
1672    fn no_index_env_skips_scan() {
1673        let _env = crate::core::data_dir::test_env_lock();
1674        let tmp = tempdir().unwrap();
1675        std::fs::write(tmp.path().join("Cargo.toml"), "").unwrap();
1676        std::fs::write(tmp.path().join("main.rs"), "fn main() {}").unwrap();
1677
1678        std::env::set_var("LEAN_CTX_NO_INDEX", "1");
1679        let idx = scan(&tmp.path().to_string_lossy());
1680        std::env::remove_var("LEAN_CTX_NO_INDEX");
1681        assert!(idx.files.is_empty(), "LEAN_CTX_NO_INDEX should skip scan");
1682    }
1683
1684    #[test]
1685    fn stale_index_detected_by_contamination() {
1686        let root_s = "/home/testuser/myproject";
1687        let mut idx = ProjectIndex::new(root_s);
1688        // Simulate a contaminated index with Desktop files
1689        idx.files.insert(
1690            "Desktop/random.py".to_string(),
1691            fe("Desktop/random.py", "x = 1\n", "py"),
1692        );
1693        idx.files.insert(
1694            "src/main.rs".to_string(),
1695            fe("src/main.rs", "fn main() {}\n", "rs"),
1696        );
1697        assert!(
1698            index_looks_stale(&idx, root_s),
1699            "Index with Desktop/ files should be considered stale"
1700        );
1701    }
1702
1703    #[test]
1704    fn stale_index_detected_by_age() {
1705        let td = tempdir().expect("tempdir");
1706        let root = td.path();
1707        std::fs::write(root.join("a.rs"), "fn a() {}\n").unwrap();
1708
1709        let root_s = normalize_project_root(&root.to_string_lossy());
1710        let mut idx = ProjectIndex::new(&root_s);
1711        idx.files
1712            .insert("a.rs".to_string(), fe("a.rs", "fn a() {}\n", "rs"));
1713        // Set last_scan to 100 hours ago (default max_age_hours is 48)
1714        let old_time = chrono::Local::now().naive_local() - chrono::Duration::hours(100);
1715        idx.last_scan = old_time.format("%Y-%m-%d %H:%M:%S").to_string();
1716
1717        assert!(
1718            index_looks_stale(&idx, &root_s),
1719            "Index older than max_age_hours should be stale"
1720        );
1721    }
1722
1723    #[test]
1724    fn safe_scan_root_rejects_home_downloads() {
1725        if let Some(home) = dirs::home_dir() {
1726            let downloads = home.join("Downloads");
1727            // Only test if Downloads doesn't contain a .git (unlikely but possible)
1728            if !downloads.join(".git").exists() {
1729                let downloads_str = downloads.to_string_lossy().to_string();
1730                assert!(
1731                    !is_safe_scan_root(&downloads_str),
1732                    "~/Downloads should be rejected without project markers"
1733                );
1734            }
1735        }
1736    }
1737}