Skip to main content

lean_ctx/core/
graph_index.rs

1// DEPRECATED: This module is being replaced by PropertyGraph (core/property_graph/).
2// New code should use GraphProvider (core/graph_provider.rs) instead of accessing
3// ProjectIndex directly. Remaining direct consumers: call_graph, graph_enricher,
4// ctx_callgraph, ctx_graph_diagram, ctx_routes, autonomy, dashboard/callgraph.
5// See OPT-14/15 plan for the full migration path.
6
7use std::collections::HashMap;
8use std::path::Path;
9
10use serde::{Deserialize, Serialize};
11
12use crate::core::import_resolver;
13use crate::core::signatures;
14
15const INDEX_VERSION: u32 = 6;
16
17pub fn is_safe_scan_root_public(path: &str) -> bool {
18    is_safe_scan_root(path)
19}
20
21fn is_filesystem_root(path: &str) -> bool {
22    let p = Path::new(path);
23    p.parent().is_none() || (cfg!(windows) && p.parent() == Some(Path::new("")))
24}
25
26fn is_safe_scan_root(path: &str) -> bool {
27    let normalized = normalize_project_root(path);
28    let p = Path::new(&normalized);
29
30    if normalized == "/" || normalized == "\\" || is_filesystem_root(&normalized) {
31        tracing::warn!("[graph_index: refusing to scan filesystem root]");
32        return false;
33    }
34
35    if normalized == "." || normalized.is_empty() {
36        tracing::warn!("[graph_index: refusing to scan relative/empty root]");
37        return false;
38    }
39
40    if let Some(home) = dirs::home_dir() {
41        let home_norm = normalize_project_root(&home.to_string_lossy());
42        if normalized == home_norm {
43            use std::sync::Once;
44            static HOME_WARN: Once = Once::new();
45            HOME_WARN.call_once(|| {
46                tracing::warn!(
47                    "[graph_index: skipping — cannot index home directory {normalized}.\n  \
48                     Run from inside a project, or set LEAN_CTX_PROJECT_ROOT=/path/to/project]"
49                );
50            });
51            return false;
52        }
53        // Block common broad home subdirectories that are never valid project roots
54        let home_path = Path::new(&home_norm);
55        const BLOCKED_HOME_SUBDIRS: &[&str] = &[
56            "Desktop",
57            "Documents",
58            "Downloads",
59            "Pictures",
60            "Music",
61            "Videos",
62            "Movies",
63            "Library",
64            ".local",
65            ".cache",
66            ".config",
67            "snap",
68            "Applications",
69        ];
70        for blocked in BLOCKED_HOME_SUBDIRS {
71            let blocked_path = home_path.join(blocked);
72            let is_inside_blocked = p == blocked_path || p.starts_with(&blocked_path);
73            let has_project_marker = p.join(".git").exists()
74                || p.join("Cargo.toml").exists()
75                || p.join("package.json").exists();
76            if is_inside_blocked && !has_project_marker {
77                tracing::warn!(
78                    "[graph_index: refusing to scan {normalized} — \
79                     inside home/{blocked} without project markers]"
80                );
81                return false;
82            }
83        }
84
85        // Block directories that are direct children of home without project markers
86        if p.parent() == Some(home_path) {
87            let has_marker = p.join(".git").exists()
88                || p.join("Cargo.toml").exists()
89                || p.join("package.json").exists()
90                || p.join("go.mod").exists()
91                || p.join("pyproject.toml").exists();
92            if !has_marker {
93                tracing::warn!(
94                    "[graph_index: refusing to scan {normalized} — \
95                     direct child of home without project markers]"
96                );
97                return false;
98            }
99        }
100    }
101
102    let breadth_markers = [
103        ".git",
104        "Cargo.toml",
105        "package.json",
106        "go.mod",
107        "pyproject.toml",
108        "setup.py",
109        "Makefile",
110        "CMakeLists.txt",
111        "pnpm-workspace.yaml",
112        ".projectile",
113        "BUILD.bazel",
114        "go.work",
115    ];
116
117    if !breadth_markers.iter().any(|m| p.join(m).exists()) {
118        let child_count = std::fs::read_dir(p).map_or(0, |rd| {
119            rd.filter_map(Result::ok)
120                .filter(|e| e.path().is_dir())
121                .count()
122        });
123        if child_count > 50 {
124            tracing::warn!(
125                "[graph_index: {normalized} has no project markers and {child_count} subdirectories — \
126                 skipping scan to avoid indexing broad directories]"
127            );
128            return false;
129        }
130    }
131
132    true
133}
134
135#[derive(Debug, Serialize, Deserialize)]
136pub struct ProjectIndex {
137    pub version: u32,
138    pub project_root: String,
139    pub last_scan: String,
140    pub files: HashMap<String, FileEntry>,
141    pub edges: Vec<IndexEdge>,
142    pub symbols: HashMap<String, SymbolEntry>,
143}
144
145#[derive(Debug, Clone, Serialize, Deserialize)]
146pub struct FileEntry {
147    pub path: String,
148    pub hash: String,
149    pub language: String,
150    pub line_count: usize,
151    pub token_count: usize,
152    pub exports: Vec<String>,
153    pub summary: String,
154}
155
156#[derive(Debug, Clone, Serialize, Deserialize)]
157pub struct SymbolEntry {
158    pub file: String,
159    pub name: String,
160    pub kind: String,
161    pub start_line: usize,
162    pub end_line: usize,
163    pub is_exported: bool,
164}
165
166#[derive(Debug, Clone, Serialize, Deserialize)]
167pub struct IndexEdge {
168    pub from: String,
169    pub to: String,
170    pub kind: String,
171    #[serde(default = "default_edge_weight")]
172    pub weight: f32,
173}
174
175fn default_edge_weight() -> f32 {
176    1.0
177}
178
179impl ProjectIndex {
180    pub fn new(project_root: &str) -> Self {
181        Self {
182            version: INDEX_VERSION,
183            project_root: normalize_project_root(project_root),
184            last_scan: chrono::Local::now().format("%Y-%m-%d %H:%M:%S").to_string(),
185            files: HashMap::new(),
186            edges: Vec::new(),
187            symbols: HashMap::new(),
188        }
189    }
190
191    pub fn index_dir(project_root: &str) -> Option<std::path::PathBuf> {
192        let normalized = normalize_project_root(project_root);
193        let hash = crate::core::project_hash::hash_project_root(&normalized);
194        crate::core::data_dir::lean_ctx_data_dir()
195            .ok()
196            .map(|d| d.join("graphs").join(hash))
197    }
198
199    pub fn load(project_root: &str) -> Option<Self> {
200        let dir = Self::index_dir(project_root)?;
201
202        let zst_path = dir.join("index.json.zst");
203        if zst_path.exists() {
204            let compressed = std::fs::read(&zst_path).ok()?;
205            let data = zstd::decode_all(compressed.as_slice()).ok()?;
206            let content = String::from_utf8(data).ok()?;
207            let index: Self = serde_json::from_str(&content).ok()?;
208            if index.version != INDEX_VERSION {
209                return None;
210            }
211            return Some(index);
212        }
213
214        let json_path = dir.join("index.json");
215        let content = std::fs::read_to_string(&json_path)
216            .or_else(|_| -> std::io::Result<String> {
217                let legacy_hash = short_hash(&normalize_project_root(project_root));
218                let legacy_dir = crate::core::data_dir::lean_ctx_data_dir()
219                    .map_err(|_| std::io::Error::new(std::io::ErrorKind::NotFound, "no data dir"))?
220                    .join("graphs")
221                    .join(legacy_hash);
222                let legacy_path = legacy_dir.join("index.json");
223                let data = std::fs::read_to_string(&legacy_path)?;
224                if let Err(e) = copy_dir_fallible(&legacy_dir, &dir) {
225                    tracing::debug!("graph index migration: {e}");
226                }
227                Ok(data)
228            })
229            .ok()?;
230        let index: Self = serde_json::from_str(&content).ok()?;
231        if index.version != INDEX_VERSION {
232            return None;
233        }
234        // Auto-migrate: compress legacy JSON to zstd
235        if let Ok(compressed) = zstd::encode_all(content.as_bytes(), 9) {
236            let zst_tmp = zst_path.with_extension("zst.tmp");
237            if std::fs::write(&zst_tmp, &compressed).is_ok()
238                && std::fs::rename(&zst_tmp, &zst_path).is_ok()
239            {
240                let _ = std::fs::remove_file(&json_path);
241            }
242        }
243        Some(index)
244    }
245
246    pub fn save(&self) -> Result<(), String> {
247        let dir = Self::index_dir(&self.project_root)
248            .ok_or_else(|| "Cannot determine data directory".to_string())?;
249        std::fs::create_dir_all(&dir).map_err(|e| e.to_string())?;
250        let json = serde_json::to_string(self).map_err(|e| e.to_string())?;
251        let compressed = zstd::encode_all(json.as_bytes(), 9).map_err(|e| format!("zstd: {e}"))?;
252        let target = dir.join("index.json.zst");
253        let tmp = target.with_extension("zst.tmp");
254        std::fs::write(&tmp, &compressed).map_err(|e| e.to_string())?;
255        std::fs::rename(&tmp, &target).map_err(|e| e.to_string())?;
256        let _ = std::fs::remove_file(dir.join("index.json"));
257        Ok(())
258    }
259
260    /// Remove all cached graph indices that are older than max_age_hours.
261    /// Called on startup/update to prevent stale data from persisting.
262    pub fn purge_stale_indices() {
263        let Ok(data_dir) = crate::core::data_dir::lean_ctx_data_dir() else {
264            return;
265        };
266        let graphs_dir = data_dir.join("graphs");
267        let Ok(entries) = std::fs::read_dir(&graphs_dir) else {
268            return;
269        };
270        let cfg = crate::core::config::Config::load();
271        let max_age_secs = cfg.archive_max_age_hours_effective() * 3600;
272
273        for entry in entries.filter_map(Result::ok) {
274            let path = entry.path();
275            if !path.is_dir() {
276                continue;
277            }
278            let zst = path.join("index.json.zst");
279            let json = path.join("index.json");
280            let index_file = if zst.exists() {
281                &zst
282            } else if json.exists() {
283                &json
284            } else {
285                continue;
286            };
287
288            let is_old = index_file
289                .metadata()
290                .and_then(|m| m.modified())
291                .is_ok_and(|mtime| {
292                    mtime
293                        .elapsed()
294                        .is_ok_and(|age| age.as_secs() > max_age_secs)
295                });
296
297            if is_old {
298                tracing::info!("[graph_index: purging stale index at {}]", path.display());
299                let _ = std::fs::remove_dir_all(&path);
300            }
301        }
302    }
303
304    pub fn file_count(&self) -> usize {
305        self.files.len()
306    }
307
308    pub fn symbol_count(&self) -> usize {
309        self.symbols.len()
310    }
311
312    pub fn edge_count(&self) -> usize {
313        self.edges.len()
314    }
315
316    pub fn get_symbol(&self, key: &str) -> Option<&SymbolEntry> {
317        self.symbols.get(key)
318    }
319
320    pub fn get_reverse_deps(&self, path: &str, depth: usize) -> Vec<String> {
321        let mut result = Vec::new();
322        let mut visited = std::collections::HashSet::new();
323        let mut queue: Vec<(String, usize)> = vec![(path.to_string(), 0)];
324
325        while let Some((current, d)) = queue.pop() {
326            if d > depth || visited.contains(&current) {
327                continue;
328            }
329            visited.insert(current.clone());
330            if current != path {
331                result.push(current.clone());
332            }
333
334            for edge in &self.edges {
335                if edge.to == current && edge.kind == "import" && !visited.contains(&edge.from) {
336                    queue.push((edge.from.clone(), d + 1));
337                }
338            }
339        }
340        result
341    }
342
343    pub fn get_related(&self, path: &str, depth: usize) -> Vec<String> {
344        let mut result = Vec::new();
345        let mut visited = std::collections::HashSet::new();
346        let mut queue: Vec<(String, usize)> = vec![(path.to_string(), 0)];
347
348        while let Some((current, d)) = queue.pop() {
349            if d > depth || visited.contains(&current) {
350                continue;
351            }
352            visited.insert(current.clone());
353            if current != path {
354                result.push(current.clone());
355            }
356
357            for edge in &self.edges {
358                if edge.from == current && !visited.contains(&edge.to) {
359                    queue.push((edge.to.clone(), d + 1));
360                }
361                if edge.to == current && !visited.contains(&edge.from) {
362                    queue.push((edge.from.clone(), d + 1));
363                }
364            }
365        }
366        result
367    }
368}
369
370/// Load the best available graph index, trying multiple root path variants.
371/// If no valid index exists, automatically scans the project to build one.
372/// This is the primary entry point — ensures zero-config usage.
373pub fn load_or_build(project_root: &str) -> ProjectIndex {
374    if std::env::var("LEAN_CTX_NO_INDEX").is_ok() {
375        return ProjectIndex::load(project_root).unwrap_or_else(|| ProjectIndex::new(project_root));
376    }
377
378    // Prefer stable absolute roots. Using "." as a cache key is fragile because
379    // it depends on the process cwd and can accidentally load the wrong project.
380    let root_abs = if project_root.trim().is_empty() || project_root == "." {
381        std::env::current_dir().ok().map_or_else(
382            || ".".to_string(),
383            |p| normalize_project_root(&p.to_string_lossy()),
384        )
385    } else {
386        normalize_project_root(project_root)
387    };
388
389    if !is_safe_scan_root(&root_abs) {
390        return ProjectIndex::new(&root_abs);
391    }
392
393    // Try the absolute/root-normalized path first.
394    if let Some(idx) = ProjectIndex::load(&root_abs) {
395        if !idx.files.is_empty() {
396            if index_looks_stale(&idx, &root_abs) {
397                tracing::warn!("[graph_index: stale index detected for {root_abs}; rebuilding]");
398                return scan(&root_abs);
399            }
400            return idx;
401        }
402    }
403
404    // CWD fallback: only use if CWD is a subdirectory of root_abs (same project)
405    if let Ok(cwd) = std::env::current_dir() {
406        let cwd_str = normalize_project_root(&cwd.to_string_lossy());
407        if cwd_str != root_abs && cwd_str.starts_with(&root_abs) {
408            if let Some(idx) = ProjectIndex::load(&cwd_str) {
409                if !idx.files.is_empty() {
410                    if index_looks_stale(&idx, &cwd_str) {
411                        return scan(&cwd_str);
412                    }
413                    return idx;
414                }
415            }
416        }
417    }
418
419    scan(&root_abs)
420}
421
422fn index_looks_stale(index: &ProjectIndex, root_abs: &str) -> bool {
423    if index.files.is_empty() {
424        return true;
425    }
426
427    // TTL check: rebuild if index is older than configured max_age_hours
428    if let Ok(scan_time) =
429        chrono::NaiveDateTime::parse_from_str(&index.last_scan, "%Y-%m-%d %H:%M:%S")
430    {
431        let cfg = crate::core::config::Config::load();
432        let effective_hours = cfg.archive_max_age_hours_effective();
433        let max_age = chrono::Duration::hours(effective_hours as i64);
434        let now = chrono::Local::now().naive_local();
435        if now.signed_duration_since(scan_time) > max_age {
436            tracing::info!(
437                "[graph_index: index is older than {}h — marking stale]",
438                effective_hours
439            );
440            return true;
441        }
442    }
443
444    // Contamination check: if index contains paths from common user directories,
445    // it was built from a too-broad root and must be rebuilt
446    const CONTAMINATION_MARKERS: &[&str] = &[
447        "Desktop/",
448        "Documents/",
449        "Downloads/",
450        "Pictures/",
451        "Music/",
452        "Videos/",
453        "Movies/",
454        "Library/",
455        ".cache/",
456        "snap/",
457    ];
458    let contaminated = index.files.keys().take(200).any(|rel| {
459        CONTAMINATION_MARKERS
460            .iter()
461            .any(|m| rel.starts_with(m) || rel.contains(&format!("/{m}")))
462    });
463    if contaminated {
464        tracing::warn!(
465            "[graph_index: index contains files from user directories (Desktop/Documents/...) — \
466             marking stale to force clean rebuild]"
467        );
468        return true;
469    }
470
471    let root_path = Path::new(root_abs);
472    // Sample up to 20 files for existence check (avoid scanning all files in large indices)
473    let sample_size = index.files.len().min(20);
474    for rel in index.files.keys().take(sample_size) {
475        let rel = rel.trim_start_matches(['/', '\\']);
476        if rel.is_empty() {
477            continue;
478        }
479        let abs = root_path.join(rel);
480        if !abs.exists() {
481            return true;
482        }
483    }
484
485    false
486}
487
488pub fn scan(project_root: &str) -> ProjectIndex {
489    scan_inner(project_root).0
490}
491
492pub fn scan_with_content_cache(project_root: &str) -> (ProjectIndex, HashMap<String, String>) {
493    scan_inner(project_root)
494}
495
496fn scan_inner(project_root: &str) -> (ProjectIndex, HashMap<String, String>) {
497    if std::env::var("LEAN_CTX_NO_INDEX").is_ok() {
498        tracing::info!("[graph_index: LEAN_CTX_NO_INDEX set — skipping scan]");
499        return (ProjectIndex::new(project_root), HashMap::new());
500    }
501
502    let project_root = normalize_project_root(project_root);
503
504    if !is_safe_scan_root(&project_root) {
505        tracing::warn!("[graph_index: scan aborted for unsafe root {project_root}]");
506        return (ProjectIndex::new(&project_root), HashMap::new());
507    }
508
509    let lock_name = format!(
510        "graph-idx-{}",
511        &crate::core::index_namespace::namespace_hash(Path::new(&project_root))[..8]
512    );
513    let _lock = crate::core::startup_guard::try_acquire_lock(
514        &lock_name,
515        std::time::Duration::from_millis(800),
516        std::time::Duration::from_mins(3),
517    );
518    if _lock.is_none() {
519        tracing::info!(
520            "[graph_index: another process is scanning {project_root} — returning cached or empty]"
521        );
522        return (
523            ProjectIndex::load(&project_root).unwrap_or_else(|| ProjectIndex::new(&project_root)),
524            HashMap::new(),
525        );
526    }
527
528    let existing = ProjectIndex::load(&project_root);
529    let mut index = ProjectIndex::new(&project_root);
530
531    let old_files: HashMap<String, (String, Vec<(String, SymbolEntry)>)> =
532        if let Some(ref prev) = existing {
533            prev.files
534                .iter()
535                .map(|(path, entry)| {
536                    let syms: Vec<(String, SymbolEntry)> = prev
537                        .symbols
538                        .iter()
539                        .filter(|(_, s)| s.file == *path)
540                        .map(|(k, v)| (k.clone(), v.clone()))
541                        .collect();
542                    (path.clone(), (entry.hash.clone(), syms))
543                })
544                .collect()
545        } else {
546            HashMap::new()
547        };
548
549    let walker = ignore::WalkBuilder::new(&project_root)
550        .hidden(true)
551        .git_ignore(true)
552        .git_global(true)
553        .git_exclude(true)
554        .max_depth(Some(20))
555        .build();
556
557    let cfg = crate::core::config::Config::load();
558    let extra_ignores: Vec<glob::Pattern> = cfg
559        .extra_ignore_patterns
560        .iter()
561        .filter_map(|p| glob::Pattern::new(p).ok())
562        .collect();
563
564    let mut scanned = 0usize;
565    let mut reused = 0usize;
566    let mut entries_visited = 0usize;
567    let mut content_cache: HashMap<String, String> = HashMap::new();
568    let max_files = if cfg.graph_index_max_files == 0 {
569        usize::MAX // unlimited
570    } else {
571        cfg.graph_index_max_files as usize
572    };
573    const MAX_ENTRIES_VISITED: usize = 500_000;
574    const MAX_FILE_SIZE_BYTES: u64 = 2 * 1024 * 1024; // 2 MB per file
575    let scan_deadline = std::time::Instant::now() + std::time::Duration::from_mins(5);
576
577    for entry in walker.filter_map(std::result::Result::ok) {
578        entries_visited += 1;
579        if entries_visited > MAX_ENTRIES_VISITED {
580            tracing::warn!(
581                "[graph_index: walked {entries_visited} entries — aborting scan to prevent \
582                 runaway traversal. Indexed {} files so far.]",
583                index.files.len()
584            );
585            break;
586        }
587        if entries_visited.is_multiple_of(5000) {
588            if std::time::Instant::now() > scan_deadline {
589                tracing::warn!(
590                    "[graph_index: scan timeout (120s) after {entries_visited} entries — \
591                     saving partial index with {} files]",
592                    index.files.len()
593                );
594                break;
595            }
596            if crate::core::memory_guard::abort_requested() {
597                tracing::warn!(
598                    "[graph_index: memory pressure abort after {entries_visited} entries — \
599                     saving partial index with {} files]",
600                    index.files.len()
601                );
602                break;
603            }
604            if crate::core::memory_guard::is_under_pressure() {
605                tracing::warn!(
606                    "[graph_index: memory pressure detected at {entries_visited} entries — \
607                     stopping scan with {} files]",
608                    index.files.len()
609                );
610                break;
611            }
612            if let Some(ref g) = _lock {
613                g.touch();
614            }
615        }
616
617        if !entry.file_type().is_some_and(|ft| ft.is_file()) {
618            continue;
619        }
620        let file_path = normalize_absolute_path(&entry.path().to_string_lossy());
621
622        // Prevent indexing files that escaped the project root (symlinks, mount points)
623        if !file_path.starts_with(&project_root) {
624            continue;
625        }
626
627        // Skip special files (devices, FIFOs, sockets) that can stream infinite data
628        if let Ok(meta) = std::fs::metadata(&file_path) {
629            if !meta.is_file() {
630                continue;
631            }
632            if meta.len() > MAX_FILE_SIZE_BYTES {
633                tracing::debug!(
634                    "[graph_index: skipping {file_path} — {:.1}MB exceeds {}MB limit]",
635                    meta.len() as f64 / 1_048_576.0,
636                    MAX_FILE_SIZE_BYTES / (1024 * 1024),
637                );
638                continue;
639            }
640        }
641
642        let ext = Path::new(&file_path)
643            .extension()
644            .and_then(|e| e.to_str())
645            .unwrap_or("");
646
647        if !is_indexable_ext(ext) {
648            continue;
649        }
650
651        let rel = make_relative(&file_path, &project_root);
652        if extra_ignores.iter().any(|p| p.matches(&rel)) {
653            continue;
654        }
655
656        if max_files != usize::MAX && index.files.len() >= max_files {
657            tracing::info!(
658                "[graph_index: reached configured limit of {} files. Set graph_index_max_files = 0 for unlimited.]",
659                max_files
660            );
661            break;
662        }
663
664        let Ok(content) = std::fs::read_to_string(&file_path) else {
665            continue;
666        };
667
668        let hash = compute_hash(&content);
669        let rel_path = make_relative(&file_path, &project_root);
670
671        if let Some((old_hash, old_syms)) = old_files.get(&rel_path) {
672            if *old_hash == hash {
673                if let Some(old_entry) = existing.as_ref().and_then(|p| p.files.get(&rel_path)) {
674                    index.files.insert(rel_path.clone(), old_entry.clone());
675                    for (key, sym) in old_syms {
676                        index.symbols.insert(key.clone(), sym.clone());
677                    }
678                    content_cache.insert(rel_path, content);
679                    reused += 1;
680                    continue;
681                }
682            }
683        }
684
685        let sigs = signatures::extract_signatures(&content, ext);
686        let line_count = content.lines().count();
687        let token_count = crate::core::tokens::count_tokens(&content);
688        let summary = extract_summary(&content);
689
690        let exports: Vec<String> = sigs
691            .iter()
692            .filter(|s| s.is_exported)
693            .map(|s| s.name.clone())
694            .collect();
695
696        index.files.insert(
697            rel_path.clone(),
698            FileEntry {
699                path: rel_path.clone(),
700                hash,
701                language: ext.to_string(),
702                line_count,
703                token_count,
704                exports,
705                summary,
706            },
707        );
708
709        for sig in &sigs {
710            let (start, end) = sig
711                .start_line
712                .zip(sig.end_line)
713                .unwrap_or_else(|| find_symbol_range(&content, sig));
714            let key = format!("{}::{}", rel_path, sig.name);
715            index.symbols.insert(
716                key,
717                SymbolEntry {
718                    file: rel_path.clone(),
719                    name: sig.name.clone(),
720                    kind: sig.kind.to_string(),
721                    start_line: start,
722                    end_line: end,
723                    is_exported: sig.is_exported,
724                },
725            );
726        }
727
728        content_cache.insert(rel_path, content);
729        scanned += 1;
730    }
731
732    build_edges_cached(&mut index, &content_cache);
733
734    if let Err(e) = index.save() {
735        tracing::warn!("could not save graph index: {e}");
736    }
737
738    tracing::warn!(
739        "[graph_index: {} files ({} scanned, {} reused), {} symbols, {} edges]",
740        index.file_count(),
741        scanned,
742        reused,
743        index.symbol_count(),
744        index.edge_count()
745    );
746
747    (index, content_cache)
748}
749
750fn build_edges_cached(index: &mut ProjectIndex, content_cache: &HashMap<String, String>) {
751    build_edges_with_cache(index, content_cache);
752    build_implicit_edges_with_cache(index, content_cache);
753    build_cochange_edges(index);
754    build_sibling_edges(index);
755}
756
757fn build_edges_with_cache(index: &mut ProjectIndex, content_cache: &HashMap<String, String>) {
758    index.edges.clear();
759
760    if crate::core::memory_guard::abort_requested() {
761        tracing::warn!("[graph_index: skipping edge-building due to memory pressure]");
762        return;
763    }
764
765    let root = normalize_project_root(&index.project_root);
766    let root_path = Path::new(&root);
767
768    let mut file_paths: Vec<String> = index.files.keys().cloned().collect();
769    file_paths.sort();
770
771    let resolver_ctx = import_resolver::ResolverContext::new(root_path, file_paths.clone());
772
773    const MAX_FILE_SIZE_FOR_EDGES: u64 = 2 * 1024 * 1024;
774
775    for (i, rel_path) in file_paths.iter().enumerate() {
776        if i.is_multiple_of(1000) && crate::core::memory_guard::is_under_pressure() {
777            tracing::warn!(
778                "[graph_index: stopping edge-building at file {i}/{} due to memory pressure]",
779                file_paths.len()
780            );
781            break;
782        }
783
784        let content = if let Some(cached) = content_cache.get(rel_path) {
785            std::borrow::Cow::Borrowed(cached.as_str())
786        } else {
787            let abs_path = root_path.join(rel_path.trim_start_matches(['/', '\\']));
788            if let Ok(meta) = abs_path.metadata() {
789                if meta.len() > MAX_FILE_SIZE_FOR_EDGES {
790                    continue;
791                }
792            }
793            match std::fs::read_to_string(&abs_path) {
794                Ok(c) => std::borrow::Cow::Owned(c),
795                Err(_) => continue,
796            }
797        };
798
799        let ext = Path::new(rel_path)
800            .extension()
801            .and_then(|e| e.to_str())
802            .unwrap_or("");
803
804        let resolve_ext = match ext {
805            "vue" | "svelte" => "ts",
806            _ => ext,
807        };
808
809        let analysis_content = if ext == "vue" || ext == "svelte" {
810            if let Some(script) = crate::core::signatures_ts::sfc::extract_script_block(&content) {
811                std::borrow::Cow::Owned(script)
812            } else {
813                content
814            }
815        } else {
816            content
817        };
818
819        let imports = crate::core::deep_queries::analyze(&analysis_content, resolve_ext).imports;
820        if imports.is_empty() {
821            continue;
822        }
823
824        let resolved =
825            import_resolver::resolve_imports(&imports, rel_path, resolve_ext, &resolver_ctx);
826        for r in resolved {
827            if r.is_external {
828                continue;
829            }
830            if let Some(to) = r.resolved_path {
831                index.edges.push(IndexEdge {
832                    from: rel_path.clone(),
833                    to,
834                    kind: "import".to_string(),
835                    weight: 1.0,
836                });
837            }
838        }
839    }
840
841    index.edges.sort_by(|a, b| {
842        a.from
843            .cmp(&b.from)
844            .then_with(|| a.to.cmp(&b.to))
845            .then_with(|| a.kind.cmp(&b.kind))
846    });
847    index
848        .edges
849        .dedup_by(|a, b| a.from == b.from && a.to == b.to && a.kind == b.kind);
850}
851
852// ---------------------------------------------------------------------------
853// Layer 2: Implicit Language Edges (weight 0.8)
854// ---------------------------------------------------------------------------
855
856fn build_implicit_edges_with_cache(
857    index: &mut ProjectIndex,
858    content_cache: &HashMap<String, String>,
859) {
860    let file_paths: Vec<String> = index.files.keys().cloned().collect();
861    let file_set: std::collections::HashSet<&str> = file_paths.iter().map(String::as_str).collect();
862
863    let mut new_edges: Vec<IndexEdge> = Vec::new();
864
865    for file in &file_paths {
866        let ext = Path::new(file.as_str())
867            .extension()
868            .and_then(|e| e.to_str())
869            .unwrap_or("");
870
871        match ext {
872            "rs" => {
873                collect_rust_mod_edges_cached(
874                    file,
875                    &file_set,
876                    index,
877                    &mut new_edges,
878                    content_cache,
879                );
880            }
881            "go" => collect_go_package_edges(file, &file_paths, &mut new_edges),
882            "py" => collect_python_init_edges(file, &file_paths, &mut new_edges),
883            "ts" | "js" | "tsx" | "jsx" => {
884                collect_barrel_edges_cached(file, &file_set, index, &mut new_edges, content_cache);
885            }
886            _ => {}
887        }
888    }
889
890    index.edges.extend(new_edges);
891}
892
893fn collect_rust_mod_edges_cached(
894    file: &str,
895    file_set: &std::collections::HashSet<&str>,
896    index: &ProjectIndex,
897    edges: &mut Vec<IndexEdge>,
898    content_cache: &HashMap<String, String>,
899) {
900    if !index.files.contains_key(file) {
901        return;
902    }
903
904    let content = if let Some(cached) = content_cache.get(file) {
905        std::borrow::Cow::Borrowed(cached.as_str())
906    } else {
907        let full_path = Path::new(&index.project_root).join(file);
908        match std::fs::read_to_string(&full_path) {
909            Ok(c) => std::borrow::Cow::Owned(c),
910            Err(_) => return,
911        }
912    };
913
914    let dir = Path::new(file)
915        .parent()
916        .map(|p| p.to_string_lossy().to_string());
917
918    for line in content.lines() {
919        let trimmed = line.trim();
920        if !trimmed.starts_with("mod ") || trimmed.contains('{') {
921            continue;
922        }
923        let mod_name = trimmed
924            .trim_start_matches("mod ")
925            .trim_start_matches("pub mod ")
926            .trim_start_matches("pub(crate) mod ")
927            .trim_end_matches(';')
928            .trim();
929
930        if mod_name.is_empty() || mod_name.contains(' ') {
931            continue;
932        }
933
934        let candidates = if let Some(ref d) = dir {
935            vec![
936                format!("{d}/{mod_name}.rs"),
937                format!("{d}/{mod_name}/mod.rs"),
938            ]
939        } else {
940            vec![format!("{mod_name}.rs"), format!("{mod_name}/mod.rs")]
941        };
942
943        for candidate in candidates {
944            if file_set.contains(candidate.as_str()) {
945                edges.push(IndexEdge {
946                    from: file.to_string(),
947                    to: candidate,
948                    kind: "module".to_string(),
949                    weight: 0.8,
950                });
951                break;
952            }
953        }
954    }
955}
956
957fn collect_go_package_edges(file: &str, file_paths: &[String], edges: &mut Vec<IndexEdge>) {
958    let p = Path::new(file);
959    if p.extension().and_then(|e| e.to_str()) != Some("go") {
960        return;
961    }
962    if file.ends_with("_test.go") {
963        return;
964    }
965
966    let Some(dir) = p.parent().map(|d| d.to_string_lossy().to_string()) else {
967        return;
968    };
969
970    for other in file_paths {
971        if other == file {
972            continue;
973        }
974        let op = Path::new(other.as_str());
975        if op.extension().and_then(|e| e.to_str()) != Some("go") {
976            continue;
977        }
978        if other.ends_with("_test.go") {
979            continue;
980        }
981        let other_dir = op
982            .parent()
983            .map(|d| d.to_string_lossy().to_string())
984            .unwrap_or_default();
985        if other_dir == dir {
986            edges.push(IndexEdge {
987                from: file.to_string(),
988                to: other.clone(),
989                kind: "package".to_string(),
990                weight: 0.5,
991            });
992            break;
993        }
994    }
995}
996
997fn collect_python_init_edges(file: &str, file_paths: &[String], edges: &mut Vec<IndexEdge>) {
998    let p = Path::new(file);
999    if p.file_name().and_then(|n| n.to_str()) != Some("__init__.py") {
1000        return;
1001    }
1002
1003    let Some(dir) = p.parent().map(|d| d.to_string_lossy().to_string()) else {
1004        return;
1005    };
1006
1007    for other in file_paths {
1008        if other == file {
1009            continue;
1010        }
1011        let op = Path::new(other.as_str());
1012        if op.extension().and_then(|e| e.to_str()) != Some("py") {
1013            continue;
1014        }
1015        let other_dir = op
1016            .parent()
1017            .map(|d| d.to_string_lossy().to_string())
1018            .unwrap_or_default();
1019        if other_dir == dir {
1020            edges.push(IndexEdge {
1021                from: file.to_string(),
1022                to: other.clone(),
1023                kind: "module".to_string(),
1024                weight: 0.8,
1025            });
1026        }
1027    }
1028}
1029
1030fn collect_barrel_edges_cached(
1031    file: &str,
1032    file_set: &std::collections::HashSet<&str>,
1033    index: &ProjectIndex,
1034    edges: &mut Vec<IndexEdge>,
1035    content_cache: &HashMap<String, String>,
1036) {
1037    let basename = Path::new(file)
1038        .file_stem()
1039        .and_then(|s| s.to_str())
1040        .unwrap_or("");
1041    if basename != "index" {
1042        return;
1043    }
1044
1045    let content = if let Some(cached) = content_cache.get(file) {
1046        std::borrow::Cow::Borrowed(cached.as_str())
1047    } else {
1048        let full_path = Path::new(&index.project_root).join(file);
1049        match std::fs::read_to_string(&full_path) {
1050            Ok(c) => std::borrow::Cow::Owned(c),
1051            Err(_) => return,
1052        }
1053    };
1054
1055    let dir = Path::new(file)
1056        .parent()
1057        .map(|p| p.to_string_lossy().to_string())
1058        .unwrap_or_default();
1059
1060    let ext = Path::new(file)
1061        .extension()
1062        .and_then(|e| e.to_str())
1063        .unwrap_or("ts");
1064
1065    for line in content.lines() {
1066        let trimmed = line.trim();
1067        if !trimmed.starts_with("export") || !trimmed.contains("from") {
1068            continue;
1069        }
1070        if let Some(from_pos) = trimmed.find("from") {
1071            let after = &trimmed[from_pos + 4..];
1072            let source = after
1073                .trim()
1074                .trim_start_matches(['\'', '"'])
1075                .trim_end_matches([';', '\'', '"'])
1076                .trim_end_matches(['\'', '"']);
1077
1078            if source.starts_with("./") || source.starts_with("../") {
1079                let resolved = if dir.is_empty() {
1080                    source.trim_start_matches("./").to_string()
1081                } else {
1082                    format!("{dir}/{}", source.trim_start_matches("./"))
1083                };
1084
1085                let candidates = vec![
1086                    format!("{resolved}.{ext}"),
1087                    format!("{resolved}/index.{ext}"),
1088                    resolved.clone(),
1089                ];
1090
1091                for candidate in candidates {
1092                    if file_set.contains(candidate.as_str()) {
1093                        edges.push(IndexEdge {
1094                            from: file.to_string(),
1095                            to: candidate,
1096                            kind: "reexport".to_string(),
1097                            weight: 0.8,
1098                        });
1099                        break;
1100                    }
1101                }
1102            }
1103        }
1104    }
1105}
1106
1107// ---------------------------------------------------------------------------
1108// Layer 3: Co-Change Edges (weight 0.5)
1109// ---------------------------------------------------------------------------
1110
1111fn build_cochange_edges(index: &mut ProjectIndex) {
1112    let project_root = &index.project_root;
1113
1114    let output = match std::process::Command::new("git")
1115        .args([
1116            "log",
1117            "--name-only",
1118            "--pretty=format:---",
1119            "--since=6 months",
1120            "--",
1121            ".",
1122        ])
1123        .current_dir(project_root)
1124        .output()
1125    {
1126        Ok(o) if o.status.success() => String::from_utf8_lossy(&o.stdout).to_string(),
1127        _ => return,
1128    };
1129
1130    let file_set: std::collections::HashSet<&str> =
1131        index.files.keys().map(String::as_str).collect();
1132
1133    let connected: std::collections::HashSet<&str> = index
1134        .edges
1135        .iter()
1136        .flat_map(|e| [e.from.as_str(), e.to.as_str()])
1137        .collect();
1138
1139    // Parse commits into groups of files
1140    let mut cooccurrence: HashMap<(String, String), u32> = HashMap::new();
1141    let mut current_commit: Vec<&str> = Vec::new();
1142
1143    for line in output.lines() {
1144        if line == "---" {
1145            if current_commit.len() >= 2 && current_commit.len() <= 20 {
1146                for i in 0..current_commit.len() {
1147                    for j in (i + 1)..current_commit.len() {
1148                        let a = current_commit[i];
1149                        let b = current_commit[j];
1150                        if !file_set.contains(a) || !file_set.contains(b) {
1151                            continue;
1152                        }
1153                        // Only add if at least one is currently isolated
1154                        if connected.contains(a) && connected.contains(b) {
1155                            continue;
1156                        }
1157                        let key = if a < b {
1158                            (a.to_string(), b.to_string())
1159                        } else {
1160                            (b.to_string(), a.to_string())
1161                        };
1162                        *cooccurrence.entry(key).or_insert(0) += 1;
1163                    }
1164                }
1165            }
1166            current_commit.clear();
1167        } else if !line.is_empty() {
1168            current_commit.push(line.trim());
1169        }
1170    }
1171
1172    // Filter: min 5 shared commits
1173    let mut cochange_edges: Vec<IndexEdge> = cooccurrence
1174        .into_iter()
1175        .filter(|(_, count)| *count >= 5)
1176        .map(|((from, to), _)| IndexEdge {
1177            from,
1178            to,
1179            kind: "cochange".to_string(),
1180            weight: 0.5,
1181        })
1182        .collect();
1183
1184    // Cap at 500 to prevent noise
1185    cochange_edges.sort_by(|a, b| a.from.cmp(&b.from).then_with(|| a.to.cmp(&b.to)));
1186    cochange_edges.truncate(500);
1187
1188    index.edges.extend(cochange_edges);
1189}
1190
1191// ---------------------------------------------------------------------------
1192// Layer 4: Sibling Edges (weight 0.2)
1193// ---------------------------------------------------------------------------
1194
1195fn build_sibling_edges(index: &mut ProjectIndex) {
1196    let connected: std::collections::HashSet<&str> = index
1197        .edges
1198        .iter()
1199        .flat_map(|e| [e.from.as_str(), e.to.as_str()])
1200        .collect();
1201
1202    let file_paths: Vec<String> = index.files.keys().cloned().collect();
1203    let mut new_edges: Vec<IndexEdge> = Vec::new();
1204
1205    for file in &file_paths {
1206        if connected.contains(file.as_str()) {
1207            continue;
1208        }
1209
1210        let ext = Path::new(file.as_str())
1211            .extension()
1212            .and_then(|e| e.to_str())
1213            .unwrap_or("");
1214        let dir = Path::new(file.as_str())
1215            .parent()
1216            .map(|p| p.to_string_lossy().to_string())
1217            .unwrap_or_default();
1218
1219        // Find one sibling with same extension
1220        for other in &file_paths {
1221            if other == file {
1222                continue;
1223            }
1224            let other_ext = Path::new(other.as_str())
1225                .extension()
1226                .and_then(|e| e.to_str())
1227                .unwrap_or("");
1228            let other_dir = Path::new(other.as_str())
1229                .parent()
1230                .map(|p| p.to_string_lossy().to_string())
1231                .unwrap_or_default();
1232
1233            if other_ext == ext && other_dir == dir {
1234                new_edges.push(IndexEdge {
1235                    from: file.clone(),
1236                    to: other.clone(),
1237                    kind: "sibling".to_string(),
1238                    weight: 0.2,
1239                });
1240                break; // Max 1 sibling edge per isolate
1241            }
1242        }
1243    }
1244
1245    index.edges.extend(new_edges);
1246}
1247
1248fn find_symbol_range(content: &str, sig: &signatures::Signature) -> (usize, usize) {
1249    let lines: Vec<&str> = content.lines().collect();
1250    let mut start = 0;
1251
1252    for (i, line) in lines.iter().enumerate() {
1253        if line.contains(&sig.name) {
1254            let trimmed = line.trim();
1255            let is_def = trimmed.starts_with("fn ")
1256                || trimmed.starts_with("pub fn ")
1257                || trimmed.starts_with("pub(crate) fn ")
1258                || trimmed.starts_with("async fn ")
1259                || trimmed.starts_with("pub async fn ")
1260                || trimmed.starts_with("struct ")
1261                || trimmed.starts_with("pub struct ")
1262                || trimmed.starts_with("enum ")
1263                || trimmed.starts_with("pub enum ")
1264                || trimmed.starts_with("trait ")
1265                || trimmed.starts_with("pub trait ")
1266                || trimmed.starts_with("impl ")
1267                || trimmed.starts_with("class ")
1268                || trimmed.starts_with("export class ")
1269                || trimmed.starts_with("export function ")
1270                || trimmed.starts_with("export async function ")
1271                || trimmed.starts_with("function ")
1272                || trimmed.starts_with("async function ")
1273                || trimmed.starts_with("def ")
1274                || trimmed.starts_with("async def ")
1275                || trimmed.starts_with("func ")
1276                || trimmed.starts_with("interface ")
1277                || trimmed.starts_with("export interface ")
1278                || trimmed.starts_with("type ")
1279                || trimmed.starts_with("export type ")
1280                || trimmed.starts_with("const ")
1281                || trimmed.starts_with("export const ")
1282                || trimmed.starts_with("fun ")
1283                || trimmed.starts_with("private fun ")
1284                || trimmed.starts_with("public fun ")
1285                || trimmed.starts_with("internal fun ")
1286                || trimmed.starts_with("class ")
1287                || trimmed.starts_with("data class ")
1288                || trimmed.starts_with("sealed class ")
1289                || trimmed.starts_with("sealed interface ")
1290                || trimmed.starts_with("enum class ")
1291                || trimmed.starts_with("object ")
1292                || trimmed.starts_with("private object ")
1293                || trimmed.starts_with("interface ")
1294                || trimmed.starts_with("typealias ")
1295                || trimmed.starts_with("private typealias ");
1296            if is_def {
1297                start = i + 1;
1298                break;
1299            }
1300        }
1301    }
1302
1303    if start == 0 {
1304        return (1, lines.len().min(20));
1305    }
1306
1307    let base_indent = lines
1308        .get(start - 1)
1309        .map_or(0, |l| l.len() - l.trim_start().len());
1310
1311    let mut end = start;
1312    let mut brace_depth: i32 = 0;
1313    let mut found_open = false;
1314
1315    for (i, line) in lines.iter().enumerate().skip(start - 1) {
1316        for ch in line.chars() {
1317            if ch == '{' {
1318                brace_depth += 1;
1319                found_open = true;
1320            } else if ch == '}' {
1321                brace_depth -= 1;
1322            }
1323        }
1324
1325        end = i + 1;
1326
1327        if found_open && brace_depth <= 0 {
1328            break;
1329        }
1330
1331        if !found_open && i > start {
1332            let indent = line.len() - line.trim_start().len();
1333            if indent <= base_indent && !line.trim().is_empty() && i > start {
1334                end = i;
1335                break;
1336            }
1337        }
1338
1339        if end - start > 200 {
1340            break;
1341        }
1342    }
1343
1344    (start, end)
1345}
1346
1347fn extract_summary(content: &str) -> String {
1348    for line in content.lines().take(20) {
1349        let trimmed = line.trim();
1350        if trimmed.is_empty()
1351            || trimmed.starts_with("//")
1352            || trimmed.starts_with('#')
1353            || trimmed.starts_with("/*")
1354            || trimmed.starts_with('*')
1355            || trimmed.starts_with("use ")
1356            || trimmed.starts_with("import ")
1357            || trimmed.starts_with("from ")
1358            || trimmed.starts_with("require(")
1359            || trimmed.starts_with("package ")
1360        {
1361            continue;
1362        }
1363        return trimmed.chars().take(120).collect();
1364    }
1365    String::new()
1366}
1367
1368fn compute_hash(content: &str) -> String {
1369    use std::collections::hash_map::DefaultHasher;
1370    use std::hash::{Hash, Hasher};
1371
1372    let mut hasher = DefaultHasher::new();
1373    content.hash(&mut hasher);
1374    format!("{:016x}", hasher.finish())
1375}
1376
1377fn short_hash(input: &str) -> String {
1378    use std::collections::hash_map::DefaultHasher;
1379    use std::hash::{Hash, Hasher};
1380
1381    let mut hasher = DefaultHasher::new();
1382    input.hash(&mut hasher);
1383    format!("{:08x}", hasher.finish() & 0xFFFF_FFFF)
1384}
1385
1386fn copy_dir_fallible(src: &std::path::Path, dst: &std::path::Path) -> Result<(), std::io::Error> {
1387    std::fs::create_dir_all(dst)?;
1388    for entry in std::fs::read_dir(src)?.flatten() {
1389        let from = entry.path();
1390        let to = dst.join(entry.file_name());
1391        if from.is_dir() {
1392            copy_dir_fallible(&from, &to)?;
1393        } else {
1394            std::fs::copy(&from, &to)?;
1395        }
1396    }
1397    Ok(())
1398}
1399
1400fn normalize_absolute_path(path: &str) -> String {
1401    if let Ok(canon) = crate::core::pathutil::safe_canonicalize(std::path::Path::new(path)) {
1402        return canon.to_string_lossy().to_string();
1403    }
1404
1405    let mut normalized = path.to_string();
1406    while normalized.ends_with("\\.") || normalized.ends_with("/.") {
1407        normalized.truncate(normalized.len() - 2);
1408    }
1409    while normalized.len() > 1
1410        && (normalized.ends_with('\\') || normalized.ends_with('/'))
1411        && !normalized.ends_with(":\\")
1412        && !normalized.ends_with(":/")
1413        && normalized != "\\"
1414        && normalized != "/"
1415    {
1416        normalized.pop();
1417    }
1418    normalized
1419}
1420
1421pub fn normalize_project_root(path: &str) -> String {
1422    normalize_absolute_path(path)
1423}
1424
1425pub fn graph_match_key(path: &str) -> String {
1426    let stripped =
1427        crate::core::pathutil::strip_verbatim_str(path).unwrap_or_else(|| path.replace('\\', "/"));
1428    stripped.trim_start_matches('/').to_string()
1429}
1430
1431pub fn graph_relative_key(path: &str, root: &str) -> String {
1432    let root_norm = normalize_project_root(root);
1433    let path_norm = normalize_absolute_path(path);
1434    let root_path = Path::new(&root_norm);
1435    let path_path = Path::new(&path_norm);
1436
1437    if let Ok(rel) = path_path.strip_prefix(root_path) {
1438        let rel = rel.to_string_lossy().to_string();
1439        return rel.trim_start_matches(['/', '\\']).to_string();
1440    }
1441
1442    path.trim_start_matches(['/', '\\'])
1443        .replace('/', std::path::MAIN_SEPARATOR_STR)
1444}
1445
1446fn make_relative(path: &str, root: &str) -> String {
1447    graph_relative_key(path, root)
1448}
1449
1450fn is_indexable_ext(ext: &str) -> bool {
1451    crate::core::language_capabilities::is_indexable_ext(ext)
1452}
1453
1454#[cfg(test)]
1455fn kotlin_package_name(content: &str) -> Option<String> {
1456    content.lines().map(str::trim).find_map(|line| {
1457        line.strip_prefix("package ")
1458            .map(|rest| rest.trim().trim_end_matches(';').to_string())
1459    })
1460}
1461
1462#[cfg(test)]
1463mod tests {
1464    use super::*;
1465    use tempfile::tempdir;
1466
1467    #[test]
1468    fn test_short_hash_deterministic() {
1469        let h1 = short_hash("/Users/test/project");
1470        let h2 = short_hash("/Users/test/project");
1471        assert_eq!(h1, h2);
1472        assert_eq!(h1.len(), 8);
1473    }
1474
1475    #[test]
1476    fn test_make_relative() {
1477        assert_eq!(
1478            make_relative("/foo/bar/src/main.rs", "/foo/bar"),
1479            graph_relative_key("/foo/bar/src/main.rs", "/foo/bar")
1480        );
1481        assert_eq!(
1482            make_relative("src/main.rs", "/foo/bar"),
1483            graph_relative_key("src/main.rs", "/foo/bar")
1484        );
1485        assert_eq!(
1486            make_relative("C:\\repo\\src\\main\\kotlin\\Example.kt", "C:\\repo"),
1487            graph_relative_key("C:\\repo\\src\\main\\kotlin\\Example.kt", "C:\\repo")
1488        );
1489        assert_eq!(
1490            make_relative("//?/C:/repo/src/main/kotlin/Example.kt", "//?/C:/repo"),
1491            graph_relative_key("//?/C:/repo/src/main/kotlin/Example.kt", "//?/C:/repo")
1492        );
1493    }
1494
1495    #[test]
1496    fn test_normalize_project_root() {
1497        assert_eq!(normalize_project_root("C:\\repo\\"), "C:\\repo");
1498        assert_eq!(normalize_project_root("C:\\repo\\."), "C:\\repo");
1499        assert_eq!(normalize_project_root("//?/C:/repo/"), "//?/C:/repo");
1500    }
1501
1502    #[test]
1503    fn test_graph_match_key_normalizes_windows_forms() {
1504        assert_eq!(
1505            graph_match_key(r"C:\repo\src\main.rs"),
1506            "C:/repo/src/main.rs"
1507        );
1508        assert_eq!(
1509            graph_match_key(r"\\?\C:\repo\src\main.rs"),
1510            "C:/repo/src/main.rs"
1511        );
1512        assert_eq!(graph_match_key(r"\src\main.rs"), "src/main.rs");
1513    }
1514
1515    #[test]
1516    fn test_extract_summary() {
1517        let content = "// comment\nuse std::io;\n\npub fn main() {\n    println!(\"hello\");\n}";
1518        let summary = extract_summary(content);
1519        assert_eq!(summary, "pub fn main() {");
1520    }
1521
1522    #[test]
1523    fn test_compute_hash_deterministic() {
1524        let h1 = compute_hash("hello world");
1525        let h2 = compute_hash("hello world");
1526        assert_eq!(h1, h2);
1527        assert_ne!(h1, compute_hash("hello world!"));
1528    }
1529
1530    #[test]
1531    fn test_project_index_new() {
1532        let idx = ProjectIndex::new("/test");
1533        assert_eq!(idx.version, INDEX_VERSION);
1534        assert_eq!(idx.project_root, "/test");
1535        assert!(idx.files.is_empty());
1536    }
1537
1538    fn fe(path: &str, content: &str, language: &str) -> FileEntry {
1539        FileEntry {
1540            path: path.to_string(),
1541            hash: compute_hash(content),
1542            language: language.to_string(),
1543            line_count: content.lines().count(),
1544            token_count: crate::core::tokens::count_tokens(content),
1545            exports: Vec::new(),
1546            summary: extract_summary(content),
1547        }
1548    }
1549
1550    #[test]
1551    fn test_index_looks_stale_when_any_file_missing() {
1552        let td = tempdir().expect("tempdir");
1553        let root = td.path();
1554        std::fs::write(root.join("a.rs"), "pub fn a() {}\n").expect("write a.rs");
1555
1556        let root_s = normalize_project_root(&root.to_string_lossy());
1557        let mut idx = ProjectIndex::new(&root_s);
1558        idx.files
1559            .insert("a.rs".to_string(), fe("a.rs", "pub fn a() {}\n", "rs"));
1560        idx.files.insert(
1561            "missing.rs".to_string(),
1562            fe("missing.rs", "pub fn m() {}\n", "rs"),
1563        );
1564
1565        assert!(index_looks_stale(&idx, &root_s));
1566    }
1567
1568    #[test]
1569    fn test_index_looks_fresh_when_all_files_exist() {
1570        let td = tempdir().expect("tempdir");
1571        let root = td.path();
1572        std::fs::write(root.join("a.rs"), "pub fn a() {}\n").expect("write a.rs");
1573
1574        let root_s = normalize_project_root(&root.to_string_lossy());
1575        let mut idx = ProjectIndex::new(&root_s);
1576        idx.files
1577            .insert("a.rs".to_string(), fe("a.rs", "pub fn a() {}\n", "rs"));
1578
1579        assert!(!index_looks_stale(&idx, &root_s));
1580    }
1581
1582    #[test]
1583    fn test_reverse_deps() {
1584        let mut idx = ProjectIndex::new("/test");
1585        idx.edges.push(IndexEdge {
1586            from: "a.rs".to_string(),
1587            to: "b.rs".to_string(),
1588            kind: "import".to_string(),
1589            weight: 1.0,
1590        });
1591        idx.edges.push(IndexEdge {
1592            from: "c.rs".to_string(),
1593            to: "b.rs".to_string(),
1594            kind: "import".to_string(),
1595            weight: 1.0,
1596        });
1597
1598        let deps = idx.get_reverse_deps("b.rs", 1);
1599        assert_eq!(deps.len(), 2);
1600        assert!(deps.contains(&"a.rs".to_string()));
1601        assert!(deps.contains(&"c.rs".to_string()));
1602    }
1603
1604    #[test]
1605    fn test_find_symbol_range_kotlin_function() {
1606        let content = r#"
1607package com.example
1608
1609class UserService {
1610    fun greet(name: String): String {
1611        return "hi $name"
1612    }
1613}
1614"#;
1615        let sig = signatures::Signature {
1616            kind: "method",
1617            name: "greet".to_string(),
1618            params: "name:String".to_string(),
1619            return_type: "String".to_string(),
1620            is_async: false,
1621            is_exported: true,
1622            indent: 2,
1623            ..signatures::Signature::no_span()
1624        };
1625        let (start, end) = find_symbol_range(content, &sig);
1626        assert_eq!(start, 5);
1627        assert!(end >= start);
1628    }
1629
1630    #[test]
1631    fn test_signature_spans_override_fallback_range() {
1632        let sig = signatures::Signature {
1633            kind: "method",
1634            name: "release".to_string(),
1635            params: "id:String".to_string(),
1636            return_type: "Boolean".to_string(),
1637            is_async: true,
1638            is_exported: true,
1639            indent: 2,
1640            start_line: Some(42),
1641            end_line: Some(43),
1642        };
1643
1644        let (start, end) = sig
1645            .start_line
1646            .zip(sig.end_line)
1647            .unwrap_or_else(|| find_symbol_range("ignored", &sig));
1648        assert_eq!((start, end), (42, 43));
1649    }
1650
1651    #[test]
1652    fn test_parse_stale_index_version() {
1653        let json = format!(
1654            r#"{{"version":{},"project_root":"/test","last_scan":"now","files":{{}},"edges":[],"symbols":{{}}}}"#,
1655            INDEX_VERSION - 1
1656        );
1657        let parsed: ProjectIndex = serde_json::from_str(&json).unwrap();
1658        assert_ne!(parsed.version, INDEX_VERSION);
1659    }
1660
1661    #[test]
1662    fn test_kotlin_package_name() {
1663        let content = "package com.example.feature\n\nclass UserService";
1664        assert_eq!(
1665            kotlin_package_name(content).as_deref(),
1666            Some("com.example.feature")
1667        );
1668    }
1669
1670    #[test]
1671    fn safe_scan_root_rejects_fs_root() {
1672        assert!(!is_safe_scan_root("/"));
1673        assert!(!is_safe_scan_root("\\"));
1674        #[cfg(windows)]
1675        {
1676            assert!(!is_safe_scan_root("C:\\"));
1677            assert!(!is_safe_scan_root("D:\\"));
1678        }
1679    }
1680
1681    #[test]
1682    fn safe_scan_root_rejects_home() {
1683        if let Some(home) = dirs::home_dir() {
1684            let home_str = home.to_string_lossy().to_string();
1685            assert!(
1686                !is_safe_scan_root(&home_str),
1687                "home dir should be rejected: {home_str}"
1688            );
1689        }
1690    }
1691
1692    #[test]
1693    fn safe_scan_root_accepts_project_dir() {
1694        let tmp = tempdir().unwrap();
1695        std::fs::write(
1696            tmp.path().join("Cargo.toml"),
1697            "[package]\nname = \"test\"\n",
1698        )
1699        .unwrap();
1700        let root = tmp.path().to_string_lossy().to_string();
1701        assert!(is_safe_scan_root(&root));
1702    }
1703
1704    #[test]
1705    fn safe_scan_root_rejects_broad_dir() {
1706        let tmp = tempdir().unwrap();
1707        for i in 0..55 {
1708            std::fs::create_dir(tmp.path().join(format!("dir{i}"))).unwrap();
1709        }
1710        let root = tmp.path().to_string_lossy().to_string();
1711        assert!(!is_safe_scan_root(&root));
1712    }
1713
1714    #[test]
1715    fn no_index_env_skips_scan() {
1716        let _env = crate::core::data_dir::test_env_lock();
1717        let tmp = tempdir().unwrap();
1718        std::fs::write(tmp.path().join("Cargo.toml"), "").unwrap();
1719        std::fs::write(tmp.path().join("main.rs"), "fn main() {}").unwrap();
1720
1721        std::env::set_var("LEAN_CTX_NO_INDEX", "1");
1722        let idx = scan(&tmp.path().to_string_lossy());
1723        std::env::remove_var("LEAN_CTX_NO_INDEX");
1724        assert!(idx.files.is_empty(), "LEAN_CTX_NO_INDEX should skip scan");
1725    }
1726
1727    #[test]
1728    fn stale_index_detected_by_contamination() {
1729        let root_s = "/home/testuser/myproject";
1730        let mut idx = ProjectIndex::new(root_s);
1731        // Simulate a contaminated index with Desktop files
1732        idx.files.insert(
1733            "Desktop/random.py".to_string(),
1734            fe("Desktop/random.py", "x = 1\n", "py"),
1735        );
1736        idx.files.insert(
1737            "src/main.rs".to_string(),
1738            fe("src/main.rs", "fn main() {}\n", "rs"),
1739        );
1740        assert!(
1741            index_looks_stale(&idx, root_s),
1742            "Index with Desktop/ files should be considered stale"
1743        );
1744    }
1745
1746    #[test]
1747    fn stale_index_detected_by_age() {
1748        let td = tempdir().expect("tempdir");
1749        let root = td.path();
1750        std::fs::write(root.join("a.rs"), "fn a() {}\n").unwrap();
1751
1752        let root_s = normalize_project_root(&root.to_string_lossy());
1753        let mut idx = ProjectIndex::new(&root_s);
1754        idx.files
1755            .insert("a.rs".to_string(), fe("a.rs", "fn a() {}\n", "rs"));
1756        // Set last_scan to 100 hours ago (default max_age_hours is 48)
1757        let old_time = chrono::Local::now().naive_local() - chrono::Duration::hours(100);
1758        idx.last_scan = old_time.format("%Y-%m-%d %H:%M:%S").to_string();
1759
1760        assert!(
1761            index_looks_stale(&idx, &root_s),
1762            "Index older than max_age_hours should be stale"
1763        );
1764    }
1765
1766    #[test]
1767    fn safe_scan_root_rejects_home_downloads() {
1768        if let Some(home) = dirs::home_dir() {
1769            let downloads = home.join("Downloads");
1770            // Only test if Downloads doesn't contain a .git (unlikely but possible)
1771            if !downloads.join(".git").exists() {
1772                let downloads_str = downloads.to_string_lossy().to_string();
1773                assert!(
1774                    !is_safe_scan_root(&downloads_str),
1775                    "~/Downloads should be rejected without project markers"
1776                );
1777            }
1778        }
1779    }
1780}