Skip to main content

lean_ctx/core/
graph_index.rs

1// DEPRECATED: This module is being replaced by PropertyGraph (core/property_graph/).
2// New code should use GraphProvider (core/graph_provider.rs) instead of accessing
3// ProjectIndex directly. Remaining direct consumers: call_graph, graph_enricher,
4// ctx_callgraph, ctx_graph_diagram, ctx_routes, autonomy, dashboard/callgraph.
5// See OPT-14/15 plan for the full migration path.
6
7use std::collections::HashMap;
8use std::path::Path;
9
10use serde::{Deserialize, Serialize};
11
12use crate::core::import_resolver;
13use crate::core::signatures;
14
15const INDEX_VERSION: u32 = 6;
16
17pub fn is_safe_scan_root_public(path: &str) -> bool {
18    is_safe_scan_root(path)
19}
20
21fn is_filesystem_root(path: &str) -> bool {
22    let p = Path::new(path);
23    p.parent().is_none() || (cfg!(windows) && p.parent() == Some(Path::new("")))
24}
25
26fn is_safe_scan_root(path: &str) -> bool {
27    let normalized = normalize_project_root(path);
28    let p = Path::new(&normalized);
29
30    if normalized == "/" || normalized == "\\" || is_filesystem_root(&normalized) {
31        tracing::warn!("[graph_index: refusing to scan filesystem root]");
32        return false;
33    }
34
35    if normalized == "." || normalized.is_empty() {
36        tracing::warn!("[graph_index: refusing to scan relative/empty root]");
37        return false;
38    }
39
40    if let Some(home) = dirs::home_dir() {
41        let home_norm = normalize_project_root(&home.to_string_lossy());
42        if normalized == home_norm {
43            use std::sync::Once;
44            static HOME_WARN: Once = Once::new();
45            HOME_WARN.call_once(|| {
46                tracing::warn!(
47                    "[graph_index: skipping — cannot index home directory {normalized}.\n  \
48                     Run from inside a project, or set LEAN_CTX_PROJECT_ROOT=/path/to/project]"
49                );
50            });
51            return false;
52        }
53        // Block common broad home subdirectories that are never valid project roots
54        let home_path = Path::new(&home_norm);
55        const BLOCKED_HOME_SUBDIRS: &[&str] = &[
56            "Desktop",
57            "Documents",
58            "Downloads",
59            "Pictures",
60            "Music",
61            "Videos",
62            "Movies",
63            "Library",
64            ".local",
65            ".cache",
66            ".config",
67            "snap",
68            "Applications",
69        ];
70        for blocked in BLOCKED_HOME_SUBDIRS {
71            let blocked_path = home_path.join(blocked);
72            let is_inside_blocked = p == blocked_path || p.starts_with(&blocked_path);
73            let has_project_marker = p.join(".git").exists()
74                || p.join("Cargo.toml").exists()
75                || p.join("package.json").exists();
76            if is_inside_blocked && !has_project_marker {
77                tracing::warn!(
78                    "[graph_index: refusing to scan {normalized} — \
79                     inside home/{blocked} without project markers]"
80                );
81                return false;
82            }
83        }
84
85        // Block directories that are direct children of home without project markers
86        if p.parent() == Some(home_path) {
87            let has_marker = p.join(".git").exists()
88                || p.join("Cargo.toml").exists()
89                || p.join("package.json").exists()
90                || p.join("go.mod").exists()
91                || p.join("pyproject.toml").exists();
92            if !has_marker {
93                tracing::warn!(
94                    "[graph_index: refusing to scan {normalized} — \
95                     direct child of home without project markers]"
96                );
97                return false;
98            }
99        }
100    }
101
102    let breadth_markers = [
103        ".git",
104        "Cargo.toml",
105        "package.json",
106        "go.mod",
107        "pyproject.toml",
108        "setup.py",
109        "Makefile",
110        "CMakeLists.txt",
111        "pnpm-workspace.yaml",
112        ".projectile",
113        "BUILD.bazel",
114        "go.work",
115    ];
116
117    if !breadth_markers.iter().any(|m| p.join(m).exists()) {
118        let child_count = std::fs::read_dir(p).map_or(0, |rd| {
119            rd.filter_map(Result::ok)
120                .filter(|e| e.path().is_dir())
121                .count()
122        });
123        if child_count > 50 {
124            tracing::warn!(
125                "[graph_index: {normalized} has no project markers and {child_count} subdirectories — \
126                 skipping scan to avoid indexing broad directories]"
127            );
128            return false;
129        }
130    }
131
132    true
133}
134
135#[derive(Debug, Serialize, Deserialize)]
136pub struct ProjectIndex {
137    pub version: u32,
138    pub project_root: String,
139    pub last_scan: String,
140    pub files: HashMap<String, FileEntry>,
141    pub edges: Vec<IndexEdge>,
142    pub symbols: HashMap<String, SymbolEntry>,
143}
144
145#[derive(Debug, Clone, Serialize, Deserialize)]
146pub struct FileEntry {
147    pub path: String,
148    pub hash: String,
149    pub language: String,
150    pub line_count: usize,
151    pub token_count: usize,
152    pub exports: Vec<String>,
153    pub summary: String,
154}
155
156#[derive(Debug, Clone, Serialize, Deserialize)]
157pub struct SymbolEntry {
158    pub file: String,
159    pub name: String,
160    pub kind: String,
161    pub start_line: usize,
162    pub end_line: usize,
163    pub is_exported: bool,
164}
165
166#[derive(Debug, Clone, Serialize, Deserialize)]
167pub struct IndexEdge {
168    pub from: String,
169    pub to: String,
170    pub kind: String,
171    #[serde(default = "default_edge_weight")]
172    pub weight: f32,
173}
174
175fn default_edge_weight() -> f32 {
176    1.0
177}
178
179impl ProjectIndex {
180    pub fn new(project_root: &str) -> Self {
181        Self {
182            version: INDEX_VERSION,
183            project_root: normalize_project_root(project_root),
184            last_scan: chrono::Local::now().format("%Y-%m-%d %H:%M:%S").to_string(),
185            files: HashMap::new(),
186            edges: Vec::new(),
187            symbols: HashMap::new(),
188        }
189    }
190
191    pub fn index_dir(project_root: &str) -> Option<std::path::PathBuf> {
192        let normalized = normalize_project_root(project_root);
193        let hash = crate::core::project_hash::hash_project_root(&normalized);
194        crate::core::data_dir::lean_ctx_data_dir()
195            .ok()
196            .map(|d| d.join("graphs").join(hash))
197    }
198
199    pub fn load(project_root: &str) -> Option<Self> {
200        let dir = Self::index_dir(project_root)?;
201
202        let zst_path = dir.join("index.json.zst");
203        if zst_path.exists() {
204            let compressed = std::fs::read(&zst_path).ok()?;
205            let data = zstd::decode_all(compressed.as_slice()).ok()?;
206            let content = String::from_utf8(data).ok()?;
207            let index: Self = serde_json::from_str(&content).ok()?;
208            if index.version != INDEX_VERSION {
209                return None;
210            }
211            return Some(index);
212        }
213
214        let json_path = dir.join("index.json");
215        let content = std::fs::read_to_string(&json_path)
216            .or_else(|_| -> std::io::Result<String> {
217                let legacy_hash = short_hash(&normalize_project_root(project_root));
218                let legacy_dir = crate::core::data_dir::lean_ctx_data_dir()
219                    .map_err(|_| std::io::Error::new(std::io::ErrorKind::NotFound, "no data dir"))?
220                    .join("graphs")
221                    .join(legacy_hash);
222                let legacy_path = legacy_dir.join("index.json");
223                let data = std::fs::read_to_string(&legacy_path)?;
224                if let Err(e) = copy_dir_fallible(&legacy_dir, &dir) {
225                    tracing::debug!("graph index migration: {e}");
226                }
227                Ok(data)
228            })
229            .ok()?;
230        let index: Self = serde_json::from_str(&content).ok()?;
231        if index.version != INDEX_VERSION {
232            return None;
233        }
234        // Auto-migrate: compress legacy JSON to zstd
235        if let Ok(compressed) = zstd::encode_all(content.as_bytes(), 9) {
236            let zst_tmp = zst_path.with_extension("zst.tmp");
237            if std::fs::write(&zst_tmp, &compressed).is_ok()
238                && std::fs::rename(&zst_tmp, &zst_path).is_ok()
239            {
240                let _ = std::fs::remove_file(&json_path);
241            }
242        }
243        Some(index)
244    }
245
246    pub fn save(&self) -> Result<(), String> {
247        let dir = Self::index_dir(&self.project_root)
248            .ok_or_else(|| "Cannot determine data directory".to_string())?;
249        std::fs::create_dir_all(&dir).map_err(|e| e.to_string())?;
250        let json = serde_json::to_string(self).map_err(|e| e.to_string())?;
251        let compressed = zstd::encode_all(json.as_bytes(), 9).map_err(|e| format!("zstd: {e}"))?;
252        let target = dir.join("index.json.zst");
253        let tmp = target.with_extension("zst.tmp");
254        std::fs::write(&tmp, &compressed).map_err(|e| e.to_string())?;
255        std::fs::rename(&tmp, &target).map_err(|e| e.to_string())?;
256        let _ = std::fs::remove_file(dir.join("index.json"));
257        Ok(())
258    }
259
260    /// Remove all cached graph indices that are older than max_age_hours.
261    /// Called on startup/update to prevent stale data from persisting.
262    pub fn purge_stale_indices() {
263        let Ok(data_dir) = crate::core::data_dir::lean_ctx_data_dir() else {
264            return;
265        };
266        let graphs_dir = data_dir.join("graphs");
267        let Ok(entries) = std::fs::read_dir(&graphs_dir) else {
268            return;
269        };
270        let cfg = crate::core::config::Config::load();
271        let max_age_secs = cfg.archive_max_age_hours_effective() * 3600;
272
273        for entry in entries.filter_map(Result::ok) {
274            let path = entry.path();
275            if !path.is_dir() {
276                continue;
277            }
278            let zst = path.join("index.json.zst");
279            let json = path.join("index.json");
280            let index_file = if zst.exists() {
281                &zst
282            } else if json.exists() {
283                &json
284            } else {
285                continue;
286            };
287
288            let is_old = index_file
289                .metadata()
290                .and_then(|m| m.modified())
291                .is_ok_and(|mtime| {
292                    mtime
293                        .elapsed()
294                        .is_ok_and(|age| age.as_secs() > max_age_secs)
295                });
296
297            if is_old {
298                tracing::info!("[graph_index: purging stale index at {}]", path.display());
299                let _ = std::fs::remove_dir_all(&path);
300            }
301        }
302    }
303
304    pub fn file_count(&self) -> usize {
305        self.files.len()
306    }
307
308    pub fn symbol_count(&self) -> usize {
309        self.symbols.len()
310    }
311
312    pub fn edge_count(&self) -> usize {
313        self.edges.len()
314    }
315
316    pub fn get_symbol(&self, key: &str) -> Option<&SymbolEntry> {
317        self.symbols.get(key)
318    }
319
320    pub fn get_reverse_deps(&self, path: &str, depth: usize) -> Vec<String> {
321        let mut result = Vec::new();
322        let mut visited = std::collections::HashSet::new();
323        let mut queue: Vec<(String, usize)> = vec![(path.to_string(), 0)];
324
325        while let Some((current, d)) = queue.pop() {
326            if d > depth || visited.contains(&current) {
327                continue;
328            }
329            visited.insert(current.clone());
330            if current != path {
331                result.push(current.clone());
332            }
333
334            for edge in &self.edges {
335                if edge.to == current && edge.kind == "import" && !visited.contains(&edge.from) {
336                    queue.push((edge.from.clone(), d + 1));
337                }
338            }
339        }
340        result
341    }
342
343    pub fn get_related(&self, path: &str, depth: usize) -> Vec<String> {
344        let mut result = Vec::new();
345        let mut visited = std::collections::HashSet::new();
346        let mut queue: Vec<(String, usize)> = vec![(path.to_string(), 0)];
347
348        while let Some((current, d)) = queue.pop() {
349            if d > depth || visited.contains(&current) {
350                continue;
351            }
352            visited.insert(current.clone());
353            if current != path {
354                result.push(current.clone());
355            }
356
357            for edge in &self.edges {
358                if edge.from == current && !visited.contains(&edge.to) {
359                    queue.push((edge.to.clone(), d + 1));
360                }
361                if edge.to == current && !visited.contains(&edge.from) {
362                    queue.push((edge.from.clone(), d + 1));
363                }
364            }
365        }
366        result
367    }
368}
369
370/// Load the best available graph index, trying multiple root path variants.
371/// If no valid index exists, automatically scans the project to build one.
372/// This is the primary entry point — ensures zero-config usage.
373pub fn load_or_build(project_root: &str) -> ProjectIndex {
374    if std::env::var("LEAN_CTX_NO_INDEX").is_ok() {
375        return ProjectIndex::load(project_root).unwrap_or_else(|| ProjectIndex::new(project_root));
376    }
377
378    // Prefer stable absolute roots. Using "." as a cache key is fragile because
379    // it depends on the process cwd and can accidentally load the wrong project.
380    let root_abs = if project_root.trim().is_empty() || project_root == "." {
381        std::env::current_dir().ok().map_or_else(
382            || ".".to_string(),
383            |p| normalize_project_root(&p.to_string_lossy()),
384        )
385    } else {
386        normalize_project_root(project_root)
387    };
388
389    if !is_safe_scan_root(&root_abs) {
390        return ProjectIndex::new(&root_abs);
391    }
392
393    // Try the absolute/root-normalized path first.
394    if let Some(idx) = ProjectIndex::load(&root_abs) {
395        if !idx.files.is_empty() {
396            if index_looks_stale(&idx, &root_abs) {
397                tracing::warn!("[graph_index: stale index detected for {root_abs}; rebuilding]");
398                return scan(&root_abs);
399            }
400            return idx;
401        }
402    }
403
404    // CWD fallback: only use if CWD is a subdirectory of root_abs (same project)
405    if let Ok(cwd) = std::env::current_dir() {
406        let cwd_str = normalize_project_root(&cwd.to_string_lossy());
407        if cwd_str != root_abs && cwd_str.starts_with(&root_abs) {
408            if let Some(idx) = ProjectIndex::load(&cwd_str) {
409                if !idx.files.is_empty() {
410                    if index_looks_stale(&idx, &cwd_str) {
411                        return scan(&cwd_str);
412                    }
413                    return idx;
414                }
415            }
416        }
417    }
418
419    scan(&root_abs)
420}
421
422fn index_looks_stale(index: &ProjectIndex, root_abs: &str) -> bool {
423    if index.files.is_empty() {
424        return true;
425    }
426
427    // TTL check: rebuild if index is older than configured max_age_hours
428    if let Ok(scan_time) =
429        chrono::NaiveDateTime::parse_from_str(&index.last_scan, "%Y-%m-%d %H:%M:%S")
430    {
431        let cfg = crate::core::config::Config::load();
432        let effective_hours = cfg.archive_max_age_hours_effective();
433        let max_age = chrono::Duration::hours(effective_hours as i64);
434        let now = chrono::Local::now().naive_local();
435        if now.signed_duration_since(scan_time) > max_age {
436            tracing::info!(
437                "[graph_index: index is older than {}h — marking stale]",
438                effective_hours
439            );
440            return true;
441        }
442    }
443
444    // Contamination check: if index contains paths from common user directories,
445    // it was built from a too-broad root and must be rebuilt
446    const CONTAMINATION_MARKERS: &[&str] = &[
447        "Desktop/",
448        "Documents/",
449        "Downloads/",
450        "Pictures/",
451        "Music/",
452        "Videos/",
453        "Movies/",
454        "Library/",
455        ".cache/",
456        "snap/",
457    ];
458    let contaminated = index.files.keys().take(200).any(|rel| {
459        CONTAMINATION_MARKERS
460            .iter()
461            .any(|m| rel.starts_with(m) || rel.contains(&format!("/{m}")))
462    });
463    if contaminated {
464        tracing::warn!(
465            "[graph_index: index contains files from user directories (Desktop/Documents/...) — \
466             marking stale to force clean rebuild]"
467        );
468        return true;
469    }
470
471    let root_path = Path::new(root_abs);
472    // Sample up to 20 files for existence check (avoid scanning all files in large indices)
473    let sample_size = index.files.len().min(20);
474    for rel in index.files.keys().take(sample_size) {
475        let rel = rel.trim_start_matches(['/', '\\']);
476        if rel.is_empty() {
477            continue;
478        }
479        let abs = root_path.join(rel);
480        if !abs.exists() {
481            return true;
482        }
483    }
484
485    false
486}
487
488pub fn scan(project_root: &str) -> ProjectIndex {
489    scan_inner(project_root).0
490}
491
492pub fn scan_with_content_cache(project_root: &str) -> (ProjectIndex, HashMap<String, String>) {
493    scan_inner(project_root)
494}
495
496fn scan_inner(project_root: &str) -> (ProjectIndex, HashMap<String, String>) {
497    if std::env::var("LEAN_CTX_NO_INDEX").is_ok() {
498        tracing::info!("[graph_index: LEAN_CTX_NO_INDEX set — skipping scan]");
499        return (ProjectIndex::new(project_root), HashMap::new());
500    }
501
502    let project_root = normalize_project_root(project_root);
503
504    if !is_safe_scan_root(&project_root) {
505        tracing::warn!("[graph_index: scan aborted for unsafe root {project_root}]");
506        return (ProjectIndex::new(&project_root), HashMap::new());
507    }
508
509    let lock_name = format!(
510        "graph-idx-{}",
511        &crate::core::index_namespace::namespace_hash(Path::new(&project_root))[..8]
512    );
513    let _lock = crate::core::startup_guard::try_acquire_lock(
514        &lock_name,
515        std::time::Duration::from_millis(800),
516        std::time::Duration::from_mins(3),
517    );
518    if _lock.is_none() {
519        tracing::info!(
520            "[graph_index: another process is scanning {project_root} — returning cached or empty]"
521        );
522        return (
523            ProjectIndex::load(&project_root).unwrap_or_else(|| ProjectIndex::new(&project_root)),
524            HashMap::new(),
525        );
526    }
527
528    let existing = ProjectIndex::load(&project_root);
529    let mut index = ProjectIndex::new(&project_root);
530
531    let old_files: HashMap<String, (String, Vec<(String, SymbolEntry)>)> =
532        if let Some(ref prev) = existing {
533            prev.files
534                .iter()
535                .map(|(path, entry)| {
536                    let syms: Vec<(String, SymbolEntry)> = prev
537                        .symbols
538                        .iter()
539                        .filter(|(_, s)| s.file == *path)
540                        .map(|(k, v)| (k.clone(), v.clone()))
541                        .collect();
542                    (path.clone(), (entry.hash.clone(), syms))
543                })
544                .collect()
545        } else {
546            HashMap::new()
547        };
548
549    let walker = ignore::WalkBuilder::new(&project_root)
550        .hidden(true)
551        .git_ignore(true)
552        .git_global(true)
553        .git_exclude(true)
554        .max_depth(Some(20))
555        .build();
556
557    let cfg = crate::core::config::Config::load();
558    let extra_ignores: Vec<glob::Pattern> = cfg
559        .extra_ignore_patterns
560        .iter()
561        .filter_map(|p| glob::Pattern::new(p).ok())
562        .collect();
563
564    let mut scanned = 0usize;
565    let mut reused = 0usize;
566    let mut entries_visited = 0usize;
567    let mut content_cache: HashMap<String, String> = HashMap::new();
568    let max_files = if cfg.graph_index_max_files == 0 {
569        usize::MAX // unlimited
570    } else {
571        cfg.graph_index_max_files as usize
572    };
573    const MAX_ENTRIES_VISITED: usize = 500_000;
574    const MAX_FILE_SIZE_BYTES: u64 = 2 * 1024 * 1024; // 2 MB per file
575    let scan_deadline = std::time::Instant::now() + std::time::Duration::from_mins(5);
576
577    for entry in walker.filter_map(std::result::Result::ok) {
578        entries_visited += 1;
579        if entries_visited > MAX_ENTRIES_VISITED {
580            tracing::warn!(
581                "[graph_index: walked {entries_visited} entries — aborting scan to prevent \
582                 runaway traversal. Indexed {} files so far.]",
583                index.files.len()
584            );
585            break;
586        }
587        if entries_visited.is_multiple_of(5000) {
588            if std::time::Instant::now() > scan_deadline {
589                tracing::warn!(
590                    "[graph_index: scan timeout (120s) after {entries_visited} entries — \
591                     saving partial index with {} files]",
592                    index.files.len()
593                );
594                break;
595            }
596            if crate::core::memory_guard::abort_requested() {
597                tracing::warn!(
598                    "[graph_index: memory pressure abort after {entries_visited} entries — \
599                     saving partial index with {} files]",
600                    index.files.len()
601                );
602                break;
603            }
604            if crate::core::memory_guard::is_under_pressure() {
605                tracing::warn!(
606                    "[graph_index: memory pressure detected at {entries_visited} entries — \
607                     stopping scan with {} files]",
608                    index.files.len()
609                );
610                break;
611            }
612            if let Some(ref g) = _lock {
613                g.touch();
614            }
615        }
616
617        if !entry.file_type().is_some_and(|ft| ft.is_file()) {
618            continue;
619        }
620
621        if entry.path_is_symlink() {
622            continue;
623        }
624        let file_path = normalize_absolute_path(&entry.path().to_string_lossy());
625
626        if !std::path::Path::new(&file_path).starts_with(std::path::Path::new(&project_root)) {
627            continue;
628        }
629
630        if let Ok(meta) = std::fs::symlink_metadata(&file_path) {
631            if meta.file_type().is_symlink() || !meta.is_file() {
632                continue;
633            }
634            if meta.len() > MAX_FILE_SIZE_BYTES {
635                tracing::debug!(
636                    "[graph_index: skipping {file_path} — {:.1}MB exceeds {}MB limit]",
637                    meta.len() as f64 / 1_048_576.0,
638                    MAX_FILE_SIZE_BYTES / (1024 * 1024),
639                );
640                continue;
641            }
642        }
643
644        let ext = Path::new(&file_path)
645            .extension()
646            .and_then(|e| e.to_str())
647            .unwrap_or("");
648
649        if !is_indexable_ext(ext) {
650            continue;
651        }
652
653        let rel = make_relative(&file_path, &project_root);
654        if extra_ignores.iter().any(|p| p.matches(&rel)) {
655            continue;
656        }
657
658        if max_files != usize::MAX && index.files.len() >= max_files {
659            tracing::info!(
660                "[graph_index: reached configured limit of {} files. Set graph_index_max_files = 0 for unlimited.]",
661                max_files
662            );
663            break;
664        }
665
666        let Ok(content) = std::fs::read_to_string(&file_path) else {
667            continue;
668        };
669
670        let hash = compute_hash(&content);
671        let rel_path = make_relative(&file_path, &project_root);
672
673        if let Some((old_hash, old_syms)) = old_files.get(&rel_path) {
674            if *old_hash == hash {
675                if let Some(old_entry) = existing.as_ref().and_then(|p| p.files.get(&rel_path)) {
676                    index.files.insert(rel_path.clone(), old_entry.clone());
677                    for (key, sym) in old_syms {
678                        index.symbols.insert(key.clone(), sym.clone());
679                    }
680                    content_cache.insert(rel_path, content);
681                    reused += 1;
682                    continue;
683                }
684            }
685        }
686
687        let sigs = signatures::extract_signatures(&content, ext);
688        let line_count = content.lines().count();
689        let token_count = crate::core::tokens::count_tokens(&content);
690        let summary = extract_summary(&content);
691
692        let exports: Vec<String> = sigs
693            .iter()
694            .filter(|s| s.is_exported)
695            .map(|s| s.name.clone())
696            .collect();
697
698        index.files.insert(
699            rel_path.clone(),
700            FileEntry {
701                path: rel_path.clone(),
702                hash,
703                language: ext.to_string(),
704                line_count,
705                token_count,
706                exports,
707                summary,
708            },
709        );
710
711        for sig in &sigs {
712            let (start, end) = sig
713                .start_line
714                .zip(sig.end_line)
715                .unwrap_or_else(|| find_symbol_range(&content, sig));
716            let key = format!("{}::{}", rel_path, sig.name);
717            index.symbols.insert(
718                key,
719                SymbolEntry {
720                    file: rel_path.clone(),
721                    name: sig.name.clone(),
722                    kind: sig.kind.to_string(),
723                    start_line: start,
724                    end_line: end,
725                    is_exported: sig.is_exported,
726                },
727            );
728        }
729
730        content_cache.insert(rel_path, content);
731        scanned += 1;
732    }
733
734    build_edges_cached(&mut index, &content_cache);
735
736    if let Err(e) = index.save() {
737        tracing::warn!("could not save graph index: {e}");
738    }
739
740    tracing::warn!(
741        "[graph_index: {} files ({} scanned, {} reused), {} symbols, {} edges]",
742        index.file_count(),
743        scanned,
744        reused,
745        index.symbol_count(),
746        index.edge_count()
747    );
748
749    (index, content_cache)
750}
751
752fn build_edges_cached(index: &mut ProjectIndex, content_cache: &HashMap<String, String>) {
753    build_edges_with_cache(index, content_cache);
754    build_implicit_edges_with_cache(index, content_cache);
755    build_cochange_edges(index);
756    build_sibling_edges(index);
757}
758
759fn build_edges_with_cache(index: &mut ProjectIndex, content_cache: &HashMap<String, String>) {
760    index.edges.clear();
761
762    if crate::core::memory_guard::abort_requested() {
763        tracing::warn!("[graph_index: skipping edge-building due to memory pressure]");
764        return;
765    }
766
767    let root = normalize_project_root(&index.project_root);
768    let root_path = Path::new(&root);
769
770    let mut file_paths: Vec<String> = index.files.keys().cloned().collect();
771    file_paths.sort();
772
773    let resolver_ctx = import_resolver::ResolverContext::new(root_path, file_paths.clone());
774
775    const MAX_FILE_SIZE_FOR_EDGES: u64 = 2 * 1024 * 1024;
776
777    for (i, rel_path) in file_paths.iter().enumerate() {
778        if i.is_multiple_of(1000) && crate::core::memory_guard::is_under_pressure() {
779            tracing::warn!(
780                "[graph_index: stopping edge-building at file {i}/{} due to memory pressure]",
781                file_paths.len()
782            );
783            break;
784        }
785
786        let content = if let Some(cached) = content_cache.get(rel_path) {
787            std::borrow::Cow::Borrowed(cached.as_str())
788        } else {
789            let abs_path = root_path.join(rel_path.trim_start_matches(['/', '\\']));
790            if let Ok(meta) = abs_path.metadata() {
791                if meta.len() > MAX_FILE_SIZE_FOR_EDGES {
792                    continue;
793                }
794            }
795            match std::fs::read_to_string(&abs_path) {
796                Ok(c) => std::borrow::Cow::Owned(c),
797                Err(_) => continue,
798            }
799        };
800
801        let ext = Path::new(rel_path)
802            .extension()
803            .and_then(|e| e.to_str())
804            .unwrap_or("");
805
806        let resolve_ext = match ext {
807            "vue" | "svelte" => "ts",
808            _ => ext,
809        };
810
811        let analysis_content = if ext == "vue" || ext == "svelte" {
812            if let Some(script) = crate::core::signatures_ts::sfc::extract_script_block(&content) {
813                std::borrow::Cow::Owned(script)
814            } else {
815                content
816            }
817        } else {
818            content
819        };
820
821        let imports = crate::core::deep_queries::analyze(&analysis_content, resolve_ext).imports;
822        if imports.is_empty() {
823            continue;
824        }
825
826        let resolved =
827            import_resolver::resolve_imports(&imports, rel_path, resolve_ext, &resolver_ctx);
828        for r in resolved {
829            if r.is_external {
830                continue;
831            }
832            if let Some(to) = r.resolved_path {
833                index.edges.push(IndexEdge {
834                    from: rel_path.clone(),
835                    to,
836                    kind: "import".to_string(),
837                    weight: 1.0,
838                });
839            }
840        }
841    }
842
843    index.edges.sort_by(|a, b| {
844        a.from
845            .cmp(&b.from)
846            .then_with(|| a.to.cmp(&b.to))
847            .then_with(|| a.kind.cmp(&b.kind))
848    });
849    index
850        .edges
851        .dedup_by(|a, b| a.from == b.from && a.to == b.to && a.kind == b.kind);
852}
853
854// ---------------------------------------------------------------------------
855// Layer 2: Implicit Language Edges (weight 0.8)
856// ---------------------------------------------------------------------------
857
858fn build_implicit_edges_with_cache(
859    index: &mut ProjectIndex,
860    content_cache: &HashMap<String, String>,
861) {
862    let file_paths: Vec<String> = index.files.keys().cloned().collect();
863    let file_set: std::collections::HashSet<&str> = file_paths.iter().map(String::as_str).collect();
864
865    let mut new_edges: Vec<IndexEdge> = Vec::new();
866
867    for file in &file_paths {
868        let ext = Path::new(file.as_str())
869            .extension()
870            .and_then(|e| e.to_str())
871            .unwrap_or("");
872
873        match ext {
874            "rs" => {
875                collect_rust_mod_edges_cached(
876                    file,
877                    &file_set,
878                    index,
879                    &mut new_edges,
880                    content_cache,
881                );
882            }
883            "go" => collect_go_package_edges(file, &file_paths, &mut new_edges),
884            "py" => collect_python_init_edges(file, &file_paths, &mut new_edges),
885            "ts" | "js" | "tsx" | "jsx" => {
886                collect_barrel_edges_cached(file, &file_set, index, &mut new_edges, content_cache);
887            }
888            _ => {}
889        }
890    }
891
892    index.edges.extend(new_edges);
893}
894
895fn collect_rust_mod_edges_cached(
896    file: &str,
897    file_set: &std::collections::HashSet<&str>,
898    index: &ProjectIndex,
899    edges: &mut Vec<IndexEdge>,
900    content_cache: &HashMap<String, String>,
901) {
902    if !index.files.contains_key(file) {
903        return;
904    }
905
906    let content = if let Some(cached) = content_cache.get(file) {
907        std::borrow::Cow::Borrowed(cached.as_str())
908    } else {
909        let full_path = Path::new(&index.project_root).join(file);
910        match std::fs::read_to_string(&full_path) {
911            Ok(c) => std::borrow::Cow::Owned(c),
912            Err(_) => return,
913        }
914    };
915
916    let dir = Path::new(file)
917        .parent()
918        .map(|p| p.to_string_lossy().to_string());
919
920    for line in content.lines() {
921        let trimmed = line.trim();
922        if !trimmed.starts_with("mod ") || trimmed.contains('{') {
923            continue;
924        }
925        let mod_name = trimmed
926            .trim_start_matches("mod ")
927            .trim_start_matches("pub mod ")
928            .trim_start_matches("pub(crate) mod ")
929            .trim_end_matches(';')
930            .trim();
931
932        if mod_name.is_empty() || mod_name.contains(' ') {
933            continue;
934        }
935
936        let candidates = if let Some(ref d) = dir {
937            vec![
938                format!("{d}/{mod_name}.rs"),
939                format!("{d}/{mod_name}/mod.rs"),
940            ]
941        } else {
942            vec![format!("{mod_name}.rs"), format!("{mod_name}/mod.rs")]
943        };
944
945        for candidate in candidates {
946            if file_set.contains(candidate.as_str()) {
947                edges.push(IndexEdge {
948                    from: file.to_string(),
949                    to: candidate,
950                    kind: "module".to_string(),
951                    weight: 0.8,
952                });
953                break;
954            }
955        }
956    }
957}
958
959fn collect_go_package_edges(file: &str, file_paths: &[String], edges: &mut Vec<IndexEdge>) {
960    let p = Path::new(file);
961    if p.extension().and_then(|e| e.to_str()) != Some("go") {
962        return;
963    }
964    if file.ends_with("_test.go") {
965        return;
966    }
967
968    let Some(dir) = p.parent().map(|d| d.to_string_lossy().to_string()) else {
969        return;
970    };
971
972    for other in file_paths {
973        if other == file {
974            continue;
975        }
976        let op = Path::new(other.as_str());
977        if op.extension().and_then(|e| e.to_str()) != Some("go") {
978            continue;
979        }
980        if other.ends_with("_test.go") {
981            continue;
982        }
983        let other_dir = op
984            .parent()
985            .map(|d| d.to_string_lossy().to_string())
986            .unwrap_or_default();
987        if other_dir == dir {
988            edges.push(IndexEdge {
989                from: file.to_string(),
990                to: other.clone(),
991                kind: "package".to_string(),
992                weight: 0.5,
993            });
994            break;
995        }
996    }
997}
998
999fn collect_python_init_edges(file: &str, file_paths: &[String], edges: &mut Vec<IndexEdge>) {
1000    let p = Path::new(file);
1001    if p.file_name().and_then(|n| n.to_str()) != Some("__init__.py") {
1002        return;
1003    }
1004
1005    let Some(dir) = p.parent().map(|d| d.to_string_lossy().to_string()) else {
1006        return;
1007    };
1008
1009    for other in file_paths {
1010        if other == file {
1011            continue;
1012        }
1013        let op = Path::new(other.as_str());
1014        if op.extension().and_then(|e| e.to_str()) != Some("py") {
1015            continue;
1016        }
1017        let other_dir = op
1018            .parent()
1019            .map(|d| d.to_string_lossy().to_string())
1020            .unwrap_or_default();
1021        if other_dir == dir {
1022            edges.push(IndexEdge {
1023                from: file.to_string(),
1024                to: other.clone(),
1025                kind: "module".to_string(),
1026                weight: 0.8,
1027            });
1028        }
1029    }
1030}
1031
1032fn collect_barrel_edges_cached(
1033    file: &str,
1034    file_set: &std::collections::HashSet<&str>,
1035    index: &ProjectIndex,
1036    edges: &mut Vec<IndexEdge>,
1037    content_cache: &HashMap<String, String>,
1038) {
1039    let basename = Path::new(file)
1040        .file_stem()
1041        .and_then(|s| s.to_str())
1042        .unwrap_or("");
1043    if basename != "index" {
1044        return;
1045    }
1046
1047    let content = if let Some(cached) = content_cache.get(file) {
1048        std::borrow::Cow::Borrowed(cached.as_str())
1049    } else {
1050        let full_path = Path::new(&index.project_root).join(file);
1051        match std::fs::read_to_string(&full_path) {
1052            Ok(c) => std::borrow::Cow::Owned(c),
1053            Err(_) => return,
1054        }
1055    };
1056
1057    let dir = Path::new(file)
1058        .parent()
1059        .map(|p| p.to_string_lossy().to_string())
1060        .unwrap_or_default();
1061
1062    let ext = Path::new(file)
1063        .extension()
1064        .and_then(|e| e.to_str())
1065        .unwrap_or("ts");
1066
1067    for line in content.lines() {
1068        let trimmed = line.trim();
1069        if !trimmed.starts_with("export") || !trimmed.contains("from") {
1070            continue;
1071        }
1072        if let Some(from_pos) = trimmed.find("from") {
1073            let after = &trimmed[from_pos + 4..];
1074            let source = after
1075                .trim()
1076                .trim_start_matches(['\'', '"'])
1077                .trim_end_matches([';', '\'', '"'])
1078                .trim_end_matches(['\'', '"']);
1079
1080            if source.starts_with("./") || source.starts_with("../") {
1081                let resolved = if dir.is_empty() {
1082                    source.trim_start_matches("./").to_string()
1083                } else {
1084                    format!("{dir}/{}", source.trim_start_matches("./"))
1085                };
1086
1087                let candidates = vec![
1088                    format!("{resolved}.{ext}"),
1089                    format!("{resolved}/index.{ext}"),
1090                    resolved.clone(),
1091                ];
1092
1093                for candidate in candidates {
1094                    if file_set.contains(candidate.as_str()) {
1095                        edges.push(IndexEdge {
1096                            from: file.to_string(),
1097                            to: candidate,
1098                            kind: "reexport".to_string(),
1099                            weight: 0.8,
1100                        });
1101                        break;
1102                    }
1103                }
1104            }
1105        }
1106    }
1107}
1108
1109// ---------------------------------------------------------------------------
1110// Layer 3: Co-Change Edges (weight 0.5)
1111// ---------------------------------------------------------------------------
1112
1113fn build_cochange_edges(index: &mut ProjectIndex) {
1114    let project_root = &index.project_root;
1115
1116    let output = match std::process::Command::new("git")
1117        .args([
1118            "log",
1119            "--name-only",
1120            "--pretty=format:---",
1121            "--since=6 months",
1122            "--",
1123            ".",
1124        ])
1125        .current_dir(project_root)
1126        .output()
1127    {
1128        Ok(o) if o.status.success() => String::from_utf8_lossy(&o.stdout).to_string(),
1129        _ => return,
1130    };
1131
1132    let file_set: std::collections::HashSet<&str> =
1133        index.files.keys().map(String::as_str).collect();
1134
1135    let connected: std::collections::HashSet<&str> = index
1136        .edges
1137        .iter()
1138        .flat_map(|e| [e.from.as_str(), e.to.as_str()])
1139        .collect();
1140
1141    // Parse commits into groups of files
1142    let mut cooccurrence: HashMap<(String, String), u32> = HashMap::new();
1143    let mut current_commit: Vec<&str> = Vec::new();
1144
1145    for line in output.lines() {
1146        if line == "---" {
1147            if current_commit.len() >= 2 && current_commit.len() <= 20 {
1148                for i in 0..current_commit.len() {
1149                    for j in (i + 1)..current_commit.len() {
1150                        let a = current_commit[i];
1151                        let b = current_commit[j];
1152                        if !file_set.contains(a) || !file_set.contains(b) {
1153                            continue;
1154                        }
1155                        // Only add if at least one is currently isolated
1156                        if connected.contains(a) && connected.contains(b) {
1157                            continue;
1158                        }
1159                        let key = if a < b {
1160                            (a.to_string(), b.to_string())
1161                        } else {
1162                            (b.to_string(), a.to_string())
1163                        };
1164                        *cooccurrence.entry(key).or_insert(0) += 1;
1165                    }
1166                }
1167            }
1168            current_commit.clear();
1169        } else if !line.is_empty() {
1170            current_commit.push(line.trim());
1171        }
1172    }
1173
1174    // Filter: min 5 shared commits
1175    let mut cochange_edges: Vec<IndexEdge> = cooccurrence
1176        .into_iter()
1177        .filter(|(_, count)| *count >= 5)
1178        .map(|((from, to), _)| IndexEdge {
1179            from,
1180            to,
1181            kind: "cochange".to_string(),
1182            weight: 0.5,
1183        })
1184        .collect();
1185
1186    // Cap at 500 to prevent noise
1187    cochange_edges.sort_by(|a, b| a.from.cmp(&b.from).then_with(|| a.to.cmp(&b.to)));
1188    cochange_edges.truncate(500);
1189
1190    index.edges.extend(cochange_edges);
1191}
1192
1193// ---------------------------------------------------------------------------
1194// Layer 4: Sibling Edges (weight 0.2)
1195// ---------------------------------------------------------------------------
1196
1197fn build_sibling_edges(index: &mut ProjectIndex) {
1198    let connected: std::collections::HashSet<&str> = index
1199        .edges
1200        .iter()
1201        .flat_map(|e| [e.from.as_str(), e.to.as_str()])
1202        .collect();
1203
1204    let file_paths: Vec<String> = index.files.keys().cloned().collect();
1205    let mut new_edges: Vec<IndexEdge> = Vec::new();
1206
1207    for file in &file_paths {
1208        if connected.contains(file.as_str()) {
1209            continue;
1210        }
1211
1212        let ext = Path::new(file.as_str())
1213            .extension()
1214            .and_then(|e| e.to_str())
1215            .unwrap_or("");
1216        let dir = Path::new(file.as_str())
1217            .parent()
1218            .map(|p| p.to_string_lossy().to_string())
1219            .unwrap_or_default();
1220
1221        // Find one sibling with same extension
1222        for other in &file_paths {
1223            if other == file {
1224                continue;
1225            }
1226            let other_ext = Path::new(other.as_str())
1227                .extension()
1228                .and_then(|e| e.to_str())
1229                .unwrap_or("");
1230            let other_dir = Path::new(other.as_str())
1231                .parent()
1232                .map(|p| p.to_string_lossy().to_string())
1233                .unwrap_or_default();
1234
1235            if other_ext == ext && other_dir == dir {
1236                new_edges.push(IndexEdge {
1237                    from: file.clone(),
1238                    to: other.clone(),
1239                    kind: "sibling".to_string(),
1240                    weight: 0.2,
1241                });
1242                break; // Max 1 sibling edge per isolate
1243            }
1244        }
1245    }
1246
1247    index.edges.extend(new_edges);
1248}
1249
1250fn find_symbol_range(content: &str, sig: &signatures::Signature) -> (usize, usize) {
1251    let lines: Vec<&str> = content.lines().collect();
1252    let mut start = 0;
1253
1254    for (i, line) in lines.iter().enumerate() {
1255        if line.contains(&sig.name) {
1256            let trimmed = line.trim();
1257            let is_def = trimmed.starts_with("fn ")
1258                || trimmed.starts_with("pub fn ")
1259                || trimmed.starts_with("pub(crate) fn ")
1260                || trimmed.starts_with("async fn ")
1261                || trimmed.starts_with("pub async fn ")
1262                || trimmed.starts_with("struct ")
1263                || trimmed.starts_with("pub struct ")
1264                || trimmed.starts_with("enum ")
1265                || trimmed.starts_with("pub enum ")
1266                || trimmed.starts_with("trait ")
1267                || trimmed.starts_with("pub trait ")
1268                || trimmed.starts_with("impl ")
1269                || trimmed.starts_with("class ")
1270                || trimmed.starts_with("export class ")
1271                || trimmed.starts_with("export function ")
1272                || trimmed.starts_with("export async function ")
1273                || trimmed.starts_with("function ")
1274                || trimmed.starts_with("async function ")
1275                || trimmed.starts_with("def ")
1276                || trimmed.starts_with("async def ")
1277                || trimmed.starts_with("func ")
1278                || trimmed.starts_with("interface ")
1279                || trimmed.starts_with("export interface ")
1280                || trimmed.starts_with("type ")
1281                || trimmed.starts_with("export type ")
1282                || trimmed.starts_with("const ")
1283                || trimmed.starts_with("export const ")
1284                || trimmed.starts_with("fun ")
1285                || trimmed.starts_with("private fun ")
1286                || trimmed.starts_with("public fun ")
1287                || trimmed.starts_with("internal fun ")
1288                || trimmed.starts_with("class ")
1289                || trimmed.starts_with("data class ")
1290                || trimmed.starts_with("sealed class ")
1291                || trimmed.starts_with("sealed interface ")
1292                || trimmed.starts_with("enum class ")
1293                || trimmed.starts_with("object ")
1294                || trimmed.starts_with("private object ")
1295                || trimmed.starts_with("interface ")
1296                || trimmed.starts_with("typealias ")
1297                || trimmed.starts_with("private typealias ");
1298            if is_def {
1299                start = i + 1;
1300                break;
1301            }
1302        }
1303    }
1304
1305    if start == 0 {
1306        return (1, lines.len().min(20));
1307    }
1308
1309    let base_indent = lines
1310        .get(start - 1)
1311        .map_or(0, |l| l.len() - l.trim_start().len());
1312
1313    let mut end = start;
1314    let mut brace_depth: i32 = 0;
1315    let mut found_open = false;
1316
1317    for (i, line) in lines.iter().enumerate().skip(start - 1) {
1318        for ch in line.chars() {
1319            if ch == '{' {
1320                brace_depth += 1;
1321                found_open = true;
1322            } else if ch == '}' {
1323                brace_depth -= 1;
1324            }
1325        }
1326
1327        end = i + 1;
1328
1329        if found_open && brace_depth <= 0 {
1330            break;
1331        }
1332
1333        if !found_open && i > start {
1334            let indent = line.len() - line.trim_start().len();
1335            if indent <= base_indent && !line.trim().is_empty() && i > start {
1336                end = i;
1337                break;
1338            }
1339        }
1340
1341        if end - start > 200 {
1342            break;
1343        }
1344    }
1345
1346    (start, end)
1347}
1348
1349fn extract_summary(content: &str) -> String {
1350    for line in content.lines().take(20) {
1351        let trimmed = line.trim();
1352        if trimmed.is_empty()
1353            || trimmed.starts_with("//")
1354            || trimmed.starts_with('#')
1355            || trimmed.starts_with("/*")
1356            || trimmed.starts_with('*')
1357            || trimmed.starts_with("use ")
1358            || trimmed.starts_with("import ")
1359            || trimmed.starts_with("from ")
1360            || trimmed.starts_with("require(")
1361            || trimmed.starts_with("package ")
1362        {
1363            continue;
1364        }
1365        return trimmed.chars().take(120).collect();
1366    }
1367    String::new()
1368}
1369
1370fn compute_hash(content: &str) -> String {
1371    use std::collections::hash_map::DefaultHasher;
1372    use std::hash::{Hash, Hasher};
1373
1374    let mut hasher = DefaultHasher::new();
1375    content.hash(&mut hasher);
1376    format!("{:016x}", hasher.finish())
1377}
1378
1379fn short_hash(input: &str) -> String {
1380    use std::collections::hash_map::DefaultHasher;
1381    use std::hash::{Hash, Hasher};
1382
1383    let mut hasher = DefaultHasher::new();
1384    input.hash(&mut hasher);
1385    format!("{:08x}", hasher.finish() & 0xFFFF_FFFF)
1386}
1387
1388fn copy_dir_fallible(src: &std::path::Path, dst: &std::path::Path) -> Result<(), std::io::Error> {
1389    std::fs::create_dir_all(dst)?;
1390    for entry in std::fs::read_dir(src)?.flatten() {
1391        let from = entry.path();
1392        let to = dst.join(entry.file_name());
1393        if from.is_dir() {
1394            copy_dir_fallible(&from, &to)?;
1395        } else {
1396            std::fs::copy(&from, &to)?;
1397        }
1398    }
1399    Ok(())
1400}
1401
1402fn normalize_absolute_path(path: &str) -> String {
1403    if let Ok(canon) = crate::core::pathutil::safe_canonicalize(std::path::Path::new(path)) {
1404        return canon.to_string_lossy().to_string();
1405    }
1406
1407    let mut normalized = path.to_string();
1408    while normalized.ends_with("\\.") || normalized.ends_with("/.") {
1409        normalized.truncate(normalized.len() - 2);
1410    }
1411    while normalized.len() > 1
1412        && (normalized.ends_with('\\') || normalized.ends_with('/'))
1413        && !normalized.ends_with(":\\")
1414        && !normalized.ends_with(":/")
1415        && normalized != "\\"
1416        && normalized != "/"
1417    {
1418        normalized.pop();
1419    }
1420    normalized
1421}
1422
1423pub fn normalize_project_root(path: &str) -> String {
1424    normalize_absolute_path(path)
1425}
1426
1427pub fn graph_match_key(path: &str) -> String {
1428    let stripped =
1429        crate::core::pathutil::strip_verbatim_str(path).unwrap_or_else(|| path.replace('\\', "/"));
1430    stripped.trim_start_matches('/').to_string()
1431}
1432
1433pub fn graph_relative_key(path: &str, root: &str) -> String {
1434    let root_norm = normalize_project_root(root);
1435    let path_norm = normalize_absolute_path(path);
1436    let root_path = Path::new(&root_norm);
1437    let path_path = Path::new(&path_norm);
1438
1439    if let Ok(rel) = path_path.strip_prefix(root_path) {
1440        let rel = rel.to_string_lossy().to_string();
1441        return rel.trim_start_matches(['/', '\\']).to_string();
1442    }
1443
1444    path.trim_start_matches(['/', '\\'])
1445        .replace('/', std::path::MAIN_SEPARATOR_STR)
1446}
1447
1448fn make_relative(path: &str, root: &str) -> String {
1449    graph_relative_key(path, root)
1450}
1451
1452fn is_indexable_ext(ext: &str) -> bool {
1453    crate::core::language_capabilities::is_indexable_ext(ext)
1454}
1455
1456#[cfg(test)]
1457fn kotlin_package_name(content: &str) -> Option<String> {
1458    content.lines().map(str::trim).find_map(|line| {
1459        line.strip_prefix("package ")
1460            .map(|rest| rest.trim().trim_end_matches(';').to_string())
1461    })
1462}
1463
1464#[cfg(test)]
1465mod tests {
1466    use super::*;
1467    use tempfile::tempdir;
1468
1469    #[test]
1470    fn test_short_hash_deterministic() {
1471        let h1 = short_hash("/Users/test/project");
1472        let h2 = short_hash("/Users/test/project");
1473        assert_eq!(h1, h2);
1474        assert_eq!(h1.len(), 8);
1475    }
1476
1477    #[test]
1478    fn test_make_relative() {
1479        assert_eq!(
1480            make_relative("/foo/bar/src/main.rs", "/foo/bar"),
1481            graph_relative_key("/foo/bar/src/main.rs", "/foo/bar")
1482        );
1483        assert_eq!(
1484            make_relative("src/main.rs", "/foo/bar"),
1485            graph_relative_key("src/main.rs", "/foo/bar")
1486        );
1487        assert_eq!(
1488            make_relative("C:\\repo\\src\\main\\kotlin\\Example.kt", "C:\\repo"),
1489            graph_relative_key("C:\\repo\\src\\main\\kotlin\\Example.kt", "C:\\repo")
1490        );
1491        assert_eq!(
1492            make_relative("//?/C:/repo/src/main/kotlin/Example.kt", "//?/C:/repo"),
1493            graph_relative_key("//?/C:/repo/src/main/kotlin/Example.kt", "//?/C:/repo")
1494        );
1495    }
1496
1497    #[test]
1498    fn test_normalize_project_root() {
1499        assert_eq!(normalize_project_root("C:\\repo\\"), "C:\\repo");
1500        assert_eq!(normalize_project_root("C:\\repo\\."), "C:\\repo");
1501        assert_eq!(normalize_project_root("//?/C:/repo/"), "//?/C:/repo");
1502    }
1503
1504    #[test]
1505    fn test_graph_match_key_normalizes_windows_forms() {
1506        assert_eq!(
1507            graph_match_key(r"C:\repo\src\main.rs"),
1508            "C:/repo/src/main.rs"
1509        );
1510        assert_eq!(
1511            graph_match_key(r"\\?\C:\repo\src\main.rs"),
1512            "C:/repo/src/main.rs"
1513        );
1514        assert_eq!(graph_match_key(r"\src\main.rs"), "src/main.rs");
1515    }
1516
1517    #[test]
1518    fn test_extract_summary() {
1519        let content = "// comment\nuse std::io;\n\npub fn main() {\n    println!(\"hello\");\n}";
1520        let summary = extract_summary(content);
1521        assert_eq!(summary, "pub fn main() {");
1522    }
1523
1524    #[test]
1525    fn test_compute_hash_deterministic() {
1526        let h1 = compute_hash("hello world");
1527        let h2 = compute_hash("hello world");
1528        assert_eq!(h1, h2);
1529        assert_ne!(h1, compute_hash("hello world!"));
1530    }
1531
1532    #[test]
1533    fn test_project_index_new() {
1534        let idx = ProjectIndex::new("/test");
1535        assert_eq!(idx.version, INDEX_VERSION);
1536        assert_eq!(idx.project_root, "/test");
1537        assert!(idx.files.is_empty());
1538    }
1539
1540    fn fe(path: &str, content: &str, language: &str) -> FileEntry {
1541        FileEntry {
1542            path: path.to_string(),
1543            hash: compute_hash(content),
1544            language: language.to_string(),
1545            line_count: content.lines().count(),
1546            token_count: crate::core::tokens::count_tokens(content),
1547            exports: Vec::new(),
1548            summary: extract_summary(content),
1549        }
1550    }
1551
1552    #[test]
1553    fn test_index_looks_stale_when_any_file_missing() {
1554        let td = tempdir().expect("tempdir");
1555        let root = td.path();
1556        std::fs::write(root.join("a.rs"), "pub fn a() {}\n").expect("write a.rs");
1557
1558        let root_s = normalize_project_root(&root.to_string_lossy());
1559        let mut idx = ProjectIndex::new(&root_s);
1560        idx.files
1561            .insert("a.rs".to_string(), fe("a.rs", "pub fn a() {}\n", "rs"));
1562        idx.files.insert(
1563            "missing.rs".to_string(),
1564            fe("missing.rs", "pub fn m() {}\n", "rs"),
1565        );
1566
1567        assert!(index_looks_stale(&idx, &root_s));
1568    }
1569
1570    #[test]
1571    fn test_index_looks_fresh_when_all_files_exist() {
1572        let td = tempdir().expect("tempdir");
1573        let root = td.path();
1574        std::fs::write(root.join("a.rs"), "pub fn a() {}\n").expect("write a.rs");
1575
1576        let root_s = normalize_project_root(&root.to_string_lossy());
1577        let mut idx = ProjectIndex::new(&root_s);
1578        idx.files
1579            .insert("a.rs".to_string(), fe("a.rs", "pub fn a() {}\n", "rs"));
1580
1581        assert!(!index_looks_stale(&idx, &root_s));
1582    }
1583
1584    #[test]
1585    fn test_reverse_deps() {
1586        let mut idx = ProjectIndex::new("/test");
1587        idx.edges.push(IndexEdge {
1588            from: "a.rs".to_string(),
1589            to: "b.rs".to_string(),
1590            kind: "import".to_string(),
1591            weight: 1.0,
1592        });
1593        idx.edges.push(IndexEdge {
1594            from: "c.rs".to_string(),
1595            to: "b.rs".to_string(),
1596            kind: "import".to_string(),
1597            weight: 1.0,
1598        });
1599
1600        let deps = idx.get_reverse_deps("b.rs", 1);
1601        assert_eq!(deps.len(), 2);
1602        assert!(deps.contains(&"a.rs".to_string()));
1603        assert!(deps.contains(&"c.rs".to_string()));
1604    }
1605
1606    #[test]
1607    fn test_find_symbol_range_kotlin_function() {
1608        let content = r#"
1609package com.example
1610
1611class UserService {
1612    fun greet(name: String): String {
1613        return "hi $name"
1614    }
1615}
1616"#;
1617        let sig = signatures::Signature {
1618            kind: "method",
1619            name: "greet".to_string(),
1620            params: "name:String".to_string(),
1621            return_type: "String".to_string(),
1622            is_async: false,
1623            is_exported: true,
1624            indent: 2,
1625            ..signatures::Signature::no_span()
1626        };
1627        let (start, end) = find_symbol_range(content, &sig);
1628        assert_eq!(start, 5);
1629        assert!(end >= start);
1630    }
1631
1632    #[test]
1633    fn test_signature_spans_override_fallback_range() {
1634        let sig = signatures::Signature {
1635            kind: "method",
1636            name: "release".to_string(),
1637            params: "id:String".to_string(),
1638            return_type: "Boolean".to_string(),
1639            is_async: true,
1640            is_exported: true,
1641            indent: 2,
1642            start_line: Some(42),
1643            end_line: Some(43),
1644        };
1645
1646        let (start, end) = sig
1647            .start_line
1648            .zip(sig.end_line)
1649            .unwrap_or_else(|| find_symbol_range("ignored", &sig));
1650        assert_eq!((start, end), (42, 43));
1651    }
1652
1653    #[test]
1654    fn test_parse_stale_index_version() {
1655        let json = format!(
1656            r#"{{"version":{},"project_root":"/test","last_scan":"now","files":{{}},"edges":[],"symbols":{{}}}}"#,
1657            INDEX_VERSION - 1
1658        );
1659        let parsed: ProjectIndex = serde_json::from_str(&json).unwrap();
1660        assert_ne!(parsed.version, INDEX_VERSION);
1661    }
1662
1663    #[test]
1664    fn test_kotlin_package_name() {
1665        let content = "package com.example.feature\n\nclass UserService";
1666        assert_eq!(
1667            kotlin_package_name(content).as_deref(),
1668            Some("com.example.feature")
1669        );
1670    }
1671
1672    #[test]
1673    fn safe_scan_root_rejects_fs_root() {
1674        assert!(!is_safe_scan_root("/"));
1675        assert!(!is_safe_scan_root("\\"));
1676        #[cfg(windows)]
1677        {
1678            assert!(!is_safe_scan_root("C:\\"));
1679            assert!(!is_safe_scan_root("D:\\"));
1680        }
1681    }
1682
1683    #[test]
1684    fn safe_scan_root_rejects_home() {
1685        if let Some(home) = dirs::home_dir() {
1686            let home_str = home.to_string_lossy().to_string();
1687            assert!(
1688                !is_safe_scan_root(&home_str),
1689                "home dir should be rejected: {home_str}"
1690            );
1691        }
1692    }
1693
1694    #[test]
1695    fn safe_scan_root_accepts_project_dir() {
1696        let tmp = tempdir().unwrap();
1697        std::fs::write(
1698            tmp.path().join("Cargo.toml"),
1699            "[package]\nname = \"test\"\n",
1700        )
1701        .unwrap();
1702        let root = tmp.path().to_string_lossy().to_string();
1703        assert!(is_safe_scan_root(&root));
1704    }
1705
1706    #[test]
1707    fn safe_scan_root_rejects_broad_dir() {
1708        let tmp = tempdir().unwrap();
1709        for i in 0..55 {
1710            std::fs::create_dir(tmp.path().join(format!("dir{i}"))).unwrap();
1711        }
1712        let root = tmp.path().to_string_lossy().to_string();
1713        assert!(!is_safe_scan_root(&root));
1714    }
1715
1716    #[test]
1717    fn no_index_env_skips_scan() {
1718        let _env = crate::core::data_dir::test_env_lock();
1719        let tmp = tempdir().unwrap();
1720        std::fs::write(tmp.path().join("Cargo.toml"), "").unwrap();
1721        std::fs::write(tmp.path().join("main.rs"), "fn main() {}").unwrap();
1722
1723        std::env::set_var("LEAN_CTX_NO_INDEX", "1");
1724        let idx = scan(&tmp.path().to_string_lossy());
1725        std::env::remove_var("LEAN_CTX_NO_INDEX");
1726        assert!(idx.files.is_empty(), "LEAN_CTX_NO_INDEX should skip scan");
1727    }
1728
1729    #[test]
1730    fn stale_index_detected_by_contamination() {
1731        let root_s = "/home/testuser/myproject";
1732        let mut idx = ProjectIndex::new(root_s);
1733        // Simulate a contaminated index with Desktop files
1734        idx.files.insert(
1735            "Desktop/random.py".to_string(),
1736            fe("Desktop/random.py", "x = 1\n", "py"),
1737        );
1738        idx.files.insert(
1739            "src/main.rs".to_string(),
1740            fe("src/main.rs", "fn main() {}\n", "rs"),
1741        );
1742        assert!(
1743            index_looks_stale(&idx, root_s),
1744            "Index with Desktop/ files should be considered stale"
1745        );
1746    }
1747
1748    #[test]
1749    fn stale_index_detected_by_age() {
1750        let td = tempdir().expect("tempdir");
1751        let root = td.path();
1752        std::fs::write(root.join("a.rs"), "fn a() {}\n").unwrap();
1753
1754        let root_s = normalize_project_root(&root.to_string_lossy());
1755        let mut idx = ProjectIndex::new(&root_s);
1756        idx.files
1757            .insert("a.rs".to_string(), fe("a.rs", "fn a() {}\n", "rs"));
1758        // Set last_scan to 100 hours ago (default max_age_hours is 48)
1759        let old_time = chrono::Local::now().naive_local() - chrono::Duration::hours(100);
1760        idx.last_scan = old_time.format("%Y-%m-%d %H:%M:%S").to_string();
1761
1762        assert!(
1763            index_looks_stale(&idx, &root_s),
1764            "Index older than max_age_hours should be stale"
1765        );
1766    }
1767
1768    #[test]
1769    fn safe_scan_root_rejects_home_downloads() {
1770        if let Some(home) = dirs::home_dir() {
1771            let downloads = home.join("Downloads");
1772            // Only test if Downloads doesn't contain a .git (unlikely but possible)
1773            if !downloads.join(".git").exists() {
1774                let downloads_str = downloads.to_string_lossy().to_string();
1775                assert!(
1776                    !is_safe_scan_root(&downloads_str),
1777                    "~/Downloads should be rejected without project markers"
1778                );
1779            }
1780        }
1781    }
1782}