Skip to main content

lean_ctx/core/
graph_index.rs

1use std::collections::HashMap;
2use std::path::Path;
3
4use serde::{Deserialize, Serialize};
5
6use crate::core::import_resolver;
7use crate::core::signatures;
8
9const INDEX_VERSION: u32 = 6;
10
11pub fn is_safe_scan_root_public(path: &str) -> bool {
12    is_safe_scan_root(path)
13}
14
15fn is_filesystem_root(path: &str) -> bool {
16    let p = Path::new(path);
17    p.parent().is_none() || (cfg!(windows) && p.parent() == Some(Path::new("")))
18}
19
20fn is_safe_scan_root(path: &str) -> bool {
21    let normalized = normalize_project_root(path);
22    let p = Path::new(&normalized);
23
24    if normalized == "/" || normalized == "\\" || is_filesystem_root(&normalized) {
25        tracing::warn!("[graph_index: refusing to scan filesystem root]");
26        return false;
27    }
28
29    if normalized == "." || normalized.is_empty() {
30        tracing::warn!("[graph_index: refusing to scan relative/empty root]");
31        return false;
32    }
33
34    if let Some(home) = dirs::home_dir() {
35        let home_norm = normalize_project_root(&home.to_string_lossy());
36        if normalized == home_norm {
37            use std::sync::Once;
38            static HOME_WARN: Once = Once::new();
39            HOME_WARN.call_once(|| {
40                tracing::warn!(
41                    "[graph_index: skipping — cannot index home directory {normalized}.\n  \
42                     Run from inside a project, or set LEAN_CTX_PROJECT_ROOT=/path/to/project]"
43                );
44            });
45            return false;
46        }
47        // Block common broad home subdirectories that are never valid project roots
48        let home_path = Path::new(&home_norm);
49        const BLOCKED_HOME_SUBDIRS: &[&str] = &[
50            "Desktop",
51            "Documents",
52            "Downloads",
53            "Pictures",
54            "Music",
55            "Videos",
56            "Movies",
57            "Library",
58            ".local",
59            ".cache",
60            ".config",
61            "snap",
62            "Applications",
63        ];
64        for blocked in BLOCKED_HOME_SUBDIRS {
65            let blocked_path = home_path.join(blocked);
66            let is_inside_blocked = p == blocked_path || p.starts_with(&blocked_path);
67            let has_project_marker = p.join(".git").exists()
68                || p.join("Cargo.toml").exists()
69                || p.join("package.json").exists();
70            if is_inside_blocked && !has_project_marker {
71                tracing::warn!(
72                    "[graph_index: refusing to scan {normalized} — \
73                     inside home/{blocked} without project markers]"
74                );
75                return false;
76            }
77        }
78
79        // Block directories that are direct children of home without project markers
80        if p.parent() == Some(home_path) {
81            let has_marker = p.join(".git").exists()
82                || p.join("Cargo.toml").exists()
83                || p.join("package.json").exists()
84                || p.join("go.mod").exists()
85                || p.join("pyproject.toml").exists();
86            if !has_marker {
87                tracing::warn!(
88                    "[graph_index: refusing to scan {normalized} — \
89                     direct child of home without project markers]"
90                );
91                return false;
92            }
93        }
94    }
95
96    let breadth_markers = [
97        ".git",
98        "Cargo.toml",
99        "package.json",
100        "go.mod",
101        "pyproject.toml",
102        "setup.py",
103        "Makefile",
104        "CMakeLists.txt",
105        "pnpm-workspace.yaml",
106        ".projectile",
107        "BUILD.bazel",
108        "go.work",
109    ];
110
111    if !breadth_markers.iter().any(|m| p.join(m).exists()) {
112        let child_count = std::fs::read_dir(p).map_or(0, |rd| {
113            rd.filter_map(Result::ok)
114                .filter(|e| e.path().is_dir())
115                .count()
116        });
117        if child_count > 50 {
118            tracing::warn!(
119                "[graph_index: {normalized} has no project markers and {child_count} subdirectories — \
120                 skipping scan to avoid indexing broad directories]"
121            );
122            return false;
123        }
124    }
125
126    true
127}
128
129#[derive(Debug, Serialize, Deserialize)]
130pub struct ProjectIndex {
131    pub version: u32,
132    pub project_root: String,
133    pub last_scan: String,
134    pub files: HashMap<String, FileEntry>,
135    pub edges: Vec<IndexEdge>,
136    pub symbols: HashMap<String, SymbolEntry>,
137}
138
139#[derive(Debug, Clone, Serialize, Deserialize)]
140pub struct FileEntry {
141    pub path: String,
142    pub hash: String,
143    pub language: String,
144    pub line_count: usize,
145    pub token_count: usize,
146    pub exports: Vec<String>,
147    pub summary: String,
148}
149
150#[derive(Debug, Clone, Serialize, Deserialize)]
151pub struct SymbolEntry {
152    pub file: String,
153    pub name: String,
154    pub kind: String,
155    pub start_line: usize,
156    pub end_line: usize,
157    pub is_exported: bool,
158}
159
160#[derive(Debug, Clone, Serialize, Deserialize)]
161pub struct IndexEdge {
162    pub from: String,
163    pub to: String,
164    pub kind: String,
165}
166
167impl ProjectIndex {
168    pub fn new(project_root: &str) -> Self {
169        Self {
170            version: INDEX_VERSION,
171            project_root: normalize_project_root(project_root),
172            last_scan: chrono::Local::now().format("%Y-%m-%d %H:%M:%S").to_string(),
173            files: HashMap::new(),
174            edges: Vec::new(),
175            symbols: HashMap::new(),
176        }
177    }
178
179    pub fn index_dir(project_root: &str) -> Option<std::path::PathBuf> {
180        let normalized = normalize_project_root(project_root);
181        let hash = crate::core::project_hash::hash_project_root(&normalized);
182        crate::core::data_dir::lean_ctx_data_dir()
183            .ok()
184            .map(|d| d.join("graphs").join(hash))
185    }
186
187    pub fn load(project_root: &str) -> Option<Self> {
188        let dir = Self::index_dir(project_root)?;
189
190        let zst_path = dir.join("index.json.zst");
191        if zst_path.exists() {
192            let compressed = std::fs::read(&zst_path).ok()?;
193            let data = zstd::decode_all(compressed.as_slice()).ok()?;
194            let content = String::from_utf8(data).ok()?;
195            let index: Self = serde_json::from_str(&content).ok()?;
196            if index.version != INDEX_VERSION {
197                return None;
198            }
199            return Some(index);
200        }
201
202        let json_path = dir.join("index.json");
203        let content = std::fs::read_to_string(&json_path)
204            .or_else(|_| -> std::io::Result<String> {
205                let legacy_hash = short_hash(&normalize_project_root(project_root));
206                let legacy_dir = crate::core::data_dir::lean_ctx_data_dir()
207                    .map_err(|_| std::io::Error::new(std::io::ErrorKind::NotFound, "no data dir"))?
208                    .join("graphs")
209                    .join(legacy_hash);
210                let legacy_path = legacy_dir.join("index.json");
211                let data = std::fs::read_to_string(&legacy_path)?;
212                if let Err(e) = copy_dir_fallible(&legacy_dir, &dir) {
213                    tracing::debug!("graph index migration: {e}");
214                }
215                Ok(data)
216            })
217            .ok()?;
218        let index: Self = serde_json::from_str(&content).ok()?;
219        if index.version != INDEX_VERSION {
220            return None;
221        }
222        // Auto-migrate: compress legacy JSON to zstd
223        if let Ok(compressed) = zstd::encode_all(content.as_bytes(), 9) {
224            let zst_tmp = zst_path.with_extension("zst.tmp");
225            if std::fs::write(&zst_tmp, &compressed).is_ok()
226                && std::fs::rename(&zst_tmp, &zst_path).is_ok()
227            {
228                let _ = std::fs::remove_file(&json_path);
229            }
230        }
231        Some(index)
232    }
233
234    pub fn save(&self) -> Result<(), String> {
235        let dir = Self::index_dir(&self.project_root)
236            .ok_or_else(|| "Cannot determine data directory".to_string())?;
237        std::fs::create_dir_all(&dir).map_err(|e| e.to_string())?;
238        let json = serde_json::to_string(self).map_err(|e| e.to_string())?;
239        let compressed = zstd::encode_all(json.as_bytes(), 9).map_err(|e| format!("zstd: {e}"))?;
240        let target = dir.join("index.json.zst");
241        let tmp = target.with_extension("zst.tmp");
242        std::fs::write(&tmp, &compressed).map_err(|e| e.to_string())?;
243        std::fs::rename(&tmp, &target).map_err(|e| e.to_string())?;
244        let _ = std::fs::remove_file(dir.join("index.json"));
245        Ok(())
246    }
247
248    /// Remove all cached graph indices that are older than max_age_hours.
249    /// Called on startup/update to prevent stale data from persisting.
250    pub fn purge_stale_indices() {
251        let Ok(data_dir) = crate::core::data_dir::lean_ctx_data_dir() else {
252            return;
253        };
254        let graphs_dir = data_dir.join("graphs");
255        let Ok(entries) = std::fs::read_dir(&graphs_dir) else {
256            return;
257        };
258        let cfg = crate::core::config::Config::load();
259        let max_age_secs = cfg.archive.max_age_hours * 3600;
260
261        for entry in entries.filter_map(Result::ok) {
262            let path = entry.path();
263            if !path.is_dir() {
264                continue;
265            }
266            let zst = path.join("index.json.zst");
267            let json = path.join("index.json");
268            let index_file = if zst.exists() {
269                &zst
270            } else if json.exists() {
271                &json
272            } else {
273                continue;
274            };
275
276            let is_old = index_file
277                .metadata()
278                .and_then(|m| m.modified())
279                .is_ok_and(|mtime| {
280                    mtime
281                        .elapsed()
282                        .is_ok_and(|age| age.as_secs() > max_age_secs)
283                });
284
285            if is_old {
286                tracing::info!("[graph_index: purging stale index at {}]", path.display());
287                let _ = std::fs::remove_dir_all(&path);
288            }
289        }
290    }
291
292    pub fn file_count(&self) -> usize {
293        self.files.len()
294    }
295
296    pub fn symbol_count(&self) -> usize {
297        self.symbols.len()
298    }
299
300    pub fn edge_count(&self) -> usize {
301        self.edges.len()
302    }
303
304    pub fn get_symbol(&self, key: &str) -> Option<&SymbolEntry> {
305        self.symbols.get(key)
306    }
307
308    pub fn get_reverse_deps(&self, path: &str, depth: usize) -> Vec<String> {
309        let mut result = Vec::new();
310        let mut visited = std::collections::HashSet::new();
311        let mut queue: Vec<(String, usize)> = vec![(path.to_string(), 0)];
312
313        while let Some((current, d)) = queue.pop() {
314            if d > depth || visited.contains(&current) {
315                continue;
316            }
317            visited.insert(current.clone());
318            if current != path {
319                result.push(current.clone());
320            }
321
322            for edge in &self.edges {
323                if edge.to == current && edge.kind == "import" && !visited.contains(&edge.from) {
324                    queue.push((edge.from.clone(), d + 1));
325                }
326            }
327        }
328        result
329    }
330
331    pub fn get_related(&self, path: &str, depth: usize) -> Vec<String> {
332        let mut result = Vec::new();
333        let mut visited = std::collections::HashSet::new();
334        let mut queue: Vec<(String, usize)> = vec![(path.to_string(), 0)];
335
336        while let Some((current, d)) = queue.pop() {
337            if d > depth || visited.contains(&current) {
338                continue;
339            }
340            visited.insert(current.clone());
341            if current != path {
342                result.push(current.clone());
343            }
344
345            for edge in &self.edges {
346                if edge.from == current && !visited.contains(&edge.to) {
347                    queue.push((edge.to.clone(), d + 1));
348                }
349                if edge.to == current && !visited.contains(&edge.from) {
350                    queue.push((edge.from.clone(), d + 1));
351                }
352            }
353        }
354        result
355    }
356}
357
358/// Load the best available graph index, trying multiple root path variants.
359/// If no valid index exists, automatically scans the project to build one.
360/// This is the primary entry point — ensures zero-config usage.
361pub fn load_or_build(project_root: &str) -> ProjectIndex {
362    if std::env::var("LEAN_CTX_NO_INDEX").is_ok() {
363        return ProjectIndex::load(project_root).unwrap_or_else(|| ProjectIndex::new(project_root));
364    }
365
366    // Prefer stable absolute roots. Using "." as a cache key is fragile because
367    // it depends on the process cwd and can accidentally load the wrong project.
368    let root_abs = if project_root.trim().is_empty() || project_root == "." {
369        std::env::current_dir().ok().map_or_else(
370            || ".".to_string(),
371            |p| normalize_project_root(&p.to_string_lossy()),
372        )
373    } else {
374        normalize_project_root(project_root)
375    };
376
377    if !is_safe_scan_root(&root_abs) {
378        return ProjectIndex::new(&root_abs);
379    }
380
381    // Try the absolute/root-normalized path first.
382    if let Some(idx) = ProjectIndex::load(&root_abs) {
383        if !idx.files.is_empty() {
384            if index_looks_stale(&idx, &root_abs) {
385                tracing::warn!("[graph_index: stale index detected for {root_abs}; rebuilding]");
386                return scan(&root_abs);
387            }
388            return idx;
389        }
390    }
391
392    // Legacy: older builds may have cached the index under ".". Only accept it if it
393    // actually refers to the current cwd project, then migrate it to `root_abs`.
394    if let Some(idx) = ProjectIndex::load(".") {
395        if !idx.files.is_empty() {
396            let mut migrated = idx;
397            migrated.project_root.clone_from(&root_abs);
398            let _ = migrated.save();
399            if index_looks_stale(&migrated, &root_abs) {
400                tracing::warn!(
401                    "[graph_index: stale legacy index detected for {root_abs}; rebuilding]"
402                );
403                return scan(&root_abs);
404            }
405            return migrated;
406        }
407    }
408
409    // Try absolute cwd
410    if let Ok(cwd) = std::env::current_dir() {
411        let cwd_str = normalize_project_root(&cwd.to_string_lossy());
412        if cwd_str != root_abs {
413            if let Some(idx) = ProjectIndex::load(&cwd_str) {
414                if !idx.files.is_empty() {
415                    if index_looks_stale(&idx, &cwd_str) {
416                        tracing::warn!(
417                            "[graph_index: stale index detected for {cwd_str}; rebuilding]"
418                        );
419                        return scan(&cwd_str);
420                    }
421                    return idx;
422                }
423            }
424        }
425    }
426
427    // No existing index found anywhere — auto-build
428    scan(&root_abs)
429}
430
431fn index_looks_stale(index: &ProjectIndex, root_abs: &str) -> bool {
432    if index.files.is_empty() {
433        return true;
434    }
435
436    // TTL check: rebuild if index is older than configured max_age_hours
437    if let Ok(scan_time) =
438        chrono::NaiveDateTime::parse_from_str(&index.last_scan, "%Y-%m-%d %H:%M:%S")
439    {
440        let cfg = crate::core::config::Config::load();
441        let max_age = chrono::Duration::hours(cfg.archive.max_age_hours as i64);
442        let now = chrono::Local::now().naive_local();
443        if now.signed_duration_since(scan_time) > max_age {
444            tracing::info!(
445                "[graph_index: index is older than {}h — marking stale]",
446                cfg.archive.max_age_hours
447            );
448            return true;
449        }
450    }
451
452    // Contamination check: if index contains paths from common user directories,
453    // it was built from a too-broad root and must be rebuilt
454    const CONTAMINATION_MARKERS: &[&str] = &[
455        "Desktop/",
456        "Documents/",
457        "Downloads/",
458        "Pictures/",
459        "Music/",
460        "Videos/",
461        "Movies/",
462        "Library/",
463        ".cache/",
464        "snap/",
465    ];
466    let contaminated = index.files.keys().take(200).any(|rel| {
467        CONTAMINATION_MARKERS
468            .iter()
469            .any(|m| rel.starts_with(m) || rel.contains(&format!("/{m}")))
470    });
471    if contaminated {
472        tracing::warn!(
473            "[graph_index: index contains files from user directories (Desktop/Documents/...) — \
474             marking stale to force clean rebuild]"
475        );
476        return true;
477    }
478
479    let root_path = Path::new(root_abs);
480    // Sample up to 20 files for existence check (avoid scanning all files in large indices)
481    let sample_size = index.files.len().min(20);
482    for rel in index.files.keys().take(sample_size) {
483        let rel = rel.trim_start_matches(['/', '\\']);
484        if rel.is_empty() {
485            continue;
486        }
487        let abs = root_path.join(rel);
488        if !abs.exists() {
489            return true;
490        }
491    }
492
493    false
494}
495
496pub fn scan(project_root: &str) -> ProjectIndex {
497    if std::env::var("LEAN_CTX_NO_INDEX").is_ok() {
498        tracing::info!("[graph_index: LEAN_CTX_NO_INDEX set — skipping scan]");
499        return ProjectIndex::new(project_root);
500    }
501
502    let project_root = normalize_project_root(project_root);
503
504    if !is_safe_scan_root(&project_root) {
505        tracing::warn!("[graph_index: scan aborted for unsafe root {project_root}]");
506        return ProjectIndex::new(&project_root);
507    }
508
509    let lock_name = format!(
510        "graph-idx-{}",
511        &crate::core::index_namespace::namespace_hash(Path::new(&project_root))[..8]
512    );
513    let _lock = crate::core::startup_guard::try_acquire_lock(
514        &lock_name,
515        std::time::Duration::from_millis(800),
516        std::time::Duration::from_mins(3),
517    );
518    if _lock.is_none() {
519        tracing::info!(
520            "[graph_index: another process is scanning {project_root} — returning cached or empty]"
521        );
522        return ProjectIndex::load(&project_root)
523            .unwrap_or_else(|| ProjectIndex::new(&project_root));
524    }
525
526    let existing = ProjectIndex::load(&project_root);
527    let mut index = ProjectIndex::new(&project_root);
528
529    let old_files: HashMap<String, (String, Vec<(String, SymbolEntry)>)> =
530        if let Some(ref prev) = existing {
531            prev.files
532                .iter()
533                .map(|(path, entry)| {
534                    let syms: Vec<(String, SymbolEntry)> = prev
535                        .symbols
536                        .iter()
537                        .filter(|(_, s)| s.file == *path)
538                        .map(|(k, v)| (k.clone(), v.clone()))
539                        .collect();
540                    (path.clone(), (entry.hash.clone(), syms))
541                })
542                .collect()
543        } else {
544            HashMap::new()
545        };
546
547    let walker = ignore::WalkBuilder::new(&project_root)
548        .hidden(true)
549        .git_ignore(true)
550        .git_global(true)
551        .git_exclude(true)
552        .max_depth(Some(20))
553        .build();
554
555    let cfg = crate::core::config::Config::load();
556    let extra_ignores: Vec<glob::Pattern> = cfg
557        .extra_ignore_patterns
558        .iter()
559        .filter_map(|p| glob::Pattern::new(p).ok())
560        .collect();
561
562    let mut scanned = 0usize;
563    let mut reused = 0usize;
564    let mut entries_visited = 0usize;
565    let max_files = if cfg.graph_index_max_files == 0 {
566        usize::MAX // unlimited
567    } else {
568        cfg.graph_index_max_files as usize
569    };
570    const MAX_ENTRIES_VISITED: usize = 500_000;
571    const MAX_FILE_SIZE_BYTES: u64 = 2 * 1024 * 1024; // 2 MB per file
572    let scan_deadline = std::time::Instant::now() + std::time::Duration::from_mins(5);
573
574    for entry in walker.filter_map(std::result::Result::ok) {
575        entries_visited += 1;
576        if entries_visited > MAX_ENTRIES_VISITED {
577            tracing::warn!(
578                "[graph_index: walked {entries_visited} entries — aborting scan to prevent \
579                 runaway traversal. Indexed {} files so far.]",
580                index.files.len()
581            );
582            break;
583        }
584        if entries_visited.is_multiple_of(5000) {
585            if std::time::Instant::now() > scan_deadline {
586                tracing::warn!(
587                    "[graph_index: scan timeout (120s) after {entries_visited} entries — \
588                     saving partial index with {} files]",
589                    index.files.len()
590                );
591                break;
592            }
593            if crate::core::memory_guard::abort_requested() {
594                tracing::warn!(
595                    "[graph_index: memory pressure abort after {entries_visited} entries — \
596                     saving partial index with {} files]",
597                    index.files.len()
598                );
599                break;
600            }
601            if crate::core::memory_guard::is_under_pressure() {
602                tracing::warn!(
603                    "[graph_index: memory pressure detected at {entries_visited} entries — \
604                     stopping scan with {} files]",
605                    index.files.len()
606                );
607                break;
608            }
609            if let Some(ref g) = _lock {
610                g.touch();
611            }
612        }
613
614        if !entry.file_type().is_some_and(|ft| ft.is_file()) {
615            continue;
616        }
617        let file_path = normalize_absolute_path(&entry.path().to_string_lossy());
618
619        // Prevent indexing files that escaped the project root (symlinks, mount points)
620        if !file_path.starts_with(&project_root) {
621            continue;
622        }
623
624        // Skip special files (devices, FIFOs, sockets) that can stream infinite data
625        if let Ok(meta) = std::fs::metadata(&file_path) {
626            if !meta.is_file() {
627                continue;
628            }
629            if meta.len() > MAX_FILE_SIZE_BYTES {
630                tracing::debug!(
631                    "[graph_index: skipping {file_path} — {:.1}MB exceeds {}MB limit]",
632                    meta.len() as f64 / 1_048_576.0,
633                    MAX_FILE_SIZE_BYTES / (1024 * 1024),
634                );
635                continue;
636            }
637        }
638
639        let ext = Path::new(&file_path)
640            .extension()
641            .and_then(|e| e.to_str())
642            .unwrap_or("");
643
644        if !is_indexable_ext(ext) {
645            continue;
646        }
647
648        let rel = make_relative(&file_path, &project_root);
649        if extra_ignores.iter().any(|p| p.matches(&rel)) {
650            continue;
651        }
652
653        if max_files != usize::MAX && index.files.len() >= max_files {
654            tracing::info!(
655                "[graph_index: reached configured limit of {} files. Set graph_index_max_files = 0 for unlimited.]",
656                max_files
657            );
658            break;
659        }
660
661        let Ok(content) = std::fs::read_to_string(&file_path) else {
662            continue;
663        };
664
665        let hash = compute_hash(&content);
666        let rel_path = make_relative(&file_path, &project_root);
667
668        if let Some((old_hash, old_syms)) = old_files.get(&rel_path) {
669            if *old_hash == hash {
670                if let Some(old_entry) = existing.as_ref().and_then(|p| p.files.get(&rel_path)) {
671                    index.files.insert(rel_path.clone(), old_entry.clone());
672                    for (key, sym) in old_syms {
673                        index.symbols.insert(key.clone(), sym.clone());
674                    }
675                    reused += 1;
676                    continue;
677                }
678            }
679        }
680
681        let sigs = signatures::extract_signatures(&content, ext);
682        let line_count = content.lines().count();
683        let token_count = crate::core::tokens::count_tokens(&content);
684        let summary = extract_summary(&content);
685
686        let exports: Vec<String> = sigs
687            .iter()
688            .filter(|s| s.is_exported)
689            .map(|s| s.name.clone())
690            .collect();
691
692        index.files.insert(
693            rel_path.clone(),
694            FileEntry {
695                path: rel_path.clone(),
696                hash,
697                language: ext.to_string(),
698                line_count,
699                token_count,
700                exports,
701                summary,
702            },
703        );
704
705        for sig in &sigs {
706            let (start, end) = sig
707                .start_line
708                .zip(sig.end_line)
709                .unwrap_or_else(|| find_symbol_range(&content, sig));
710            let key = format!("{}::{}", rel_path, sig.name);
711            index.symbols.insert(
712                key,
713                SymbolEntry {
714                    file: rel_path.clone(),
715                    name: sig.name.clone(),
716                    kind: sig.kind.to_string(),
717                    start_line: start,
718                    end_line: end,
719                    is_exported: sig.is_exported,
720                },
721            );
722        }
723
724        scanned += 1;
725    }
726
727    build_edges(&mut index);
728
729    if let Err(e) = index.save() {
730        tracing::warn!("could not save graph index: {e}");
731    }
732
733    tracing::warn!(
734        "[graph_index: {} files ({} scanned, {} reused), {} symbols, {} edges]",
735        index.file_count(),
736        scanned,
737        reused,
738        index.symbol_count(),
739        index.edge_count()
740    );
741
742    index
743}
744
745fn build_edges(index: &mut ProjectIndex) {
746    build_edges_with_cache(index, &HashMap::new());
747}
748
749fn build_edges_with_cache(index: &mut ProjectIndex, content_cache: &HashMap<String, String>) {
750    index.edges.clear();
751
752    if crate::core::memory_guard::abort_requested() {
753        tracing::warn!("[graph_index: skipping edge-building due to memory pressure]");
754        return;
755    }
756
757    let root = normalize_project_root(&index.project_root);
758    let root_path = Path::new(&root);
759
760    let mut file_paths: Vec<String> = index.files.keys().cloned().collect();
761    file_paths.sort();
762
763    let resolver_ctx = import_resolver::ResolverContext::new(root_path, file_paths.clone());
764
765    const MAX_FILE_SIZE_FOR_EDGES: u64 = 2 * 1024 * 1024;
766
767    for (i, rel_path) in file_paths.iter().enumerate() {
768        if i.is_multiple_of(1000) && crate::core::memory_guard::is_under_pressure() {
769            tracing::warn!(
770                "[graph_index: stopping edge-building at file {i}/{} due to memory pressure]",
771                file_paths.len()
772            );
773            break;
774        }
775
776        let content = if let Some(cached) = content_cache.get(rel_path) {
777            std::borrow::Cow::Borrowed(cached.as_str())
778        } else {
779            let abs_path = root_path.join(rel_path.trim_start_matches(['/', '\\']));
780            if let Ok(meta) = abs_path.metadata() {
781                if meta.len() > MAX_FILE_SIZE_FOR_EDGES {
782                    continue;
783                }
784            }
785            match std::fs::read_to_string(&abs_path) {
786                Ok(c) => std::borrow::Cow::Owned(c),
787                Err(_) => continue,
788            }
789        };
790
791        let ext = Path::new(rel_path)
792            .extension()
793            .and_then(|e| e.to_str())
794            .unwrap_or("");
795
796        let resolve_ext = match ext {
797            "vue" | "svelte" => "ts",
798            _ => ext,
799        };
800
801        let imports = crate::core::deep_queries::analyze(&content, resolve_ext).imports;
802        if imports.is_empty() {
803            continue;
804        }
805
806        let resolved =
807            import_resolver::resolve_imports(&imports, rel_path, resolve_ext, &resolver_ctx);
808        for r in resolved {
809            if r.is_external {
810                continue;
811            }
812            if let Some(to) = r.resolved_path {
813                index.edges.push(IndexEdge {
814                    from: rel_path.clone(),
815                    to,
816                    kind: "import".to_string(),
817                });
818            }
819        }
820    }
821
822    index.edges.sort_by(|a, b| {
823        a.from
824            .cmp(&b.from)
825            .then_with(|| a.to.cmp(&b.to))
826            .then_with(|| a.kind.cmp(&b.kind))
827    });
828    index
829        .edges
830        .dedup_by(|a, b| a.from == b.from && a.to == b.to && a.kind == b.kind);
831}
832
833fn find_symbol_range(content: &str, sig: &signatures::Signature) -> (usize, usize) {
834    let lines: Vec<&str> = content.lines().collect();
835    let mut start = 0;
836
837    for (i, line) in lines.iter().enumerate() {
838        if line.contains(&sig.name) {
839            let trimmed = line.trim();
840            let is_def = trimmed.starts_with("fn ")
841                || trimmed.starts_with("pub fn ")
842                || trimmed.starts_with("pub(crate) fn ")
843                || trimmed.starts_with("async fn ")
844                || trimmed.starts_with("pub async fn ")
845                || trimmed.starts_with("struct ")
846                || trimmed.starts_with("pub struct ")
847                || trimmed.starts_with("enum ")
848                || trimmed.starts_with("pub enum ")
849                || trimmed.starts_with("trait ")
850                || trimmed.starts_with("pub trait ")
851                || trimmed.starts_with("impl ")
852                || trimmed.starts_with("class ")
853                || trimmed.starts_with("export class ")
854                || trimmed.starts_with("export function ")
855                || trimmed.starts_with("export async function ")
856                || trimmed.starts_with("function ")
857                || trimmed.starts_with("async function ")
858                || trimmed.starts_with("def ")
859                || trimmed.starts_with("async def ")
860                || trimmed.starts_with("func ")
861                || trimmed.starts_with("interface ")
862                || trimmed.starts_with("export interface ")
863                || trimmed.starts_with("type ")
864                || trimmed.starts_with("export type ")
865                || trimmed.starts_with("const ")
866                || trimmed.starts_with("export const ")
867                || trimmed.starts_with("fun ")
868                || trimmed.starts_with("private fun ")
869                || trimmed.starts_with("public fun ")
870                || trimmed.starts_with("internal fun ")
871                || trimmed.starts_with("class ")
872                || trimmed.starts_with("data class ")
873                || trimmed.starts_with("sealed class ")
874                || trimmed.starts_with("sealed interface ")
875                || trimmed.starts_with("enum class ")
876                || trimmed.starts_with("object ")
877                || trimmed.starts_with("private object ")
878                || trimmed.starts_with("interface ")
879                || trimmed.starts_with("typealias ")
880                || trimmed.starts_with("private typealias ");
881            if is_def {
882                start = i + 1;
883                break;
884            }
885        }
886    }
887
888    if start == 0 {
889        return (1, lines.len().min(20));
890    }
891
892    let base_indent = lines
893        .get(start - 1)
894        .map_or(0, |l| l.len() - l.trim_start().len());
895
896    let mut end = start;
897    let mut brace_depth: i32 = 0;
898    let mut found_open = false;
899
900    for (i, line) in lines.iter().enumerate().skip(start - 1) {
901        for ch in line.chars() {
902            if ch == '{' {
903                brace_depth += 1;
904                found_open = true;
905            } else if ch == '}' {
906                brace_depth -= 1;
907            }
908        }
909
910        end = i + 1;
911
912        if found_open && brace_depth <= 0 {
913            break;
914        }
915
916        if !found_open && i > start {
917            let indent = line.len() - line.trim_start().len();
918            if indent <= base_indent && !line.trim().is_empty() && i > start {
919                end = i;
920                break;
921            }
922        }
923
924        if end - start > 200 {
925            break;
926        }
927    }
928
929    (start, end)
930}
931
932fn extract_summary(content: &str) -> String {
933    for line in content.lines().take(20) {
934        let trimmed = line.trim();
935        if trimmed.is_empty()
936            || trimmed.starts_with("//")
937            || trimmed.starts_with('#')
938            || trimmed.starts_with("/*")
939            || trimmed.starts_with('*')
940            || trimmed.starts_with("use ")
941            || trimmed.starts_with("import ")
942            || trimmed.starts_with("from ")
943            || trimmed.starts_with("require(")
944            || trimmed.starts_with("package ")
945        {
946            continue;
947        }
948        return trimmed.chars().take(120).collect();
949    }
950    String::new()
951}
952
953fn compute_hash(content: &str) -> String {
954    use std::collections::hash_map::DefaultHasher;
955    use std::hash::{Hash, Hasher};
956
957    let mut hasher = DefaultHasher::new();
958    content.hash(&mut hasher);
959    format!("{:016x}", hasher.finish())
960}
961
962fn short_hash(input: &str) -> String {
963    use std::collections::hash_map::DefaultHasher;
964    use std::hash::{Hash, Hasher};
965
966    let mut hasher = DefaultHasher::new();
967    input.hash(&mut hasher);
968    format!("{:08x}", hasher.finish() & 0xFFFF_FFFF)
969}
970
971fn copy_dir_fallible(src: &std::path::Path, dst: &std::path::Path) -> Result<(), std::io::Error> {
972    std::fs::create_dir_all(dst)?;
973    for entry in std::fs::read_dir(src)?.flatten() {
974        let from = entry.path();
975        let to = dst.join(entry.file_name());
976        if from.is_dir() {
977            copy_dir_fallible(&from, &to)?;
978        } else {
979            std::fs::copy(&from, &to)?;
980        }
981    }
982    Ok(())
983}
984
985fn normalize_absolute_path(path: &str) -> String {
986    if let Ok(canon) = crate::core::pathutil::safe_canonicalize(std::path::Path::new(path)) {
987        return canon.to_string_lossy().to_string();
988    }
989
990    let mut normalized = path.to_string();
991    while normalized.ends_with("\\.") || normalized.ends_with("/.") {
992        normalized.truncate(normalized.len() - 2);
993    }
994    while normalized.len() > 1
995        && (normalized.ends_with('\\') || normalized.ends_with('/'))
996        && !normalized.ends_with(":\\")
997        && !normalized.ends_with(":/")
998        && normalized != "\\"
999        && normalized != "/"
1000    {
1001        normalized.pop();
1002    }
1003    normalized
1004}
1005
1006pub fn normalize_project_root(path: &str) -> String {
1007    normalize_absolute_path(path)
1008}
1009
1010pub fn graph_match_key(path: &str) -> String {
1011    let stripped =
1012        crate::core::pathutil::strip_verbatim_str(path).unwrap_or_else(|| path.replace('\\', "/"));
1013    stripped.trim_start_matches('/').to_string()
1014}
1015
1016pub fn graph_relative_key(path: &str, root: &str) -> String {
1017    let root_norm = normalize_project_root(root);
1018    let path_norm = normalize_absolute_path(path);
1019    let root_path = Path::new(&root_norm);
1020    let path_path = Path::new(&path_norm);
1021
1022    if let Ok(rel) = path_path.strip_prefix(root_path) {
1023        let rel = rel.to_string_lossy().to_string();
1024        return rel.trim_start_matches(['/', '\\']).to_string();
1025    }
1026
1027    path.trim_start_matches(['/', '\\'])
1028        .replace('/', std::path::MAIN_SEPARATOR_STR)
1029}
1030
1031fn make_relative(path: &str, root: &str) -> String {
1032    graph_relative_key(path, root)
1033}
1034
1035fn is_indexable_ext(ext: &str) -> bool {
1036    crate::core::language_capabilities::is_indexable_ext(ext)
1037}
1038
1039#[cfg(test)]
1040fn kotlin_package_name(content: &str) -> Option<String> {
1041    content.lines().map(str::trim).find_map(|line| {
1042        line.strip_prefix("package ")
1043            .map(|rest| rest.trim().trim_end_matches(';').to_string())
1044    })
1045}
1046
1047#[cfg(test)]
1048mod tests {
1049    use super::*;
1050    use tempfile::tempdir;
1051
1052    #[test]
1053    fn test_short_hash_deterministic() {
1054        let h1 = short_hash("/Users/test/project");
1055        let h2 = short_hash("/Users/test/project");
1056        assert_eq!(h1, h2);
1057        assert_eq!(h1.len(), 8);
1058    }
1059
1060    #[test]
1061    fn test_make_relative() {
1062        assert_eq!(
1063            make_relative("/foo/bar/src/main.rs", "/foo/bar"),
1064            graph_relative_key("/foo/bar/src/main.rs", "/foo/bar")
1065        );
1066        assert_eq!(
1067            make_relative("src/main.rs", "/foo/bar"),
1068            graph_relative_key("src/main.rs", "/foo/bar")
1069        );
1070        assert_eq!(
1071            make_relative("C:\\repo\\src\\main\\kotlin\\Example.kt", "C:\\repo"),
1072            graph_relative_key("C:\\repo\\src\\main\\kotlin\\Example.kt", "C:\\repo")
1073        );
1074        assert_eq!(
1075            make_relative("//?/C:/repo/src/main/kotlin/Example.kt", "//?/C:/repo"),
1076            graph_relative_key("//?/C:/repo/src/main/kotlin/Example.kt", "//?/C:/repo")
1077        );
1078    }
1079
1080    #[test]
1081    fn test_normalize_project_root() {
1082        assert_eq!(normalize_project_root("C:\\repo\\"), "C:\\repo");
1083        assert_eq!(normalize_project_root("C:\\repo\\."), "C:\\repo");
1084        assert_eq!(normalize_project_root("//?/C:/repo/"), "//?/C:/repo");
1085    }
1086
1087    #[test]
1088    fn test_graph_match_key_normalizes_windows_forms() {
1089        assert_eq!(
1090            graph_match_key(r"C:\repo\src\main.rs"),
1091            "C:/repo/src/main.rs"
1092        );
1093        assert_eq!(
1094            graph_match_key(r"\\?\C:\repo\src\main.rs"),
1095            "C:/repo/src/main.rs"
1096        );
1097        assert_eq!(graph_match_key(r"\src\main.rs"), "src/main.rs");
1098    }
1099
1100    #[test]
1101    fn test_extract_summary() {
1102        let content = "// comment\nuse std::io;\n\npub fn main() {\n    println!(\"hello\");\n}";
1103        let summary = extract_summary(content);
1104        assert_eq!(summary, "pub fn main() {");
1105    }
1106
1107    #[test]
1108    fn test_compute_hash_deterministic() {
1109        let h1 = compute_hash("hello world");
1110        let h2 = compute_hash("hello world");
1111        assert_eq!(h1, h2);
1112        assert_ne!(h1, compute_hash("hello world!"));
1113    }
1114
1115    #[test]
1116    fn test_project_index_new() {
1117        let idx = ProjectIndex::new("/test");
1118        assert_eq!(idx.version, INDEX_VERSION);
1119        assert_eq!(idx.project_root, "/test");
1120        assert!(idx.files.is_empty());
1121    }
1122
1123    fn fe(path: &str, content: &str, language: &str) -> FileEntry {
1124        FileEntry {
1125            path: path.to_string(),
1126            hash: compute_hash(content),
1127            language: language.to_string(),
1128            line_count: content.lines().count(),
1129            token_count: crate::core::tokens::count_tokens(content),
1130            exports: Vec::new(),
1131            summary: extract_summary(content),
1132        }
1133    }
1134
1135    #[test]
1136    fn test_index_looks_stale_when_any_file_missing() {
1137        let td = tempdir().expect("tempdir");
1138        let root = td.path();
1139        std::fs::write(root.join("a.rs"), "pub fn a() {}\n").expect("write a.rs");
1140
1141        let root_s = normalize_project_root(&root.to_string_lossy());
1142        let mut idx = ProjectIndex::new(&root_s);
1143        idx.files
1144            .insert("a.rs".to_string(), fe("a.rs", "pub fn a() {}\n", "rs"));
1145        idx.files.insert(
1146            "missing.rs".to_string(),
1147            fe("missing.rs", "pub fn m() {}\n", "rs"),
1148        );
1149
1150        assert!(index_looks_stale(&idx, &root_s));
1151    }
1152
1153    #[test]
1154    fn test_index_looks_fresh_when_all_files_exist() {
1155        let td = tempdir().expect("tempdir");
1156        let root = td.path();
1157        std::fs::write(root.join("a.rs"), "pub fn a() {}\n").expect("write a.rs");
1158
1159        let root_s = normalize_project_root(&root.to_string_lossy());
1160        let mut idx = ProjectIndex::new(&root_s);
1161        idx.files
1162            .insert("a.rs".to_string(), fe("a.rs", "pub fn a() {}\n", "rs"));
1163
1164        assert!(!index_looks_stale(&idx, &root_s));
1165    }
1166
1167    #[test]
1168    fn test_reverse_deps() {
1169        let mut idx = ProjectIndex::new("/test");
1170        idx.edges.push(IndexEdge {
1171            from: "a.rs".to_string(),
1172            to: "b.rs".to_string(),
1173            kind: "import".to_string(),
1174        });
1175        idx.edges.push(IndexEdge {
1176            from: "c.rs".to_string(),
1177            to: "b.rs".to_string(),
1178            kind: "import".to_string(),
1179        });
1180
1181        let deps = idx.get_reverse_deps("b.rs", 1);
1182        assert_eq!(deps.len(), 2);
1183        assert!(deps.contains(&"a.rs".to_string()));
1184        assert!(deps.contains(&"c.rs".to_string()));
1185    }
1186
1187    #[test]
1188    fn test_find_symbol_range_kotlin_function() {
1189        let content = r#"
1190package com.example
1191
1192class UserService {
1193    fun greet(name: String): String {
1194        return "hi $name"
1195    }
1196}
1197"#;
1198        let sig = signatures::Signature {
1199            kind: "method",
1200            name: "greet".to_string(),
1201            params: "name:String".to_string(),
1202            return_type: "String".to_string(),
1203            is_async: false,
1204            is_exported: true,
1205            indent: 2,
1206            ..signatures::Signature::no_span()
1207        };
1208        let (start, end) = find_symbol_range(content, &sig);
1209        assert_eq!(start, 5);
1210        assert!(end >= start);
1211    }
1212
1213    #[test]
1214    fn test_signature_spans_override_fallback_range() {
1215        let sig = signatures::Signature {
1216            kind: "method",
1217            name: "release".to_string(),
1218            params: "id:String".to_string(),
1219            return_type: "Boolean".to_string(),
1220            is_async: true,
1221            is_exported: true,
1222            indent: 2,
1223            start_line: Some(42),
1224            end_line: Some(43),
1225        };
1226
1227        let (start, end) = sig
1228            .start_line
1229            .zip(sig.end_line)
1230            .unwrap_or_else(|| find_symbol_range("ignored", &sig));
1231        assert_eq!((start, end), (42, 43));
1232    }
1233
1234    #[test]
1235    fn test_parse_stale_index_version() {
1236        let json = format!(
1237            r#"{{"version":{},"project_root":"/test","last_scan":"now","files":{{}},"edges":[],"symbols":{{}}}}"#,
1238            INDEX_VERSION - 1
1239        );
1240        let parsed: ProjectIndex = serde_json::from_str(&json).unwrap();
1241        assert_ne!(parsed.version, INDEX_VERSION);
1242    }
1243
1244    #[test]
1245    fn test_kotlin_package_name() {
1246        let content = "package com.example.feature\n\nclass UserService";
1247        assert_eq!(
1248            kotlin_package_name(content).as_deref(),
1249            Some("com.example.feature")
1250        );
1251    }
1252
1253    #[test]
1254    fn safe_scan_root_rejects_fs_root() {
1255        assert!(!is_safe_scan_root("/"));
1256        assert!(!is_safe_scan_root("\\"));
1257        #[cfg(windows)]
1258        {
1259            assert!(!is_safe_scan_root("C:\\"));
1260            assert!(!is_safe_scan_root("D:\\"));
1261        }
1262    }
1263
1264    #[test]
1265    fn safe_scan_root_rejects_home() {
1266        if let Some(home) = dirs::home_dir() {
1267            let home_str = home.to_string_lossy().to_string();
1268            assert!(
1269                !is_safe_scan_root(&home_str),
1270                "home dir should be rejected: {home_str}"
1271            );
1272        }
1273    }
1274
1275    #[test]
1276    fn safe_scan_root_accepts_project_dir() {
1277        let tmp = tempdir().unwrap();
1278        std::fs::write(
1279            tmp.path().join("Cargo.toml"),
1280            "[package]\nname = \"test\"\n",
1281        )
1282        .unwrap();
1283        let root = tmp.path().to_string_lossy().to_string();
1284        assert!(is_safe_scan_root(&root));
1285    }
1286
1287    #[test]
1288    fn safe_scan_root_rejects_broad_dir() {
1289        let tmp = tempdir().unwrap();
1290        for i in 0..55 {
1291            std::fs::create_dir(tmp.path().join(format!("dir{i}"))).unwrap();
1292        }
1293        let root = tmp.path().to_string_lossy().to_string();
1294        assert!(!is_safe_scan_root(&root));
1295    }
1296
1297    #[test]
1298    fn no_index_env_skips_scan() {
1299        let _env = crate::core::data_dir::test_env_lock();
1300        let tmp = tempdir().unwrap();
1301        std::fs::write(tmp.path().join("Cargo.toml"), "").unwrap();
1302        std::fs::write(tmp.path().join("main.rs"), "fn main() {}").unwrap();
1303
1304        std::env::set_var("LEAN_CTX_NO_INDEX", "1");
1305        let idx = scan(&tmp.path().to_string_lossy());
1306        std::env::remove_var("LEAN_CTX_NO_INDEX");
1307        assert!(idx.files.is_empty(), "LEAN_CTX_NO_INDEX should skip scan");
1308    }
1309
1310    #[test]
1311    fn stale_index_detected_by_contamination() {
1312        let root_s = "/home/testuser/myproject";
1313        let mut idx = ProjectIndex::new(root_s);
1314        // Simulate a contaminated index with Desktop files
1315        idx.files.insert(
1316            "Desktop/random.py".to_string(),
1317            fe("Desktop/random.py", "x = 1\n", "py"),
1318        );
1319        idx.files.insert(
1320            "src/main.rs".to_string(),
1321            fe("src/main.rs", "fn main() {}\n", "rs"),
1322        );
1323        assert!(
1324            index_looks_stale(&idx, root_s),
1325            "Index with Desktop/ files should be considered stale"
1326        );
1327    }
1328
1329    #[test]
1330    fn stale_index_detected_by_age() {
1331        let td = tempdir().expect("tempdir");
1332        let root = td.path();
1333        std::fs::write(root.join("a.rs"), "fn a() {}\n").unwrap();
1334
1335        let root_s = normalize_project_root(&root.to_string_lossy());
1336        let mut idx = ProjectIndex::new(&root_s);
1337        idx.files
1338            .insert("a.rs".to_string(), fe("a.rs", "fn a() {}\n", "rs"));
1339        // Set last_scan to 100 hours ago (default max_age_hours is 48)
1340        let old_time = chrono::Local::now().naive_local() - chrono::Duration::hours(100);
1341        idx.last_scan = old_time.format("%Y-%m-%d %H:%M:%S").to_string();
1342
1343        assert!(
1344            index_looks_stale(&idx, &root_s),
1345            "Index older than max_age_hours should be stale"
1346        );
1347    }
1348
1349    #[test]
1350    fn safe_scan_root_rejects_home_downloads() {
1351        if let Some(home) = dirs::home_dir() {
1352            let downloads = home.join("Downloads");
1353            // Only test if Downloads doesn't contain a .git (unlikely but possible)
1354            if !downloads.join(".git").exists() {
1355                let downloads_str = downloads.to_string_lossy().to_string();
1356                assert!(
1357                    !is_safe_scan_root(&downloads_str),
1358                    "~/Downloads should be rejected without project markers"
1359                );
1360            }
1361        }
1362    }
1363}