Skip to main content

lean_ctx/core/
graph_index.rs

1use std::collections::HashMap;
2use std::path::Path;
3
4use serde::{Deserialize, Serialize};
5
6use crate::core::import_resolver;
7use crate::core::signatures;
8
9const INDEX_VERSION: u32 = 6;
10
11pub fn is_safe_scan_root_public(path: &str) -> bool {
12    is_safe_scan_root(path)
13}
14
15fn is_filesystem_root(path: &str) -> bool {
16    let p = Path::new(path);
17    p.parent().is_none() || (cfg!(windows) && p.parent() == Some(Path::new("")))
18}
19
20fn is_safe_scan_root(path: &str) -> bool {
21    let normalized = normalize_project_root(path);
22    let p = Path::new(&normalized);
23
24    if normalized == "/" || normalized == "\\" || is_filesystem_root(&normalized) {
25        tracing::warn!("[graph_index: refusing to scan filesystem root]");
26        return false;
27    }
28
29    if let Some(home) = dirs::home_dir() {
30        let home_norm = normalize_project_root(&home.to_string_lossy());
31        if normalized == home_norm {
32            tracing::warn!(
33                "[graph_index: refusing to scan home directory {normalized} — \
34                 set LEAN_CTX_PROJECT_ROOT or run from inside a project]"
35            );
36            return false;
37        }
38        // Block common broad home subdirectories that are never valid project roots
39        let home_path = Path::new(&home_norm);
40        const BLOCKED_HOME_SUBDIRS: &[&str] = &[
41            "Desktop",
42            "Documents",
43            "Downloads",
44            "Pictures",
45            "Music",
46            "Videos",
47            "Movies",
48            "Library",
49            ".local",
50            ".cache",
51            ".config",
52            "snap",
53            "Applications",
54        ];
55        for blocked in BLOCKED_HOME_SUBDIRS {
56            let blocked_path = home_path.join(blocked);
57            let is_inside_blocked = p == blocked_path || p.starts_with(&blocked_path);
58            let has_project_marker = p.join(".git").exists()
59                || p.join("Cargo.toml").exists()
60                || p.join("package.json").exists();
61            if is_inside_blocked && !has_project_marker {
62                tracing::warn!(
63                    "[graph_index: refusing to scan {normalized} — \
64                     inside home/{blocked} without project markers]"
65                );
66                return false;
67            }
68        }
69    }
70
71    let breadth_markers = [
72        ".git",
73        "Cargo.toml",
74        "package.json",
75        "go.mod",
76        "pyproject.toml",
77        "setup.py",
78        "Makefile",
79        "CMakeLists.txt",
80        "pnpm-workspace.yaml",
81        ".projectile",
82        "BUILD.bazel",
83        "go.work",
84    ];
85
86    if !breadth_markers.iter().any(|m| p.join(m).exists()) {
87        let child_count = std::fs::read_dir(p).map_or(0, |rd| {
88            rd.filter_map(Result::ok)
89                .filter(|e| e.path().is_dir())
90                .count()
91        });
92        if child_count > 50 {
93            tracing::warn!(
94                "[graph_index: {normalized} has no project markers and {child_count} subdirectories — \
95                 skipping scan to avoid indexing broad directories]"
96            );
97            return false;
98        }
99    }
100
101    true
102}
103
104#[derive(Debug, Serialize, Deserialize)]
105pub struct ProjectIndex {
106    pub version: u32,
107    pub project_root: String,
108    pub last_scan: String,
109    pub files: HashMap<String, FileEntry>,
110    pub edges: Vec<IndexEdge>,
111    pub symbols: HashMap<String, SymbolEntry>,
112}
113
114#[derive(Debug, Clone, Serialize, Deserialize)]
115pub struct FileEntry {
116    pub path: String,
117    pub hash: String,
118    pub language: String,
119    pub line_count: usize,
120    pub token_count: usize,
121    pub exports: Vec<String>,
122    pub summary: String,
123}
124
125#[derive(Debug, Clone, Serialize, Deserialize)]
126pub struct SymbolEntry {
127    pub file: String,
128    pub name: String,
129    pub kind: String,
130    pub start_line: usize,
131    pub end_line: usize,
132    pub is_exported: bool,
133}
134
135#[derive(Debug, Clone, Serialize, Deserialize)]
136pub struct IndexEdge {
137    pub from: String,
138    pub to: String,
139    pub kind: String,
140}
141
142impl ProjectIndex {
143    pub fn new(project_root: &str) -> Self {
144        Self {
145            version: INDEX_VERSION,
146            project_root: normalize_project_root(project_root),
147            last_scan: chrono::Local::now().format("%Y-%m-%d %H:%M:%S").to_string(),
148            files: HashMap::new(),
149            edges: Vec::new(),
150            symbols: HashMap::new(),
151        }
152    }
153
154    pub fn index_dir(project_root: &str) -> Option<std::path::PathBuf> {
155        let normalized = normalize_project_root(project_root);
156        let hash = crate::core::project_hash::hash_project_root(&normalized);
157        crate::core::data_dir::lean_ctx_data_dir()
158            .ok()
159            .map(|d| d.join("graphs").join(hash))
160    }
161
162    pub fn load(project_root: &str) -> Option<Self> {
163        let dir = Self::index_dir(project_root)?;
164
165        let zst_path = dir.join("index.json.zst");
166        if zst_path.exists() {
167            let compressed = std::fs::read(&zst_path).ok()?;
168            let data = zstd::decode_all(compressed.as_slice()).ok()?;
169            let content = String::from_utf8(data).ok()?;
170            let index: Self = serde_json::from_str(&content).ok()?;
171            if index.version != INDEX_VERSION {
172                return None;
173            }
174            return Some(index);
175        }
176
177        let json_path = dir.join("index.json");
178        let content = std::fs::read_to_string(&json_path)
179            .or_else(|_| -> std::io::Result<String> {
180                let legacy_hash = short_hash(&normalize_project_root(project_root));
181                let legacy_dir = crate::core::data_dir::lean_ctx_data_dir()
182                    .map_err(|_| std::io::Error::new(std::io::ErrorKind::NotFound, "no data dir"))?
183                    .join("graphs")
184                    .join(legacy_hash);
185                let legacy_path = legacy_dir.join("index.json");
186                let data = std::fs::read_to_string(&legacy_path)?;
187                if let Err(e) = copy_dir_fallible(&legacy_dir, &dir) {
188                    tracing::debug!("graph index migration: {e}");
189                }
190                Ok(data)
191            })
192            .ok()?;
193        let index: Self = serde_json::from_str(&content).ok()?;
194        if index.version != INDEX_VERSION {
195            return None;
196        }
197        // Auto-migrate: compress legacy JSON to zstd
198        if let Ok(compressed) = zstd::encode_all(content.as_bytes(), 9) {
199            let zst_tmp = zst_path.with_extension("zst.tmp");
200            if std::fs::write(&zst_tmp, &compressed).is_ok()
201                && std::fs::rename(&zst_tmp, &zst_path).is_ok()
202            {
203                let _ = std::fs::remove_file(&json_path);
204            }
205        }
206        Some(index)
207    }
208
209    pub fn save(&self) -> Result<(), String> {
210        let dir = Self::index_dir(&self.project_root)
211            .ok_or_else(|| "Cannot determine data directory".to_string())?;
212        std::fs::create_dir_all(&dir).map_err(|e| e.to_string())?;
213        let json = serde_json::to_string(self).map_err(|e| e.to_string())?;
214        let compressed = zstd::encode_all(json.as_bytes(), 9).map_err(|e| format!("zstd: {e}"))?;
215        let target = dir.join("index.json.zst");
216        let tmp = target.with_extension("zst.tmp");
217        std::fs::write(&tmp, &compressed).map_err(|e| e.to_string())?;
218        std::fs::rename(&tmp, &target).map_err(|e| e.to_string())?;
219        let _ = std::fs::remove_file(dir.join("index.json"));
220        Ok(())
221    }
222
223    /// Remove all cached graph indices that are older than max_age_hours.
224    /// Called on startup/update to prevent stale data from persisting.
225    pub fn purge_stale_indices() {
226        let Ok(data_dir) = crate::core::data_dir::lean_ctx_data_dir() else {
227            return;
228        };
229        let graphs_dir = data_dir.join("graphs");
230        let Ok(entries) = std::fs::read_dir(&graphs_dir) else {
231            return;
232        };
233        let cfg = crate::core::config::Config::load();
234        let max_age_secs = cfg.archive.max_age_hours * 3600;
235
236        for entry in entries.filter_map(Result::ok) {
237            let path = entry.path();
238            if !path.is_dir() {
239                continue;
240            }
241            let zst = path.join("index.json.zst");
242            let json = path.join("index.json");
243            let index_file = if zst.exists() {
244                &zst
245            } else if json.exists() {
246                &json
247            } else {
248                continue;
249            };
250
251            let is_old = index_file
252                .metadata()
253                .and_then(|m| m.modified())
254                .is_ok_and(|mtime| {
255                    mtime
256                        .elapsed()
257                        .is_ok_and(|age| age.as_secs() > max_age_secs)
258                });
259
260            if is_old {
261                tracing::info!("[graph_index: purging stale index at {}]", path.display());
262                let _ = std::fs::remove_dir_all(&path);
263            }
264        }
265    }
266
267    pub fn file_count(&self) -> usize {
268        self.files.len()
269    }
270
271    pub fn symbol_count(&self) -> usize {
272        self.symbols.len()
273    }
274
275    pub fn edge_count(&self) -> usize {
276        self.edges.len()
277    }
278
279    pub fn get_symbol(&self, key: &str) -> Option<&SymbolEntry> {
280        self.symbols.get(key)
281    }
282
283    pub fn get_reverse_deps(&self, path: &str, depth: usize) -> Vec<String> {
284        let mut result = Vec::new();
285        let mut visited = std::collections::HashSet::new();
286        let mut queue: Vec<(String, usize)> = vec![(path.to_string(), 0)];
287
288        while let Some((current, d)) = queue.pop() {
289            if d > depth || visited.contains(&current) {
290                continue;
291            }
292            visited.insert(current.clone());
293            if current != path {
294                result.push(current.clone());
295            }
296
297            for edge in &self.edges {
298                if edge.to == current && edge.kind == "import" && !visited.contains(&edge.from) {
299                    queue.push((edge.from.clone(), d + 1));
300                }
301            }
302        }
303        result
304    }
305
306    pub fn get_related(&self, path: &str, depth: usize) -> Vec<String> {
307        let mut result = Vec::new();
308        let mut visited = std::collections::HashSet::new();
309        let mut queue: Vec<(String, usize)> = vec![(path.to_string(), 0)];
310
311        while let Some((current, d)) = queue.pop() {
312            if d > depth || visited.contains(&current) {
313                continue;
314            }
315            visited.insert(current.clone());
316            if current != path {
317                result.push(current.clone());
318            }
319
320            for edge in &self.edges {
321                if edge.from == current && !visited.contains(&edge.to) {
322                    queue.push((edge.to.clone(), d + 1));
323                }
324                if edge.to == current && !visited.contains(&edge.from) {
325                    queue.push((edge.from.clone(), d + 1));
326                }
327            }
328        }
329        result
330    }
331}
332
333/// Load the best available graph index, trying multiple root path variants.
334/// If no valid index exists, automatically scans the project to build one.
335/// This is the primary entry point — ensures zero-config usage.
336pub fn load_or_build(project_root: &str) -> ProjectIndex {
337    if std::env::var("LEAN_CTX_NO_INDEX").is_ok() {
338        return ProjectIndex::load(project_root).unwrap_or_else(|| ProjectIndex::new(project_root));
339    }
340
341    // Prefer stable absolute roots. Using "." as a cache key is fragile because
342    // it depends on the process cwd and can accidentally load the wrong project.
343    let root_abs = if project_root.trim().is_empty() || project_root == "." {
344        std::env::current_dir().ok().map_or_else(
345            || ".".to_string(),
346            |p| normalize_project_root(&p.to_string_lossy()),
347        )
348    } else {
349        normalize_project_root(project_root)
350    };
351
352    if !is_safe_scan_root(&root_abs) {
353        return ProjectIndex::new(&root_abs);
354    }
355
356    // Try the absolute/root-normalized path first.
357    if let Some(idx) = ProjectIndex::load(&root_abs) {
358        if !idx.files.is_empty() {
359            if index_looks_stale(&idx, &root_abs) {
360                tracing::warn!("[graph_index: stale index detected for {root_abs}; rebuilding]");
361                return scan(&root_abs);
362            }
363            return idx;
364        }
365    }
366
367    // Legacy: older builds may have cached the index under ".". Only accept it if it
368    // actually refers to the current cwd project, then migrate it to `root_abs`.
369    if let Some(idx) = ProjectIndex::load(".") {
370        if !idx.files.is_empty() {
371            let mut migrated = idx;
372            migrated.project_root.clone_from(&root_abs);
373            let _ = migrated.save();
374            if index_looks_stale(&migrated, &root_abs) {
375                tracing::warn!(
376                    "[graph_index: stale legacy index detected for {root_abs}; rebuilding]"
377                );
378                return scan(&root_abs);
379            }
380            return migrated;
381        }
382    }
383
384    // Try absolute cwd
385    if let Ok(cwd) = std::env::current_dir() {
386        let cwd_str = normalize_project_root(&cwd.to_string_lossy());
387        if cwd_str != root_abs {
388            if let Some(idx) = ProjectIndex::load(&cwd_str) {
389                if !idx.files.is_empty() {
390                    if index_looks_stale(&idx, &cwd_str) {
391                        tracing::warn!(
392                            "[graph_index: stale index detected for {cwd_str}; rebuilding]"
393                        );
394                        return scan(&cwd_str);
395                    }
396                    return idx;
397                }
398            }
399        }
400    }
401
402    // No existing index found anywhere — auto-build
403    scan(&root_abs)
404}
405
406fn index_looks_stale(index: &ProjectIndex, root_abs: &str) -> bool {
407    if index.files.is_empty() {
408        return true;
409    }
410
411    // TTL check: rebuild if index is older than configured max_age_hours
412    if let Ok(scan_time) =
413        chrono::NaiveDateTime::parse_from_str(&index.last_scan, "%Y-%m-%d %H:%M:%S")
414    {
415        let cfg = crate::core::config::Config::load();
416        let max_age = chrono::Duration::hours(cfg.archive.max_age_hours as i64);
417        let now = chrono::Local::now().naive_local();
418        if now.signed_duration_since(scan_time) > max_age {
419            tracing::info!(
420                "[graph_index: index is older than {}h — marking stale]",
421                cfg.archive.max_age_hours
422            );
423            return true;
424        }
425    }
426
427    // Contamination check: if index contains paths from common user directories,
428    // it was built from a too-broad root and must be rebuilt
429    const CONTAMINATION_MARKERS: &[&str] = &[
430        "Desktop/",
431        "Documents/",
432        "Downloads/",
433        "Pictures/",
434        "Music/",
435        "Videos/",
436        "Movies/",
437        "Library/",
438        ".cache/",
439        "snap/",
440    ];
441    let contaminated = index.files.keys().take(200).any(|rel| {
442        CONTAMINATION_MARKERS
443            .iter()
444            .any(|m| rel.starts_with(m) || rel.contains(&format!("/{m}")))
445    });
446    if contaminated {
447        tracing::warn!(
448            "[graph_index: index contains files from user directories (Desktop/Documents/...) — \
449             marking stale to force clean rebuild]"
450        );
451        return true;
452    }
453
454    let root_path = Path::new(root_abs);
455    // Sample up to 20 files for existence check (avoid scanning all files in large indices)
456    let sample_size = index.files.len().min(20);
457    for rel in index.files.keys().take(sample_size) {
458        let rel = rel.trim_start_matches(['/', '\\']);
459        if rel.is_empty() {
460            continue;
461        }
462        let abs = root_path.join(rel);
463        if !abs.exists() {
464            return true;
465        }
466    }
467
468    false
469}
470
471pub fn scan(project_root: &str) -> ProjectIndex {
472    if std::env::var("LEAN_CTX_NO_INDEX").is_ok() {
473        tracing::info!("[graph_index: LEAN_CTX_NO_INDEX set — skipping scan]");
474        return ProjectIndex::new(project_root);
475    }
476
477    let project_root = normalize_project_root(project_root);
478
479    if !is_safe_scan_root(&project_root) {
480        tracing::warn!("[graph_index: scan aborted for unsafe root {project_root}]");
481        return ProjectIndex::new(&project_root);
482    }
483
484    let lock_name = format!(
485        "graph-idx-{}",
486        &crate::core::index_namespace::namespace_hash(Path::new(&project_root))[..8]
487    );
488    let _lock = crate::core::startup_guard::try_acquire_lock(
489        &lock_name,
490        std::time::Duration::from_millis(800),
491        std::time::Duration::from_mins(3),
492    );
493    if _lock.is_none() {
494        tracing::info!(
495            "[graph_index: another process is scanning {project_root} — returning cached or empty]"
496        );
497        return ProjectIndex::load(&project_root)
498            .unwrap_or_else(|| ProjectIndex::new(&project_root));
499    }
500
501    let existing = ProjectIndex::load(&project_root);
502    let mut index = ProjectIndex::new(&project_root);
503
504    let old_files: HashMap<String, (String, Vec<(String, SymbolEntry)>)> =
505        if let Some(ref prev) = existing {
506            prev.files
507                .iter()
508                .map(|(path, entry)| {
509                    let syms: Vec<(String, SymbolEntry)> = prev
510                        .symbols
511                        .iter()
512                        .filter(|(_, s)| s.file == *path)
513                        .map(|(k, v)| (k.clone(), v.clone()))
514                        .collect();
515                    (path.clone(), (entry.hash.clone(), syms))
516                })
517                .collect()
518        } else {
519            HashMap::new()
520        };
521
522    let walker = ignore::WalkBuilder::new(&project_root)
523        .hidden(true)
524        .git_ignore(true)
525        .git_global(true)
526        .git_exclude(true)
527        .max_depth(Some(20))
528        .build();
529
530    let cfg = crate::core::config::Config::load();
531    let extra_ignores: Vec<glob::Pattern> = cfg
532        .extra_ignore_patterns
533        .iter()
534        .filter_map(|p| glob::Pattern::new(p).ok())
535        .collect();
536
537    let mut scanned = 0usize;
538    let mut reused = 0usize;
539    let mut entries_visited = 0usize;
540    let max_files = if cfg.graph_index_max_files == 0 {
541        usize::MAX // unlimited
542    } else {
543        cfg.graph_index_max_files as usize
544    };
545    const MAX_ENTRIES_VISITED: usize = 500_000;
546    let scan_deadline = std::time::Instant::now() + std::time::Duration::from_mins(5);
547
548    for entry in walker.filter_map(std::result::Result::ok) {
549        entries_visited += 1;
550        if entries_visited > MAX_ENTRIES_VISITED {
551            tracing::warn!(
552                "[graph_index: walked {entries_visited} entries — aborting scan to prevent \
553                 runaway traversal. Indexed {} files so far.]",
554                index.files.len()
555            );
556            break;
557        }
558        if entries_visited.is_multiple_of(5000) {
559            if std::time::Instant::now() > scan_deadline {
560                tracing::warn!(
561                    "[graph_index: scan timeout (120s) after {entries_visited} entries — \
562                     saving partial index with {} files]",
563                    index.files.len()
564                );
565                break;
566            }
567            if let Some(ref g) = _lock {
568                g.touch();
569            }
570        }
571
572        if !entry.file_type().is_some_and(|ft| ft.is_file()) {
573            continue;
574        }
575        let file_path = normalize_absolute_path(&entry.path().to_string_lossy());
576
577        // Prevent indexing files that escaped the project root (symlinks, mount points)
578        if !file_path.starts_with(&project_root) {
579            continue;
580        }
581
582        let ext = Path::new(&file_path)
583            .extension()
584            .and_then(|e| e.to_str())
585            .unwrap_or("");
586
587        if !is_indexable_ext(ext) {
588            continue;
589        }
590
591        let rel = make_relative(&file_path, &project_root);
592        if extra_ignores.iter().any(|p| p.matches(&rel)) {
593            continue;
594        }
595
596        if max_files != usize::MAX && index.files.len() >= max_files {
597            tracing::info!(
598                "[graph_index: reached configured limit of {} files. Set graph_index_max_files = 0 for unlimited.]",
599                max_files
600            );
601            break;
602        }
603
604        let Ok(content) = std::fs::read_to_string(&file_path) else {
605            continue;
606        };
607
608        let hash = compute_hash(&content);
609        let rel_path = make_relative(&file_path, &project_root);
610
611        if let Some((old_hash, old_syms)) = old_files.get(&rel_path) {
612            if *old_hash == hash {
613                if let Some(old_entry) = existing.as_ref().and_then(|p| p.files.get(&rel_path)) {
614                    index.files.insert(rel_path.clone(), old_entry.clone());
615                    for (key, sym) in old_syms {
616                        index.symbols.insert(key.clone(), sym.clone());
617                    }
618                    reused += 1;
619                    continue;
620                }
621            }
622        }
623
624        let sigs = signatures::extract_signatures(&content, ext);
625        let line_count = content.lines().count();
626        let token_count = crate::core::tokens::count_tokens(&content);
627        let summary = extract_summary(&content);
628
629        let exports: Vec<String> = sigs
630            .iter()
631            .filter(|s| s.is_exported)
632            .map(|s| s.name.clone())
633            .collect();
634
635        index.files.insert(
636            rel_path.clone(),
637            FileEntry {
638                path: rel_path.clone(),
639                hash,
640                language: ext.to_string(),
641                line_count,
642                token_count,
643                exports,
644                summary,
645            },
646        );
647
648        for sig in &sigs {
649            let (start, end) = sig
650                .start_line
651                .zip(sig.end_line)
652                .unwrap_or_else(|| find_symbol_range(&content, sig));
653            let key = format!("{}::{}", rel_path, sig.name);
654            index.symbols.insert(
655                key,
656                SymbolEntry {
657                    file: rel_path.clone(),
658                    name: sig.name.clone(),
659                    kind: sig.kind.to_string(),
660                    start_line: start,
661                    end_line: end,
662                    is_exported: sig.is_exported,
663                },
664            );
665        }
666
667        scanned += 1;
668    }
669
670    build_edges(&mut index);
671
672    if let Err(e) = index.save() {
673        tracing::warn!("could not save graph index: {e}");
674    }
675
676    tracing::warn!(
677        "[graph_index: {} files ({} scanned, {} reused), {} symbols, {} edges]",
678        index.file_count(),
679        scanned,
680        reused,
681        index.symbol_count(),
682        index.edge_count()
683    );
684
685    index
686}
687
688fn build_edges(index: &mut ProjectIndex) {
689    index.edges.clear();
690
691    let root = normalize_project_root(&index.project_root);
692    let root_path = Path::new(&root);
693
694    let mut file_paths: Vec<String> = index.files.keys().cloned().collect();
695    file_paths.sort();
696
697    let resolver_ctx = import_resolver::ResolverContext::new(root_path, file_paths.clone());
698
699    for rel_path in &file_paths {
700        let abs_path = root_path.join(rel_path.trim_start_matches(['/', '\\']));
701        let Ok(content) = std::fs::read_to_string(&abs_path) else {
702            continue;
703        };
704
705        let ext = Path::new(rel_path)
706            .extension()
707            .and_then(|e| e.to_str())
708            .unwrap_or("");
709
710        let resolve_ext = match ext {
711            "vue" | "svelte" => "ts",
712            _ => ext,
713        };
714
715        let imports = crate::core::deep_queries::analyze(&content, resolve_ext).imports;
716        if imports.is_empty() {
717            continue;
718        }
719
720        let resolved =
721            import_resolver::resolve_imports(&imports, rel_path, resolve_ext, &resolver_ctx);
722        for r in resolved {
723            if r.is_external {
724                continue;
725            }
726            if let Some(to) = r.resolved_path {
727                index.edges.push(IndexEdge {
728                    from: rel_path.clone(),
729                    to,
730                    kind: "import".to_string(),
731                });
732            }
733        }
734    }
735
736    index.edges.sort_by(|a, b| {
737        a.from
738            .cmp(&b.from)
739            .then_with(|| a.to.cmp(&b.to))
740            .then_with(|| a.kind.cmp(&b.kind))
741    });
742    index
743        .edges
744        .dedup_by(|a, b| a.from == b.from && a.to == b.to && a.kind == b.kind);
745}
746
747fn find_symbol_range(content: &str, sig: &signatures::Signature) -> (usize, usize) {
748    let lines: Vec<&str> = content.lines().collect();
749    let mut start = 0;
750
751    for (i, line) in lines.iter().enumerate() {
752        if line.contains(&sig.name) {
753            let trimmed = line.trim();
754            let is_def = trimmed.starts_with("fn ")
755                || trimmed.starts_with("pub fn ")
756                || trimmed.starts_with("pub(crate) fn ")
757                || trimmed.starts_with("async fn ")
758                || trimmed.starts_with("pub async fn ")
759                || trimmed.starts_with("struct ")
760                || trimmed.starts_with("pub struct ")
761                || trimmed.starts_with("enum ")
762                || trimmed.starts_with("pub enum ")
763                || trimmed.starts_with("trait ")
764                || trimmed.starts_with("pub trait ")
765                || trimmed.starts_with("impl ")
766                || trimmed.starts_with("class ")
767                || trimmed.starts_with("export class ")
768                || trimmed.starts_with("export function ")
769                || trimmed.starts_with("export async function ")
770                || trimmed.starts_with("function ")
771                || trimmed.starts_with("async function ")
772                || trimmed.starts_with("def ")
773                || trimmed.starts_with("async def ")
774                || trimmed.starts_with("func ")
775                || trimmed.starts_with("interface ")
776                || trimmed.starts_with("export interface ")
777                || trimmed.starts_with("type ")
778                || trimmed.starts_with("export type ")
779                || trimmed.starts_with("const ")
780                || trimmed.starts_with("export const ")
781                || trimmed.starts_with("fun ")
782                || trimmed.starts_with("private fun ")
783                || trimmed.starts_with("public fun ")
784                || trimmed.starts_with("internal fun ")
785                || trimmed.starts_with("class ")
786                || trimmed.starts_with("data class ")
787                || trimmed.starts_with("sealed class ")
788                || trimmed.starts_with("sealed interface ")
789                || trimmed.starts_with("enum class ")
790                || trimmed.starts_with("object ")
791                || trimmed.starts_with("private object ")
792                || trimmed.starts_with("interface ")
793                || trimmed.starts_with("typealias ")
794                || trimmed.starts_with("private typealias ");
795            if is_def {
796                start = i + 1;
797                break;
798            }
799        }
800    }
801
802    if start == 0 {
803        return (1, lines.len().min(20));
804    }
805
806    let base_indent = lines
807        .get(start - 1)
808        .map_or(0, |l| l.len() - l.trim_start().len());
809
810    let mut end = start;
811    let mut brace_depth: i32 = 0;
812    let mut found_open = false;
813
814    for (i, line) in lines.iter().enumerate().skip(start - 1) {
815        for ch in line.chars() {
816            if ch == '{' {
817                brace_depth += 1;
818                found_open = true;
819            } else if ch == '}' {
820                brace_depth -= 1;
821            }
822        }
823
824        end = i + 1;
825
826        if found_open && brace_depth <= 0 {
827            break;
828        }
829
830        if !found_open && i > start {
831            let indent = line.len() - line.trim_start().len();
832            if indent <= base_indent && !line.trim().is_empty() && i > start {
833                end = i;
834                break;
835            }
836        }
837
838        if end - start > 200 {
839            break;
840        }
841    }
842
843    (start, end)
844}
845
846fn extract_summary(content: &str) -> String {
847    for line in content.lines().take(20) {
848        let trimmed = line.trim();
849        if trimmed.is_empty()
850            || trimmed.starts_with("//")
851            || trimmed.starts_with('#')
852            || trimmed.starts_with("/*")
853            || trimmed.starts_with('*')
854            || trimmed.starts_with("use ")
855            || trimmed.starts_with("import ")
856            || trimmed.starts_with("from ")
857            || trimmed.starts_with("require(")
858            || trimmed.starts_with("package ")
859        {
860            continue;
861        }
862        return trimmed.chars().take(120).collect();
863    }
864    String::new()
865}
866
867fn compute_hash(content: &str) -> String {
868    use std::collections::hash_map::DefaultHasher;
869    use std::hash::{Hash, Hasher};
870
871    let mut hasher = DefaultHasher::new();
872    content.hash(&mut hasher);
873    format!("{:016x}", hasher.finish())
874}
875
876fn short_hash(input: &str) -> String {
877    use std::collections::hash_map::DefaultHasher;
878    use std::hash::{Hash, Hasher};
879
880    let mut hasher = DefaultHasher::new();
881    input.hash(&mut hasher);
882    format!("{:08x}", hasher.finish() & 0xFFFF_FFFF)
883}
884
885fn copy_dir_fallible(src: &std::path::Path, dst: &std::path::Path) -> Result<(), std::io::Error> {
886    std::fs::create_dir_all(dst)?;
887    for entry in std::fs::read_dir(src)?.flatten() {
888        let from = entry.path();
889        let to = dst.join(entry.file_name());
890        if from.is_dir() {
891            copy_dir_fallible(&from, &to)?;
892        } else {
893            std::fs::copy(&from, &to)?;
894        }
895    }
896    Ok(())
897}
898
899fn normalize_absolute_path(path: &str) -> String {
900    if let Ok(canon) = crate::core::pathutil::safe_canonicalize(std::path::Path::new(path)) {
901        return canon.to_string_lossy().to_string();
902    }
903
904    let mut normalized = path.to_string();
905    while normalized.ends_with("\\.") || normalized.ends_with("/.") {
906        normalized.truncate(normalized.len() - 2);
907    }
908    while normalized.len() > 1
909        && (normalized.ends_with('\\') || normalized.ends_with('/'))
910        && !normalized.ends_with(":\\")
911        && !normalized.ends_with(":/")
912        && normalized != "\\"
913        && normalized != "/"
914    {
915        normalized.pop();
916    }
917    normalized
918}
919
920pub fn normalize_project_root(path: &str) -> String {
921    normalize_absolute_path(path)
922}
923
924pub fn graph_match_key(path: &str) -> String {
925    let stripped =
926        crate::core::pathutil::strip_verbatim_str(path).unwrap_or_else(|| path.replace('\\', "/"));
927    stripped.trim_start_matches('/').to_string()
928}
929
930pub fn graph_relative_key(path: &str, root: &str) -> String {
931    let root_norm = normalize_project_root(root);
932    let path_norm = normalize_absolute_path(path);
933    let root_path = Path::new(&root_norm);
934    let path_path = Path::new(&path_norm);
935
936    if let Ok(rel) = path_path.strip_prefix(root_path) {
937        let rel = rel.to_string_lossy().to_string();
938        return rel.trim_start_matches(['/', '\\']).to_string();
939    }
940
941    path.trim_start_matches(['/', '\\'])
942        .replace('/', std::path::MAIN_SEPARATOR_STR)
943}
944
945fn make_relative(path: &str, root: &str) -> String {
946    graph_relative_key(path, root)
947}
948
949fn is_indexable_ext(ext: &str) -> bool {
950    crate::core::language_capabilities::is_indexable_ext(ext)
951}
952
953#[cfg(test)]
954fn kotlin_package_name(content: &str) -> Option<String> {
955    content.lines().map(str::trim).find_map(|line| {
956        line.strip_prefix("package ")
957            .map(|rest| rest.trim().trim_end_matches(';').to_string())
958    })
959}
960
961#[cfg(test)]
962mod tests {
963    use super::*;
964    use tempfile::tempdir;
965
966    #[test]
967    fn test_short_hash_deterministic() {
968        let h1 = short_hash("/Users/test/project");
969        let h2 = short_hash("/Users/test/project");
970        assert_eq!(h1, h2);
971        assert_eq!(h1.len(), 8);
972    }
973
974    #[test]
975    fn test_make_relative() {
976        assert_eq!(
977            make_relative("/foo/bar/src/main.rs", "/foo/bar"),
978            graph_relative_key("/foo/bar/src/main.rs", "/foo/bar")
979        );
980        assert_eq!(
981            make_relative("src/main.rs", "/foo/bar"),
982            graph_relative_key("src/main.rs", "/foo/bar")
983        );
984        assert_eq!(
985            make_relative("C:\\repo\\src\\main\\kotlin\\Example.kt", "C:\\repo"),
986            graph_relative_key("C:\\repo\\src\\main\\kotlin\\Example.kt", "C:\\repo")
987        );
988        assert_eq!(
989            make_relative("//?/C:/repo/src/main/kotlin/Example.kt", "//?/C:/repo"),
990            graph_relative_key("//?/C:/repo/src/main/kotlin/Example.kt", "//?/C:/repo")
991        );
992    }
993
994    #[test]
995    fn test_normalize_project_root() {
996        assert_eq!(normalize_project_root("C:\\repo\\"), "C:\\repo");
997        assert_eq!(normalize_project_root("C:\\repo\\."), "C:\\repo");
998        assert_eq!(normalize_project_root("//?/C:/repo/"), "//?/C:/repo");
999    }
1000
1001    #[test]
1002    fn test_graph_match_key_normalizes_windows_forms() {
1003        assert_eq!(
1004            graph_match_key(r"C:\repo\src\main.rs"),
1005            "C:/repo/src/main.rs"
1006        );
1007        assert_eq!(
1008            graph_match_key(r"\\?\C:\repo\src\main.rs"),
1009            "C:/repo/src/main.rs"
1010        );
1011        assert_eq!(graph_match_key(r"\src\main.rs"), "src/main.rs");
1012    }
1013
1014    #[test]
1015    fn test_extract_summary() {
1016        let content = "// comment\nuse std::io;\n\npub fn main() {\n    println!(\"hello\");\n}";
1017        let summary = extract_summary(content);
1018        assert_eq!(summary, "pub fn main() {");
1019    }
1020
1021    #[test]
1022    fn test_compute_hash_deterministic() {
1023        let h1 = compute_hash("hello world");
1024        let h2 = compute_hash("hello world");
1025        assert_eq!(h1, h2);
1026        assert_ne!(h1, compute_hash("hello world!"));
1027    }
1028
1029    #[test]
1030    fn test_project_index_new() {
1031        let idx = ProjectIndex::new("/test");
1032        assert_eq!(idx.version, INDEX_VERSION);
1033        assert_eq!(idx.project_root, "/test");
1034        assert!(idx.files.is_empty());
1035    }
1036
1037    fn fe(path: &str, content: &str, language: &str) -> FileEntry {
1038        FileEntry {
1039            path: path.to_string(),
1040            hash: compute_hash(content),
1041            language: language.to_string(),
1042            line_count: content.lines().count(),
1043            token_count: crate::core::tokens::count_tokens(content),
1044            exports: Vec::new(),
1045            summary: extract_summary(content),
1046        }
1047    }
1048
1049    #[test]
1050    fn test_index_looks_stale_when_any_file_missing() {
1051        let td = tempdir().expect("tempdir");
1052        let root = td.path();
1053        std::fs::write(root.join("a.rs"), "pub fn a() {}\n").expect("write a.rs");
1054
1055        let root_s = normalize_project_root(&root.to_string_lossy());
1056        let mut idx = ProjectIndex::new(&root_s);
1057        idx.files
1058            .insert("a.rs".to_string(), fe("a.rs", "pub fn a() {}\n", "rs"));
1059        idx.files.insert(
1060            "missing.rs".to_string(),
1061            fe("missing.rs", "pub fn m() {}\n", "rs"),
1062        );
1063
1064        assert!(index_looks_stale(&idx, &root_s));
1065    }
1066
1067    #[test]
1068    fn test_index_looks_fresh_when_all_files_exist() {
1069        let td = tempdir().expect("tempdir");
1070        let root = td.path();
1071        std::fs::write(root.join("a.rs"), "pub fn a() {}\n").expect("write a.rs");
1072
1073        let root_s = normalize_project_root(&root.to_string_lossy());
1074        let mut idx = ProjectIndex::new(&root_s);
1075        idx.files
1076            .insert("a.rs".to_string(), fe("a.rs", "pub fn a() {}\n", "rs"));
1077
1078        assert!(!index_looks_stale(&idx, &root_s));
1079    }
1080
1081    #[test]
1082    fn test_reverse_deps() {
1083        let mut idx = ProjectIndex::new("/test");
1084        idx.edges.push(IndexEdge {
1085            from: "a.rs".to_string(),
1086            to: "b.rs".to_string(),
1087            kind: "import".to_string(),
1088        });
1089        idx.edges.push(IndexEdge {
1090            from: "c.rs".to_string(),
1091            to: "b.rs".to_string(),
1092            kind: "import".to_string(),
1093        });
1094
1095        let deps = idx.get_reverse_deps("b.rs", 1);
1096        assert_eq!(deps.len(), 2);
1097        assert!(deps.contains(&"a.rs".to_string()));
1098        assert!(deps.contains(&"c.rs".to_string()));
1099    }
1100
1101    #[test]
1102    fn test_find_symbol_range_kotlin_function() {
1103        let content = r#"
1104package com.example
1105
1106class UserService {
1107    fun greet(name: String): String {
1108        return "hi $name"
1109    }
1110}
1111"#;
1112        let sig = signatures::Signature {
1113            kind: "method",
1114            name: "greet".to_string(),
1115            params: "name:String".to_string(),
1116            return_type: "String".to_string(),
1117            is_async: false,
1118            is_exported: true,
1119            indent: 2,
1120            ..signatures::Signature::no_span()
1121        };
1122        let (start, end) = find_symbol_range(content, &sig);
1123        assert_eq!(start, 5);
1124        assert!(end >= start);
1125    }
1126
1127    #[test]
1128    fn test_signature_spans_override_fallback_range() {
1129        let sig = signatures::Signature {
1130            kind: "method",
1131            name: "release".to_string(),
1132            params: "id:String".to_string(),
1133            return_type: "Boolean".to_string(),
1134            is_async: true,
1135            is_exported: true,
1136            indent: 2,
1137            start_line: Some(42),
1138            end_line: Some(43),
1139        };
1140
1141        let (start, end) = sig
1142            .start_line
1143            .zip(sig.end_line)
1144            .unwrap_or_else(|| find_symbol_range("ignored", &sig));
1145        assert_eq!((start, end), (42, 43));
1146    }
1147
1148    #[test]
1149    fn test_parse_stale_index_version() {
1150        let json = format!(
1151            r#"{{"version":{},"project_root":"/test","last_scan":"now","files":{{}},"edges":[],"symbols":{{}}}}"#,
1152            INDEX_VERSION - 1
1153        );
1154        let parsed: ProjectIndex = serde_json::from_str(&json).unwrap();
1155        assert_ne!(parsed.version, INDEX_VERSION);
1156    }
1157
1158    #[test]
1159    fn test_kotlin_package_name() {
1160        let content = "package com.example.feature\n\nclass UserService";
1161        assert_eq!(
1162            kotlin_package_name(content).as_deref(),
1163            Some("com.example.feature")
1164        );
1165    }
1166
1167    #[test]
1168    fn safe_scan_root_rejects_fs_root() {
1169        assert!(!is_safe_scan_root("/"));
1170        assert!(!is_safe_scan_root("\\"));
1171        #[cfg(windows)]
1172        {
1173            assert!(!is_safe_scan_root("C:\\"));
1174            assert!(!is_safe_scan_root("D:\\"));
1175        }
1176    }
1177
1178    #[test]
1179    fn safe_scan_root_rejects_home() {
1180        if let Some(home) = dirs::home_dir() {
1181            let home_str = home.to_string_lossy().to_string();
1182            assert!(
1183                !is_safe_scan_root(&home_str),
1184                "home dir should be rejected: {home_str}"
1185            );
1186        }
1187    }
1188
1189    #[test]
1190    fn safe_scan_root_accepts_project_dir() {
1191        let tmp = tempdir().unwrap();
1192        std::fs::write(
1193            tmp.path().join("Cargo.toml"),
1194            "[package]\nname = \"test\"\n",
1195        )
1196        .unwrap();
1197        let root = tmp.path().to_string_lossy().to_string();
1198        assert!(is_safe_scan_root(&root));
1199    }
1200
1201    #[test]
1202    fn safe_scan_root_rejects_broad_dir() {
1203        let tmp = tempdir().unwrap();
1204        for i in 0..55 {
1205            std::fs::create_dir(tmp.path().join(format!("dir{i}"))).unwrap();
1206        }
1207        let root = tmp.path().to_string_lossy().to_string();
1208        assert!(!is_safe_scan_root(&root));
1209    }
1210
1211    #[test]
1212    fn no_index_env_skips_scan() {
1213        let _env = crate::core::data_dir::test_env_lock();
1214        let tmp = tempdir().unwrap();
1215        std::fs::write(tmp.path().join("Cargo.toml"), "").unwrap();
1216        std::fs::write(tmp.path().join("main.rs"), "fn main() {}").unwrap();
1217
1218        std::env::set_var("LEAN_CTX_NO_INDEX", "1");
1219        let idx = scan(&tmp.path().to_string_lossy());
1220        std::env::remove_var("LEAN_CTX_NO_INDEX");
1221        assert!(idx.files.is_empty(), "LEAN_CTX_NO_INDEX should skip scan");
1222    }
1223
1224    #[test]
1225    fn stale_index_detected_by_contamination() {
1226        let root_s = "/home/testuser/myproject";
1227        let mut idx = ProjectIndex::new(root_s);
1228        // Simulate a contaminated index with Desktop files
1229        idx.files.insert(
1230            "Desktop/random.py".to_string(),
1231            fe("Desktop/random.py", "x = 1\n", "py"),
1232        );
1233        idx.files.insert(
1234            "src/main.rs".to_string(),
1235            fe("src/main.rs", "fn main() {}\n", "rs"),
1236        );
1237        assert!(
1238            index_looks_stale(&idx, root_s),
1239            "Index with Desktop/ files should be considered stale"
1240        );
1241    }
1242
1243    #[test]
1244    fn stale_index_detected_by_age() {
1245        let td = tempdir().expect("tempdir");
1246        let root = td.path();
1247        std::fs::write(root.join("a.rs"), "fn a() {}\n").unwrap();
1248
1249        let root_s = normalize_project_root(&root.to_string_lossy());
1250        let mut idx = ProjectIndex::new(&root_s);
1251        idx.files
1252            .insert("a.rs".to_string(), fe("a.rs", "fn a() {}\n", "rs"));
1253        // Set last_scan to 100 hours ago (default max_age_hours is 48)
1254        let old_time = chrono::Local::now().naive_local() - chrono::Duration::hours(100);
1255        idx.last_scan = old_time.format("%Y-%m-%d %H:%M:%S").to_string();
1256
1257        assert!(
1258            index_looks_stale(&idx, &root_s),
1259            "Index older than max_age_hours should be stale"
1260        );
1261    }
1262
1263    #[test]
1264    fn safe_scan_root_rejects_home_downloads() {
1265        if let Some(home) = dirs::home_dir() {
1266            let downloads = home.join("Downloads");
1267            // Only test if Downloads doesn't contain a .git (unlikely but possible)
1268            if !downloads.join(".git").exists() {
1269                let downloads_str = downloads.to_string_lossy().to_string();
1270                assert!(
1271                    !is_safe_scan_root(&downloads_str),
1272                    "~/Downloads should be rejected without project markers"
1273                );
1274            }
1275        }
1276    }
1277}