Skip to main content

lean_ctx/core/graph_index/
mod.rs

1// DEPRECATED: This module is being replaced by PropertyGraph (core/property_graph/).
2// New code should use GraphProvider (core/graph_provider.rs) instead of accessing
3// ProjectIndex directly. Remaining direct consumers: call_graph, graph_enricher,
4// ctx_callgraph, ctx_graph_diagram, ctx_routes, autonomy, dashboard/callgraph.
5// See OPT-14/15 plan for the full migration path.
6
7use std::collections::HashMap;
8use std::path::Path;
9
10use serde::{Deserialize, Serialize};
11
12use crate::core::import_resolver;
13use crate::core::signatures;
14mod edges;
15pub(crate) use edges::*;
16#[cfg(test)]
17mod tests;
18
19const INDEX_VERSION: u32 = 6;
20
21pub fn is_safe_scan_root_public(path: &str) -> bool {
22    is_safe_scan_root(path)
23}
24
25fn is_filesystem_root(path: &str) -> bool {
26    let p = Path::new(path);
27    p.parent().is_none() || (cfg!(windows) && p.parent() == Some(Path::new("")))
28}
29
30fn is_safe_scan_root(path: &str) -> bool {
31    let normalized = normalize_project_root(path);
32    let p = Path::new(&normalized);
33
34    if normalized == "/" || normalized == "\\" || is_filesystem_root(&normalized) {
35        tracing::warn!("[graph_index: refusing to scan filesystem root]");
36        return false;
37    }
38
39    if normalized == "." || normalized.is_empty() {
40        tracing::warn!("[graph_index: refusing to scan relative/empty root]");
41        return false;
42    }
43
44    if let Some(home) = dirs::home_dir() {
45        let home_norm = normalize_project_root(&home.to_string_lossy());
46        if normalized == home_norm {
47            use std::sync::Once;
48            static HOME_WARN: Once = Once::new();
49            HOME_WARN.call_once(|| {
50                tracing::warn!(
51                    "[graph_index: skipping — cannot index home directory {normalized}.\n  \
52                     Run from inside a project, or set LEAN_CTX_PROJECT_ROOT=/path/to/project]"
53                );
54            });
55            return false;
56        }
57        // Block common broad home subdirectories that are never valid project roots
58        let home_path = Path::new(&home_norm);
59        const BLOCKED_HOME_SUBDIRS: &[&str] = &[
60            "Desktop",
61            "Documents",
62            "Downloads",
63            "Pictures",
64            "Music",
65            "Videos",
66            "Movies",
67            "Library",
68            ".local",
69            ".cache",
70            ".config",
71            "snap",
72            "Applications",
73        ];
74        for blocked in BLOCKED_HOME_SUBDIRS {
75            let blocked_path = home_path.join(blocked);
76            let is_inside_blocked = p == blocked_path || p.starts_with(&blocked_path);
77            let has_marker = p.join(".git").exists()
78                || p.join("Cargo.toml").exists()
79                || p.join("package.json").exists();
80            if is_inside_blocked
81                && !has_marker
82                && !crate::core::pathutil::has_multi_repo_children(p)
83            {
84                tracing::warn!(
85                    "[graph_index: refusing to scan {normalized} — \
86                     inside home/{blocked} without project markers]"
87                );
88                return false;
89            }
90        }
91
92        // Block directories that are direct children of home without project markers
93        // (but allow multi-repo workspace parents like ~/code/)
94        if p.parent() == Some(home_path) {
95            let has_marker = p.join(".git").exists()
96                || p.join("Cargo.toml").exists()
97                || p.join("package.json").exists()
98                || p.join("go.mod").exists()
99                || p.join("pyproject.toml").exists();
100            if !has_marker && !crate::core::pathutil::has_multi_repo_children(p) {
101                tracing::warn!(
102                    "[graph_index: refusing to scan {normalized} — \
103                     direct child of home without project markers]"
104                );
105                return false;
106            }
107        }
108    }
109
110    let breadth_markers = [
111        ".git",
112        "Cargo.toml",
113        "package.json",
114        "go.mod",
115        "pyproject.toml",
116        "setup.py",
117        "Makefile",
118        "CMakeLists.txt",
119        "pnpm-workspace.yaml",
120        ".projectile",
121        "BUILD.bazel",
122        "go.work",
123    ];
124
125    if !breadth_markers.iter().any(|m| p.join(m).exists()) {
126        // Multi-repo workspace parent: >=2 children with project markers is always safe
127        if crate::core::pathutil::has_multi_repo_children(p) {
128            return true;
129        }
130
131        let child_count = std::fs::read_dir(p).map_or(0, |rd| {
132            rd.filter_map(Result::ok)
133                .filter(|e| e.path().is_dir())
134                .count()
135        });
136        if child_count > 50 {
137            tracing::warn!(
138                "[graph_index: {normalized} has no project markers and {child_count} subdirectories — \
139                 skipping scan to avoid indexing broad directories]"
140            );
141            return false;
142        }
143    }
144
145    true
146}
147
148#[derive(Debug, Clone, Serialize, Deserialize)]
149pub struct ProjectIndex {
150    pub version: u32,
151    pub project_root: String,
152    pub last_scan: String,
153    pub files: HashMap<String, FileEntry>,
154    pub edges: Vec<IndexEdge>,
155    pub symbols: HashMap<String, SymbolEntry>,
156}
157
158#[derive(Debug, Clone, Serialize, Deserialize)]
159pub struct FileEntry {
160    pub path: String,
161    pub hash: String,
162    pub language: String,
163    pub line_count: usize,
164    pub token_count: usize,
165    pub exports: Vec<String>,
166    pub summary: String,
167}
168
169#[derive(Debug, Clone, Serialize, Deserialize)]
170pub struct SymbolEntry {
171    pub file: String,
172    pub name: String,
173    pub kind: String,
174    pub start_line: usize,
175    pub end_line: usize,
176    pub is_exported: bool,
177}
178
179#[derive(Debug, Clone, Serialize, Deserialize)]
180pub struct IndexEdge {
181    pub from: String,
182    pub to: String,
183    pub kind: String,
184    #[serde(default = "default_edge_weight")]
185    pub weight: f32,
186}
187
188fn default_edge_weight() -> f32 {
189    1.0
190}
191
192impl ProjectIndex {
193    pub fn new(project_root: &str) -> Self {
194        Self {
195            version: INDEX_VERSION,
196            project_root: normalize_project_root(project_root),
197            last_scan: chrono::Local::now().format("%Y-%m-%d %H:%M:%S").to_string(),
198            files: HashMap::new(),
199            edges: Vec::new(),
200            symbols: HashMap::new(),
201        }
202    }
203
204    pub fn index_dir(project_root: &str) -> Option<std::path::PathBuf> {
205        let normalized = normalize_project_root(project_root);
206        let hash = crate::core::project_hash::hash_project_root(&normalized);
207        crate::core::data_dir::lean_ctx_data_dir()
208            .ok()
209            .map(|d| d.join("graphs").join(hash))
210    }
211
212    pub fn load(project_root: &str) -> Option<Self> {
213        let dir = Self::index_dir(project_root)?;
214
215        let zst_path = dir.join("index.json.zst");
216        if zst_path.exists() {
217            let compressed = std::fs::read(&zst_path).ok()?;
218            let data = zstd::decode_all(compressed.as_slice()).ok()?;
219            let content = String::from_utf8(data).ok()?;
220            let index: Self = serde_json::from_str(&content).ok()?;
221            if index.version != INDEX_VERSION {
222                return None;
223            }
224            return Some(index);
225        }
226
227        let json_path = dir.join("index.json");
228        let content = std::fs::read_to_string(&json_path)
229            .or_else(|_| -> std::io::Result<String> {
230                let legacy_hash = short_hash(&normalize_project_root(project_root));
231                let legacy_dir = crate::core::data_dir::lean_ctx_data_dir()
232                    .map_err(|_| std::io::Error::new(std::io::ErrorKind::NotFound, "no data dir"))?
233                    .join("graphs")
234                    .join(legacy_hash);
235                let legacy_path = legacy_dir.join("index.json");
236                let data = std::fs::read_to_string(&legacy_path)?;
237                if let Err(e) = copy_dir_fallible(&legacy_dir, &dir) {
238                    tracing::debug!("graph index migration: {e}");
239                }
240                Ok(data)
241            })
242            .ok()?;
243        let index: Self = serde_json::from_str(&content).ok()?;
244        if index.version != INDEX_VERSION {
245            return None;
246        }
247        // Auto-migrate: compress legacy JSON to zstd
248        if let Ok(compressed) = zstd::encode_all(content.as_bytes(), 9) {
249            let zst_tmp = zst_path.with_extension("zst.tmp");
250            if std::fs::write(&zst_tmp, &compressed).is_ok()
251                && std::fs::rename(&zst_tmp, &zst_path).is_ok()
252            {
253                let _ = std::fs::remove_file(&json_path);
254            }
255        }
256        Some(index)
257    }
258
259    pub fn save(&self) -> Result<(), String> {
260        let dir = Self::index_dir(&self.project_root)
261            .ok_or_else(|| "Cannot determine data directory".to_string())?;
262        std::fs::create_dir_all(&dir).map_err(|e| e.to_string())?;
263        let json = serde_json::to_string(self).map_err(|e| e.to_string())?;
264        let compressed = zstd::encode_all(json.as_bytes(), 9).map_err(|e| format!("zstd: {e}"))?;
265        let target = dir.join("index.json.zst");
266        let tmp = target.with_extension("zst.tmp");
267        std::fs::write(&tmp, &compressed).map_err(|e| e.to_string())?;
268        std::fs::rename(&tmp, &target).map_err(|e| e.to_string())?;
269        let _ = std::fs::remove_file(dir.join("index.json"));
270        Ok(())
271    }
272
273    /// Remove all cached graph indices that are older than max_age_hours.
274    /// Called on startup/update to prevent stale data from persisting.
275    pub fn purge_stale_indices() {
276        let Ok(data_dir) = crate::core::data_dir::lean_ctx_data_dir() else {
277            return;
278        };
279        let graphs_dir = data_dir.join("graphs");
280        let Ok(entries) = std::fs::read_dir(&graphs_dir) else {
281            return;
282        };
283        let cfg = crate::core::config::Config::load();
284        let max_age_secs = cfg.archive_max_age_hours_effective() * 3600;
285
286        for entry in entries.filter_map(Result::ok) {
287            let path = entry.path();
288            if !path.is_dir() {
289                continue;
290            }
291            let zst = path.join("index.json.zst");
292            let json = path.join("index.json");
293            let index_file = if zst.exists() {
294                &zst
295            } else if json.exists() {
296                &json
297            } else {
298                continue;
299            };
300
301            let is_old = index_file
302                .metadata()
303                .and_then(|m| m.modified())
304                .is_ok_and(|mtime| {
305                    mtime
306                        .elapsed()
307                        .is_ok_and(|age| age.as_secs() > max_age_secs)
308                });
309
310            if is_old {
311                tracing::info!("[graph_index: purging stale index at {}]", path.display());
312                let _ = std::fs::remove_dir_all(&path);
313            }
314        }
315    }
316
317    pub fn file_count(&self) -> usize {
318        self.files.len()
319    }
320
321    pub fn symbol_count(&self) -> usize {
322        self.symbols.len()
323    }
324
325    pub fn edge_count(&self) -> usize {
326        self.edges.len()
327    }
328
329    pub fn get_symbol(&self, key: &str) -> Option<&SymbolEntry> {
330        self.symbols.get(key)
331    }
332
333    pub fn get_reverse_deps(&self, path: &str, depth: usize) -> Vec<String> {
334        let mut result = Vec::new();
335        let mut visited = std::collections::HashSet::new();
336        let mut queue: Vec<(String, usize)> = vec![(path.to_string(), 0)];
337
338        while let Some((current, d)) = queue.pop() {
339            if d > depth || visited.contains(&current) {
340                continue;
341            }
342            visited.insert(current.clone());
343            if current != path {
344                result.push(current.clone());
345            }
346
347            for edge in &self.edges {
348                if edge.to == current && edge.kind == "import" && !visited.contains(&edge.from) {
349                    queue.push((edge.from.clone(), d + 1));
350                }
351            }
352        }
353        result
354    }
355
356    pub fn get_related(&self, path: &str, depth: usize) -> Vec<String> {
357        let mut result = Vec::new();
358        let mut visited = std::collections::HashSet::new();
359        let mut queue: Vec<(String, usize)> = vec![(path.to_string(), 0)];
360
361        while let Some((current, d)) = queue.pop() {
362            if d > depth || visited.contains(&current) {
363                continue;
364            }
365            visited.insert(current.clone());
366            if current != path {
367                result.push(current.clone());
368            }
369
370            for edge in &self.edges {
371                if edge.from == current && !visited.contains(&edge.to) {
372                    queue.push((edge.to.clone(), d + 1));
373                }
374                if edge.to == current && !visited.contains(&edge.from) {
375                    queue.push((edge.from.clone(), d + 1));
376                }
377            }
378        }
379        result
380    }
381}
382
383/// Load the best available graph index, trying multiple root path variants.
384/// If no valid index exists, automatically scans the project to build one.
385/// This is the primary entry point — ensures zero-config usage.
386pub fn load_or_build(project_root: &str) -> ProjectIndex {
387    if std::env::var("LEAN_CTX_NO_INDEX").is_ok() {
388        return ProjectIndex::load(project_root).unwrap_or_else(|| ProjectIndex::new(project_root));
389    }
390
391    // Prefer stable absolute roots. Using "." as a cache key is fragile because
392    // it depends on the process cwd and can accidentally load the wrong project.
393    let root_abs = if project_root.trim().is_empty() || project_root == "." {
394        std::env::current_dir().ok().map_or_else(
395            || ".".to_string(),
396            |p| normalize_project_root(&p.to_string_lossy()),
397        )
398    } else {
399        normalize_project_root(project_root)
400    };
401
402    if !is_safe_scan_root(&root_abs) {
403        return ProjectIndex::new(&root_abs);
404    }
405
406    // Try the absolute/root-normalized path first.
407    if let Some(idx) = ProjectIndex::load(&root_abs) {
408        if !idx.files.is_empty() {
409            if index_looks_stale(&idx, &root_abs) {
410                tracing::warn!("[graph_index: stale index detected for {root_abs}; rebuilding]");
411                return scan(&root_abs);
412            }
413            return idx;
414        }
415    }
416
417    // CWD fallback: only use if CWD is a subdirectory of root_abs (same project)
418    if let Ok(cwd) = std::env::current_dir() {
419        let cwd_str = normalize_project_root(&cwd.to_string_lossy());
420        if cwd_str != root_abs && cwd_str.starts_with(&root_abs) {
421            if let Some(idx) = ProjectIndex::load(&cwd_str) {
422                if !idx.files.is_empty() {
423                    if index_looks_stale(&idx, &cwd_str) {
424                        return scan(&cwd_str);
425                    }
426                    return idx;
427                }
428            }
429        }
430    }
431
432    scan(&root_abs)
433}
434
435fn index_looks_stale(index: &ProjectIndex, root_abs: &str) -> bool {
436    if index.files.is_empty() {
437        return true;
438    }
439
440    // TTL check: rebuild if index is older than configured max_age_hours
441    if let Ok(scan_time) =
442        chrono::NaiveDateTime::parse_from_str(&index.last_scan, "%Y-%m-%d %H:%M:%S")
443    {
444        let cfg = crate::core::config::Config::load();
445        let effective_hours = cfg.archive_max_age_hours_effective();
446        let max_age = chrono::Duration::hours(effective_hours as i64);
447        let now = chrono::Local::now().naive_local();
448        if now.signed_duration_since(scan_time) > max_age {
449            tracing::info!(
450                "[graph_index: index is older than {}h — marking stale]",
451                effective_hours
452            );
453            return true;
454        }
455    }
456
457    // Contamination check: if index contains paths from common user directories,
458    // it was built from a too-broad root and must be rebuilt
459    const CONTAMINATION_MARKERS: &[&str] = &[
460        "Desktop/",
461        "Documents/",
462        "Downloads/",
463        "Pictures/",
464        "Music/",
465        "Videos/",
466        "Movies/",
467        "Library/",
468        ".cache/",
469        "snap/",
470    ];
471    let contaminated = index.files.keys().take(200).any(|rel| {
472        CONTAMINATION_MARKERS
473            .iter()
474            .any(|m| rel.starts_with(m) || rel.contains(&format!("/{m}")))
475    });
476    if contaminated {
477        tracing::warn!(
478            "[graph_index: index contains files from user directories (Desktop/Documents/...) — \
479             marking stale to force clean rebuild]"
480        );
481        return true;
482    }
483
484    let root_path = Path::new(root_abs);
485    // Sample up to 20 files for existence check (avoid scanning all files in large indices)
486    let sample_size = index.files.len().min(20);
487    for rel in index.files.keys().take(sample_size) {
488        let rel = rel.trim_start_matches(['/', '\\']);
489        if rel.is_empty() {
490            continue;
491        }
492        let abs = root_path.join(rel);
493        if !abs.exists() {
494            return true;
495        }
496    }
497
498    false
499}
500
501pub fn scan(project_root: &str) -> ProjectIndex {
502    scan_inner(project_root).0
503}
504
505pub fn scan_with_content_cache(project_root: &str) -> (ProjectIndex, HashMap<String, String>) {
506    scan_inner(project_root)
507}
508
509fn scan_inner(project_root: &str) -> (ProjectIndex, HashMap<String, String>) {
510    if std::env::var("LEAN_CTX_NO_INDEX").is_ok() {
511        tracing::info!("[graph_index: LEAN_CTX_NO_INDEX set — skipping scan]");
512        return (ProjectIndex::new(project_root), HashMap::new());
513    }
514
515    let project_root = normalize_project_root(project_root);
516
517    if !is_safe_scan_root(&project_root) {
518        tracing::warn!("[graph_index: scan aborted for unsafe root {project_root}]");
519        return (ProjectIndex::new(&project_root), HashMap::new());
520    }
521
522    let lock_name = format!(
523        "graph-idx-{}",
524        &crate::core::index_namespace::namespace_hash(Path::new(&project_root))[..8]
525    );
526    let _lock = crate::core::startup_guard::try_acquire_lock(
527        &lock_name,
528        std::time::Duration::from_millis(800),
529        std::time::Duration::from_mins(3),
530    );
531    if _lock.is_none() {
532        tracing::info!(
533            "[graph_index: another process is scanning {project_root} — returning cached or empty]"
534        );
535        return (
536            ProjectIndex::load(&project_root).unwrap_or_else(|| ProjectIndex::new(&project_root)),
537            HashMap::new(),
538        );
539    }
540
541    let existing = ProjectIndex::load(&project_root);
542    let mut index = ProjectIndex::new(&project_root);
543
544    let old_files: HashMap<String, (String, Vec<(String, SymbolEntry)>)> =
545        if let Some(ref prev) = existing {
546            prev.files
547                .iter()
548                .map(|(path, entry)| {
549                    let syms: Vec<(String, SymbolEntry)> = prev
550                        .symbols
551                        .iter()
552                        .filter(|(_, s)| s.file == *path)
553                        .map(|(k, v)| (k.clone(), v.clone()))
554                        .collect();
555                    (path.clone(), (entry.hash.clone(), syms))
556                })
557                .collect()
558        } else {
559            HashMap::new()
560        };
561
562    let walker = ignore::WalkBuilder::new(&project_root)
563        .hidden(true)
564        .git_ignore(true)
565        .git_global(true)
566        .git_exclude(true)
567        .max_depth(Some(20))
568        .build();
569
570    let cfg = crate::core::config::Config::load();
571    let extra_ignores: Vec<glob::Pattern> = cfg
572        .extra_ignore_patterns
573        .iter()
574        .filter_map(|p| glob::Pattern::new(p).ok())
575        .collect();
576
577    let mut scanned = 0usize;
578    let mut reused = 0usize;
579    let mut entries_visited = 0usize;
580    let mut content_cache: HashMap<String, String> = HashMap::new();
581    let max_files = if cfg.graph_index_max_files == 0 {
582        usize::MAX // unlimited
583    } else {
584        cfg.graph_index_max_files as usize
585    };
586    const MAX_ENTRIES_VISITED: usize = 500_000;
587    const MAX_FILE_SIZE_BYTES: u64 = 2 * 1024 * 1024; // 2 MB per file
588    let scan_deadline = std::time::Instant::now() + std::time::Duration::from_mins(5);
589
590    for entry in walker.filter_map(std::result::Result::ok) {
591        entries_visited += 1;
592        if entries_visited > MAX_ENTRIES_VISITED {
593            tracing::warn!(
594                "[graph_index: walked {entries_visited} entries — aborting scan to prevent \
595                 runaway traversal. Indexed {} files so far.]",
596                index.files.len()
597            );
598            break;
599        }
600        if entries_visited.is_multiple_of(5000) {
601            if std::time::Instant::now() > scan_deadline {
602                tracing::warn!(
603                    "[graph_index: scan timeout (120s) after {entries_visited} entries — \
604                     saving partial index with {} files]",
605                    index.files.len()
606                );
607                break;
608            }
609            if crate::core::memory_guard::abort_requested() {
610                tracing::warn!(
611                    "[graph_index: memory pressure abort after {entries_visited} entries — \
612                     saving partial index with {} files]",
613                    index.files.len()
614                );
615                break;
616            }
617            if crate::core::memory_guard::is_under_pressure() {
618                tracing::warn!(
619                    "[graph_index: memory pressure detected at {entries_visited} entries — \
620                     stopping scan with {} files]",
621                    index.files.len()
622                );
623                break;
624            }
625            if let Some(ref g) = _lock {
626                g.touch();
627            }
628        }
629
630        if !entry.file_type().is_some_and(|ft| ft.is_file()) {
631            continue;
632        }
633
634        if entry.path_is_symlink() {
635            continue;
636        }
637        let file_path = normalize_absolute_path(&entry.path().to_string_lossy());
638
639        if !std::path::Path::new(&file_path).starts_with(std::path::Path::new(&project_root)) {
640            continue;
641        }
642
643        if let Ok(meta) = std::fs::symlink_metadata(&file_path) {
644            if meta.file_type().is_symlink() || !meta.is_file() {
645                continue;
646            }
647            if meta.len() > MAX_FILE_SIZE_BYTES {
648                tracing::debug!(
649                    "[graph_index: skipping {file_path} — {:.1}MB exceeds {}MB limit]",
650                    meta.len() as f64 / 1_048_576.0,
651                    MAX_FILE_SIZE_BYTES / (1024 * 1024),
652                );
653                continue;
654            }
655        }
656
657        let ext = Path::new(&file_path)
658            .extension()
659            .and_then(|e| e.to_str())
660            .unwrap_or("");
661
662        if !is_indexable_ext(ext) {
663            continue;
664        }
665
666        let rel = make_relative(&file_path, &project_root);
667        if extra_ignores.iter().any(|p| p.matches(&rel)) {
668            continue;
669        }
670
671        if max_files != usize::MAX && index.files.len() >= max_files {
672            tracing::info!(
673                "[graph_index: reached configured limit of {} files. Set graph_index_max_files = 0 for unlimited.]",
674                max_files
675            );
676            break;
677        }
678
679        let Ok(content) = std::fs::read_to_string(&file_path) else {
680            continue;
681        };
682
683        let hash = compute_hash(&content);
684        let rel_path = make_relative(&file_path, &project_root);
685
686        if let Some((old_hash, old_syms)) = old_files.get(&rel_path) {
687            if *old_hash == hash {
688                if let Some(old_entry) = existing.as_ref().and_then(|p| p.files.get(&rel_path)) {
689                    index.files.insert(rel_path.clone(), old_entry.clone());
690                    for (key, sym) in old_syms {
691                        index.symbols.insert(key.clone(), sym.clone());
692                    }
693                    content_cache.insert(rel_path, content);
694                    reused += 1;
695                    continue;
696                }
697            }
698        }
699
700        let sigs = signatures::extract_signatures(&content, ext);
701        let line_count = content.lines().count();
702        let token_count = crate::core::tokens::count_tokens(&content);
703        let summary = extract_summary(&content);
704
705        let exports: Vec<String> = sigs
706            .iter()
707            .filter(|s| s.is_exported)
708            .map(|s| s.name.clone())
709            .collect();
710
711        index.files.insert(
712            rel_path.clone(),
713            FileEntry {
714                path: rel_path.clone(),
715                hash,
716                language: ext.to_string(),
717                line_count,
718                token_count,
719                exports,
720                summary,
721            },
722        );
723
724        for sig in &sigs {
725            let (start, end) = sig
726                .start_line
727                .zip(sig.end_line)
728                .unwrap_or_else(|| find_symbol_range(&content, sig));
729            let key = format!("{}::{}", rel_path, sig.name);
730            index.symbols.insert(
731                key,
732                SymbolEntry {
733                    file: rel_path.clone(),
734                    name: sig.name.clone(),
735                    kind: sig.kind.to_string(),
736                    start_line: start,
737                    end_line: end,
738                    is_exported: sig.is_exported,
739                },
740            );
741        }
742
743        content_cache.insert(rel_path, content);
744        scanned += 1;
745    }
746
747    build_edges_cached(&mut index, &content_cache);
748
749    if let Err(e) = index.save() {
750        tracing::warn!("could not save graph index: {e}");
751    }
752
753    tracing::warn!(
754        "[graph_index: {} files ({} scanned, {} reused), {} symbols, {} edges]",
755        index.file_count(),
756        scanned,
757        reused,
758        index.symbol_count(),
759        index.edge_count()
760    );
761
762    (index, content_cache)
763}
764
765fn find_symbol_range(content: &str, sig: &signatures::Signature) -> (usize, usize) {
766    let lines: Vec<&str> = content.lines().collect();
767    let mut start = 0;
768
769    for (i, line) in lines.iter().enumerate() {
770        if line.contains(&sig.name) {
771            let trimmed = line.trim();
772            let is_def = trimmed.starts_with("fn ")
773                || trimmed.starts_with("pub fn ")
774                || trimmed.starts_with("pub(crate) fn ")
775                || trimmed.starts_with("async fn ")
776                || trimmed.starts_with("pub async fn ")
777                || trimmed.starts_with("struct ")
778                || trimmed.starts_with("pub struct ")
779                || trimmed.starts_with("enum ")
780                || trimmed.starts_with("pub enum ")
781                || trimmed.starts_with("trait ")
782                || trimmed.starts_with("pub trait ")
783                || trimmed.starts_with("impl ")
784                || trimmed.starts_with("class ")
785                || trimmed.starts_with("export class ")
786                || trimmed.starts_with("export function ")
787                || trimmed.starts_with("export async function ")
788                || trimmed.starts_with("function ")
789                || trimmed.starts_with("async function ")
790                || trimmed.starts_with("def ")
791                || trimmed.starts_with("async def ")
792                || trimmed.starts_with("func ")
793                || trimmed.starts_with("interface ")
794                || trimmed.starts_with("export interface ")
795                || trimmed.starts_with("type ")
796                || trimmed.starts_with("export type ")
797                || trimmed.starts_with("const ")
798                || trimmed.starts_with("export const ")
799                || trimmed.starts_with("fun ")
800                || trimmed.starts_with("private fun ")
801                || trimmed.starts_with("public fun ")
802                || trimmed.starts_with("internal fun ")
803                || trimmed.starts_with("class ")
804                || trimmed.starts_with("data class ")
805                || trimmed.starts_with("sealed class ")
806                || trimmed.starts_with("sealed interface ")
807                || trimmed.starts_with("enum class ")
808                || trimmed.starts_with("object ")
809                || trimmed.starts_with("private object ")
810                || trimmed.starts_with("interface ")
811                || trimmed.starts_with("typealias ")
812                || trimmed.starts_with("private typealias ");
813            if is_def {
814                start = i + 1;
815                break;
816            }
817        }
818    }
819
820    if start == 0 {
821        return (1, lines.len().min(20));
822    }
823
824    let base_indent = lines
825        .get(start - 1)
826        .map_or(0, |l| l.len() - l.trim_start().len());
827
828    let mut end = start;
829    let mut brace_depth: i32 = 0;
830    let mut found_open = false;
831
832    for (i, line) in lines.iter().enumerate().skip(start - 1) {
833        for ch in line.chars() {
834            if ch == '{' {
835                brace_depth += 1;
836                found_open = true;
837            } else if ch == '}' {
838                brace_depth -= 1;
839            }
840        }
841
842        end = i + 1;
843
844        if found_open && brace_depth <= 0 {
845            break;
846        }
847
848        if !found_open && i > start {
849            let indent = line.len() - line.trim_start().len();
850            if indent <= base_indent && !line.trim().is_empty() && i > start {
851                end = i;
852                break;
853            }
854        }
855
856        if end - start > 200 {
857            break;
858        }
859    }
860
861    (start, end)
862}
863
864fn extract_summary(content: &str) -> String {
865    for line in content.lines().take(20) {
866        let trimmed = line.trim();
867        if trimmed.is_empty()
868            || trimmed.starts_with("//")
869            || trimmed.starts_with('#')
870            || trimmed.starts_with("/*")
871            || trimmed.starts_with('*')
872            || trimmed.starts_with("use ")
873            || trimmed.starts_with("import ")
874            || trimmed.starts_with("from ")
875            || trimmed.starts_with("require(")
876            || trimmed.starts_with("package ")
877        {
878            continue;
879        }
880        return trimmed.chars().take(120).collect();
881    }
882    String::new()
883}
884
885fn compute_hash(content: &str) -> String {
886    use std::collections::hash_map::DefaultHasher;
887    use std::hash::{Hash, Hasher};
888
889    let mut hasher = DefaultHasher::new();
890    content.hash(&mut hasher);
891    format!("{:016x}", hasher.finish())
892}
893
894fn short_hash(input: &str) -> String {
895    use std::collections::hash_map::DefaultHasher;
896    use std::hash::{Hash, Hasher};
897
898    let mut hasher = DefaultHasher::new();
899    input.hash(&mut hasher);
900    format!("{:08x}", hasher.finish() & 0xFFFF_FFFF)
901}
902
903fn copy_dir_fallible(src: &std::path::Path, dst: &std::path::Path) -> Result<(), std::io::Error> {
904    std::fs::create_dir_all(dst)?;
905    for entry in std::fs::read_dir(src)?.flatten() {
906        let from = entry.path();
907        let to = dst.join(entry.file_name());
908        if from.is_dir() {
909            copy_dir_fallible(&from, &to)?;
910        } else {
911            std::fs::copy(&from, &to)?;
912        }
913    }
914    Ok(())
915}
916
917fn normalize_absolute_path(path: &str) -> String {
918    if let Ok(canon) = crate::core::pathutil::safe_canonicalize(std::path::Path::new(path)) {
919        return canon.to_string_lossy().to_string();
920    }
921
922    let mut normalized = path.to_string();
923    while normalized.ends_with("\\.") || normalized.ends_with("/.") {
924        normalized.truncate(normalized.len() - 2);
925    }
926    while normalized.len() > 1
927        && (normalized.ends_with('\\') || normalized.ends_with('/'))
928        && !normalized.ends_with(":\\")
929        && !normalized.ends_with(":/")
930        && normalized != "\\"
931        && normalized != "/"
932    {
933        normalized.pop();
934    }
935    normalized
936}
937
938pub fn normalize_project_root(path: &str) -> String {
939    normalize_absolute_path(path)
940}
941
942pub fn graph_match_key(path: &str) -> String {
943    let stripped =
944        crate::core::pathutil::strip_verbatim_str(path).unwrap_or_else(|| path.replace('\\', "/"));
945    stripped.trim_start_matches('/').to_string()
946}
947
948pub fn graph_relative_key(path: &str, root: &str) -> String {
949    let root_norm = normalize_project_root(root);
950    let path_norm = normalize_absolute_path(path);
951    let root_path = Path::new(&root_norm);
952    let path_path = Path::new(&path_norm);
953
954    if let Ok(rel) = path_path.strip_prefix(root_path) {
955        let rel = rel.to_string_lossy().to_string();
956        return rel.trim_start_matches(['/', '\\']).to_string();
957    }
958
959    path.trim_start_matches(['/', '\\'])
960        .replace('/', std::path::MAIN_SEPARATOR_STR)
961}
962
963fn make_relative(path: &str, root: &str) -> String {
964    graph_relative_key(path, root)
965}
966
967fn is_indexable_ext(ext: &str) -> bool {
968    crate::core::language_capabilities::is_indexable_ext(ext)
969}
970
971#[cfg(test)]
972fn kotlin_package_name(content: &str) -> Option<String> {
973    content.lines().map(str::trim).find_map(|line| {
974        line.strip_prefix("package ")
975            .map(|rest| rest.trim().trim_end_matches(';').to_string())
976    })
977}