Skip to main content

lean_ctx/core/graph_index/
mod.rs

1// DEPRECATED: This module is being replaced by PropertyGraph (core/property_graph/).
2// New code should use GraphProvider (core/graph_provider.rs) instead of accessing
3// ProjectIndex directly. Remaining direct consumers: call_graph, graph_enricher,
4// ctx_callgraph, ctx_graph_diagram, ctx_routes, autonomy, dashboard/callgraph.
5// See OPT-14/15 plan for the full migration path.
6
7use std::collections::HashMap;
8use std::path::Path;
9
10use serde::{Deserialize, Serialize};
11
12use crate::core::import_resolver;
13use crate::core::signatures;
14mod edges;
15pub(crate) use edges::*;
16#[cfg(test)]
17mod tests;
18
19const INDEX_VERSION: u32 = 6;
20
21pub fn is_safe_scan_root_public(path: &str) -> bool {
22    is_safe_scan_root(path)
23}
24
25fn is_filesystem_root(path: &str) -> bool {
26    let p = Path::new(path);
27    p.parent().is_none() || (cfg!(windows) && p.parent() == Some(Path::new("")))
28}
29
30fn is_safe_scan_root(path: &str) -> bool {
31    let normalized = normalize_project_root(path);
32    let p = Path::new(&normalized);
33
34    if normalized == "/" || normalized == "\\" || is_filesystem_root(&normalized) {
35        tracing::warn!("[graph_index: refusing to scan filesystem root]");
36        return false;
37    }
38
39    if normalized == "." || normalized.is_empty() {
40        tracing::warn!("[graph_index: refusing to scan relative/empty root]");
41        return false;
42    }
43
44    if let Some(home) = dirs::home_dir() {
45        let home_norm = normalize_project_root(&home.to_string_lossy());
46        if normalized == home_norm {
47            use std::sync::Once;
48            static HOME_WARN: Once = Once::new();
49            HOME_WARN.call_once(|| {
50                tracing::warn!(
51                    "[graph_index: skipping — cannot index home directory {normalized}.\n  \
52                     Run from inside a project, or set LEAN_CTX_PROJECT_ROOT=/path/to/project]"
53                );
54            });
55            return false;
56        }
57        // macOS TCC: Documents/Desktop/Downloads pop a privacy prompt the moment
58        // we stat or enumerate inside them (#356). They are never valid scan roots,
59        // so refuse here before any has_marker stat or read_dir runs.
60        if crate::core::pathutil::is_tcc_sensitive_home_dir(p) {
61            tracing::warn!(
62                "[graph_index: refusing to scan {normalized} — macOS TCC-protected home dir]"
63            );
64            return false;
65        }
66        // Block common broad home subdirectories that are never valid project roots
67        let home_path = Path::new(&home_norm);
68        const BLOCKED_HOME_SUBDIRS: &[&str] = &[
69            "Desktop",
70            "Documents",
71            "Downloads",
72            "Pictures",
73            "Music",
74            "Videos",
75            "Movies",
76            "Library",
77            ".local",
78            ".cache",
79            ".config",
80            "snap",
81            "Applications",
82        ];
83        for blocked in BLOCKED_HOME_SUBDIRS {
84            let blocked_path = home_path.join(blocked);
85            let is_inside_blocked = p == blocked_path || p.starts_with(&blocked_path);
86            let has_marker = p.join(".git").exists()
87                || p.join("Cargo.toml").exists()
88                || p.join("package.json").exists();
89            if is_inside_blocked
90                && !has_marker
91                && !crate::core::pathutil::has_multi_repo_children(p)
92            {
93                tracing::warn!(
94                    "[graph_index: refusing to scan {normalized} — \
95                     inside home/{blocked} without project markers]"
96                );
97                return false;
98            }
99        }
100
101        // Block directories that are direct children of home without project markers
102        // (but allow multi-repo workspace parents like ~/code/)
103        if p.parent() == Some(home_path) {
104            let has_marker = p.join(".git").exists()
105                || p.join("Cargo.toml").exists()
106                || p.join("package.json").exists()
107                || p.join("go.mod").exists()
108                || p.join("pyproject.toml").exists();
109            if !has_marker && !crate::core::pathutil::has_multi_repo_children(p) {
110                tracing::warn!(
111                    "[graph_index: refusing to scan {normalized} — \
112                     direct child of home without project markers]"
113                );
114                return false;
115            }
116        }
117    }
118
119    let breadth_markers = [
120        ".git",
121        "Cargo.toml",
122        "package.json",
123        "go.mod",
124        "pyproject.toml",
125        "setup.py",
126        "Makefile",
127        "CMakeLists.txt",
128        "pnpm-workspace.yaml",
129        ".projectile",
130        "BUILD.bazel",
131        "go.work",
132    ];
133
134    if !breadth_markers.iter().any(|m| p.join(m).exists()) {
135        // Multi-repo workspace parent: >=2 children with project markers is always safe
136        if crate::core::pathutil::has_multi_repo_children(p) {
137            return true;
138        }
139
140        let child_count = std::fs::read_dir(p).map_or(0, |rd| {
141            rd.filter_map(Result::ok)
142                .filter(|e| e.path().is_dir())
143                .count()
144        });
145        if child_count > 50 {
146            tracing::warn!(
147                "[graph_index: {normalized} has no project markers and {child_count} subdirectories — \
148                 skipping scan to avoid indexing broad directories]"
149            );
150            return false;
151        }
152    }
153
154    true
155}
156
157#[derive(Debug, Clone, Serialize, Deserialize)]
158pub struct ProjectIndex {
159    pub version: u32,
160    pub project_root: String,
161    pub last_scan: String,
162    pub files: HashMap<String, FileEntry>,
163    pub edges: Vec<IndexEdge>,
164    pub symbols: HashMap<String, SymbolEntry>,
165}
166
167#[derive(Debug, Clone, Serialize, Deserialize)]
168pub struct FileEntry {
169    pub path: String,
170    pub hash: String,
171    pub language: String,
172    pub line_count: usize,
173    pub token_count: usize,
174    pub exports: Vec<String>,
175    pub summary: String,
176}
177
178#[derive(Debug, Clone, Serialize, Deserialize)]
179pub struct SymbolEntry {
180    pub file: String,
181    pub name: String,
182    pub kind: String,
183    pub start_line: usize,
184    pub end_line: usize,
185    pub is_exported: bool,
186}
187
188#[derive(Debug, Clone, Serialize, Deserialize)]
189pub struct IndexEdge {
190    pub from: String,
191    pub to: String,
192    pub kind: String,
193    #[serde(default = "default_edge_weight")]
194    pub weight: f32,
195}
196
197fn default_edge_weight() -> f32 {
198    1.0
199}
200
201impl ProjectIndex {
202    pub fn new(project_root: &str) -> Self {
203        Self {
204            version: INDEX_VERSION,
205            project_root: normalize_project_root(project_root),
206            last_scan: chrono::Local::now().format("%Y-%m-%d %H:%M:%S").to_string(),
207            files: HashMap::new(),
208            edges: Vec::new(),
209            symbols: HashMap::new(),
210        }
211    }
212
213    pub fn index_dir(project_root: &str) -> Option<std::path::PathBuf> {
214        let normalized = normalize_project_root(project_root);
215        let hash = crate::core::project_hash::hash_project_root(&normalized);
216        crate::core::data_dir::lean_ctx_data_dir()
217            .ok()
218            .map(|d| d.join("graphs").join(hash))
219    }
220
221    pub fn load(project_root: &str) -> Option<Self> {
222        let dir = Self::index_dir(project_root)?;
223
224        let zst_path = dir.join("index.json.zst");
225        if zst_path.exists() {
226            let compressed = std::fs::read(&zst_path).ok()?;
227            let data = zstd::decode_all(compressed.as_slice()).ok()?;
228            let content = String::from_utf8(data).ok()?;
229            let index: Self = serde_json::from_str(&content).ok()?;
230            if index.version != INDEX_VERSION {
231                return None;
232            }
233            return Some(index);
234        }
235
236        let json_path = dir.join("index.json");
237        let content = std::fs::read_to_string(&json_path)
238            .or_else(|_| -> std::io::Result<String> {
239                let legacy_hash = short_hash(&normalize_project_root(project_root));
240                let legacy_dir = crate::core::data_dir::lean_ctx_data_dir()
241                    .map_err(|_| std::io::Error::new(std::io::ErrorKind::NotFound, "no data dir"))?
242                    .join("graphs")
243                    .join(legacy_hash);
244                let legacy_path = legacy_dir.join("index.json");
245                let data = std::fs::read_to_string(&legacy_path)?;
246                if let Err(e) = copy_dir_fallible(&legacy_dir, &dir) {
247                    tracing::debug!("graph index migration: {e}");
248                }
249                Ok(data)
250            })
251            .ok()?;
252        let index: Self = serde_json::from_str(&content).ok()?;
253        if index.version != INDEX_VERSION {
254            return None;
255        }
256        // Auto-migrate: compress legacy JSON to zstd
257        if let Ok(compressed) = zstd::encode_all(content.as_bytes(), 9) {
258            let zst_tmp = zst_path.with_extension("zst.tmp");
259            if std::fs::write(&zst_tmp, &compressed).is_ok()
260                && std::fs::rename(&zst_tmp, &zst_path).is_ok()
261            {
262                let _ = std::fs::remove_file(&json_path);
263            }
264        }
265        Some(index)
266    }
267
268    pub fn save(&self) -> Result<(), String> {
269        let dir = Self::index_dir(&self.project_root)
270            .ok_or_else(|| "Cannot determine data directory".to_string())?;
271        std::fs::create_dir_all(&dir).map_err(|e| e.to_string())?;
272        let json = serde_json::to_string(self).map_err(|e| e.to_string())?;
273        let compressed = zstd::encode_all(json.as_bytes(), 9).map_err(|e| format!("zstd: {e}"))?;
274        let target = dir.join("index.json.zst");
275        let tmp = target.with_extension("zst.tmp");
276        std::fs::write(&tmp, &compressed).map_err(|e| e.to_string())?;
277        std::fs::rename(&tmp, &target).map_err(|e| e.to_string())?;
278        let _ = std::fs::remove_file(dir.join("index.json"));
279        Ok(())
280    }
281
282    /// Remove all cached graph indices that are older than max_age_hours.
283    /// Called on startup/update to prevent stale data from persisting.
284    pub fn purge_stale_indices() {
285        let Ok(data_dir) = crate::core::data_dir::lean_ctx_data_dir() else {
286            return;
287        };
288        let graphs_dir = data_dir.join("graphs");
289        let Ok(entries) = std::fs::read_dir(&graphs_dir) else {
290            return;
291        };
292        let cfg = crate::core::config::Config::load();
293        let max_age_secs = cfg.archive_max_age_hours_effective() * 3600;
294
295        for entry in entries.filter_map(Result::ok) {
296            let path = entry.path();
297            if !path.is_dir() {
298                continue;
299            }
300            let zst = path.join("index.json.zst");
301            let json = path.join("index.json");
302            let index_file = if zst.exists() {
303                &zst
304            } else if json.exists() {
305                &json
306            } else {
307                continue;
308            };
309
310            let is_old = index_file
311                .metadata()
312                .and_then(|m| m.modified())
313                .is_ok_and(|mtime| {
314                    mtime
315                        .elapsed()
316                        .is_ok_and(|age| age.as_secs() > max_age_secs)
317                });
318
319            if is_old {
320                tracing::info!("[graph_index: purging stale index at {}]", path.display());
321                let _ = std::fs::remove_dir_all(&path);
322            }
323        }
324    }
325
326    pub fn file_count(&self) -> usize {
327        self.files.len()
328    }
329
330    pub fn symbol_count(&self) -> usize {
331        self.symbols.len()
332    }
333
334    pub fn edge_count(&self) -> usize {
335        self.edges.len()
336    }
337
338    pub fn get_symbol(&self, key: &str) -> Option<&SymbolEntry> {
339        self.symbols.get(key)
340    }
341
342    pub fn get_reverse_deps(&self, path: &str, depth: usize) -> Vec<String> {
343        let mut result = Vec::new();
344        let mut visited = std::collections::HashSet::new();
345        let mut queue: Vec<(String, usize)> = vec![(path.to_string(), 0)];
346
347        while let Some((current, d)) = queue.pop() {
348            if d > depth || visited.contains(&current) {
349                continue;
350            }
351            visited.insert(current.clone());
352            if current != path {
353                result.push(current.clone());
354            }
355
356            for edge in &self.edges {
357                if edge.to == current && edge.kind == "import" && !visited.contains(&edge.from) {
358                    queue.push((edge.from.clone(), d + 1));
359                }
360            }
361        }
362        result
363    }
364
365    pub fn get_related(&self, path: &str, depth: usize) -> Vec<String> {
366        let mut result = Vec::new();
367        let mut visited = std::collections::HashSet::new();
368        let mut queue: Vec<(String, usize)> = vec![(path.to_string(), 0)];
369
370        while let Some((current, d)) = queue.pop() {
371            if d > depth || visited.contains(&current) {
372                continue;
373            }
374            visited.insert(current.clone());
375            if current != path {
376                result.push(current.clone());
377            }
378
379            for edge in &self.edges {
380                if edge.from == current && !visited.contains(&edge.to) {
381                    queue.push((edge.to.clone(), d + 1));
382                }
383                if edge.to == current && !visited.contains(&edge.from) {
384                    queue.push((edge.from.clone(), d + 1));
385                }
386            }
387        }
388        result
389    }
390}
391
392/// Load the best available graph index, trying multiple root path variants.
393/// If no valid index exists, automatically scans the project to build one.
394/// This is the primary entry point — ensures zero-config usage.
395pub fn load_or_build(project_root: &str) -> ProjectIndex {
396    if std::env::var("LEAN_CTX_NO_INDEX").is_ok() {
397        return ProjectIndex::load(project_root).unwrap_or_else(|| ProjectIndex::new(project_root));
398    }
399
400    // Prefer stable absolute roots. Using "." as a cache key is fragile because
401    // it depends on the process cwd and can accidentally load the wrong project.
402    let root_abs = if project_root.trim().is_empty() || project_root == "." {
403        std::env::current_dir().ok().map_or_else(
404            || ".".to_string(),
405            |p| normalize_project_root(&p.to_string_lossy()),
406        )
407    } else {
408        normalize_project_root(project_root)
409    };
410
411    if !is_safe_scan_root(&root_abs) {
412        return ProjectIndex::new(&root_abs);
413    }
414
415    // Try the absolute/root-normalized path first.
416    if let Some(idx) = ProjectIndex::load(&root_abs) {
417        if !idx.files.is_empty() {
418            if index_looks_stale(&idx, &root_abs) {
419                tracing::warn!("[graph_index: stale index detected for {root_abs}; rebuilding]");
420                return scan(&root_abs);
421            }
422            return idx;
423        }
424    }
425
426    // CWD fallback: only use if CWD is a subdirectory of root_abs (same project)
427    if let Ok(cwd) = std::env::current_dir() {
428        let cwd_str = normalize_project_root(&cwd.to_string_lossy());
429        if cwd_str != root_abs && cwd_str.starts_with(&root_abs) {
430            if let Some(idx) = ProjectIndex::load(&cwd_str) {
431                if !idx.files.is_empty() {
432                    if index_looks_stale(&idx, &cwd_str) {
433                        return scan(&cwd_str);
434                    }
435                    return idx;
436                }
437            }
438        }
439    }
440
441    scan(&root_abs)
442}
443
444fn index_looks_stale(index: &ProjectIndex, root_abs: &str) -> bool {
445    if index.files.is_empty() {
446        return true;
447    }
448
449    // TTL check: rebuild if index is older than configured max_age_hours
450    if let Ok(scan_time) =
451        chrono::NaiveDateTime::parse_from_str(&index.last_scan, "%Y-%m-%d %H:%M:%S")
452    {
453        let cfg = crate::core::config::Config::load();
454        let effective_hours = cfg.archive_max_age_hours_effective();
455        let max_age = chrono::Duration::hours(effective_hours as i64);
456        let now = chrono::Local::now().naive_local();
457        if now.signed_duration_since(scan_time) > max_age {
458            tracing::info!(
459                "[graph_index: index is older than {}h — marking stale]",
460                effective_hours
461            );
462            return true;
463        }
464    }
465
466    // Contamination check: if index contains paths from common user directories,
467    // it was built from a too-broad root and must be rebuilt
468    const CONTAMINATION_MARKERS: &[&str] = &[
469        "Desktop/",
470        "Documents/",
471        "Downloads/",
472        "Pictures/",
473        "Music/",
474        "Videos/",
475        "Movies/",
476        "Library/",
477        ".cache/",
478        "snap/",
479    ];
480    let contaminated = index.files.keys().take(200).any(|rel| {
481        CONTAMINATION_MARKERS
482            .iter()
483            .any(|m| rel.starts_with(m) || rel.contains(&format!("/{m}")))
484    });
485    if contaminated {
486        tracing::warn!(
487            "[graph_index: index contains files from user directories (Desktop/Documents/...) — \
488             marking stale to force clean rebuild]"
489        );
490        return true;
491    }
492
493    let root_path = Path::new(root_abs);
494    // Sample up to 20 files for existence check (avoid scanning all files in large indices)
495    let sample_size = index.files.len().min(20);
496    for rel in index.files.keys().take(sample_size) {
497        let rel = rel.trim_start_matches(['/', '\\']);
498        if rel.is_empty() {
499            continue;
500        }
501        let abs = root_path.join(rel);
502        if !abs.exists() {
503            return true;
504        }
505    }
506
507    false
508}
509
510pub fn scan(project_root: &str) -> ProjectIndex {
511    scan_inner(project_root).0
512}
513
514pub fn scan_with_content_cache(project_root: &str) -> (ProjectIndex, HashMap<String, String>) {
515    scan_inner(project_root)
516}
517
518fn scan_inner(project_root: &str) -> (ProjectIndex, HashMap<String, String>) {
519    if std::env::var("LEAN_CTX_NO_INDEX").is_ok() {
520        tracing::info!("[graph_index: LEAN_CTX_NO_INDEX set — skipping scan]");
521        return (ProjectIndex::new(project_root), HashMap::new());
522    }
523
524    let project_root = normalize_project_root(project_root);
525
526    if !is_safe_scan_root(&project_root) {
527        tracing::warn!("[graph_index: scan aborted for unsafe root {project_root}]");
528        return (ProjectIndex::new(&project_root), HashMap::new());
529    }
530
531    let lock_name = format!(
532        "graph-idx-{}",
533        &crate::core::index_namespace::namespace_hash(Path::new(&project_root))[..8]
534    );
535    let _lock = crate::core::startup_guard::try_acquire_lock(
536        &lock_name,
537        std::time::Duration::from_millis(800),
538        std::time::Duration::from_mins(3),
539    );
540    if _lock.is_none() {
541        tracing::info!(
542            "[graph_index: another process is scanning {project_root} — returning cached or empty]"
543        );
544        return (
545            ProjectIndex::load(&project_root).unwrap_or_else(|| ProjectIndex::new(&project_root)),
546            HashMap::new(),
547        );
548    }
549
550    let existing = ProjectIndex::load(&project_root);
551    let mut index = ProjectIndex::new(&project_root);
552
553    let old_files: HashMap<String, (String, Vec<(String, SymbolEntry)>)> =
554        if let Some(ref prev) = existing {
555            prev.files
556                .iter()
557                .map(|(path, entry)| {
558                    let syms: Vec<(String, SymbolEntry)> = prev
559                        .symbols
560                        .iter()
561                        .filter(|(_, s)| s.file == *path)
562                        .map(|(k, v)| (k.clone(), v.clone()))
563                        .collect();
564                    (path.clone(), (entry.hash.clone(), syms))
565                })
566                .collect()
567        } else {
568            HashMap::new()
569        };
570
571    let walker = ignore::WalkBuilder::new(&project_root)
572        .hidden(true)
573        .git_ignore(true)
574        .git_global(true)
575        .git_exclude(true)
576        .max_depth(Some(20))
577        .build();
578
579    let cfg = crate::core::config::Config::load();
580    let extra_ignores: Vec<glob::Pattern> = cfg
581        .extra_ignore_patterns
582        .iter()
583        .filter_map(|p| glob::Pattern::new(p).ok())
584        .collect();
585
586    let mut scanned = 0usize;
587    let mut reused = 0usize;
588    let mut entries_visited = 0usize;
589    let mut content_cache: HashMap<String, String> = HashMap::new();
590    let max_files = if cfg.graph_index_max_files == 0 {
591        usize::MAX // unlimited
592    } else {
593        cfg.graph_index_max_files as usize
594    };
595    const MAX_ENTRIES_VISITED: usize = 500_000;
596    const MAX_FILE_SIZE_BYTES: u64 = 2 * 1024 * 1024; // 2 MB per file
597    let scan_deadline = std::time::Instant::now() + std::time::Duration::from_mins(5);
598
599    for entry in walker.filter_map(std::result::Result::ok) {
600        entries_visited += 1;
601        if entries_visited > MAX_ENTRIES_VISITED {
602            tracing::warn!(
603                "[graph_index: walked {entries_visited} entries — aborting scan to prevent \
604                 runaway traversal. Indexed {} files so far.]",
605                index.files.len()
606            );
607            break;
608        }
609        if entries_visited.is_multiple_of(5000) {
610            if std::time::Instant::now() > scan_deadline {
611                tracing::warn!(
612                    "[graph_index: scan timeout (120s) after {entries_visited} entries — \
613                     saving partial index with {} files]",
614                    index.files.len()
615                );
616                break;
617            }
618            if crate::core::memory_guard::abort_requested() {
619                tracing::warn!(
620                    "[graph_index: memory pressure abort after {entries_visited} entries — \
621                     saving partial index with {} files]",
622                    index.files.len()
623                );
624                break;
625            }
626            if crate::core::memory_guard::is_under_pressure() {
627                tracing::warn!(
628                    "[graph_index: memory pressure detected at {entries_visited} entries — \
629                     stopping scan with {} files]",
630                    index.files.len()
631                );
632                break;
633            }
634            if let Some(ref g) = _lock {
635                g.touch();
636            }
637        }
638
639        if !entry.file_type().is_some_and(|ft| ft.is_file()) {
640            continue;
641        }
642
643        if entry.path_is_symlink() {
644            continue;
645        }
646        let file_path = normalize_absolute_path(&entry.path().to_string_lossy());
647
648        if !std::path::Path::new(&file_path).starts_with(std::path::Path::new(&project_root)) {
649            continue;
650        }
651
652        if let Ok(meta) = std::fs::symlink_metadata(&file_path) {
653            if meta.file_type().is_symlink() || !meta.is_file() {
654                continue;
655            }
656            if meta.len() > MAX_FILE_SIZE_BYTES {
657                tracing::debug!(
658                    "[graph_index: skipping {file_path} — {:.1}MB exceeds {}MB limit]",
659                    meta.len() as f64 / 1_048_576.0,
660                    MAX_FILE_SIZE_BYTES / (1024 * 1024),
661                );
662                continue;
663            }
664        }
665
666        let ext = Path::new(&file_path)
667            .extension()
668            .and_then(|e| e.to_str())
669            .unwrap_or("");
670
671        if !is_indexable_ext(ext) {
672            continue;
673        }
674
675        let rel = make_relative(&file_path, &project_root);
676        if extra_ignores.iter().any(|p| p.matches(&rel)) {
677            continue;
678        }
679
680        if max_files != usize::MAX && index.files.len() >= max_files {
681            tracing::info!(
682                "[graph_index: reached configured limit of {} files. Set graph_index_max_files = 0 for unlimited.]",
683                max_files
684            );
685            break;
686        }
687
688        let Ok(content) = std::fs::read_to_string(&file_path) else {
689            continue;
690        };
691
692        let hash = compute_hash(&content);
693        let rel_path = make_relative(&file_path, &project_root);
694
695        if let Some((old_hash, old_syms)) = old_files.get(&rel_path) {
696            if *old_hash == hash {
697                if let Some(old_entry) = existing.as_ref().and_then(|p| p.files.get(&rel_path)) {
698                    index.files.insert(rel_path.clone(), old_entry.clone());
699                    for (key, sym) in old_syms {
700                        index.symbols.insert(key.clone(), sym.clone());
701                    }
702                    content_cache.insert(rel_path, content);
703                    reused += 1;
704                    continue;
705                }
706            }
707        }
708
709        let sigs = signatures::extract_signatures(&content, ext);
710        let line_count = content.lines().count();
711        let token_count = crate::core::tokens::count_tokens(&content);
712        let summary = extract_summary(&content);
713
714        let exports: Vec<String> = sigs
715            .iter()
716            .filter(|s| s.is_exported)
717            .map(|s| s.name.clone())
718            .collect();
719
720        index.files.insert(
721            rel_path.clone(),
722            FileEntry {
723                path: rel_path.clone(),
724                hash,
725                language: ext.to_string(),
726                line_count,
727                token_count,
728                exports,
729                summary,
730            },
731        );
732
733        for sig in &sigs {
734            let (start, end) = sig
735                .start_line
736                .zip(sig.end_line)
737                .unwrap_or_else(|| find_symbol_range(&content, sig));
738            let key = format!("{}::{}", rel_path, sig.name);
739            index.symbols.insert(
740                key,
741                SymbolEntry {
742                    file: rel_path.clone(),
743                    name: sig.name.clone(),
744                    kind: sig.kind.to_string(),
745                    start_line: start,
746                    end_line: end,
747                    is_exported: sig.is_exported,
748                },
749            );
750        }
751
752        content_cache.insert(rel_path, content);
753        scanned += 1;
754    }
755
756    build_edges_cached(&mut index, &content_cache);
757
758    if let Err(e) = index.save() {
759        tracing::warn!("could not save graph index: {e}");
760    }
761
762    tracing::warn!(
763        "[graph_index: {} files ({} scanned, {} reused), {} symbols, {} edges]",
764        index.file_count(),
765        scanned,
766        reused,
767        index.symbol_count(),
768        index.edge_count()
769    );
770
771    (index, content_cache)
772}
773
774fn find_symbol_range(content: &str, sig: &signatures::Signature) -> (usize, usize) {
775    let lines: Vec<&str> = content.lines().collect();
776    let mut start = 0;
777
778    for (i, line) in lines.iter().enumerate() {
779        if line.contains(&sig.name) {
780            let trimmed = line.trim();
781            let is_def = trimmed.starts_with("fn ")
782                || trimmed.starts_with("pub fn ")
783                || trimmed.starts_with("pub(crate) fn ")
784                || trimmed.starts_with("async fn ")
785                || trimmed.starts_with("pub async fn ")
786                || trimmed.starts_with("struct ")
787                || trimmed.starts_with("pub struct ")
788                || trimmed.starts_with("enum ")
789                || trimmed.starts_with("pub enum ")
790                || trimmed.starts_with("trait ")
791                || trimmed.starts_with("pub trait ")
792                || trimmed.starts_with("impl ")
793                || trimmed.starts_with("class ")
794                || trimmed.starts_with("export class ")
795                || trimmed.starts_with("export function ")
796                || trimmed.starts_with("export async function ")
797                || trimmed.starts_with("function ")
798                || trimmed.starts_with("async function ")
799                || trimmed.starts_with("def ")
800                || trimmed.starts_with("async def ")
801                || trimmed.starts_with("func ")
802                || trimmed.starts_with("interface ")
803                || trimmed.starts_with("export interface ")
804                || trimmed.starts_with("type ")
805                || trimmed.starts_with("export type ")
806                || trimmed.starts_with("const ")
807                || trimmed.starts_with("export const ")
808                || trimmed.starts_with("fun ")
809                || trimmed.starts_with("private fun ")
810                || trimmed.starts_with("public fun ")
811                || trimmed.starts_with("internal fun ")
812                || trimmed.starts_with("class ")
813                || trimmed.starts_with("data class ")
814                || trimmed.starts_with("sealed class ")
815                || trimmed.starts_with("sealed interface ")
816                || trimmed.starts_with("enum class ")
817                || trimmed.starts_with("object ")
818                || trimmed.starts_with("private object ")
819                || trimmed.starts_with("interface ")
820                || trimmed.starts_with("typealias ")
821                || trimmed.starts_with("private typealias ");
822            if is_def {
823                start = i + 1;
824                break;
825            }
826        }
827    }
828
829    if start == 0 {
830        return (1, lines.len().min(20));
831    }
832
833    let base_indent = lines
834        .get(start - 1)
835        .map_or(0, |l| l.len() - l.trim_start().len());
836
837    let mut end = start;
838    let mut brace_depth: i32 = 0;
839    let mut found_open = false;
840
841    for (i, line) in lines.iter().enumerate().skip(start - 1) {
842        for ch in line.chars() {
843            if ch == '{' {
844                brace_depth += 1;
845                found_open = true;
846            } else if ch == '}' {
847                brace_depth -= 1;
848            }
849        }
850
851        end = i + 1;
852
853        if found_open && brace_depth <= 0 {
854            break;
855        }
856
857        if !found_open && i > start {
858            let indent = line.len() - line.trim_start().len();
859            if indent <= base_indent && !line.trim().is_empty() && i > start {
860                end = i;
861                break;
862            }
863        }
864
865        if end - start > 200 {
866            break;
867        }
868    }
869
870    (start, end)
871}
872
873fn extract_summary(content: &str) -> String {
874    for line in content.lines().take(20) {
875        let trimmed = line.trim();
876        if trimmed.is_empty()
877            || trimmed.starts_with("//")
878            || trimmed.starts_with('#')
879            || trimmed.starts_with("/*")
880            || trimmed.starts_with('*')
881            || trimmed.starts_with("use ")
882            || trimmed.starts_with("import ")
883            || trimmed.starts_with("from ")
884            || trimmed.starts_with("require(")
885            || trimmed.starts_with("package ")
886        {
887            continue;
888        }
889        return trimmed.chars().take(120).collect();
890    }
891    String::new()
892}
893
894fn compute_hash(content: &str) -> String {
895    use std::collections::hash_map::DefaultHasher;
896    use std::hash::{Hash, Hasher};
897
898    let mut hasher = DefaultHasher::new();
899    content.hash(&mut hasher);
900    format!("{:016x}", hasher.finish())
901}
902
903fn short_hash(input: &str) -> String {
904    use std::collections::hash_map::DefaultHasher;
905    use std::hash::{Hash, Hasher};
906
907    let mut hasher = DefaultHasher::new();
908    input.hash(&mut hasher);
909    format!("{:08x}", hasher.finish() & 0xFFFF_FFFF)
910}
911
912fn copy_dir_fallible(src: &std::path::Path, dst: &std::path::Path) -> Result<(), std::io::Error> {
913    std::fs::create_dir_all(dst)?;
914    for entry in std::fs::read_dir(src)?.flatten() {
915        let from = entry.path();
916        let to = dst.join(entry.file_name());
917        if from.is_dir() {
918            copy_dir_fallible(&from, &to)?;
919        } else {
920            std::fs::copy(&from, &to)?;
921        }
922    }
923    Ok(())
924}
925
926fn normalize_absolute_path(path: &str) -> String {
927    if let Ok(canon) = crate::core::pathutil::safe_canonicalize(std::path::Path::new(path)) {
928        return canon.to_string_lossy().to_string();
929    }
930
931    let mut normalized = path.to_string();
932    while normalized.ends_with("\\.") || normalized.ends_with("/.") {
933        normalized.truncate(normalized.len() - 2);
934    }
935    while normalized.len() > 1
936        && (normalized.ends_with('\\') || normalized.ends_with('/'))
937        && !normalized.ends_with(":\\")
938        && !normalized.ends_with(":/")
939        && normalized != "\\"
940        && normalized != "/"
941    {
942        normalized.pop();
943    }
944    normalized
945}
946
947pub fn normalize_project_root(path: &str) -> String {
948    normalize_absolute_path(path)
949}
950
951pub fn graph_match_key(path: &str) -> String {
952    let stripped =
953        crate::core::pathutil::strip_verbatim_str(path).unwrap_or_else(|| path.replace('\\', "/"));
954    stripped.trim_start_matches('/').to_string()
955}
956
957pub fn graph_relative_key(path: &str, root: &str) -> String {
958    let root_norm = normalize_project_root(root);
959    let path_norm = normalize_absolute_path(path);
960    let root_path = Path::new(&root_norm);
961    let path_path = Path::new(&path_norm);
962
963    if let Ok(rel) = path_path.strip_prefix(root_path) {
964        let rel = rel.to_string_lossy().to_string();
965        return rel.trim_start_matches(['/', '\\']).to_string();
966    }
967
968    path.trim_start_matches(['/', '\\'])
969        .replace('/', std::path::MAIN_SEPARATOR_STR)
970}
971
972fn make_relative(path: &str, root: &str) -> String {
973    graph_relative_key(path, root)
974}
975
976fn is_indexable_ext(ext: &str) -> bool {
977    crate::core::language_capabilities::is_indexable_ext(ext)
978}
979
980#[cfg(test)]
981fn kotlin_package_name(content: &str) -> Option<String> {
982    content.lines().map(str::trim).find_map(|line| {
983        line.strip_prefix("package ")
984            .map(|rest| rest.trim().trim_end_matches(';').to_string())
985    })
986}