Skip to main content

lean_ctx/core/graph_index/
mod.rs

1// DEPRECATED: This module is being replaced by PropertyGraph (core/property_graph/).
2// New code should use GraphProvider (core/graph_provider.rs) instead of accessing
3// ProjectIndex directly. Remaining direct consumers: call_graph, graph_enricher,
4// ctx_callgraph, ctx_graph_diagram, ctx_routes, autonomy, dashboard/callgraph.
5// See OPT-14/15 plan for the full migration path.
6
7use std::collections::HashMap;
8use std::path::Path;
9
10use serde::{Deserialize, Serialize};
11
12use crate::core::import_resolver;
13use crate::core::signatures;
14mod edges;
15pub(crate) use edges::*;
16#[cfg(test)]
17mod tests;
18
19const INDEX_VERSION: u32 = 6;
20
21pub fn is_safe_scan_root_public(path: &str) -> bool {
22    is_safe_scan_root(path)
23}
24
25fn is_filesystem_root(path: &str) -> bool {
26    let p = Path::new(path);
27    p.parent().is_none() || (cfg!(windows) && p.parent() == Some(Path::new("")))
28}
29
30fn is_safe_scan_root(path: &str) -> bool {
31    let normalized = normalize_project_root(path);
32    let p = Path::new(&normalized);
33
34    if normalized == "/" || normalized == "\\" || is_filesystem_root(&normalized) {
35        tracing::warn!("[graph_index: refusing to scan filesystem root]");
36        return false;
37    }
38
39    if normalized == "." || normalized.is_empty() {
40        tracing::warn!("[graph_index: refusing to scan relative/empty root]");
41        return false;
42    }
43
44    if let Some(home) = dirs::home_dir() {
45        let home_norm = normalize_project_root(&home.to_string_lossy());
46        if normalized == home_norm {
47            use std::sync::Once;
48            static HOME_WARN: Once = Once::new();
49            HOME_WARN.call_once(|| {
50                tracing::warn!(
51                    "[graph_index: skipping — cannot index home directory {normalized}.\n  \
52                     Run from inside a project, or set LEAN_CTX_PROJECT_ROOT=/path/to/project]"
53                );
54            });
55            return false;
56        }
57        // macOS TCC: Documents/Desktop/Downloads pop a privacy prompt the moment
58        // we stat or enumerate inside them (#356). They are never valid scan roots,
59        // so refuse here before any has_marker stat or read_dir runs.
60        if crate::core::pathutil::is_tcc_sensitive_home_dir(p) {
61            tracing::warn!(
62                "[graph_index: refusing to scan {normalized} — macOS TCC-protected home dir]"
63            );
64            return false;
65        }
66        // Block common broad home subdirectories that are never valid project roots
67        let home_path = Path::new(&home_norm);
68        const BLOCKED_HOME_SUBDIRS: &[&str] = &[
69            "Desktop",
70            "Documents",
71            "Downloads",
72            "Pictures",
73            "Music",
74            "Videos",
75            "Movies",
76            "Library",
77            ".local",
78            ".cache",
79            ".config",
80            "snap",
81            "Applications",
82            // Cloud-sync roots: scanning these forces on-demand providers to
83            // hydrate (download) every placeholder file/folder (#363). iCloud's
84            // backing dir (~/Library/Mobile Documents) is already covered by
85            // "Library" above.
86            "OneDrive",
87            "Dropbox",
88            "Google Drive",
89        ];
90        for blocked in BLOCKED_HOME_SUBDIRS {
91            let blocked_path = home_path.join(blocked);
92            let is_inside_blocked = p == blocked_path || p.starts_with(&blocked_path);
93            let has_marker = p.join(".git").exists()
94                || p.join("Cargo.toml").exists()
95                || p.join("package.json").exists();
96            if is_inside_blocked
97                && !has_marker
98                && !crate::core::pathutil::has_multi_repo_children(p)
99            {
100                tracing::warn!(
101                    "[graph_index: refusing to scan {normalized} — \
102                     inside home/{blocked} without project markers]"
103                );
104                return false;
105            }
106        }
107
108        // Block directories that are direct children of home without project markers
109        // (but allow multi-repo workspace parents like ~/code/)
110        if p.parent() == Some(home_path) {
111            let has_marker = p.join(".git").exists()
112                || p.join("Cargo.toml").exists()
113                || p.join("package.json").exists()
114                || p.join("go.mod").exists()
115                || p.join("pyproject.toml").exists();
116            if !has_marker && !crate::core::pathutil::has_multi_repo_children(p) {
117                tracing::warn!(
118                    "[graph_index: refusing to scan {normalized} — \
119                     direct child of home without project markers]"
120                );
121                return false;
122            }
123        }
124    }
125
126    let breadth_markers = [
127        ".git",
128        "Cargo.toml",
129        "package.json",
130        "go.mod",
131        "pyproject.toml",
132        "setup.py",
133        "Makefile",
134        "CMakeLists.txt",
135        "pnpm-workspace.yaml",
136        ".projectile",
137        "BUILD.bazel",
138        "go.work",
139    ];
140
141    if !breadth_markers.iter().any(|m| p.join(m).exists()) {
142        // Multi-repo workspace parent: >=2 children with project markers is always safe
143        if crate::core::pathutil::has_multi_repo_children(p) {
144            return true;
145        }
146
147        let child_count = std::fs::read_dir(p).map_or(0, |rd| {
148            rd.filter_map(Result::ok)
149                .filter(|e| e.path().is_dir())
150                .count()
151        });
152        if child_count > 50 {
153            tracing::warn!(
154                "[graph_index: {normalized} has no project markers and {child_count} subdirectories — \
155                 skipping scan to avoid indexing broad directories]"
156            );
157            return false;
158        }
159    }
160
161    true
162}
163
164#[derive(Debug, Clone, Serialize, Deserialize)]
165pub struct ProjectIndex {
166    pub version: u32,
167    pub project_root: String,
168    pub last_scan: String,
169    pub files: HashMap<String, FileEntry>,
170    pub edges: Vec<IndexEdge>,
171    pub symbols: HashMap<String, SymbolEntry>,
172}
173
174#[derive(Debug, Clone, Serialize, Deserialize)]
175pub struct FileEntry {
176    pub path: String,
177    pub hash: String,
178    pub language: String,
179    pub line_count: usize,
180    pub token_count: usize,
181    pub exports: Vec<String>,
182    pub summary: String,
183}
184
185#[derive(Debug, Clone, Serialize, Deserialize)]
186pub struct SymbolEntry {
187    pub file: String,
188    pub name: String,
189    pub kind: String,
190    pub start_line: usize,
191    pub end_line: usize,
192    pub is_exported: bool,
193}
194
195#[derive(Debug, Clone, Serialize, Deserialize)]
196pub struct IndexEdge {
197    pub from: String,
198    pub to: String,
199    pub kind: String,
200    #[serde(default = "default_edge_weight")]
201    pub weight: f32,
202}
203
204fn default_edge_weight() -> f32 {
205    1.0
206}
207
208impl ProjectIndex {
209    pub fn new(project_root: &str) -> Self {
210        Self {
211            version: INDEX_VERSION,
212            project_root: normalize_project_root(project_root),
213            last_scan: chrono::Local::now().format("%Y-%m-%d %H:%M:%S").to_string(),
214            files: HashMap::new(),
215            edges: Vec::new(),
216            symbols: HashMap::new(),
217        }
218    }
219
220    pub fn index_dir(project_root: &str) -> Option<std::path::PathBuf> {
221        let normalized = normalize_project_root(project_root);
222        let hash = crate::core::project_hash::hash_project_root(&normalized);
223        crate::core::data_dir::lean_ctx_data_dir()
224            .ok()
225            .map(|d| d.join("graphs").join(hash))
226    }
227
228    pub fn load(project_root: &str) -> Option<Self> {
229        let dir = Self::index_dir(project_root)?;
230
231        let zst_path = dir.join("index.json.zst");
232        if zst_path.exists() {
233            let compressed = std::fs::read(&zst_path).ok()?;
234            let data = zstd::decode_all(compressed.as_slice()).ok()?;
235            let content = String::from_utf8(data).ok()?;
236            let index: Self = serde_json::from_str(&content).ok()?;
237            if index.version != INDEX_VERSION {
238                return None;
239            }
240            return Some(index);
241        }
242
243        let json_path = dir.join("index.json");
244        let content = std::fs::read_to_string(&json_path)
245            .or_else(|_| -> std::io::Result<String> {
246                let legacy_hash = short_hash(&normalize_project_root(project_root));
247                let legacy_dir = crate::core::data_dir::lean_ctx_data_dir()
248                    .map_err(|_| std::io::Error::new(std::io::ErrorKind::NotFound, "no data dir"))?
249                    .join("graphs")
250                    .join(legacy_hash);
251                let legacy_path = legacy_dir.join("index.json");
252                let data = std::fs::read_to_string(&legacy_path)?;
253                if let Err(e) = copy_dir_fallible(&legacy_dir, &dir) {
254                    tracing::debug!("graph index migration: {e}");
255                }
256                Ok(data)
257            })
258            .ok()?;
259        let index: Self = serde_json::from_str(&content).ok()?;
260        if index.version != INDEX_VERSION {
261            return None;
262        }
263        // Auto-migrate: compress legacy JSON to zstd
264        if let Ok(compressed) = zstd::encode_all(content.as_bytes(), 9) {
265            let zst_tmp = zst_path.with_extension("zst.tmp");
266            if std::fs::write(&zst_tmp, &compressed).is_ok()
267                && std::fs::rename(&zst_tmp, &zst_path).is_ok()
268            {
269                let _ = std::fs::remove_file(&json_path);
270            }
271        }
272        Some(index)
273    }
274
275    pub fn save(&self) -> Result<(), String> {
276        let dir = Self::index_dir(&self.project_root)
277            .ok_or_else(|| "Cannot determine data directory".to_string())?;
278        std::fs::create_dir_all(&dir).map_err(|e| e.to_string())?;
279        let json = serde_json::to_string(self).map_err(|e| e.to_string())?;
280        let compressed = zstd::encode_all(json.as_bytes(), 9).map_err(|e| format!("zstd: {e}"))?;
281        let target = dir.join("index.json.zst");
282        let tmp = target.with_extension("zst.tmp");
283        std::fs::write(&tmp, &compressed).map_err(|e| e.to_string())?;
284        std::fs::rename(&tmp, &target).map_err(|e| e.to_string())?;
285        let _ = std::fs::remove_file(dir.join("index.json"));
286        Ok(())
287    }
288
289    /// Remove all cached graph indices that are older than max_age_hours.
290    /// Called on startup/update to prevent stale data from persisting.
291    pub fn purge_stale_indices() {
292        let Ok(data_dir) = crate::core::data_dir::lean_ctx_data_dir() else {
293            return;
294        };
295        let graphs_dir = data_dir.join("graphs");
296        let Ok(entries) = std::fs::read_dir(&graphs_dir) else {
297            return;
298        };
299        let cfg = crate::core::config::Config::load();
300        let max_age_secs = cfg.archive_max_age_hours_effective() * 3600;
301
302        for entry in entries.filter_map(Result::ok) {
303            let path = entry.path();
304            if !path.is_dir() {
305                continue;
306            }
307            let zst = path.join("index.json.zst");
308            let json = path.join("index.json");
309            let index_file = if zst.exists() {
310                &zst
311            } else if json.exists() {
312                &json
313            } else {
314                continue;
315            };
316
317            let is_old = index_file
318                .metadata()
319                .and_then(|m| m.modified())
320                .is_ok_and(|mtime| {
321                    mtime
322                        .elapsed()
323                        .is_ok_and(|age| age.as_secs() > max_age_secs)
324                });
325
326            if is_old {
327                tracing::info!("[graph_index: purging stale index at {}]", path.display());
328                let _ = std::fs::remove_dir_all(&path);
329            }
330        }
331    }
332
333    pub fn file_count(&self) -> usize {
334        self.files.len()
335    }
336
337    pub fn symbol_count(&self) -> usize {
338        self.symbols.len()
339    }
340
341    pub fn edge_count(&self) -> usize {
342        self.edges.len()
343    }
344
345    pub fn get_symbol(&self, key: &str) -> Option<&SymbolEntry> {
346        self.symbols.get(key)
347    }
348
349    pub fn get_reverse_deps(&self, path: &str, depth: usize) -> Vec<String> {
350        let mut result = Vec::new();
351        let mut visited = std::collections::HashSet::new();
352        let mut queue: Vec<(String, usize)> = vec![(path.to_string(), 0)];
353
354        while let Some((current, d)) = queue.pop() {
355            if d > depth || visited.contains(&current) {
356                continue;
357            }
358            visited.insert(current.clone());
359            if current != path {
360                result.push(current.clone());
361            }
362
363            for edge in &self.edges {
364                if edge.to == current && edge.kind == "import" && !visited.contains(&edge.from) {
365                    queue.push((edge.from.clone(), d + 1));
366                }
367            }
368        }
369        result
370    }
371
372    pub fn get_related(&self, path: &str, depth: usize) -> Vec<String> {
373        let mut result = Vec::new();
374        let mut visited = std::collections::HashSet::new();
375        let mut queue: Vec<(String, usize)> = vec![(path.to_string(), 0)];
376
377        while let Some((current, d)) = queue.pop() {
378            if d > depth || visited.contains(&current) {
379                continue;
380            }
381            visited.insert(current.clone());
382            if current != path {
383                result.push(current.clone());
384            }
385
386            for edge in &self.edges {
387                if edge.from == current && !visited.contains(&edge.to) {
388                    queue.push((edge.to.clone(), d + 1));
389                }
390                if edge.to == current && !visited.contains(&edge.from) {
391                    queue.push((edge.from.clone(), d + 1));
392                }
393            }
394        }
395        result
396    }
397}
398
399/// Load the best available graph index, trying multiple root path variants.
400/// If no valid index exists, automatically scans the project to build one.
401/// This is the primary entry point — ensures zero-config usage.
402pub fn load_or_build(project_root: &str) -> ProjectIndex {
403    if std::env::var("LEAN_CTX_NO_INDEX").is_ok() {
404        return ProjectIndex::load(project_root).unwrap_or_else(|| ProjectIndex::new(project_root));
405    }
406
407    // Prefer stable absolute roots. Using "." as a cache key is fragile because
408    // it depends on the process cwd and can accidentally load the wrong project.
409    let root_abs = if project_root.trim().is_empty() || project_root == "." {
410        std::env::current_dir().ok().map_or_else(
411            || ".".to_string(),
412            |p| normalize_project_root(&p.to_string_lossy()),
413        )
414    } else {
415        normalize_project_root(project_root)
416    };
417
418    if !is_safe_scan_root(&root_abs) {
419        return ProjectIndex::new(&root_abs);
420    }
421
422    // Try the absolute/root-normalized path first.
423    if let Some(idx) = ProjectIndex::load(&root_abs) {
424        if !idx.files.is_empty() {
425            if index_looks_stale(&idx, &root_abs) {
426                tracing::warn!("[graph_index: stale index detected for {root_abs}; rebuilding]");
427                return scan(&root_abs);
428            }
429            return idx;
430        }
431    }
432
433    // CWD fallback: only use if CWD is a subdirectory of root_abs (same project)
434    if let Ok(cwd) = std::env::current_dir() {
435        let cwd_str = normalize_project_root(&cwd.to_string_lossy());
436        if cwd_str != root_abs && cwd_str.starts_with(&root_abs) {
437            if let Some(idx) = ProjectIndex::load(&cwd_str) {
438                if !idx.files.is_empty() {
439                    if index_looks_stale(&idx, &cwd_str) {
440                        return scan(&cwd_str);
441                    }
442                    return idx;
443                }
444            }
445        }
446    }
447
448    scan(&root_abs)
449}
450
451fn index_looks_stale(index: &ProjectIndex, root_abs: &str) -> bool {
452    if index.files.is_empty() {
453        return true;
454    }
455
456    // TTL check: rebuild if index is older than configured max_age_hours
457    if let Ok(scan_time) =
458        chrono::NaiveDateTime::parse_from_str(&index.last_scan, "%Y-%m-%d %H:%M:%S")
459    {
460        let cfg = crate::core::config::Config::load();
461        let effective_hours = cfg.archive_max_age_hours_effective();
462        let max_age = chrono::Duration::hours(effective_hours as i64);
463        let now = chrono::Local::now().naive_local();
464        if now.signed_duration_since(scan_time) > max_age {
465            tracing::info!(
466                "[graph_index: index is older than {}h — marking stale]",
467                effective_hours
468            );
469            return true;
470        }
471    }
472
473    // Contamination check: if index contains paths from common user directories,
474    // it was built from a too-broad root and must be rebuilt
475    const CONTAMINATION_MARKERS: &[&str] = &[
476        "Desktop/",
477        "Documents/",
478        "Downloads/",
479        "Pictures/",
480        "Music/",
481        "Videos/",
482        "Movies/",
483        "Library/",
484        ".cache/",
485        "snap/",
486    ];
487    let contaminated = index.files.keys().take(200).any(|rel| {
488        CONTAMINATION_MARKERS
489            .iter()
490            .any(|m| rel.starts_with(m) || rel.contains(&format!("/{m}")))
491    });
492    if contaminated {
493        tracing::warn!(
494            "[graph_index: index contains files from user directories (Desktop/Documents/...) — \
495             marking stale to force clean rebuild]"
496        );
497        return true;
498    }
499
500    let root_path = Path::new(root_abs);
501    // Sample up to 20 files for existence check (avoid scanning all files in large indices)
502    let sample_size = index.files.len().min(20);
503    for rel in index.files.keys().take(sample_size) {
504        let rel = rel.trim_start_matches(['/', '\\']);
505        if rel.is_empty() {
506            continue;
507        }
508        let abs = root_path.join(rel);
509        if !abs.exists() {
510            return true;
511        }
512    }
513
514    false
515}
516
517pub fn scan(project_root: &str) -> ProjectIndex {
518    scan_inner(project_root).0
519}
520
521pub fn scan_with_content_cache(project_root: &str) -> (ProjectIndex, HashMap<String, String>) {
522    scan_inner(project_root)
523}
524
525fn scan_inner(project_root: &str) -> (ProjectIndex, HashMap<String, String>) {
526    if std::env::var("LEAN_CTX_NO_INDEX").is_ok() {
527        tracing::info!("[graph_index: LEAN_CTX_NO_INDEX set — skipping scan]");
528        return (ProjectIndex::new(project_root), HashMap::new());
529    }
530
531    let project_root = normalize_project_root(project_root);
532
533    if !is_safe_scan_root(&project_root) {
534        tracing::warn!("[graph_index: scan aborted for unsafe root {project_root}]");
535        return (ProjectIndex::new(&project_root), HashMap::new());
536    }
537
538    let lock_name = format!(
539        "graph-idx-{}",
540        &crate::core::index_namespace::namespace_hash(Path::new(&project_root))[..8]
541    );
542    let _lock = crate::core::startup_guard::try_acquire_lock(
543        &lock_name,
544        std::time::Duration::from_millis(800),
545        std::time::Duration::from_mins(3),
546    );
547    if _lock.is_none() {
548        tracing::info!(
549            "[graph_index: another process is scanning {project_root} — returning cached or empty]"
550        );
551        return (
552            ProjectIndex::load(&project_root).unwrap_or_else(|| ProjectIndex::new(&project_root)),
553            HashMap::new(),
554        );
555    }
556
557    let existing = ProjectIndex::load(&project_root);
558    let mut index = ProjectIndex::new(&project_root);
559
560    let old_files: HashMap<String, (String, Vec<(String, SymbolEntry)>)> =
561        if let Some(ref prev) = existing {
562            prev.files
563                .iter()
564                .map(|(path, entry)| {
565                    let syms: Vec<(String, SymbolEntry)> = prev
566                        .symbols
567                        .iter()
568                        .filter(|(_, s)| s.file == *path)
569                        .map(|(k, v)| (k.clone(), v.clone()))
570                        .collect();
571                    (path.clone(), (entry.hash.clone(), syms))
572                })
573                .collect()
574        } else {
575            HashMap::new()
576        };
577
578    let walker = ignore::WalkBuilder::new(&project_root)
579        .hidden(true)
580        .git_ignore(true)
581        .git_global(true)
582        .git_exclude(true)
583        .max_depth(Some(20))
584        .filter_entry(crate::core::cloud_files::keep_entry)
585        .build();
586
587    let cfg = crate::core::config::Config::load();
588    let extra_ignores: Vec<glob::Pattern> = cfg
589        .extra_ignore_patterns
590        .iter()
591        .filter_map(|p| glob::Pattern::new(p).ok())
592        .collect();
593
594    let mut scanned = 0usize;
595    let mut reused = 0usize;
596    let mut entries_visited = 0usize;
597    let mut content_cache: HashMap<String, String> = HashMap::new();
598    let max_files = if cfg.graph_index_max_files == 0 {
599        usize::MAX // unlimited
600    } else {
601        cfg.graph_index_max_files as usize
602    };
603    const MAX_ENTRIES_VISITED: usize = 500_000;
604    const MAX_FILE_SIZE_BYTES: u64 = 2 * 1024 * 1024; // 2 MB per file
605    let scan_deadline = std::time::Instant::now() + std::time::Duration::from_mins(5);
606
607    for entry in walker.filter_map(std::result::Result::ok) {
608        entries_visited += 1;
609        if entries_visited > MAX_ENTRIES_VISITED {
610            tracing::warn!(
611                "[graph_index: walked {entries_visited} entries — aborting scan to prevent \
612                 runaway traversal. Indexed {} files so far.]",
613                index.files.len()
614            );
615            break;
616        }
617        if entries_visited.is_multiple_of(5000) {
618            if std::time::Instant::now() > scan_deadline {
619                tracing::warn!(
620                    "[graph_index: scan timeout (120s) after {entries_visited} entries — \
621                     saving partial index with {} files]",
622                    index.files.len()
623                );
624                break;
625            }
626            if crate::core::memory_guard::abort_requested() {
627                tracing::warn!(
628                    "[graph_index: memory pressure abort after {entries_visited} entries — \
629                     saving partial index with {} files]",
630                    index.files.len()
631                );
632                break;
633            }
634            if crate::core::memory_guard::is_under_pressure() {
635                tracing::warn!(
636                    "[graph_index: memory pressure detected at {entries_visited} entries — \
637                     stopping scan with {} files]",
638                    index.files.len()
639                );
640                break;
641            }
642            if let Some(ref g) = _lock {
643                g.touch();
644            }
645        }
646
647        if !entry.file_type().is_some_and(|ft| ft.is_file()) {
648            continue;
649        }
650
651        if entry.path_is_symlink() {
652            continue;
653        }
654        let file_path = normalize_absolute_path(&entry.path().to_string_lossy());
655
656        if !std::path::Path::new(&file_path).starts_with(std::path::Path::new(&project_root)) {
657            continue;
658        }
659
660        if let Ok(meta) = std::fs::symlink_metadata(&file_path) {
661            if meta.file_type().is_symlink() || !meta.is_file() {
662                continue;
663            }
664            if meta.len() > MAX_FILE_SIZE_BYTES {
665                tracing::debug!(
666                    "[graph_index: skipping {file_path} — {:.1}MB exceeds {}MB limit]",
667                    meta.len() as f64 / 1_048_576.0,
668                    MAX_FILE_SIZE_BYTES / (1024 * 1024),
669                );
670                continue;
671            }
672        }
673
674        let ext = Path::new(&file_path)
675            .extension()
676            .and_then(|e| e.to_str())
677            .unwrap_or("");
678
679        if !is_indexable_ext(ext) {
680            continue;
681        }
682
683        let rel = make_relative(&file_path, &project_root);
684        if extra_ignores.iter().any(|p| p.matches(&rel)) {
685            continue;
686        }
687
688        if max_files != usize::MAX && index.files.len() >= max_files {
689            tracing::info!(
690                "[graph_index: reached configured limit of {} files. Set graph_index_max_files = 0 for unlimited.]",
691                max_files
692            );
693            break;
694        }
695
696        let Ok(content) = std::fs::read_to_string(&file_path) else {
697            continue;
698        };
699
700        let hash = compute_hash(&content);
701        let rel_path = make_relative(&file_path, &project_root);
702
703        if let Some((old_hash, old_syms)) = old_files.get(&rel_path) {
704            if *old_hash == hash {
705                if let Some(old_entry) = existing.as_ref().and_then(|p| p.files.get(&rel_path)) {
706                    index.files.insert(rel_path.clone(), old_entry.clone());
707                    for (key, sym) in old_syms {
708                        index.symbols.insert(key.clone(), sym.clone());
709                    }
710                    content_cache.insert(rel_path, content);
711                    reused += 1;
712                    continue;
713                }
714            }
715        }
716
717        let sigs = signatures::extract_signatures(&content, ext);
718        let line_count = content.lines().count();
719        let token_count = crate::core::tokens::count_tokens(&content);
720        let summary = extract_summary(&content);
721
722        let exports: Vec<String> = sigs
723            .iter()
724            .filter(|s| s.is_exported)
725            .map(|s| s.name.clone())
726            .collect();
727
728        index.files.insert(
729            rel_path.clone(),
730            FileEntry {
731                path: rel_path.clone(),
732                hash,
733                language: ext.to_string(),
734                line_count,
735                token_count,
736                exports,
737                summary,
738            },
739        );
740
741        for sig in &sigs {
742            let (start, end) = sig
743                .start_line
744                .zip(sig.end_line)
745                .unwrap_or_else(|| find_symbol_range(&content, sig));
746            let key = format!("{}::{}", rel_path, sig.name);
747            index.symbols.insert(
748                key,
749                SymbolEntry {
750                    file: rel_path.clone(),
751                    name: sig.name.clone(),
752                    kind: sig.kind.to_string(),
753                    start_line: start,
754                    end_line: end,
755                    is_exported: sig.is_exported,
756                },
757            );
758        }
759
760        content_cache.insert(rel_path, content);
761        scanned += 1;
762    }
763
764    build_edges_cached(&mut index, &content_cache);
765
766    if let Err(e) = index.save() {
767        tracing::warn!("could not save graph index: {e}");
768    }
769
770    tracing::warn!(
771        "[graph_index: {} files ({} scanned, {} reused), {} symbols, {} edges]",
772        index.file_count(),
773        scanned,
774        reused,
775        index.symbol_count(),
776        index.edge_count()
777    );
778
779    (index, content_cache)
780}
781
782fn find_symbol_range(content: &str, sig: &signatures::Signature) -> (usize, usize) {
783    let lines: Vec<&str> = content.lines().collect();
784    let mut start = 0;
785
786    for (i, line) in lines.iter().enumerate() {
787        if line.contains(&sig.name) {
788            let trimmed = line.trim();
789            let is_def = trimmed.starts_with("fn ")
790                || trimmed.starts_with("pub fn ")
791                || trimmed.starts_with("pub(crate) fn ")
792                || trimmed.starts_with("async fn ")
793                || trimmed.starts_with("pub async fn ")
794                || trimmed.starts_with("struct ")
795                || trimmed.starts_with("pub struct ")
796                || trimmed.starts_with("enum ")
797                || trimmed.starts_with("pub enum ")
798                || trimmed.starts_with("trait ")
799                || trimmed.starts_with("pub trait ")
800                || trimmed.starts_with("impl ")
801                || trimmed.starts_with("class ")
802                || trimmed.starts_with("export class ")
803                || trimmed.starts_with("export function ")
804                || trimmed.starts_with("export async function ")
805                || trimmed.starts_with("function ")
806                || trimmed.starts_with("async function ")
807                || trimmed.starts_with("def ")
808                || trimmed.starts_with("async def ")
809                || trimmed.starts_with("func ")
810                || trimmed.starts_with("interface ")
811                || trimmed.starts_with("export interface ")
812                || trimmed.starts_with("type ")
813                || trimmed.starts_with("export type ")
814                || trimmed.starts_with("const ")
815                || trimmed.starts_with("export const ")
816                || trimmed.starts_with("fun ")
817                || trimmed.starts_with("private fun ")
818                || trimmed.starts_with("public fun ")
819                || trimmed.starts_with("internal fun ")
820                || trimmed.starts_with("class ")
821                || trimmed.starts_with("data class ")
822                || trimmed.starts_with("sealed class ")
823                || trimmed.starts_with("sealed interface ")
824                || trimmed.starts_with("enum class ")
825                || trimmed.starts_with("object ")
826                || trimmed.starts_with("private object ")
827                || trimmed.starts_with("interface ")
828                || trimmed.starts_with("typealias ")
829                || trimmed.starts_with("private typealias ");
830            if is_def {
831                start = i + 1;
832                break;
833            }
834        }
835    }
836
837    if start == 0 {
838        return (1, lines.len().min(20));
839    }
840
841    let base_indent = lines
842        .get(start - 1)
843        .map_or(0, |l| l.len() - l.trim_start().len());
844
845    let mut end = start;
846    let mut brace_depth: i32 = 0;
847    let mut found_open = false;
848
849    for (i, line) in lines.iter().enumerate().skip(start - 1) {
850        for ch in line.chars() {
851            if ch == '{' {
852                brace_depth += 1;
853                found_open = true;
854            } else if ch == '}' {
855                brace_depth -= 1;
856            }
857        }
858
859        end = i + 1;
860
861        if found_open && brace_depth <= 0 {
862            break;
863        }
864
865        if !found_open && i > start {
866            let indent = line.len() - line.trim_start().len();
867            if indent <= base_indent && !line.trim().is_empty() && i > start {
868                end = i;
869                break;
870            }
871        }
872
873        if end - start > 200 {
874            break;
875        }
876    }
877
878    (start, end)
879}
880
881fn extract_summary(content: &str) -> String {
882    for line in content.lines().take(20) {
883        let trimmed = line.trim();
884        if trimmed.is_empty()
885            || trimmed.starts_with("//")
886            || trimmed.starts_with('#')
887            || trimmed.starts_with("/*")
888            || trimmed.starts_with('*')
889            || trimmed.starts_with("use ")
890            || trimmed.starts_with("import ")
891            || trimmed.starts_with("from ")
892            || trimmed.starts_with("require(")
893            || trimmed.starts_with("package ")
894        {
895            continue;
896        }
897        return trimmed.chars().take(120).collect();
898    }
899    String::new()
900}
901
902fn compute_hash(content: &str) -> String {
903    use std::collections::hash_map::DefaultHasher;
904    use std::hash::{Hash, Hasher};
905
906    let mut hasher = DefaultHasher::new();
907    content.hash(&mut hasher);
908    format!("{:016x}", hasher.finish())
909}
910
911fn short_hash(input: &str) -> String {
912    use std::collections::hash_map::DefaultHasher;
913    use std::hash::{Hash, Hasher};
914
915    let mut hasher = DefaultHasher::new();
916    input.hash(&mut hasher);
917    format!("{:08x}", hasher.finish() & 0xFFFF_FFFF)
918}
919
920fn copy_dir_fallible(src: &std::path::Path, dst: &std::path::Path) -> Result<(), std::io::Error> {
921    std::fs::create_dir_all(dst)?;
922    for entry in std::fs::read_dir(src)?.flatten() {
923        let from = entry.path();
924        let to = dst.join(entry.file_name());
925        if from.is_dir() {
926            copy_dir_fallible(&from, &to)?;
927        } else {
928            std::fs::copy(&from, &to)?;
929        }
930    }
931    Ok(())
932}
933
934fn normalize_absolute_path(path: &str) -> String {
935    if let Ok(canon) = crate::core::pathutil::safe_canonicalize(std::path::Path::new(path)) {
936        return canon.to_string_lossy().to_string();
937    }
938
939    let mut normalized = path.to_string();
940    while normalized.ends_with("\\.") || normalized.ends_with("/.") {
941        normalized.truncate(normalized.len() - 2);
942    }
943    while normalized.len() > 1
944        && (normalized.ends_with('\\') || normalized.ends_with('/'))
945        && !normalized.ends_with(":\\")
946        && !normalized.ends_with(":/")
947        && normalized != "\\"
948        && normalized != "/"
949    {
950        normalized.pop();
951    }
952    normalized
953}
954
955pub fn normalize_project_root(path: &str) -> String {
956    normalize_absolute_path(path)
957}
958
959pub fn graph_match_key(path: &str) -> String {
960    let stripped =
961        crate::core::pathutil::strip_verbatim_str(path).unwrap_or_else(|| path.replace('\\', "/"));
962    stripped.trim_start_matches('/').to_string()
963}
964
965pub fn graph_relative_key(path: &str, root: &str) -> String {
966    let root_norm = normalize_project_root(root);
967    let path_norm = normalize_absolute_path(path);
968    let root_path = Path::new(&root_norm);
969    let path_path = Path::new(&path_norm);
970
971    if let Ok(rel) = path_path.strip_prefix(root_path) {
972        let rel = rel.to_string_lossy().to_string();
973        return rel.trim_start_matches(['/', '\\']).to_string();
974    }
975
976    path.trim_start_matches(['/', '\\'])
977        .replace('/', std::path::MAIN_SEPARATOR_STR)
978}
979
980fn make_relative(path: &str, root: &str) -> String {
981    graph_relative_key(path, root)
982}
983
984fn is_indexable_ext(ext: &str) -> bool {
985    crate::core::language_capabilities::is_indexable_ext(ext)
986}
987
988#[cfg(test)]
989fn kotlin_package_name(content: &str) -> Option<String> {
990    content.lines().map(str::trim).find_map(|line| {
991        line.strip_prefix("package ")
992            .map(|rest| rest.trim().trim_end_matches(';').to_string())
993    })
994}