Skip to main content

lean_ctx/core/
graph_index.rs

1use std::collections::HashMap;
2use std::path::Path;
3
4use serde::{Deserialize, Serialize};
5
6use crate::core::import_resolver;
7use crate::core::signatures;
8
9const INDEX_VERSION: u32 = 6;
10
11#[derive(Debug, Serialize, Deserialize)]
12pub struct ProjectIndex {
13    pub version: u32,
14    pub project_root: String,
15    pub last_scan: String,
16    pub files: HashMap<String, FileEntry>,
17    pub edges: Vec<IndexEdge>,
18    pub symbols: HashMap<String, SymbolEntry>,
19}
20
21#[derive(Debug, Clone, Serialize, Deserialize)]
22pub struct FileEntry {
23    pub path: String,
24    pub hash: String,
25    pub language: String,
26    pub line_count: usize,
27    pub token_count: usize,
28    pub exports: Vec<String>,
29    pub summary: String,
30}
31
32#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct SymbolEntry {
34    pub file: String,
35    pub name: String,
36    pub kind: String,
37    pub start_line: usize,
38    pub end_line: usize,
39    pub is_exported: bool,
40}
41
42#[derive(Debug, Clone, Serialize, Deserialize)]
43pub struct IndexEdge {
44    pub from: String,
45    pub to: String,
46    pub kind: String,
47}
48
49impl ProjectIndex {
50    pub fn new(project_root: &str) -> Self {
51        Self {
52            version: INDEX_VERSION,
53            project_root: normalize_project_root(project_root),
54            last_scan: chrono::Local::now().format("%Y-%m-%d %H:%M:%S").to_string(),
55            files: HashMap::new(),
56            edges: Vec::new(),
57            symbols: HashMap::new(),
58        }
59    }
60
61    pub fn index_dir(project_root: &str) -> Option<std::path::PathBuf> {
62        let hash = short_hash(&normalize_project_root(project_root));
63        crate::core::data_dir::lean_ctx_data_dir()
64            .ok()
65            .map(|d| d.join("graphs").join(hash))
66    }
67
68    pub fn load(project_root: &str) -> Option<Self> {
69        let dir = Self::index_dir(project_root)?;
70        let path = dir.join("index.json");
71        let content = std::fs::read_to_string(path).ok()?;
72        let index: Self = serde_json::from_str(&content).ok()?;
73        if index.version != INDEX_VERSION {
74            return None;
75        }
76        Some(index)
77    }
78
79    pub fn save(&self) -> Result<(), String> {
80        let dir = Self::index_dir(&self.project_root)
81            .ok_or_else(|| "Cannot determine data directory".to_string())?;
82        std::fs::create_dir_all(&dir).map_err(|e| e.to_string())?;
83        let json = serde_json::to_string_pretty(self).map_err(|e| e.to_string())?;
84        std::fs::write(dir.join("index.json"), json).map_err(|e| e.to_string())
85    }
86
87    pub fn file_count(&self) -> usize {
88        self.files.len()
89    }
90
91    pub fn symbol_count(&self) -> usize {
92        self.symbols.len()
93    }
94
95    pub fn edge_count(&self) -> usize {
96        self.edges.len()
97    }
98
99    pub fn get_symbol(&self, key: &str) -> Option<&SymbolEntry> {
100        self.symbols.get(key)
101    }
102
103    pub fn get_reverse_deps(&self, path: &str, depth: usize) -> Vec<String> {
104        let mut result = Vec::new();
105        let mut visited = std::collections::HashSet::new();
106        let mut queue: Vec<(String, usize)> = vec![(path.to_string(), 0)];
107
108        while let Some((current, d)) = queue.pop() {
109            if d > depth || visited.contains(&current) {
110                continue;
111            }
112            visited.insert(current.clone());
113            if current != path {
114                result.push(current.clone());
115            }
116
117            for edge in &self.edges {
118                if edge.to == current && edge.kind == "import" && !visited.contains(&edge.from) {
119                    queue.push((edge.from.clone(), d + 1));
120                }
121            }
122        }
123        result
124    }
125
126    pub fn get_related(&self, path: &str, depth: usize) -> Vec<String> {
127        let mut result = Vec::new();
128        let mut visited = std::collections::HashSet::new();
129        let mut queue: Vec<(String, usize)> = vec![(path.to_string(), 0)];
130
131        while let Some((current, d)) = queue.pop() {
132            if d > depth || visited.contains(&current) {
133                continue;
134            }
135            visited.insert(current.clone());
136            if current != path {
137                result.push(current.clone());
138            }
139
140            for edge in &self.edges {
141                if edge.from == current && !visited.contains(&edge.to) {
142                    queue.push((edge.to.clone(), d + 1));
143                }
144                if edge.to == current && !visited.contains(&edge.from) {
145                    queue.push((edge.from.clone(), d + 1));
146                }
147            }
148        }
149        result
150    }
151}
152
153/// Load the best available graph index, trying multiple root path variants.
154/// If no valid index exists, automatically scans the project to build one.
155/// This is the primary entry point — ensures zero-config usage.
156pub fn load_or_build(project_root: &str) -> ProjectIndex {
157    // Prefer stable absolute roots. Using "." as a cache key is fragile because
158    // it depends on the process cwd and can accidentally load the wrong project.
159    let root_abs = if project_root.trim().is_empty() || project_root == "." {
160        std::env::current_dir()
161            .ok()
162            .map(|p| normalize_project_root(&p.to_string_lossy()))
163            .unwrap_or_else(|| ".".to_string())
164    } else {
165        normalize_project_root(project_root)
166    };
167
168    // Try the absolute/root-normalized path first.
169    if let Some(idx) = ProjectIndex::load(&root_abs) {
170        if !idx.files.is_empty() {
171            return idx;
172        }
173    }
174
175    // Legacy: older builds may have cached the index under ".". Only accept it if it
176    // actually refers to the current cwd project, then migrate it to `root_abs`.
177    if let Some(idx) = ProjectIndex::load(".") {
178        if !idx.files.is_empty() {
179            let mut migrated = idx;
180            migrated.project_root = root_abs.clone();
181            let _ = migrated.save();
182            return migrated;
183        }
184    }
185
186    // Try absolute cwd
187    if let Ok(cwd) = std::env::current_dir() {
188        let cwd_str = normalize_project_root(&cwd.to_string_lossy());
189        if cwd_str != root_abs {
190            if let Some(idx) = ProjectIndex::load(&cwd_str) {
191                if !idx.files.is_empty() {
192                    return idx;
193                }
194            }
195        }
196    }
197
198    // No existing index found anywhere — auto-build
199    scan(&root_abs)
200}
201
202pub fn scan(project_root: &str) -> ProjectIndex {
203    let project_root = normalize_project_root(project_root);
204    let existing = ProjectIndex::load(&project_root);
205    let mut index = ProjectIndex::new(&project_root);
206
207    let old_files: HashMap<String, (String, Vec<(String, SymbolEntry)>)> =
208        if let Some(ref prev) = existing {
209            prev.files
210                .iter()
211                .map(|(path, entry)| {
212                    let syms: Vec<(String, SymbolEntry)> = prev
213                        .symbols
214                        .iter()
215                        .filter(|(_, s)| s.file == *path)
216                        .map(|(k, v)| (k.clone(), v.clone()))
217                        .collect();
218                    (path.clone(), (entry.hash.clone(), syms))
219                })
220                .collect()
221        } else {
222            HashMap::new()
223        };
224
225    let walker = ignore::WalkBuilder::new(&project_root)
226        .hidden(true)
227        .git_ignore(true)
228        .git_global(true)
229        .git_exclude(true)
230        .max_depth(Some(10))
231        .build();
232
233    let cfg = crate::core::config::Config::load();
234    let extra_ignores: Vec<glob::Pattern> = cfg
235        .extra_ignore_patterns
236        .iter()
237        .filter_map(|p| glob::Pattern::new(p).ok())
238        .collect();
239
240    let mut scanned = 0usize;
241    let mut reused = 0usize;
242    let max_files = 2000;
243
244    for entry in walker.filter_map(|e| e.ok()) {
245        if !entry.file_type().is_some_and(|ft| ft.is_file()) {
246            continue;
247        }
248        let file_path = normalize_absolute_path(&entry.path().to_string_lossy());
249        let ext = Path::new(&file_path)
250            .extension()
251            .and_then(|e| e.to_str())
252            .unwrap_or("");
253
254        if !is_indexable_ext(ext) {
255            continue;
256        }
257
258        let rel = make_relative(&file_path, &project_root);
259        if extra_ignores.iter().any(|p| p.matches(&rel)) {
260            continue;
261        }
262
263        if index.files.len() >= max_files {
264            break;
265        }
266
267        let content = match std::fs::read_to_string(&file_path) {
268            Ok(c) => c,
269            Err(_) => continue,
270        };
271
272        let hash = compute_hash(&content);
273        let rel_path = make_relative(&file_path, &project_root);
274
275        if let Some((old_hash, old_syms)) = old_files.get(&rel_path) {
276            if *old_hash == hash {
277                if let Some(old_entry) = existing.as_ref().and_then(|p| p.files.get(&rel_path)) {
278                    index.files.insert(rel_path.clone(), old_entry.clone());
279                    for (key, sym) in old_syms {
280                        index.symbols.insert(key.clone(), sym.clone());
281                    }
282                    reused += 1;
283                    continue;
284                }
285            }
286        }
287
288        let sigs = signatures::extract_signatures(&content, ext);
289        let line_count = content.lines().count();
290        let token_count = crate::core::tokens::count_tokens(&content);
291        let summary = extract_summary(&content);
292
293        let exports: Vec<String> = sigs
294            .iter()
295            .filter(|s| s.is_exported)
296            .map(|s| s.name.clone())
297            .collect();
298
299        index.files.insert(
300            rel_path.clone(),
301            FileEntry {
302                path: rel_path.clone(),
303                hash,
304                language: ext.to_string(),
305                line_count,
306                token_count,
307                exports,
308                summary,
309            },
310        );
311
312        for sig in &sigs {
313            let (start, end) = sig
314                .start_line
315                .zip(sig.end_line)
316                .unwrap_or_else(|| find_symbol_range(&content, sig));
317            let key = format!("{}::{}", rel_path, sig.name);
318            index.symbols.insert(
319                key,
320                SymbolEntry {
321                    file: rel_path.clone(),
322                    name: sig.name.clone(),
323                    kind: sig.kind.to_string(),
324                    start_line: start,
325                    end_line: end,
326                    is_exported: sig.is_exported,
327                },
328            );
329        }
330
331        scanned += 1;
332    }
333
334    build_edges(&mut index);
335
336    if let Err(e) = index.save() {
337        eprintln!("Warning: could not save graph index: {e}");
338    }
339
340    eprintln!(
341        "[graph_index: {} files ({} scanned, {} reused), {} symbols, {} edges]",
342        index.file_count(),
343        scanned,
344        reused,
345        index.symbol_count(),
346        index.edge_count()
347    );
348
349    index
350}
351
352fn build_edges(index: &mut ProjectIndex) {
353    index.edges.clear();
354
355    let root = normalize_project_root(&index.project_root);
356    let root_path = Path::new(&root);
357
358    let mut file_paths: Vec<String> = index.files.keys().cloned().collect();
359    file_paths.sort();
360
361    let resolver_ctx = import_resolver::ResolverContext::new(root_path, file_paths.clone());
362
363    for rel_path in &file_paths {
364        let abs_path = root_path.join(rel_path.trim_start_matches(['/', '\\']));
365        let content = match std::fs::read_to_string(&abs_path) {
366            Ok(c) => c,
367            Err(_) => continue,
368        };
369
370        let ext = Path::new(rel_path)
371            .extension()
372            .and_then(|e| e.to_str())
373            .unwrap_or("");
374
375        let resolve_ext = match ext {
376            "vue" | "svelte" => "ts",
377            _ => ext,
378        };
379
380        let imports = crate::core::deep_queries::analyze(&content, resolve_ext).imports;
381        if imports.is_empty() {
382            continue;
383        }
384
385        let resolved =
386            import_resolver::resolve_imports(&imports, rel_path, resolve_ext, &resolver_ctx);
387        for r in resolved {
388            if r.is_external {
389                continue;
390            }
391            if let Some(to) = r.resolved_path {
392                index.edges.push(IndexEdge {
393                    from: rel_path.clone(),
394                    to,
395                    kind: "import".to_string(),
396                });
397            }
398        }
399    }
400
401    index.edges.sort_by(|a, b| {
402        a.from
403            .cmp(&b.from)
404            .then_with(|| a.to.cmp(&b.to))
405            .then_with(|| a.kind.cmp(&b.kind))
406    });
407    index
408        .edges
409        .dedup_by(|a, b| a.from == b.from && a.to == b.to && a.kind == b.kind);
410}
411
412fn find_symbol_range(content: &str, sig: &signatures::Signature) -> (usize, usize) {
413    let lines: Vec<&str> = content.lines().collect();
414    let mut start = 0;
415
416    for (i, line) in lines.iter().enumerate() {
417        if line.contains(&sig.name) {
418            let trimmed = line.trim();
419            let is_def = trimmed.starts_with("fn ")
420                || trimmed.starts_with("pub fn ")
421                || trimmed.starts_with("pub(crate) fn ")
422                || trimmed.starts_with("async fn ")
423                || trimmed.starts_with("pub async fn ")
424                || trimmed.starts_with("struct ")
425                || trimmed.starts_with("pub struct ")
426                || trimmed.starts_with("enum ")
427                || trimmed.starts_with("pub enum ")
428                || trimmed.starts_with("trait ")
429                || trimmed.starts_with("pub trait ")
430                || trimmed.starts_with("impl ")
431                || trimmed.starts_with("class ")
432                || trimmed.starts_with("export class ")
433                || trimmed.starts_with("export function ")
434                || trimmed.starts_with("export async function ")
435                || trimmed.starts_with("function ")
436                || trimmed.starts_with("async function ")
437                || trimmed.starts_with("def ")
438                || trimmed.starts_with("async def ")
439                || trimmed.starts_with("func ")
440                || trimmed.starts_with("interface ")
441                || trimmed.starts_with("export interface ")
442                || trimmed.starts_with("type ")
443                || trimmed.starts_with("export type ")
444                || trimmed.starts_with("const ")
445                || trimmed.starts_with("export const ")
446                || trimmed.starts_with("fun ")
447                || trimmed.starts_with("private fun ")
448                || trimmed.starts_with("public fun ")
449                || trimmed.starts_with("internal fun ")
450                || trimmed.starts_with("class ")
451                || trimmed.starts_with("data class ")
452                || trimmed.starts_with("sealed class ")
453                || trimmed.starts_with("sealed interface ")
454                || trimmed.starts_with("enum class ")
455                || trimmed.starts_with("object ")
456                || trimmed.starts_with("private object ")
457                || trimmed.starts_with("interface ")
458                || trimmed.starts_with("typealias ")
459                || trimmed.starts_with("private typealias ");
460            if is_def {
461                start = i + 1;
462                break;
463            }
464        }
465    }
466
467    if start == 0 {
468        return (1, lines.len().min(20));
469    }
470
471    let base_indent = lines
472        .get(start - 1)
473        .map(|l| l.len() - l.trim_start().len())
474        .unwrap_or(0);
475
476    let mut end = start;
477    let mut brace_depth: i32 = 0;
478    let mut found_open = false;
479
480    for (i, line) in lines.iter().enumerate().skip(start - 1) {
481        for ch in line.chars() {
482            if ch == '{' {
483                brace_depth += 1;
484                found_open = true;
485            } else if ch == '}' {
486                brace_depth -= 1;
487            }
488        }
489
490        end = i + 1;
491
492        if found_open && brace_depth <= 0 {
493            break;
494        }
495
496        if !found_open && i > start {
497            let indent = line.len() - line.trim_start().len();
498            if indent <= base_indent && !line.trim().is_empty() && i > start {
499                end = i;
500                break;
501            }
502        }
503
504        if end - start > 200 {
505            break;
506        }
507    }
508
509    (start, end)
510}
511
512fn extract_summary(content: &str) -> String {
513    for line in content.lines().take(20) {
514        let trimmed = line.trim();
515        if trimmed.is_empty()
516            || trimmed.starts_with("//")
517            || trimmed.starts_with('#')
518            || trimmed.starts_with("/*")
519            || trimmed.starts_with('*')
520            || trimmed.starts_with("use ")
521            || trimmed.starts_with("import ")
522            || trimmed.starts_with("from ")
523            || trimmed.starts_with("require(")
524            || trimmed.starts_with("package ")
525        {
526            continue;
527        }
528        return trimmed.chars().take(120).collect();
529    }
530    String::new()
531}
532
533fn compute_hash(content: &str) -> String {
534    use std::collections::hash_map::DefaultHasher;
535    use std::hash::{Hash, Hasher};
536
537    let mut hasher = DefaultHasher::new();
538    content.hash(&mut hasher);
539    format!("{:016x}", hasher.finish())
540}
541
542fn short_hash(input: &str) -> String {
543    use std::collections::hash_map::DefaultHasher;
544    use std::hash::{Hash, Hasher};
545
546    let mut hasher = DefaultHasher::new();
547    input.hash(&mut hasher);
548    format!("{:08x}", hasher.finish() & 0xFFFF_FFFF)
549}
550
551fn normalize_absolute_path(path: &str) -> String {
552    if let Ok(canon) = crate::core::pathutil::safe_canonicalize(std::path::Path::new(path)) {
553        return canon.to_string_lossy().to_string();
554    }
555
556    let mut normalized = path.to_string();
557    while normalized.ends_with("\\.") || normalized.ends_with("/.") {
558        normalized.truncate(normalized.len() - 2);
559    }
560    while normalized.len() > 1
561        && (normalized.ends_with('\\') || normalized.ends_with('/'))
562        && !normalized.ends_with(":\\")
563        && !normalized.ends_with(":/")
564        && normalized != "\\"
565        && normalized != "/"
566    {
567        normalized.pop();
568    }
569    normalized
570}
571
572pub fn normalize_project_root(path: &str) -> String {
573    normalize_absolute_path(path)
574}
575
576pub fn graph_match_key(path: &str) -> String {
577    let stripped =
578        crate::core::pathutil::strip_verbatim_str(path).unwrap_or_else(|| path.replace('\\', "/"));
579    stripped.trim_start_matches('/').to_string()
580}
581
582pub fn graph_relative_key(path: &str, root: &str) -> String {
583    let root_norm = normalize_project_root(root);
584    let path_norm = normalize_absolute_path(path);
585    let root_path = Path::new(&root_norm);
586    let path_path = Path::new(&path_norm);
587
588    if let Ok(rel) = path_path.strip_prefix(root_path) {
589        let rel = rel.to_string_lossy().to_string();
590        return rel.trim_start_matches(['/', '\\']).to_string();
591    }
592
593    path.trim_start_matches(['/', '\\'])
594        .replace('/', std::path::MAIN_SEPARATOR_STR)
595}
596
597fn make_relative(path: &str, root: &str) -> String {
598    graph_relative_key(path, root)
599}
600
601fn is_indexable_ext(ext: &str) -> bool {
602    crate::core::language_capabilities::is_indexable_ext(ext)
603}
604
605#[cfg(test)]
606fn kotlin_package_name(content: &str) -> Option<String> {
607    content.lines().map(str::trim).find_map(|line| {
608        line.strip_prefix("package ")
609            .map(|rest| rest.trim().trim_end_matches(';').to_string())
610    })
611}
612
613#[cfg(test)]
614mod tests {
615    use super::*;
616
617    #[test]
618    fn test_short_hash_deterministic() {
619        let h1 = short_hash("/Users/test/project");
620        let h2 = short_hash("/Users/test/project");
621        assert_eq!(h1, h2);
622        assert_eq!(h1.len(), 8);
623    }
624
625    #[test]
626    fn test_make_relative() {
627        assert_eq!(
628            make_relative("/foo/bar/src/main.rs", "/foo/bar"),
629            graph_relative_key("/foo/bar/src/main.rs", "/foo/bar")
630        );
631        assert_eq!(
632            make_relative("src/main.rs", "/foo/bar"),
633            graph_relative_key("src/main.rs", "/foo/bar")
634        );
635        assert_eq!(
636            make_relative("C:\\repo\\src\\main\\kotlin\\Example.kt", "C:\\repo"),
637            graph_relative_key("C:\\repo\\src\\main\\kotlin\\Example.kt", "C:\\repo")
638        );
639        assert_eq!(
640            make_relative("//?/C:/repo/src/main/kotlin/Example.kt", "//?/C:/repo"),
641            graph_relative_key("//?/C:/repo/src/main/kotlin/Example.kt", "//?/C:/repo")
642        );
643    }
644
645    #[test]
646    fn test_normalize_project_root() {
647        assert_eq!(normalize_project_root("C:\\repo\\"), "C:\\repo");
648        assert_eq!(normalize_project_root("C:\\repo\\."), "C:\\repo");
649        assert_eq!(normalize_project_root("//?/C:/repo/"), "//?/C:/repo");
650    }
651
652    #[test]
653    fn test_graph_match_key_normalizes_windows_forms() {
654        assert_eq!(
655            graph_match_key(r"C:\repo\src\main.rs"),
656            "C:/repo/src/main.rs"
657        );
658        assert_eq!(
659            graph_match_key(r"\\?\C:\repo\src\main.rs"),
660            "C:/repo/src/main.rs"
661        );
662        assert_eq!(graph_match_key(r"\src\main.rs"), "src/main.rs");
663    }
664
665    #[test]
666    fn test_extract_summary() {
667        let content = "// comment\nuse std::io;\n\npub fn main() {\n    println!(\"hello\");\n}";
668        let summary = extract_summary(content);
669        assert_eq!(summary, "pub fn main() {");
670    }
671
672    #[test]
673    fn test_compute_hash_deterministic() {
674        let h1 = compute_hash("hello world");
675        let h2 = compute_hash("hello world");
676        assert_eq!(h1, h2);
677        assert_ne!(h1, compute_hash("hello world!"));
678    }
679
680    #[test]
681    fn test_project_index_new() {
682        let idx = ProjectIndex::new("/test");
683        assert_eq!(idx.version, INDEX_VERSION);
684        assert_eq!(idx.project_root, "/test");
685        assert!(idx.files.is_empty());
686    }
687
688    #[test]
689    fn test_reverse_deps() {
690        let mut idx = ProjectIndex::new("/test");
691        idx.edges.push(IndexEdge {
692            from: "a.rs".to_string(),
693            to: "b.rs".to_string(),
694            kind: "import".to_string(),
695        });
696        idx.edges.push(IndexEdge {
697            from: "c.rs".to_string(),
698            to: "b.rs".to_string(),
699            kind: "import".to_string(),
700        });
701
702        let deps = idx.get_reverse_deps("b.rs", 1);
703        assert_eq!(deps.len(), 2);
704        assert!(deps.contains(&"a.rs".to_string()));
705        assert!(deps.contains(&"c.rs".to_string()));
706    }
707
708    #[test]
709    fn test_find_symbol_range_kotlin_function() {
710        let content = r#"
711package com.example
712
713class UserService {
714    fun greet(name: String): String {
715        return "hi $name"
716    }
717}
718"#;
719        let sig = signatures::Signature {
720            kind: "method",
721            name: "greet".to_string(),
722            params: "name:String".to_string(),
723            return_type: "String".to_string(),
724            is_async: false,
725            is_exported: true,
726            indent: 2,
727            ..signatures::Signature::no_span()
728        };
729        let (start, end) = find_symbol_range(content, &sig);
730        assert_eq!(start, 5);
731        assert!(end >= start);
732    }
733
734    #[test]
735    fn test_signature_spans_override_fallback_range() {
736        let sig = signatures::Signature {
737            kind: "method",
738            name: "release".to_string(),
739            params: "id:String".to_string(),
740            return_type: "Boolean".to_string(),
741            is_async: true,
742            is_exported: true,
743            indent: 2,
744            start_line: Some(42),
745            end_line: Some(43),
746        };
747
748        let (start, end) = sig
749            .start_line
750            .zip(sig.end_line)
751            .unwrap_or_else(|| find_symbol_range("ignored", &sig));
752        assert_eq!((start, end), (42, 43));
753    }
754
755    #[test]
756    fn test_parse_stale_index_version() {
757        let json = format!(
758            r#"{{"version":{},"project_root":"/test","last_scan":"now","files":{{}},"edges":[],"symbols":{{}}}}"#,
759            INDEX_VERSION - 1
760        );
761        let parsed: ProjectIndex = serde_json::from_str(&json).unwrap();
762        assert_ne!(parsed.version, INDEX_VERSION);
763    }
764
765    #[test]
766    fn test_kotlin_package_name() {
767        let content = "package com.example.feature\n\nclass UserService";
768        assert_eq!(
769            kotlin_package_name(content).as_deref(),
770            Some("com.example.feature")
771        );
772    }
773}