Skip to main content

lean_ctx/core/
graph_index.rs

1use std::collections::HashMap;
2use std::path::Path;
3
4use serde::{Deserialize, Serialize};
5
6use crate::core::import_resolver;
7use crate::core::signatures;
8
9const INDEX_VERSION: u32 = 6;
10
11#[derive(Debug, Serialize, Deserialize)]
12pub struct ProjectIndex {
13    pub version: u32,
14    pub project_root: String,
15    pub last_scan: String,
16    pub files: HashMap<String, FileEntry>,
17    pub edges: Vec<IndexEdge>,
18    pub symbols: HashMap<String, SymbolEntry>,
19}
20
21#[derive(Debug, Clone, Serialize, Deserialize)]
22pub struct FileEntry {
23    pub path: String,
24    pub hash: String,
25    pub language: String,
26    pub line_count: usize,
27    pub token_count: usize,
28    pub exports: Vec<String>,
29    pub summary: String,
30}
31
32#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct SymbolEntry {
34    pub file: String,
35    pub name: String,
36    pub kind: String,
37    pub start_line: usize,
38    pub end_line: usize,
39    pub is_exported: bool,
40}
41
42#[derive(Debug, Clone, Serialize, Deserialize)]
43pub struct IndexEdge {
44    pub from: String,
45    pub to: String,
46    pub kind: String,
47}
48
49impl ProjectIndex {
50    pub fn new(project_root: &str) -> Self {
51        Self {
52            version: INDEX_VERSION,
53            project_root: normalize_project_root(project_root),
54            last_scan: chrono::Local::now().format("%Y-%m-%d %H:%M:%S").to_string(),
55            files: HashMap::new(),
56            edges: Vec::new(),
57            symbols: HashMap::new(),
58        }
59    }
60
61    pub fn index_dir(project_root: &str) -> Option<std::path::PathBuf> {
62        let hash = short_hash(&normalize_project_root(project_root));
63        crate::core::data_dir::lean_ctx_data_dir()
64            .ok()
65            .map(|d| d.join("graphs").join(hash))
66    }
67
68    pub fn load(project_root: &str) -> Option<Self> {
69        let dir = Self::index_dir(project_root)?;
70        let path = dir.join("index.json");
71        let content = std::fs::read_to_string(path).ok()?;
72        let index: Self = serde_json::from_str(&content).ok()?;
73        if index.version != INDEX_VERSION {
74            return None;
75        }
76        Some(index)
77    }
78
79    pub fn save(&self) -> Result<(), String> {
80        let dir = Self::index_dir(&self.project_root)
81            .ok_or_else(|| "Cannot determine data directory".to_string())?;
82        std::fs::create_dir_all(&dir).map_err(|e| e.to_string())?;
83        let json = serde_json::to_string_pretty(self).map_err(|e| e.to_string())?;
84        std::fs::write(dir.join("index.json"), json).map_err(|e| e.to_string())
85    }
86
87    pub fn file_count(&self) -> usize {
88        self.files.len()
89    }
90
91    pub fn symbol_count(&self) -> usize {
92        self.symbols.len()
93    }
94
95    pub fn edge_count(&self) -> usize {
96        self.edges.len()
97    }
98
99    pub fn get_symbol(&self, key: &str) -> Option<&SymbolEntry> {
100        self.symbols.get(key)
101    }
102
103    pub fn get_reverse_deps(&self, path: &str, depth: usize) -> Vec<String> {
104        let mut result = Vec::new();
105        let mut visited = std::collections::HashSet::new();
106        let mut queue: Vec<(String, usize)> = vec![(path.to_string(), 0)];
107
108        while let Some((current, d)) = queue.pop() {
109            if d > depth || visited.contains(&current) {
110                continue;
111            }
112            visited.insert(current.clone());
113            if current != path {
114                result.push(current.clone());
115            }
116
117            for edge in &self.edges {
118                if edge.to == current && edge.kind == "import" && !visited.contains(&edge.from) {
119                    queue.push((edge.from.clone(), d + 1));
120                }
121            }
122        }
123        result
124    }
125
126    pub fn get_related(&self, path: &str, depth: usize) -> Vec<String> {
127        let mut result = Vec::new();
128        let mut visited = std::collections::HashSet::new();
129        let mut queue: Vec<(String, usize)> = vec![(path.to_string(), 0)];
130
131        while let Some((current, d)) = queue.pop() {
132            if d > depth || visited.contains(&current) {
133                continue;
134            }
135            visited.insert(current.clone());
136            if current != path {
137                result.push(current.clone());
138            }
139
140            for edge in &self.edges {
141                if edge.from == current && !visited.contains(&edge.to) {
142                    queue.push((edge.to.clone(), d + 1));
143                }
144                if edge.to == current && !visited.contains(&edge.from) {
145                    queue.push((edge.from.clone(), d + 1));
146                }
147            }
148        }
149        result
150    }
151}
152
153/// Load the best available graph index, trying multiple root path variants.
154/// If no valid index exists, automatically scans the project to build one.
155/// This is the primary entry point — ensures zero-config usage.
156pub fn load_or_build(project_root: &str) -> ProjectIndex {
157    // Prefer stable absolute roots. Using "." as a cache key is fragile because
158    // it depends on the process cwd and can accidentally load the wrong project.
159    let root_abs = if project_root.trim().is_empty() || project_root == "." {
160        std::env::current_dir().ok().map_or_else(
161            || ".".to_string(),
162            |p| normalize_project_root(&p.to_string_lossy()),
163        )
164    } else {
165        normalize_project_root(project_root)
166    };
167
168    // Try the absolute/root-normalized path first.
169    if let Some(idx) = ProjectIndex::load(&root_abs) {
170        if !idx.files.is_empty() {
171            return idx;
172        }
173    }
174
175    // Legacy: older builds may have cached the index under ".". Only accept it if it
176    // actually refers to the current cwd project, then migrate it to `root_abs`.
177    if let Some(idx) = ProjectIndex::load(".") {
178        if !idx.files.is_empty() {
179            let mut migrated = idx;
180            migrated.project_root.clone_from(&root_abs);
181            let _ = migrated.save();
182            return migrated;
183        }
184    }
185
186    // Try absolute cwd
187    if let Ok(cwd) = std::env::current_dir() {
188        let cwd_str = normalize_project_root(&cwd.to_string_lossy());
189        if cwd_str != root_abs {
190            if let Some(idx) = ProjectIndex::load(&cwd_str) {
191                if !idx.files.is_empty() {
192                    return idx;
193                }
194            }
195        }
196    }
197
198    // No existing index found anywhere — auto-build
199    scan(&root_abs)
200}
201
202pub fn scan(project_root: &str) -> ProjectIndex {
203    let project_root = normalize_project_root(project_root);
204    let existing = ProjectIndex::load(&project_root);
205    let mut index = ProjectIndex::new(&project_root);
206
207    let old_files: HashMap<String, (String, Vec<(String, SymbolEntry)>)> =
208        if let Some(ref prev) = existing {
209            prev.files
210                .iter()
211                .map(|(path, entry)| {
212                    let syms: Vec<(String, SymbolEntry)> = prev
213                        .symbols
214                        .iter()
215                        .filter(|(_, s)| s.file == *path)
216                        .map(|(k, v)| (k.clone(), v.clone()))
217                        .collect();
218                    (path.clone(), (entry.hash.clone(), syms))
219                })
220                .collect()
221        } else {
222            HashMap::new()
223        };
224
225    let walker = ignore::WalkBuilder::new(&project_root)
226        .hidden(true)
227        .git_ignore(true)
228        .git_global(true)
229        .git_exclude(true)
230        .max_depth(Some(10))
231        .build();
232
233    let cfg = crate::core::config::Config::load();
234    let extra_ignores: Vec<glob::Pattern> = cfg
235        .extra_ignore_patterns
236        .iter()
237        .filter_map(|p| glob::Pattern::new(p).ok())
238        .collect();
239
240    let mut scanned = 0usize;
241    let mut reused = 0usize;
242    let max_files = 2000;
243
244    for entry in walker.filter_map(std::result::Result::ok) {
245        if !entry.file_type().is_some_and(|ft| ft.is_file()) {
246            continue;
247        }
248        let file_path = normalize_absolute_path(&entry.path().to_string_lossy());
249        let ext = Path::new(&file_path)
250            .extension()
251            .and_then(|e| e.to_str())
252            .unwrap_or("");
253
254        if !is_indexable_ext(ext) {
255            continue;
256        }
257
258        let rel = make_relative(&file_path, &project_root);
259        if extra_ignores.iter().any(|p| p.matches(&rel)) {
260            continue;
261        }
262
263        if index.files.len() >= max_files {
264            break;
265        }
266
267        let Ok(content) = std::fs::read_to_string(&file_path) else {
268            continue;
269        };
270
271        let hash = compute_hash(&content);
272        let rel_path = make_relative(&file_path, &project_root);
273
274        if let Some((old_hash, old_syms)) = old_files.get(&rel_path) {
275            if *old_hash == hash {
276                if let Some(old_entry) = existing.as_ref().and_then(|p| p.files.get(&rel_path)) {
277                    index.files.insert(rel_path.clone(), old_entry.clone());
278                    for (key, sym) in old_syms {
279                        index.symbols.insert(key.clone(), sym.clone());
280                    }
281                    reused += 1;
282                    continue;
283                }
284            }
285        }
286
287        let sigs = signatures::extract_signatures(&content, ext);
288        let line_count = content.lines().count();
289        let token_count = crate::core::tokens::count_tokens(&content);
290        let summary = extract_summary(&content);
291
292        let exports: Vec<String> = sigs
293            .iter()
294            .filter(|s| s.is_exported)
295            .map(|s| s.name.clone())
296            .collect();
297
298        index.files.insert(
299            rel_path.clone(),
300            FileEntry {
301                path: rel_path.clone(),
302                hash,
303                language: ext.to_string(),
304                line_count,
305                token_count,
306                exports,
307                summary,
308            },
309        );
310
311        for sig in &sigs {
312            let (start, end) = sig
313                .start_line
314                .zip(sig.end_line)
315                .unwrap_or_else(|| find_symbol_range(&content, sig));
316            let key = format!("{}::{}", rel_path, sig.name);
317            index.symbols.insert(
318                key,
319                SymbolEntry {
320                    file: rel_path.clone(),
321                    name: sig.name.clone(),
322                    kind: sig.kind.to_string(),
323                    start_line: start,
324                    end_line: end,
325                    is_exported: sig.is_exported,
326                },
327            );
328        }
329
330        scanned += 1;
331    }
332
333    build_edges(&mut index);
334
335    if let Err(e) = index.save() {
336        tracing::warn!("could not save graph index: {e}");
337    }
338
339    tracing::warn!(
340        "[graph_index: {} files ({} scanned, {} reused), {} symbols, {} edges]",
341        index.file_count(),
342        scanned,
343        reused,
344        index.symbol_count(),
345        index.edge_count()
346    );
347
348    index
349}
350
351fn build_edges(index: &mut ProjectIndex) {
352    index.edges.clear();
353
354    let root = normalize_project_root(&index.project_root);
355    let root_path = Path::new(&root);
356
357    let mut file_paths: Vec<String> = index.files.keys().cloned().collect();
358    file_paths.sort();
359
360    let resolver_ctx = import_resolver::ResolverContext::new(root_path, file_paths.clone());
361
362    for rel_path in &file_paths {
363        let abs_path = root_path.join(rel_path.trim_start_matches(['/', '\\']));
364        let Ok(content) = std::fs::read_to_string(&abs_path) else {
365            continue;
366        };
367
368        let ext = Path::new(rel_path)
369            .extension()
370            .and_then(|e| e.to_str())
371            .unwrap_or("");
372
373        let resolve_ext = match ext {
374            "vue" | "svelte" => "ts",
375            _ => ext,
376        };
377
378        let imports = crate::core::deep_queries::analyze(&content, resolve_ext).imports;
379        if imports.is_empty() {
380            continue;
381        }
382
383        let resolved =
384            import_resolver::resolve_imports(&imports, rel_path, resolve_ext, &resolver_ctx);
385        for r in resolved {
386            if r.is_external {
387                continue;
388            }
389            if let Some(to) = r.resolved_path {
390                index.edges.push(IndexEdge {
391                    from: rel_path.clone(),
392                    to,
393                    kind: "import".to_string(),
394                });
395            }
396        }
397    }
398
399    index.edges.sort_by(|a, b| {
400        a.from
401            .cmp(&b.from)
402            .then_with(|| a.to.cmp(&b.to))
403            .then_with(|| a.kind.cmp(&b.kind))
404    });
405    index
406        .edges
407        .dedup_by(|a, b| a.from == b.from && a.to == b.to && a.kind == b.kind);
408}
409
410fn find_symbol_range(content: &str, sig: &signatures::Signature) -> (usize, usize) {
411    let lines: Vec<&str> = content.lines().collect();
412    let mut start = 0;
413
414    for (i, line) in lines.iter().enumerate() {
415        if line.contains(&sig.name) {
416            let trimmed = line.trim();
417            let is_def = trimmed.starts_with("fn ")
418                || trimmed.starts_with("pub fn ")
419                || trimmed.starts_with("pub(crate) fn ")
420                || trimmed.starts_with("async fn ")
421                || trimmed.starts_with("pub async fn ")
422                || trimmed.starts_with("struct ")
423                || trimmed.starts_with("pub struct ")
424                || trimmed.starts_with("enum ")
425                || trimmed.starts_with("pub enum ")
426                || trimmed.starts_with("trait ")
427                || trimmed.starts_with("pub trait ")
428                || trimmed.starts_with("impl ")
429                || trimmed.starts_with("class ")
430                || trimmed.starts_with("export class ")
431                || trimmed.starts_with("export function ")
432                || trimmed.starts_with("export async function ")
433                || trimmed.starts_with("function ")
434                || trimmed.starts_with("async function ")
435                || trimmed.starts_with("def ")
436                || trimmed.starts_with("async def ")
437                || trimmed.starts_with("func ")
438                || trimmed.starts_with("interface ")
439                || trimmed.starts_with("export interface ")
440                || trimmed.starts_with("type ")
441                || trimmed.starts_with("export type ")
442                || trimmed.starts_with("const ")
443                || trimmed.starts_with("export const ")
444                || trimmed.starts_with("fun ")
445                || trimmed.starts_with("private fun ")
446                || trimmed.starts_with("public fun ")
447                || trimmed.starts_with("internal fun ")
448                || trimmed.starts_with("class ")
449                || trimmed.starts_with("data class ")
450                || trimmed.starts_with("sealed class ")
451                || trimmed.starts_with("sealed interface ")
452                || trimmed.starts_with("enum class ")
453                || trimmed.starts_with("object ")
454                || trimmed.starts_with("private object ")
455                || trimmed.starts_with("interface ")
456                || trimmed.starts_with("typealias ")
457                || trimmed.starts_with("private typealias ");
458            if is_def {
459                start = i + 1;
460                break;
461            }
462        }
463    }
464
465    if start == 0 {
466        return (1, lines.len().min(20));
467    }
468
469    let base_indent = lines
470        .get(start - 1)
471        .map_or(0, |l| l.len() - l.trim_start().len());
472
473    let mut end = start;
474    let mut brace_depth: i32 = 0;
475    let mut found_open = false;
476
477    for (i, line) in lines.iter().enumerate().skip(start - 1) {
478        for ch in line.chars() {
479            if ch == '{' {
480                brace_depth += 1;
481                found_open = true;
482            } else if ch == '}' {
483                brace_depth -= 1;
484            }
485        }
486
487        end = i + 1;
488
489        if found_open && brace_depth <= 0 {
490            break;
491        }
492
493        if !found_open && i > start {
494            let indent = line.len() - line.trim_start().len();
495            if indent <= base_indent && !line.trim().is_empty() && i > start {
496                end = i;
497                break;
498            }
499        }
500
501        if end - start > 200 {
502            break;
503        }
504    }
505
506    (start, end)
507}
508
509fn extract_summary(content: &str) -> String {
510    for line in content.lines().take(20) {
511        let trimmed = line.trim();
512        if trimmed.is_empty()
513            || trimmed.starts_with("//")
514            || trimmed.starts_with('#')
515            || trimmed.starts_with("/*")
516            || trimmed.starts_with('*')
517            || trimmed.starts_with("use ")
518            || trimmed.starts_with("import ")
519            || trimmed.starts_with("from ")
520            || trimmed.starts_with("require(")
521            || trimmed.starts_with("package ")
522        {
523            continue;
524        }
525        return trimmed.chars().take(120).collect();
526    }
527    String::new()
528}
529
530fn compute_hash(content: &str) -> String {
531    use std::collections::hash_map::DefaultHasher;
532    use std::hash::{Hash, Hasher};
533
534    let mut hasher = DefaultHasher::new();
535    content.hash(&mut hasher);
536    format!("{:016x}", hasher.finish())
537}
538
539fn short_hash(input: &str) -> String {
540    use std::collections::hash_map::DefaultHasher;
541    use std::hash::{Hash, Hasher};
542
543    let mut hasher = DefaultHasher::new();
544    input.hash(&mut hasher);
545    format!("{:08x}", hasher.finish() & 0xFFFF_FFFF)
546}
547
548fn normalize_absolute_path(path: &str) -> String {
549    if let Ok(canon) = crate::core::pathutil::safe_canonicalize(std::path::Path::new(path)) {
550        return canon.to_string_lossy().to_string();
551    }
552
553    let mut normalized = path.to_string();
554    while normalized.ends_with("\\.") || normalized.ends_with("/.") {
555        normalized.truncate(normalized.len() - 2);
556    }
557    while normalized.len() > 1
558        && (normalized.ends_with('\\') || normalized.ends_with('/'))
559        && !normalized.ends_with(":\\")
560        && !normalized.ends_with(":/")
561        && normalized != "\\"
562        && normalized != "/"
563    {
564        normalized.pop();
565    }
566    normalized
567}
568
569pub fn normalize_project_root(path: &str) -> String {
570    normalize_absolute_path(path)
571}
572
573pub fn graph_match_key(path: &str) -> String {
574    let stripped =
575        crate::core::pathutil::strip_verbatim_str(path).unwrap_or_else(|| path.replace('\\', "/"));
576    stripped.trim_start_matches('/').to_string()
577}
578
579pub fn graph_relative_key(path: &str, root: &str) -> String {
580    let root_norm = normalize_project_root(root);
581    let path_norm = normalize_absolute_path(path);
582    let root_path = Path::new(&root_norm);
583    let path_path = Path::new(&path_norm);
584
585    if let Ok(rel) = path_path.strip_prefix(root_path) {
586        let rel = rel.to_string_lossy().to_string();
587        return rel.trim_start_matches(['/', '\\']).to_string();
588    }
589
590    path.trim_start_matches(['/', '\\'])
591        .replace('/', std::path::MAIN_SEPARATOR_STR)
592}
593
594fn make_relative(path: &str, root: &str) -> String {
595    graph_relative_key(path, root)
596}
597
598fn is_indexable_ext(ext: &str) -> bool {
599    crate::core::language_capabilities::is_indexable_ext(ext)
600}
601
602#[cfg(test)]
603fn kotlin_package_name(content: &str) -> Option<String> {
604    content.lines().map(str::trim).find_map(|line| {
605        line.strip_prefix("package ")
606            .map(|rest| rest.trim().trim_end_matches(';').to_string())
607    })
608}
609
610#[cfg(test)]
611mod tests {
612    use super::*;
613
614    #[test]
615    fn test_short_hash_deterministic() {
616        let h1 = short_hash("/Users/test/project");
617        let h2 = short_hash("/Users/test/project");
618        assert_eq!(h1, h2);
619        assert_eq!(h1.len(), 8);
620    }
621
622    #[test]
623    fn test_make_relative() {
624        assert_eq!(
625            make_relative("/foo/bar/src/main.rs", "/foo/bar"),
626            graph_relative_key("/foo/bar/src/main.rs", "/foo/bar")
627        );
628        assert_eq!(
629            make_relative("src/main.rs", "/foo/bar"),
630            graph_relative_key("src/main.rs", "/foo/bar")
631        );
632        assert_eq!(
633            make_relative("C:\\repo\\src\\main\\kotlin\\Example.kt", "C:\\repo"),
634            graph_relative_key("C:\\repo\\src\\main\\kotlin\\Example.kt", "C:\\repo")
635        );
636        assert_eq!(
637            make_relative("//?/C:/repo/src/main/kotlin/Example.kt", "//?/C:/repo"),
638            graph_relative_key("//?/C:/repo/src/main/kotlin/Example.kt", "//?/C:/repo")
639        );
640    }
641
642    #[test]
643    fn test_normalize_project_root() {
644        assert_eq!(normalize_project_root("C:\\repo\\"), "C:\\repo");
645        assert_eq!(normalize_project_root("C:\\repo\\."), "C:\\repo");
646        assert_eq!(normalize_project_root("//?/C:/repo/"), "//?/C:/repo");
647    }
648
649    #[test]
650    fn test_graph_match_key_normalizes_windows_forms() {
651        assert_eq!(
652            graph_match_key(r"C:\repo\src\main.rs"),
653            "C:/repo/src/main.rs"
654        );
655        assert_eq!(
656            graph_match_key(r"\\?\C:\repo\src\main.rs"),
657            "C:/repo/src/main.rs"
658        );
659        assert_eq!(graph_match_key(r"\src\main.rs"), "src/main.rs");
660    }
661
662    #[test]
663    fn test_extract_summary() {
664        let content = "// comment\nuse std::io;\n\npub fn main() {\n    println!(\"hello\");\n}";
665        let summary = extract_summary(content);
666        assert_eq!(summary, "pub fn main() {");
667    }
668
669    #[test]
670    fn test_compute_hash_deterministic() {
671        let h1 = compute_hash("hello world");
672        let h2 = compute_hash("hello world");
673        assert_eq!(h1, h2);
674        assert_ne!(h1, compute_hash("hello world!"));
675    }
676
677    #[test]
678    fn test_project_index_new() {
679        let idx = ProjectIndex::new("/test");
680        assert_eq!(idx.version, INDEX_VERSION);
681        assert_eq!(idx.project_root, "/test");
682        assert!(idx.files.is_empty());
683    }
684
685    #[test]
686    fn test_reverse_deps() {
687        let mut idx = ProjectIndex::new("/test");
688        idx.edges.push(IndexEdge {
689            from: "a.rs".to_string(),
690            to: "b.rs".to_string(),
691            kind: "import".to_string(),
692        });
693        idx.edges.push(IndexEdge {
694            from: "c.rs".to_string(),
695            to: "b.rs".to_string(),
696            kind: "import".to_string(),
697        });
698
699        let deps = idx.get_reverse_deps("b.rs", 1);
700        assert_eq!(deps.len(), 2);
701        assert!(deps.contains(&"a.rs".to_string()));
702        assert!(deps.contains(&"c.rs".to_string()));
703    }
704
705    #[test]
706    fn test_find_symbol_range_kotlin_function() {
707        let content = r#"
708package com.example
709
710class UserService {
711    fun greet(name: String): String {
712        return "hi $name"
713    }
714}
715"#;
716        let sig = signatures::Signature {
717            kind: "method",
718            name: "greet".to_string(),
719            params: "name:String".to_string(),
720            return_type: "String".to_string(),
721            is_async: false,
722            is_exported: true,
723            indent: 2,
724            ..signatures::Signature::no_span()
725        };
726        let (start, end) = find_symbol_range(content, &sig);
727        assert_eq!(start, 5);
728        assert!(end >= start);
729    }
730
731    #[test]
732    fn test_signature_spans_override_fallback_range() {
733        let sig = signatures::Signature {
734            kind: "method",
735            name: "release".to_string(),
736            params: "id:String".to_string(),
737            return_type: "Boolean".to_string(),
738            is_async: true,
739            is_exported: true,
740            indent: 2,
741            start_line: Some(42),
742            end_line: Some(43),
743        };
744
745        let (start, end) = sig
746            .start_line
747            .zip(sig.end_line)
748            .unwrap_or_else(|| find_symbol_range("ignored", &sig));
749        assert_eq!((start, end), (42, 43));
750    }
751
752    #[test]
753    fn test_parse_stale_index_version() {
754        let json = format!(
755            r#"{{"version":{},"project_root":"/test","last_scan":"now","files":{{}},"edges":[],"symbols":{{}}}}"#,
756            INDEX_VERSION - 1
757        );
758        let parsed: ProjectIndex = serde_json::from_str(&json).unwrap();
759        assert_ne!(parsed.version, INDEX_VERSION);
760    }
761
762    #[test]
763    fn test_kotlin_package_name() {
764        let content = "package com.example.feature\n\nclass UserService";
765        assert_eq!(
766            kotlin_package_name(content).as_deref(),
767            Some("com.example.feature")
768        );
769    }
770}