Skip to main content

lean_ctx/core/
graph_index.rs

1use std::collections::HashMap;
2use std::path::Path;
3
4use serde::{Deserialize, Serialize};
5
6use crate::core::import_resolver;
7use crate::core::signatures;
8
9const INDEX_VERSION: u32 = 2;
10
11#[derive(Debug, Serialize, Deserialize)]
12pub struct ProjectIndex {
13    pub version: u32,
14    pub project_root: String,
15    pub last_scan: String,
16    pub files: HashMap<String, FileEntry>,
17    pub edges: Vec<IndexEdge>,
18    pub symbols: HashMap<String, SymbolEntry>,
19}
20
21#[derive(Debug, Clone, Serialize, Deserialize)]
22pub struct FileEntry {
23    pub path: String,
24    pub hash: String,
25    pub language: String,
26    pub line_count: usize,
27    pub token_count: usize,
28    pub exports: Vec<String>,
29    pub summary: String,
30}
31
32#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct SymbolEntry {
34    pub file: String,
35    pub name: String,
36    pub kind: String,
37    pub start_line: usize,
38    pub end_line: usize,
39    pub is_exported: bool,
40}
41
42#[derive(Debug, Clone, Serialize, Deserialize)]
43pub struct IndexEdge {
44    pub from: String,
45    pub to: String,
46    pub kind: String,
47}
48
49impl ProjectIndex {
50    pub fn new(project_root: &str) -> Self {
51        Self {
52            version: INDEX_VERSION,
53            project_root: project_root.to_string(),
54            last_scan: chrono::Local::now().format("%Y-%m-%d %H:%M:%S").to_string(),
55            files: HashMap::new(),
56            edges: Vec::new(),
57            symbols: HashMap::new(),
58        }
59    }
60
61    pub fn index_dir(project_root: &str) -> Option<std::path::PathBuf> {
62        let hash = short_hash(project_root);
63        crate::core::data_dir::lean_ctx_data_dir()
64            .ok()
65            .map(|d| d.join("graphs").join(hash))
66    }
67
68    pub fn load(project_root: &str) -> Option<Self> {
69        let dir = Self::index_dir(project_root)?;
70        let path = dir.join("index.json");
71        let content = std::fs::read_to_string(path).ok()?;
72        let index: Self = serde_json::from_str(&content).ok()?;
73        if index.version != INDEX_VERSION {
74            return None;
75        }
76        Some(index)
77    }
78
79    pub fn save(&self) -> Result<(), String> {
80        let dir = Self::index_dir(&self.project_root)
81            .ok_or_else(|| "Cannot determine data directory".to_string())?;
82        std::fs::create_dir_all(&dir).map_err(|e| e.to_string())?;
83        let json = serde_json::to_string_pretty(self).map_err(|e| e.to_string())?;
84        std::fs::write(dir.join("index.json"), json).map_err(|e| e.to_string())
85    }
86
87    pub fn file_count(&self) -> usize {
88        self.files.len()
89    }
90
91    pub fn symbol_count(&self) -> usize {
92        self.symbols.len()
93    }
94
95    pub fn edge_count(&self) -> usize {
96        self.edges.len()
97    }
98
99    pub fn get_symbol(&self, key: &str) -> Option<&SymbolEntry> {
100        self.symbols.get(key)
101    }
102
103    pub fn get_reverse_deps(&self, path: &str, depth: usize) -> Vec<String> {
104        let mut result = Vec::new();
105        let mut visited = std::collections::HashSet::new();
106        let mut queue: Vec<(String, usize)> = vec![(path.to_string(), 0)];
107
108        while let Some((current, d)) = queue.pop() {
109            if d > depth || visited.contains(&current) {
110                continue;
111            }
112            visited.insert(current.clone());
113            if current != path {
114                result.push(current.clone());
115            }
116
117            for edge in &self.edges {
118                if edge.to == current && edge.kind == "import" && !visited.contains(&edge.from) {
119                    queue.push((edge.from.clone(), d + 1));
120                }
121            }
122        }
123        result
124    }
125
126    pub fn get_related(&self, path: &str, depth: usize) -> Vec<String> {
127        let mut result = Vec::new();
128        let mut visited = std::collections::HashSet::new();
129        let mut queue: Vec<(String, usize)> = vec![(path.to_string(), 0)];
130
131        while let Some((current, d)) = queue.pop() {
132            if d > depth || visited.contains(&current) {
133                continue;
134            }
135            visited.insert(current.clone());
136            if current != path {
137                result.push(current.clone());
138            }
139
140            for edge in &self.edges {
141                if edge.from == current && !visited.contains(&edge.to) {
142                    queue.push((edge.to.clone(), d + 1));
143                }
144                if edge.to == current && !visited.contains(&edge.from) {
145                    queue.push((edge.from.clone(), d + 1));
146                }
147            }
148        }
149        result
150    }
151}
152
153/// Load the best available graph index, trying multiple root path variants.
154/// If no valid index exists, automatically scans the project to build one.
155/// This is the primary entry point — ensures zero-config usage.
156pub fn load_or_build(project_root: &str) -> ProjectIndex {
157    // Prefer stable absolute roots. Using "." as a cache key is fragile because
158    // it depends on the process cwd and can accidentally load the wrong project.
159    let root_abs = if project_root.trim().is_empty() || project_root == "." {
160        std::env::current_dir()
161            .ok()
162            .map(|p| p.to_string_lossy().to_string())
163            .unwrap_or_else(|| ".".to_string())
164    } else {
165        project_root.to_string()
166    };
167
168    // Try the absolute/root-normalized path first.
169    if let Some(idx) = ProjectIndex::load(&root_abs) {
170        if !idx.files.is_empty() {
171            return idx;
172        }
173    }
174
175    // Legacy: older builds may have cached the index under ".". Only accept it if it
176    // actually refers to the current cwd project, then migrate it to `root_abs`.
177    if let Some(idx) = ProjectIndex::load(".") {
178        if !idx.files.is_empty() {
179            let mut migrated = idx;
180            migrated.project_root = root_abs.clone();
181            let _ = migrated.save();
182            return migrated;
183        }
184    }
185
186    // Try absolute cwd
187    if let Ok(cwd) = std::env::current_dir() {
188        let cwd_str = cwd.to_string_lossy().to_string();
189        if cwd_str != root_abs {
190            if let Some(idx) = ProjectIndex::load(&cwd_str) {
191                if !idx.files.is_empty() {
192                    return idx;
193                }
194            }
195        }
196    }
197
198    // No existing index found anywhere — auto-build
199    scan(&root_abs)
200}
201
202pub fn scan(project_root: &str) -> ProjectIndex {
203    let existing = ProjectIndex::load(project_root);
204    let mut index = ProjectIndex::new(project_root);
205
206    let old_files: HashMap<String, (String, Vec<(String, SymbolEntry)>)> =
207        if let Some(ref prev) = existing {
208            prev.files
209                .iter()
210                .map(|(path, entry)| {
211                    let syms: Vec<(String, SymbolEntry)> = prev
212                        .symbols
213                        .iter()
214                        .filter(|(_, s)| s.file == *path)
215                        .map(|(k, v)| (k.clone(), v.clone()))
216                        .collect();
217                    (path.clone(), (entry.hash.clone(), syms))
218                })
219                .collect()
220        } else {
221            HashMap::new()
222        };
223
224    let walker = ignore::WalkBuilder::new(project_root)
225        .hidden(true)
226        .git_ignore(true)
227        .git_global(true)
228        .git_exclude(true)
229        .max_depth(Some(10))
230        .build();
231
232    let mut scanned = 0usize;
233    let mut reused = 0usize;
234    let max_files = 2000;
235
236    for entry in walker.filter_map(|e| e.ok()) {
237        if !entry.file_type().is_some_and(|ft| ft.is_file()) {
238            continue;
239        }
240        let file_path = entry.path().to_string_lossy().to_string();
241        let ext = Path::new(&file_path)
242            .extension()
243            .and_then(|e| e.to_str())
244            .unwrap_or("");
245
246        if !is_indexable_ext(ext) {
247            continue;
248        }
249
250        if index.files.len() >= max_files {
251            break;
252        }
253
254        let content = match std::fs::read_to_string(&file_path) {
255            Ok(c) => c,
256            Err(_) => continue,
257        };
258
259        let hash = compute_hash(&content);
260        let rel_path = make_relative(&file_path, project_root);
261
262        if let Some((old_hash, old_syms)) = old_files.get(&rel_path) {
263            if *old_hash == hash {
264                if let Some(old_entry) = existing.as_ref().and_then(|p| p.files.get(&rel_path)) {
265                    index.files.insert(rel_path.clone(), old_entry.clone());
266                    for (key, sym) in old_syms {
267                        index.symbols.insert(key.clone(), sym.clone());
268                    }
269                    reused += 1;
270                    continue;
271                }
272            }
273        }
274
275        let sigs = signatures::extract_signatures(&content, ext);
276        let line_count = content.lines().count();
277        let token_count = crate::core::tokens::count_tokens(&content);
278        let summary = extract_summary(&content);
279
280        let exports: Vec<String> = sigs
281            .iter()
282            .filter(|s| s.is_exported)
283            .map(|s| s.name.clone())
284            .collect();
285
286        index.files.insert(
287            rel_path.clone(),
288            FileEntry {
289                path: rel_path.clone(),
290                hash,
291                language: ext.to_string(),
292                line_count,
293                token_count,
294                exports,
295                summary,
296            },
297        );
298
299        for sig in &sigs {
300            let (start, end) = sig
301                .start_line
302                .zip(sig.end_line)
303                .unwrap_or_else(|| find_symbol_range(&content, sig));
304            let key = format!("{}::{}", rel_path, sig.name);
305            index.symbols.insert(
306                key,
307                SymbolEntry {
308                    file: rel_path.clone(),
309                    name: sig.name.clone(),
310                    kind: sig.kind.to_string(),
311                    start_line: start,
312                    end_line: end,
313                    is_exported: sig.is_exported,
314                },
315            );
316        }
317
318        scanned += 1;
319    }
320
321    build_edges(&mut index);
322
323    if let Err(e) = index.save() {
324        eprintln!("Warning: could not save graph index: {e}");
325    }
326
327    eprintln!(
328        "[graph_index: {} files ({} scanned, {} reused), {} symbols, {} edges]",
329        index.file_count(),
330        scanned,
331        reused,
332        index.symbol_count(),
333        index.edge_count()
334    );
335
336    index
337}
338
339fn build_edges(index: &mut ProjectIndex) {
340    index.edges.clear();
341
342    let root = index.project_root.clone();
343    let root_path = Path::new(&root);
344
345    let mut file_paths: Vec<String> = index.files.keys().cloned().collect();
346    file_paths.sort();
347
348    let resolver_ctx = import_resolver::ResolverContext::new(root_path, file_paths.clone());
349
350    for rel_path in &file_paths {
351        let abs_path = root_path.join(rel_path);
352        let content = match std::fs::read_to_string(&abs_path) {
353            Ok(c) => c,
354            Err(_) => continue,
355        };
356
357        let ext = Path::new(rel_path)
358            .extension()
359            .and_then(|e| e.to_str())
360            .unwrap_or("");
361
362        // Vue/Svelte store JS/TS imports inside <script>; resolution is best-effort TS-like.
363        let resolve_ext = match ext {
364            "vue" | "svelte" => "ts",
365            _ => ext,
366        };
367
368        let imports = crate::core::deep_queries::analyze(&content, resolve_ext).imports;
369        if imports.is_empty() {
370            continue;
371        }
372
373        let resolved =
374            import_resolver::resolve_imports(&imports, rel_path, resolve_ext, &resolver_ctx);
375        for r in resolved {
376            if r.is_external {
377                continue;
378            }
379            if let Some(to) = r.resolved_path {
380                index.edges.push(IndexEdge {
381                    from: rel_path.clone(),
382                    to,
383                    kind: "import".to_string(),
384                });
385            }
386        }
387    }
388
389    index.edges.sort_by(|a, b| {
390        a.from
391            .cmp(&b.from)
392            .then_with(|| a.to.cmp(&b.to))
393            .then_with(|| a.kind.cmp(&b.kind))
394    });
395    index
396        .edges
397        .dedup_by(|a, b| a.from == b.from && a.to == b.to && a.kind == b.kind);
398}
399
400fn find_symbol_range(content: &str, sig: &signatures::Signature) -> (usize, usize) {
401    let lines: Vec<&str> = content.lines().collect();
402    let mut start = 0;
403
404    for (i, line) in lines.iter().enumerate() {
405        if line.contains(&sig.name) {
406            let trimmed = line.trim();
407            let is_def = trimmed.starts_with("fn ")
408                || trimmed.starts_with("pub fn ")
409                || trimmed.starts_with("pub(crate) fn ")
410                || trimmed.starts_with("async fn ")
411                || trimmed.starts_with("pub async fn ")
412                || trimmed.starts_with("struct ")
413                || trimmed.starts_with("pub struct ")
414                || trimmed.starts_with("enum ")
415                || trimmed.starts_with("pub enum ")
416                || trimmed.starts_with("trait ")
417                || trimmed.starts_with("pub trait ")
418                || trimmed.starts_with("impl ")
419                || trimmed.starts_with("class ")
420                || trimmed.starts_with("export class ")
421                || trimmed.starts_with("export function ")
422                || trimmed.starts_with("export async function ")
423                || trimmed.starts_with("function ")
424                || trimmed.starts_with("async function ")
425                || trimmed.starts_with("def ")
426                || trimmed.starts_with("async def ")
427                || trimmed.starts_with("func ")
428                || trimmed.starts_with("interface ")
429                || trimmed.starts_with("export interface ")
430                || trimmed.starts_with("type ")
431                || trimmed.starts_with("export type ")
432                || trimmed.starts_with("const ")
433                || trimmed.starts_with("export const ")
434                || trimmed.starts_with("fun ")
435                || trimmed.starts_with("private fun ")
436                || trimmed.starts_with("public fun ")
437                || trimmed.starts_with("internal fun ")
438                || trimmed.starts_with("class ")
439                || trimmed.starts_with("data class ")
440                || trimmed.starts_with("sealed class ")
441                || trimmed.starts_with("sealed interface ")
442                || trimmed.starts_with("enum class ")
443                || trimmed.starts_with("object ")
444                || trimmed.starts_with("private object ")
445                || trimmed.starts_with("interface ")
446                || trimmed.starts_with("typealias ")
447                || trimmed.starts_with("private typealias ");
448            if is_def {
449                start = i + 1;
450                break;
451            }
452        }
453    }
454
455    if start == 0 {
456        return (1, lines.len().min(20));
457    }
458
459    let base_indent = lines
460        .get(start - 1)
461        .map(|l| l.len() - l.trim_start().len())
462        .unwrap_or(0);
463
464    let mut end = start;
465    let mut brace_depth: i32 = 0;
466    let mut found_open = false;
467
468    for (i, line) in lines.iter().enumerate().skip(start - 1) {
469        for ch in line.chars() {
470            if ch == '{' {
471                brace_depth += 1;
472                found_open = true;
473            } else if ch == '}' {
474                brace_depth -= 1;
475            }
476        }
477
478        end = i + 1;
479
480        if found_open && brace_depth <= 0 {
481            break;
482        }
483
484        if !found_open && i > start {
485            let indent = line.len() - line.trim_start().len();
486            if indent <= base_indent && !line.trim().is_empty() && i > start {
487                end = i;
488                break;
489            }
490        }
491
492        if end - start > 200 {
493            break;
494        }
495    }
496
497    (start, end)
498}
499
500fn extract_summary(content: &str) -> String {
501    for line in content.lines().take(20) {
502        let trimmed = line.trim();
503        if trimmed.is_empty()
504            || trimmed.starts_with("//")
505            || trimmed.starts_with('#')
506            || trimmed.starts_with("/*")
507            || trimmed.starts_with('*')
508            || trimmed.starts_with("use ")
509            || trimmed.starts_with("import ")
510            || trimmed.starts_with("from ")
511            || trimmed.starts_with("require(")
512            || trimmed.starts_with("package ")
513        {
514            continue;
515        }
516        return trimmed.chars().take(120).collect();
517    }
518    String::new()
519}
520
521fn compute_hash(content: &str) -> String {
522    use std::collections::hash_map::DefaultHasher;
523    use std::hash::{Hash, Hasher};
524
525    let mut hasher = DefaultHasher::new();
526    content.hash(&mut hasher);
527    format!("{:016x}", hasher.finish())
528}
529
530fn short_hash(input: &str) -> String {
531    use std::collections::hash_map::DefaultHasher;
532    use std::hash::{Hash, Hasher};
533
534    let mut hasher = DefaultHasher::new();
535    input.hash(&mut hasher);
536    format!("{:08x}", hasher.finish() & 0xFFFF_FFFF)
537}
538
539fn make_relative(path: &str, root: &str) -> String {
540    path.strip_prefix(root)
541        .unwrap_or(path)
542        .trim_start_matches('/')
543        .to_string()
544}
545
546fn is_indexable_ext(ext: &str) -> bool {
547    crate::core::language_capabilities::is_indexable_ext(ext)
548}
549
550#[cfg(test)]
551fn kotlin_package_name(content: &str) -> Option<String> {
552    content.lines().map(str::trim).find_map(|line| {
553        line.strip_prefix("package ")
554            .map(|rest| rest.trim().trim_end_matches(';').to_string())
555    })
556}
557
558#[cfg(test)]
559mod tests {
560    use super::*;
561
562    #[test]
563    fn test_short_hash_deterministic() {
564        let h1 = short_hash("/Users/test/project");
565        let h2 = short_hash("/Users/test/project");
566        assert_eq!(h1, h2);
567        assert_eq!(h1.len(), 8);
568    }
569
570    #[test]
571    fn test_make_relative() {
572        assert_eq!(
573            make_relative("/foo/bar/src/main.rs", "/foo/bar"),
574            "src/main.rs"
575        );
576        assert_eq!(make_relative("src/main.rs", "/foo/bar"), "src/main.rs");
577    }
578
579    #[test]
580    fn test_extract_summary() {
581        let content = "// comment\nuse std::io;\n\npub fn main() {\n    println!(\"hello\");\n}";
582        let summary = extract_summary(content);
583        assert_eq!(summary, "pub fn main() {");
584    }
585
586    #[test]
587    fn test_compute_hash_deterministic() {
588        let h1 = compute_hash("hello world");
589        let h2 = compute_hash("hello world");
590        assert_eq!(h1, h2);
591        assert_ne!(h1, compute_hash("hello world!"));
592    }
593
594    #[test]
595    fn test_project_index_new() {
596        let idx = ProjectIndex::new("/test");
597        assert_eq!(idx.version, INDEX_VERSION);
598        assert_eq!(idx.project_root, "/test");
599        assert!(idx.files.is_empty());
600    }
601
602    #[test]
603    fn test_reverse_deps() {
604        let mut idx = ProjectIndex::new("/test");
605        idx.edges.push(IndexEdge {
606            from: "a.rs".to_string(),
607            to: "b.rs".to_string(),
608            kind: "import".to_string(),
609        });
610        idx.edges.push(IndexEdge {
611            from: "c.rs".to_string(),
612            to: "b.rs".to_string(),
613            kind: "import".to_string(),
614        });
615
616        let deps = idx.get_reverse_deps("b.rs", 1);
617        assert_eq!(deps.len(), 2);
618        assert!(deps.contains(&"a.rs".to_string()));
619        assert!(deps.contains(&"c.rs".to_string()));
620    }
621
622    #[test]
623    fn test_find_symbol_range_kotlin_function() {
624        let content = r#"
625package com.example
626
627class UserService {
628    fun greet(name: String): String {
629        return "hi $name"
630    }
631}
632"#;
633        let sig = signatures::Signature {
634            kind: "method",
635            name: "greet".to_string(),
636            params: "name:String".to_string(),
637            return_type: "String".to_string(),
638            is_async: false,
639            is_exported: true,
640            indent: 2,
641            ..signatures::Signature::no_span()
642        };
643        let (start, end) = find_symbol_range(content, &sig);
644        assert_eq!(start, 5);
645        assert!(end >= start);
646    }
647
648    #[test]
649    fn test_signature_spans_override_fallback_range() {
650        let sig = signatures::Signature {
651            kind: "method",
652            name: "release".to_string(),
653            params: "id:String".to_string(),
654            return_type: "Boolean".to_string(),
655            is_async: true,
656            is_exported: true,
657            indent: 2,
658            start_line: Some(42),
659            end_line: Some(43),
660        };
661
662        let (start, end) = sig
663            .start_line
664            .zip(sig.end_line)
665            .unwrap_or_else(|| find_symbol_range("ignored", &sig));
666        assert_eq!((start, end), (42, 43));
667    }
668
669    #[test]
670    fn test_parse_stale_index_version() {
671        let json = format!(
672            r#"{{"version":{},"project_root":"/test","last_scan":"now","files":{{}},"edges":[],"symbols":{{}}}}"#,
673            INDEX_VERSION - 1
674        );
675        let parsed: ProjectIndex = serde_json::from_str(&json).unwrap();
676        assert_ne!(parsed.version, INDEX_VERSION);
677    }
678
679    #[test]
680    fn test_kotlin_package_name() {
681        let content = "package com.example.feature\n\nclass UserService";
682        assert_eq!(
683            kotlin_package_name(content).as_deref(),
684            Some("com.example.feature")
685        );
686    }
687}