Skip to main content

lean_ctx/core/
graph_index.rs

1use std::collections::HashMap;
2use std::path::Path;
3
4use serde::{Deserialize, Serialize};
5
6use crate::core::deps;
7use crate::core::signatures;
8
9const INDEX_VERSION: u32 = 1;
10
11#[derive(Debug, Serialize, Deserialize)]
12pub struct ProjectIndex {
13    pub version: u32,
14    pub project_root: String,
15    pub last_scan: String,
16    pub files: HashMap<String, FileEntry>,
17    pub edges: Vec<IndexEdge>,
18    pub symbols: HashMap<String, SymbolEntry>,
19}
20
21#[derive(Debug, Clone, Serialize, Deserialize)]
22pub struct FileEntry {
23    pub path: String,
24    pub hash: String,
25    pub language: String,
26    pub line_count: usize,
27    pub token_count: usize,
28    pub exports: Vec<String>,
29    pub summary: String,
30}
31
32#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct SymbolEntry {
34    pub file: String,
35    pub name: String,
36    pub kind: String,
37    pub start_line: usize,
38    pub end_line: usize,
39    pub is_exported: bool,
40}
41
42#[derive(Debug, Clone, Serialize, Deserialize)]
43pub struct IndexEdge {
44    pub from: String,
45    pub to: String,
46    pub kind: String,
47}
48
49impl ProjectIndex {
50    pub fn new(project_root: &str) -> Self {
51        Self {
52            version: INDEX_VERSION,
53            project_root: project_root.to_string(),
54            last_scan: chrono::Local::now().format("%Y-%m-%d %H:%M:%S").to_string(),
55            files: HashMap::new(),
56            edges: Vec::new(),
57            symbols: HashMap::new(),
58        }
59    }
60
61    pub fn index_dir(project_root: &str) -> Option<std::path::PathBuf> {
62        let hash = short_hash(project_root);
63        dirs::home_dir().map(|h| h.join(".lean-ctx").join("graphs").join(hash))
64    }
65
66    pub fn load(project_root: &str) -> Option<Self> {
67        let dir = Self::index_dir(project_root)?;
68        let path = dir.join("index.json");
69        let content = std::fs::read_to_string(path).ok()?;
70        serde_json::from_str(&content).ok()
71    }
72
73    pub fn save(&self) -> Result<(), String> {
74        let dir = Self::index_dir(&self.project_root)
75            .ok_or_else(|| "Cannot determine home directory".to_string())?;
76        std::fs::create_dir_all(&dir).map_err(|e| e.to_string())?;
77        let json = serde_json::to_string_pretty(self).map_err(|e| e.to_string())?;
78        std::fs::write(dir.join("index.json"), json).map_err(|e| e.to_string())
79    }
80
81    pub fn file_count(&self) -> usize {
82        self.files.len()
83    }
84
85    pub fn symbol_count(&self) -> usize {
86        self.symbols.len()
87    }
88
89    pub fn edge_count(&self) -> usize {
90        self.edges.len()
91    }
92
93    pub fn get_symbol(&self, key: &str) -> Option<&SymbolEntry> {
94        self.symbols.get(key)
95    }
96
97    pub fn get_reverse_deps(&self, path: &str, depth: usize) -> Vec<String> {
98        let mut result = Vec::new();
99        let mut visited = std::collections::HashSet::new();
100        let mut queue: Vec<(String, usize)> = vec![(path.to_string(), 0)];
101
102        while let Some((current, d)) = queue.pop() {
103            if d > depth || visited.contains(&current) {
104                continue;
105            }
106            visited.insert(current.clone());
107            if current != path {
108                result.push(current.clone());
109            }
110
111            for edge in &self.edges {
112                if edge.to == current && edge.kind == "import" && !visited.contains(&edge.from) {
113                    queue.push((edge.from.clone(), d + 1));
114                }
115            }
116        }
117        result
118    }
119
120    pub fn get_related(&self, path: &str, depth: usize) -> Vec<String> {
121        let mut result = Vec::new();
122        let mut visited = std::collections::HashSet::new();
123        let mut queue: Vec<(String, usize)> = vec![(path.to_string(), 0)];
124
125        while let Some((current, d)) = queue.pop() {
126            if d > depth || visited.contains(&current) {
127                continue;
128            }
129            visited.insert(current.clone());
130            if current != path {
131                result.push(current.clone());
132            }
133
134            for edge in &self.edges {
135                if edge.from == current && !visited.contains(&edge.to) {
136                    queue.push((edge.to.clone(), d + 1));
137                }
138                if edge.to == current && !visited.contains(&edge.from) {
139                    queue.push((edge.from.clone(), d + 1));
140                }
141            }
142        }
143        result
144    }
145}
146
147/// Load the best available graph index, trying multiple root path variants.
148/// If no valid index exists, automatically scans the project to build one.
149/// This is the primary entry point — ensures zero-config usage.
150pub fn load_or_build(project_root: &str) -> ProjectIndex {
151    // Prefer stable absolute roots. Using "." as a cache key is fragile because
152    // it depends on the process cwd and can accidentally load the wrong project.
153    let root_abs = if project_root.trim().is_empty() || project_root == "." {
154        std::env::current_dir()
155            .ok()
156            .map(|p| p.to_string_lossy().to_string())
157            .unwrap_or_else(|| ".".to_string())
158    } else {
159        project_root.to_string()
160    };
161
162    // Try the absolute/root-normalized path first.
163    if let Some(idx) = ProjectIndex::load(&root_abs) {
164        if !idx.files.is_empty() {
165            return idx;
166        }
167    }
168
169    // Legacy: older builds may have cached the index under ".". Only accept it if it
170    // actually refers to the current cwd project, then migrate it to `root_abs`.
171    if let Some(idx) = ProjectIndex::load(".") {
172        if !idx.files.is_empty() {
173            let mut migrated = idx;
174            migrated.project_root = root_abs.clone();
175            let _ = migrated.save();
176            return migrated;
177        }
178    }
179
180    // Try absolute cwd
181    if let Ok(cwd) = std::env::current_dir() {
182        let cwd_str = cwd.to_string_lossy().to_string();
183        if cwd_str != root_abs {
184            if let Some(idx) = ProjectIndex::load(&cwd_str) {
185                if !idx.files.is_empty() {
186                    return idx;
187                }
188            }
189        }
190    }
191
192    // No existing index found anywhere — auto-build
193    scan(&root_abs)
194}
195
196pub fn scan(project_root: &str) -> ProjectIndex {
197    let existing = ProjectIndex::load(project_root);
198    let mut index = ProjectIndex::new(project_root);
199
200    let old_files: HashMap<String, (String, Vec<(String, SymbolEntry)>)> =
201        if let Some(ref prev) = existing {
202            prev.files
203                .iter()
204                .map(|(path, entry)| {
205                    let syms: Vec<(String, SymbolEntry)> = prev
206                        .symbols
207                        .iter()
208                        .filter(|(_, s)| s.file == *path)
209                        .map(|(k, v)| (k.clone(), v.clone()))
210                        .collect();
211                    (path.clone(), (entry.hash.clone(), syms))
212                })
213                .collect()
214        } else {
215            HashMap::new()
216        };
217
218    let walker = ignore::WalkBuilder::new(project_root)
219        .hidden(true)
220        .git_ignore(true)
221        .git_global(true)
222        .git_exclude(true)
223        .max_depth(Some(10))
224        .build();
225
226    let mut scanned = 0usize;
227    let mut reused = 0usize;
228    let max_files = 2000;
229
230    for entry in walker.filter_map(|e| e.ok()) {
231        if !entry.file_type().is_some_and(|ft| ft.is_file()) {
232            continue;
233        }
234        let file_path = entry.path().to_string_lossy().to_string();
235        let ext = Path::new(&file_path)
236            .extension()
237            .and_then(|e| e.to_str())
238            .unwrap_or("");
239
240        if !is_indexable_ext(ext) {
241            continue;
242        }
243
244        if index.files.len() >= max_files {
245            break;
246        }
247
248        let content = match std::fs::read_to_string(&file_path) {
249            Ok(c) => c,
250            Err(_) => continue,
251        };
252
253        let hash = compute_hash(&content);
254        let rel_path = make_relative(&file_path, project_root);
255
256        if let Some((old_hash, old_syms)) = old_files.get(&rel_path) {
257            if *old_hash == hash {
258                if let Some(old_entry) = existing.as_ref().and_then(|p| p.files.get(&rel_path)) {
259                    index.files.insert(rel_path.clone(), old_entry.clone());
260                    for (key, sym) in old_syms {
261                        index.symbols.insert(key.clone(), sym.clone());
262                    }
263                    reused += 1;
264                    add_edges(&mut index, &rel_path, &content, ext);
265                    continue;
266                }
267            }
268        }
269
270        let sigs = signatures::extract_signatures(&content, ext);
271        let line_count = content.lines().count();
272        let token_count = crate::core::tokens::count_tokens(&content);
273        let summary = extract_summary(&content);
274
275        let exports: Vec<String> = sigs
276            .iter()
277            .filter(|s| s.is_exported)
278            .map(|s| s.name.clone())
279            .collect();
280
281        index.files.insert(
282            rel_path.clone(),
283            FileEntry {
284                path: rel_path.clone(),
285                hash,
286                language: ext.to_string(),
287                line_count,
288                token_count,
289                exports,
290                summary,
291            },
292        );
293
294        for sig in &sigs {
295            let (start, end) = find_symbol_range(&content, sig);
296            let key = format!("{}::{}", rel_path, sig.name);
297            index.symbols.insert(
298                key,
299                SymbolEntry {
300                    file: rel_path.clone(),
301                    name: sig.name.clone(),
302                    kind: sig.kind.to_string(),
303                    start_line: start,
304                    end_line: end,
305                    is_exported: sig.is_exported,
306                },
307            );
308        }
309
310        add_edges(&mut index, &rel_path, &content, ext);
311        scanned += 1;
312    }
313
314    if let Err(e) = index.save() {
315        eprintln!("Warning: could not save graph index: {e}");
316    }
317
318    eprintln!(
319        "[graph_index: {} files ({} scanned, {} reused), {} symbols, {} edges]",
320        index.file_count(),
321        scanned,
322        reused,
323        index.symbol_count(),
324        index.edge_count()
325    );
326
327    index
328}
329
330fn add_edges(index: &mut ProjectIndex, rel_path: &str, content: &str, ext: &str) {
331    let dep_info = deps::extract_deps(content, ext);
332    for imp in &dep_info.imports {
333        index.edges.push(IndexEdge {
334            from: rel_path.to_string(),
335            to: imp.clone(),
336            kind: "import".to_string(),
337        });
338    }
339}
340
341fn find_symbol_range(content: &str, sig: &signatures::Signature) -> (usize, usize) {
342    let lines: Vec<&str> = content.lines().collect();
343    let mut start = 0;
344
345    for (i, line) in lines.iter().enumerate() {
346        if line.contains(&sig.name) {
347            let trimmed = line.trim();
348            let is_def = trimmed.starts_with("fn ")
349                || trimmed.starts_with("pub fn ")
350                || trimmed.starts_with("pub(crate) fn ")
351                || trimmed.starts_with("async fn ")
352                || trimmed.starts_with("pub async fn ")
353                || trimmed.starts_with("struct ")
354                || trimmed.starts_with("pub struct ")
355                || trimmed.starts_with("enum ")
356                || trimmed.starts_with("pub enum ")
357                || trimmed.starts_with("trait ")
358                || trimmed.starts_with("pub trait ")
359                || trimmed.starts_with("impl ")
360                || trimmed.starts_with("class ")
361                || trimmed.starts_with("export class ")
362                || trimmed.starts_with("export function ")
363                || trimmed.starts_with("export async function ")
364                || trimmed.starts_with("function ")
365                || trimmed.starts_with("async function ")
366                || trimmed.starts_with("def ")
367                || trimmed.starts_with("async def ")
368                || trimmed.starts_with("func ")
369                || trimmed.starts_with("interface ")
370                || trimmed.starts_with("export interface ")
371                || trimmed.starts_with("type ")
372                || trimmed.starts_with("export type ")
373                || trimmed.starts_with("const ")
374                || trimmed.starts_with("export const ");
375            if is_def {
376                start = i + 1;
377                break;
378            }
379        }
380    }
381
382    if start == 0 {
383        return (1, lines.len().min(20));
384    }
385
386    let base_indent = lines
387        .get(start - 1)
388        .map(|l| l.len() - l.trim_start().len())
389        .unwrap_or(0);
390
391    let mut end = start;
392    let mut brace_depth: i32 = 0;
393    let mut found_open = false;
394
395    for (i, line) in lines.iter().enumerate().skip(start - 1) {
396        for ch in line.chars() {
397            if ch == '{' {
398                brace_depth += 1;
399                found_open = true;
400            } else if ch == '}' {
401                brace_depth -= 1;
402            }
403        }
404
405        end = i + 1;
406
407        if found_open && brace_depth <= 0 {
408            break;
409        }
410
411        if !found_open && i > start {
412            let indent = line.len() - line.trim_start().len();
413            if indent <= base_indent && !line.trim().is_empty() && i > start {
414                end = i;
415                break;
416            }
417        }
418
419        if end - start > 200 {
420            break;
421        }
422    }
423
424    (start, end)
425}
426
427fn extract_summary(content: &str) -> String {
428    for line in content.lines().take(20) {
429        let trimmed = line.trim();
430        if trimmed.is_empty()
431            || trimmed.starts_with("//")
432            || trimmed.starts_with('#')
433            || trimmed.starts_with("/*")
434            || trimmed.starts_with('*')
435            || trimmed.starts_with("use ")
436            || trimmed.starts_with("import ")
437            || trimmed.starts_with("from ")
438            || trimmed.starts_with("require(")
439            || trimmed.starts_with("package ")
440        {
441            continue;
442        }
443        return trimmed.chars().take(120).collect();
444    }
445    String::new()
446}
447
448fn compute_hash(content: &str) -> String {
449    use std::collections::hash_map::DefaultHasher;
450    use std::hash::{Hash, Hasher};
451
452    let mut hasher = DefaultHasher::new();
453    content.hash(&mut hasher);
454    format!("{:016x}", hasher.finish())
455}
456
457fn short_hash(input: &str) -> String {
458    use std::collections::hash_map::DefaultHasher;
459    use std::hash::{Hash, Hasher};
460
461    let mut hasher = DefaultHasher::new();
462    input.hash(&mut hasher);
463    format!("{:08x}", hasher.finish() & 0xFFFF_FFFF)
464}
465
466fn make_relative(path: &str, root: &str) -> String {
467    path.strip_prefix(root)
468        .unwrap_or(path)
469        .trim_start_matches('/')
470        .to_string()
471}
472
473fn is_indexable_ext(ext: &str) -> bool {
474    matches!(
475        ext,
476        "rs" | "ts"
477            | "tsx"
478            | "js"
479            | "jsx"
480            | "py"
481            | "go"
482            | "java"
483            | "c"
484            | "cpp"
485            | "h"
486            | "hpp"
487            | "rb"
488            | "cs"
489            | "kt"
490            | "swift"
491            | "php"
492            | "ex"
493            | "exs"
494            | "vue"
495            | "svelte"
496    )
497}
498
499#[cfg(test)]
500mod tests {
501    use super::*;
502
503    #[test]
504    fn test_short_hash_deterministic() {
505        let h1 = short_hash("/Users/test/project");
506        let h2 = short_hash("/Users/test/project");
507        assert_eq!(h1, h2);
508        assert_eq!(h1.len(), 8);
509    }
510
511    #[test]
512    fn test_make_relative() {
513        assert_eq!(
514            make_relative("/foo/bar/src/main.rs", "/foo/bar"),
515            "src/main.rs"
516        );
517        assert_eq!(make_relative("src/main.rs", "/foo/bar"), "src/main.rs");
518    }
519
520    #[test]
521    fn test_extract_summary() {
522        let content = "// comment\nuse std::io;\n\npub fn main() {\n    println!(\"hello\");\n}";
523        let summary = extract_summary(content);
524        assert_eq!(summary, "pub fn main() {");
525    }
526
527    #[test]
528    fn test_compute_hash_deterministic() {
529        let h1 = compute_hash("hello world");
530        let h2 = compute_hash("hello world");
531        assert_eq!(h1, h2);
532        assert_ne!(h1, compute_hash("hello world!"));
533    }
534
535    #[test]
536    fn test_project_index_new() {
537        let idx = ProjectIndex::new("/test");
538        assert_eq!(idx.version, INDEX_VERSION);
539        assert_eq!(idx.project_root, "/test");
540        assert!(idx.files.is_empty());
541    }
542
543    #[test]
544    fn test_reverse_deps() {
545        let mut idx = ProjectIndex::new("/test");
546        idx.edges.push(IndexEdge {
547            from: "a.rs".to_string(),
548            to: "b.rs".to_string(),
549            kind: "import".to_string(),
550        });
551        idx.edges.push(IndexEdge {
552            from: "c.rs".to_string(),
553            to: "b.rs".to_string(),
554            kind: "import".to_string(),
555        });
556
557        let deps = idx.get_reverse_deps("b.rs", 1);
558        assert_eq!(deps.len(), 2);
559        assert!(deps.contains(&"a.rs".to_string()));
560        assert!(deps.contains(&"c.rs".to_string()));
561    }
562}