Skip to main content

lean_ctx/core/
graph_index.rs

1use std::collections::HashMap;
2use std::path::Path;
3
4use serde::{Deserialize, Serialize};
5
6use crate::core::deps;
7use crate::core::signatures;
8
9const INDEX_VERSION: u32 = 1;
10
11#[derive(Debug, Serialize, Deserialize)]
12pub struct ProjectIndex {
13    pub version: u32,
14    pub project_root: String,
15    pub last_scan: String,
16    pub files: HashMap<String, FileEntry>,
17    pub edges: Vec<IndexEdge>,
18    pub symbols: HashMap<String, SymbolEntry>,
19}
20
21#[derive(Debug, Clone, Serialize, Deserialize)]
22pub struct FileEntry {
23    pub path: String,
24    pub hash: String,
25    pub language: String,
26    pub line_count: usize,
27    pub token_count: usize,
28    pub exports: Vec<String>,
29    pub summary: String,
30}
31
32#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct SymbolEntry {
34    pub file: String,
35    pub name: String,
36    pub kind: String,
37    pub start_line: usize,
38    pub end_line: usize,
39    pub is_exported: bool,
40}
41
42#[derive(Debug, Clone, Serialize, Deserialize)]
43pub struct IndexEdge {
44    pub from: String,
45    pub to: String,
46    pub kind: String,
47}
48
49impl ProjectIndex {
50    pub fn new(project_root: &str) -> Self {
51        Self {
52            version: INDEX_VERSION,
53            project_root: project_root.to_string(),
54            last_scan: chrono::Local::now().format("%Y-%m-%d %H:%M:%S").to_string(),
55            files: HashMap::new(),
56            edges: Vec::new(),
57            symbols: HashMap::new(),
58        }
59    }
60
61    pub fn index_dir(project_root: &str) -> Option<std::path::PathBuf> {
62        let hash = short_hash(project_root);
63        dirs::home_dir().map(|h| h.join(".lean-ctx").join("graphs").join(hash))
64    }
65
66    pub fn load(project_root: &str) -> Option<Self> {
67        let dir = Self::index_dir(project_root)?;
68        let path = dir.join("index.json");
69        let content = std::fs::read_to_string(path).ok()?;
70        serde_json::from_str(&content).ok()
71    }
72
73    pub fn save(&self) -> Result<(), String> {
74        let dir = Self::index_dir(&self.project_root)
75            .ok_or_else(|| "Cannot determine home directory".to_string())?;
76        std::fs::create_dir_all(&dir).map_err(|e| e.to_string())?;
77        let json = serde_json::to_string_pretty(self).map_err(|e| e.to_string())?;
78        std::fs::write(dir.join("index.json"), json).map_err(|e| e.to_string())
79    }
80
81    pub fn file_count(&self) -> usize {
82        self.files.len()
83    }
84
85    pub fn symbol_count(&self) -> usize {
86        self.symbols.len()
87    }
88
89    pub fn edge_count(&self) -> usize {
90        self.edges.len()
91    }
92
93    pub fn get_symbol(&self, key: &str) -> Option<&SymbolEntry> {
94        self.symbols.get(key)
95    }
96
97    pub fn get_reverse_deps(&self, path: &str, depth: usize) -> Vec<String> {
98        let mut result = Vec::new();
99        let mut visited = std::collections::HashSet::new();
100        let mut queue: Vec<(String, usize)> = vec![(path.to_string(), 0)];
101
102        while let Some((current, d)) = queue.pop() {
103            if d > depth || visited.contains(&current) {
104                continue;
105            }
106            visited.insert(current.clone());
107            if current != path {
108                result.push(current.clone());
109            }
110
111            for edge in &self.edges {
112                if edge.to == current && edge.kind == "import" && !visited.contains(&edge.from) {
113                    queue.push((edge.from.clone(), d + 1));
114                }
115            }
116        }
117        result
118    }
119
120    pub fn get_related(&self, path: &str, depth: usize) -> Vec<String> {
121        let mut result = Vec::new();
122        let mut visited = std::collections::HashSet::new();
123        let mut queue: Vec<(String, usize)> = vec![(path.to_string(), 0)];
124
125        while let Some((current, d)) = queue.pop() {
126            if d > depth || visited.contains(&current) {
127                continue;
128            }
129            visited.insert(current.clone());
130            if current != path {
131                result.push(current.clone());
132            }
133
134            for edge in &self.edges {
135                if edge.from == current && !visited.contains(&edge.to) {
136                    queue.push((edge.to.clone(), d + 1));
137                }
138                if edge.to == current && !visited.contains(&edge.from) {
139                    queue.push((edge.from.clone(), d + 1));
140                }
141            }
142        }
143        result
144    }
145}
146
147/// Load the best available graph index, trying multiple root path variants.
148/// If no valid index exists, automatically scans the project to build one.
149/// This is the primary entry point — ensures zero-config usage.
150pub fn load_or_build(project_root: &str) -> ProjectIndex {
151    // Try the given root first
152    if let Some(idx) = ProjectIndex::load(project_root) {
153        if !idx.files.is_empty() {
154            return idx;
155        }
156    }
157
158    // ctx_graph build typically saves with ".", try that
159    if project_root != "." {
160        if let Some(idx) = ProjectIndex::load(".") {
161            if !idx.files.is_empty() {
162                return idx;
163            }
164        }
165    }
166
167    // Try absolute cwd
168    if let Ok(cwd) = std::env::current_dir() {
169        let cwd_str = cwd.to_string_lossy().to_string();
170        if cwd_str != project_root {
171            if let Some(idx) = ProjectIndex::load(&cwd_str) {
172                if !idx.files.is_empty() {
173                    return idx;
174                }
175            }
176        }
177    }
178
179    // No existing index found anywhere — auto-build
180    scan(project_root)
181}
182
183pub fn scan(project_root: &str) -> ProjectIndex {
184    let existing = ProjectIndex::load(project_root);
185    let mut index = ProjectIndex::new(project_root);
186
187    let old_files: HashMap<String, (String, Vec<(String, SymbolEntry)>)> =
188        if let Some(ref prev) = existing {
189            prev.files
190                .iter()
191                .map(|(path, entry)| {
192                    let syms: Vec<(String, SymbolEntry)> = prev
193                        .symbols
194                        .iter()
195                        .filter(|(_, s)| s.file == *path)
196                        .map(|(k, v)| (k.clone(), v.clone()))
197                        .collect();
198                    (path.clone(), (entry.hash.clone(), syms))
199                })
200                .collect()
201        } else {
202            HashMap::new()
203        };
204
205    let walker = ignore::WalkBuilder::new(project_root)
206        .hidden(true)
207        .git_ignore(true)
208        .git_global(true)
209        .git_exclude(true)
210        .max_depth(Some(10))
211        .build();
212
213    let mut scanned = 0usize;
214    let mut reused = 0usize;
215    let max_files = 2000;
216
217    for entry in walker.filter_map(|e| e.ok()) {
218        if !entry.file_type().is_some_and(|ft| ft.is_file()) {
219            continue;
220        }
221        let file_path = entry.path().to_string_lossy().to_string();
222        let ext = Path::new(&file_path)
223            .extension()
224            .and_then(|e| e.to_str())
225            .unwrap_or("");
226
227        if !is_indexable_ext(ext) {
228            continue;
229        }
230
231        if index.files.len() >= max_files {
232            break;
233        }
234
235        let content = match std::fs::read_to_string(&file_path) {
236            Ok(c) => c,
237            Err(_) => continue,
238        };
239
240        let hash = compute_hash(&content);
241        let rel_path = make_relative(&file_path, project_root);
242
243        if let Some((old_hash, old_syms)) = old_files.get(&rel_path) {
244            if *old_hash == hash {
245                if let Some(old_entry) = existing.as_ref().and_then(|p| p.files.get(&rel_path)) {
246                    index.files.insert(rel_path.clone(), old_entry.clone());
247                    for (key, sym) in old_syms {
248                        index.symbols.insert(key.clone(), sym.clone());
249                    }
250                    reused += 1;
251                    add_edges(&mut index, &rel_path, &content, ext);
252                    continue;
253                }
254            }
255        }
256
257        let sigs = signatures::extract_signatures(&content, ext);
258        let line_count = content.lines().count();
259        let token_count = crate::core::tokens::count_tokens(&content);
260        let summary = extract_summary(&content);
261
262        let exports: Vec<String> = sigs
263            .iter()
264            .filter(|s| s.is_exported)
265            .map(|s| s.name.clone())
266            .collect();
267
268        index.files.insert(
269            rel_path.clone(),
270            FileEntry {
271                path: rel_path.clone(),
272                hash,
273                language: ext.to_string(),
274                line_count,
275                token_count,
276                exports,
277                summary,
278            },
279        );
280
281        for sig in &sigs {
282            let (start, end) = find_symbol_range(&content, sig);
283            let key = format!("{}::{}", rel_path, sig.name);
284            index.symbols.insert(
285                key,
286                SymbolEntry {
287                    file: rel_path.clone(),
288                    name: sig.name.clone(),
289                    kind: sig.kind.to_string(),
290                    start_line: start,
291                    end_line: end,
292                    is_exported: sig.is_exported,
293                },
294            );
295        }
296
297        add_edges(&mut index, &rel_path, &content, ext);
298        scanned += 1;
299    }
300
301    if let Err(e) = index.save() {
302        eprintln!("Warning: could not save graph index: {e}");
303    }
304
305    eprintln!(
306        "[graph_index: {} files ({} scanned, {} reused), {} symbols, {} edges]",
307        index.file_count(),
308        scanned,
309        reused,
310        index.symbol_count(),
311        index.edge_count()
312    );
313
314    index
315}
316
317fn add_edges(index: &mut ProjectIndex, rel_path: &str, content: &str, ext: &str) {
318    let dep_info = deps::extract_deps(content, ext);
319    for imp in &dep_info.imports {
320        index.edges.push(IndexEdge {
321            from: rel_path.to_string(),
322            to: imp.clone(),
323            kind: "import".to_string(),
324        });
325    }
326}
327
328fn find_symbol_range(content: &str, sig: &signatures::Signature) -> (usize, usize) {
329    let lines: Vec<&str> = content.lines().collect();
330    let mut start = 0;
331
332    for (i, line) in lines.iter().enumerate() {
333        if line.contains(&sig.name) {
334            let trimmed = line.trim();
335            let is_def = trimmed.starts_with("fn ")
336                || trimmed.starts_with("pub fn ")
337                || trimmed.starts_with("pub(crate) fn ")
338                || trimmed.starts_with("async fn ")
339                || trimmed.starts_with("pub async fn ")
340                || trimmed.starts_with("struct ")
341                || trimmed.starts_with("pub struct ")
342                || trimmed.starts_with("enum ")
343                || trimmed.starts_with("pub enum ")
344                || trimmed.starts_with("trait ")
345                || trimmed.starts_with("pub trait ")
346                || trimmed.starts_with("impl ")
347                || trimmed.starts_with("class ")
348                || trimmed.starts_with("export class ")
349                || trimmed.starts_with("export function ")
350                || trimmed.starts_with("export async function ")
351                || trimmed.starts_with("function ")
352                || trimmed.starts_with("async function ")
353                || trimmed.starts_with("def ")
354                || trimmed.starts_with("async def ")
355                || trimmed.starts_with("func ")
356                || trimmed.starts_with("interface ")
357                || trimmed.starts_with("export interface ")
358                || trimmed.starts_with("type ")
359                || trimmed.starts_with("export type ")
360                || trimmed.starts_with("const ")
361                || trimmed.starts_with("export const ");
362            if is_def {
363                start = i + 1;
364                break;
365            }
366        }
367    }
368
369    if start == 0 {
370        return (1, lines.len().min(20));
371    }
372
373    let base_indent = lines
374        .get(start - 1)
375        .map(|l| l.len() - l.trim_start().len())
376        .unwrap_or(0);
377
378    let mut end = start;
379    let mut brace_depth: i32 = 0;
380    let mut found_open = false;
381
382    for (i, line) in lines.iter().enumerate().skip(start - 1) {
383        for ch in line.chars() {
384            if ch == '{' {
385                brace_depth += 1;
386                found_open = true;
387            } else if ch == '}' {
388                brace_depth -= 1;
389            }
390        }
391
392        end = i + 1;
393
394        if found_open && brace_depth <= 0 {
395            break;
396        }
397
398        if !found_open && i > start {
399            let indent = line.len() - line.trim_start().len();
400            if indent <= base_indent && !line.trim().is_empty() && i > start {
401                end = i;
402                break;
403            }
404        }
405
406        if end - start > 200 {
407            break;
408        }
409    }
410
411    (start, end)
412}
413
414fn extract_summary(content: &str) -> String {
415    for line in content.lines().take(20) {
416        let trimmed = line.trim();
417        if trimmed.is_empty()
418            || trimmed.starts_with("//")
419            || trimmed.starts_with('#')
420            || trimmed.starts_with("/*")
421            || trimmed.starts_with('*')
422            || trimmed.starts_with("use ")
423            || trimmed.starts_with("import ")
424            || trimmed.starts_with("from ")
425            || trimmed.starts_with("require(")
426            || trimmed.starts_with("package ")
427        {
428            continue;
429        }
430        return trimmed.chars().take(120).collect();
431    }
432    String::new()
433}
434
435fn compute_hash(content: &str) -> String {
436    use std::collections::hash_map::DefaultHasher;
437    use std::hash::{Hash, Hasher};
438
439    let mut hasher = DefaultHasher::new();
440    content.hash(&mut hasher);
441    format!("{:016x}", hasher.finish())
442}
443
444fn short_hash(input: &str) -> String {
445    use std::collections::hash_map::DefaultHasher;
446    use std::hash::{Hash, Hasher};
447
448    let mut hasher = DefaultHasher::new();
449    input.hash(&mut hasher);
450    format!("{:08x}", hasher.finish() & 0xFFFF_FFFF)
451}
452
453fn make_relative(path: &str, root: &str) -> String {
454    path.strip_prefix(root)
455        .unwrap_or(path)
456        .trim_start_matches('/')
457        .to_string()
458}
459
460fn is_indexable_ext(ext: &str) -> bool {
461    matches!(
462        ext,
463        "rs" | "ts"
464            | "tsx"
465            | "js"
466            | "jsx"
467            | "py"
468            | "go"
469            | "java"
470            | "c"
471            | "cpp"
472            | "h"
473            | "hpp"
474            | "rb"
475            | "cs"
476            | "kt"
477            | "swift"
478            | "php"
479            | "ex"
480            | "exs"
481            | "vue"
482            | "svelte"
483    )
484}
485
486#[cfg(test)]
487mod tests {
488    use super::*;
489
490    #[test]
491    fn test_short_hash_deterministic() {
492        let h1 = short_hash("/Users/test/project");
493        let h2 = short_hash("/Users/test/project");
494        assert_eq!(h1, h2);
495        assert_eq!(h1.len(), 8);
496    }
497
498    #[test]
499    fn test_make_relative() {
500        assert_eq!(
501            make_relative("/foo/bar/src/main.rs", "/foo/bar"),
502            "src/main.rs"
503        );
504        assert_eq!(make_relative("src/main.rs", "/foo/bar"), "src/main.rs");
505    }
506
507    #[test]
508    fn test_extract_summary() {
509        let content = "// comment\nuse std::io;\n\npub fn main() {\n    println!(\"hello\");\n}";
510        let summary = extract_summary(content);
511        assert_eq!(summary, "pub fn main() {");
512    }
513
514    #[test]
515    fn test_compute_hash_deterministic() {
516        let h1 = compute_hash("hello world");
517        let h2 = compute_hash("hello world");
518        assert_eq!(h1, h2);
519        assert_ne!(h1, compute_hash("hello world!"));
520    }
521
522    #[test]
523    fn test_project_index_new() {
524        let idx = ProjectIndex::new("/test");
525        assert_eq!(idx.version, INDEX_VERSION);
526        assert_eq!(idx.project_root, "/test");
527        assert!(idx.files.is_empty());
528    }
529
530    #[test]
531    fn test_reverse_deps() {
532        let mut idx = ProjectIndex::new("/test");
533        idx.edges.push(IndexEdge {
534            from: "a.rs".to_string(),
535            to: "b.rs".to_string(),
536            kind: "import".to_string(),
537        });
538        idx.edges.push(IndexEdge {
539            from: "c.rs".to_string(),
540            to: "b.rs".to_string(),
541            kind: "import".to_string(),
542        });
543
544        let deps = idx.get_reverse_deps("b.rs", 1);
545        assert_eq!(deps.len(), 2);
546        assert!(deps.contains(&"a.rs".to_string()));
547        assert!(deps.contains(&"c.rs".to_string()));
548    }
549}