Skip to main content

lean_ctx/core/
graph_index.rs

1use std::collections::HashMap;
2use std::path::Path;
3
4use serde::{Deserialize, Serialize};
5
6use crate::core::deps;
7use crate::core::signatures;
8
9const INDEX_VERSION: u32 = 1;
10
11#[derive(Debug, Serialize, Deserialize)]
12pub struct ProjectIndex {
13    pub version: u32,
14    pub project_root: String,
15    pub last_scan: String,
16    pub files: HashMap<String, FileEntry>,
17    pub edges: Vec<IndexEdge>,
18    pub symbols: HashMap<String, SymbolEntry>,
19}
20
21#[derive(Debug, Clone, Serialize, Deserialize)]
22pub struct FileEntry {
23    pub path: String,
24    pub hash: String,
25    pub language: String,
26    pub line_count: usize,
27    pub token_count: usize,
28    pub exports: Vec<String>,
29    pub summary: String,
30}
31
32#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct SymbolEntry {
34    pub file: String,
35    pub name: String,
36    pub kind: String,
37    pub start_line: usize,
38    pub end_line: usize,
39    pub is_exported: bool,
40}
41
42#[derive(Debug, Clone, Serialize, Deserialize)]
43pub struct IndexEdge {
44    pub from: String,
45    pub to: String,
46    pub kind: String,
47}
48
49impl ProjectIndex {
50    pub fn new(project_root: &str) -> Self {
51        Self {
52            version: INDEX_VERSION,
53            project_root: project_root.to_string(),
54            last_scan: chrono::Local::now().format("%Y-%m-%d %H:%M:%S").to_string(),
55            files: HashMap::new(),
56            edges: Vec::new(),
57            symbols: HashMap::new(),
58        }
59    }
60
61    pub fn index_dir(project_root: &str) -> Option<std::path::PathBuf> {
62        let hash = short_hash(project_root);
63        dirs::home_dir().map(|h| h.join(".lean-ctx").join("graphs").join(hash))
64    }
65
66    pub fn load(project_root: &str) -> Option<Self> {
67        let dir = Self::index_dir(project_root)?;
68        let path = dir.join("index.json");
69        let content = std::fs::read_to_string(path).ok()?;
70        serde_json::from_str(&content).ok()
71    }
72
73    pub fn save(&self) -> Result<(), String> {
74        let dir = Self::index_dir(&self.project_root)
75            .ok_or_else(|| "Cannot determine home directory".to_string())?;
76        std::fs::create_dir_all(&dir).map_err(|e| e.to_string())?;
77        let json = serde_json::to_string_pretty(self).map_err(|e| e.to_string())?;
78        std::fs::write(dir.join("index.json"), json).map_err(|e| e.to_string())
79    }
80
81    pub fn file_count(&self) -> usize {
82        self.files.len()
83    }
84
85    pub fn symbol_count(&self) -> usize {
86        self.symbols.len()
87    }
88
89    pub fn edge_count(&self) -> usize {
90        self.edges.len()
91    }
92
93    pub fn get_symbol(&self, key: &str) -> Option<&SymbolEntry> {
94        self.symbols.get(key)
95    }
96
97    pub fn get_reverse_deps(&self, path: &str, depth: usize) -> Vec<String> {
98        let mut result = Vec::new();
99        let mut visited = std::collections::HashSet::new();
100        let mut queue: Vec<(String, usize)> = vec![(path.to_string(), 0)];
101
102        while let Some((current, d)) = queue.pop() {
103            if d > depth || visited.contains(&current) {
104                continue;
105            }
106            visited.insert(current.clone());
107            if current != path {
108                result.push(current.clone());
109            }
110
111            for edge in &self.edges {
112                if edge.to == current && edge.kind == "import" && !visited.contains(&edge.from) {
113                    queue.push((edge.from.clone(), d + 1));
114                }
115            }
116        }
117        result
118    }
119
120    pub fn get_related(&self, path: &str, depth: usize) -> Vec<String> {
121        let mut result = Vec::new();
122        let mut visited = std::collections::HashSet::new();
123        let mut queue: Vec<(String, usize)> = vec![(path.to_string(), 0)];
124
125        while let Some((current, d)) = queue.pop() {
126            if d > depth || visited.contains(&current) {
127                continue;
128            }
129            visited.insert(current.clone());
130            if current != path {
131                result.push(current.clone());
132            }
133
134            for edge in &self.edges {
135                if edge.from == current && !visited.contains(&edge.to) {
136                    queue.push((edge.to.clone(), d + 1));
137                }
138                if edge.to == current && !visited.contains(&edge.from) {
139                    queue.push((edge.from.clone(), d + 1));
140                }
141            }
142        }
143        result
144    }
145}
146
147pub fn scan(project_root: &str) -> ProjectIndex {
148    let existing = ProjectIndex::load(project_root);
149    let mut index = ProjectIndex::new(project_root);
150
151    let old_files: HashMap<String, (String, Vec<(String, SymbolEntry)>)> =
152        if let Some(ref prev) = existing {
153            prev.files
154                .iter()
155                .map(|(path, entry)| {
156                    let syms: Vec<(String, SymbolEntry)> = prev
157                        .symbols
158                        .iter()
159                        .filter(|(_, s)| s.file == *path)
160                        .map(|(k, v)| (k.clone(), v.clone()))
161                        .collect();
162                    (path.clone(), (entry.hash.clone(), syms))
163                })
164                .collect()
165        } else {
166            HashMap::new()
167        };
168
169    let walker = ignore::WalkBuilder::new(project_root)
170        .hidden(true)
171        .git_ignore(true)
172        .git_global(true)
173        .git_exclude(true)
174        .max_depth(Some(10))
175        .build();
176
177    let mut scanned = 0usize;
178    let mut reused = 0usize;
179    let max_files = 2000;
180
181    for entry in walker.filter_map(|e| e.ok()) {
182        if !entry.file_type().is_some_and(|ft| ft.is_file()) {
183            continue;
184        }
185        let file_path = entry.path().to_string_lossy().to_string();
186        let ext = Path::new(&file_path)
187            .extension()
188            .and_then(|e| e.to_str())
189            .unwrap_or("");
190
191        if !is_indexable_ext(ext) {
192            continue;
193        }
194
195        if index.files.len() >= max_files {
196            break;
197        }
198
199        let content = match std::fs::read_to_string(&file_path) {
200            Ok(c) => c,
201            Err(_) => continue,
202        };
203
204        let hash = compute_hash(&content);
205        let rel_path = make_relative(&file_path, project_root);
206
207        if let Some((old_hash, old_syms)) = old_files.get(&rel_path) {
208            if *old_hash == hash {
209                if let Some(old_entry) = existing.as_ref().and_then(|p| p.files.get(&rel_path)) {
210                    index.files.insert(rel_path.clone(), old_entry.clone());
211                    for (key, sym) in old_syms {
212                        index.symbols.insert(key.clone(), sym.clone());
213                    }
214                    reused += 1;
215                    add_edges(&mut index, &rel_path, &content, ext);
216                    continue;
217                }
218            }
219        }
220
221        let sigs = signatures::extract_signatures(&content, ext);
222        let line_count = content.lines().count();
223        let token_count = crate::core::tokens::count_tokens(&content);
224        let summary = extract_summary(&content);
225
226        let exports: Vec<String> = sigs
227            .iter()
228            .filter(|s| s.is_exported)
229            .map(|s| s.name.clone())
230            .collect();
231
232        index.files.insert(
233            rel_path.clone(),
234            FileEntry {
235                path: rel_path.clone(),
236                hash,
237                language: ext.to_string(),
238                line_count,
239                token_count,
240                exports,
241                summary,
242            },
243        );
244
245        for sig in &sigs {
246            let (start, end) = find_symbol_range(&content, sig);
247            let key = format!("{}::{}", rel_path, sig.name);
248            index.symbols.insert(
249                key,
250                SymbolEntry {
251                    file: rel_path.clone(),
252                    name: sig.name.clone(),
253                    kind: sig.kind.to_string(),
254                    start_line: start,
255                    end_line: end,
256                    is_exported: sig.is_exported,
257                },
258            );
259        }
260
261        add_edges(&mut index, &rel_path, &content, ext);
262        scanned += 1;
263    }
264
265    if let Err(e) = index.save() {
266        eprintln!("Warning: could not save graph index: {e}");
267    }
268
269    eprintln!(
270        "[graph_index: {} files ({} scanned, {} reused), {} symbols, {} edges]",
271        index.file_count(),
272        scanned,
273        reused,
274        index.symbol_count(),
275        index.edge_count()
276    );
277
278    index
279}
280
281fn add_edges(index: &mut ProjectIndex, rel_path: &str, content: &str, ext: &str) {
282    let dep_info = deps::extract_deps(content, ext);
283    for imp in &dep_info.imports {
284        index.edges.push(IndexEdge {
285            from: rel_path.to_string(),
286            to: imp.clone(),
287            kind: "import".to_string(),
288        });
289    }
290}
291
292fn find_symbol_range(content: &str, sig: &signatures::Signature) -> (usize, usize) {
293    let lines: Vec<&str> = content.lines().collect();
294    let mut start = 0;
295
296    for (i, line) in lines.iter().enumerate() {
297        if line.contains(&sig.name) {
298            let trimmed = line.trim();
299            let is_def = trimmed.starts_with("fn ")
300                || trimmed.starts_with("pub fn ")
301                || trimmed.starts_with("pub(crate) fn ")
302                || trimmed.starts_with("async fn ")
303                || trimmed.starts_with("pub async fn ")
304                || trimmed.starts_with("struct ")
305                || trimmed.starts_with("pub struct ")
306                || trimmed.starts_with("enum ")
307                || trimmed.starts_with("pub enum ")
308                || trimmed.starts_with("trait ")
309                || trimmed.starts_with("pub trait ")
310                || trimmed.starts_with("impl ")
311                || trimmed.starts_with("class ")
312                || trimmed.starts_with("export class ")
313                || trimmed.starts_with("export function ")
314                || trimmed.starts_with("export async function ")
315                || trimmed.starts_with("function ")
316                || trimmed.starts_with("async function ")
317                || trimmed.starts_with("def ")
318                || trimmed.starts_with("async def ")
319                || trimmed.starts_with("func ")
320                || trimmed.starts_with("interface ")
321                || trimmed.starts_with("export interface ")
322                || trimmed.starts_with("type ")
323                || trimmed.starts_with("export type ")
324                || trimmed.starts_with("const ")
325                || trimmed.starts_with("export const ");
326            if is_def {
327                start = i + 1;
328                break;
329            }
330        }
331    }
332
333    if start == 0 {
334        return (1, lines.len().min(20));
335    }
336
337    let base_indent = lines
338        .get(start - 1)
339        .map(|l| l.len() - l.trim_start().len())
340        .unwrap_or(0);
341
342    let mut end = start;
343    let mut brace_depth: i32 = 0;
344    let mut found_open = false;
345
346    for (i, line) in lines.iter().enumerate().skip(start - 1) {
347        for ch in line.chars() {
348            if ch == '{' {
349                brace_depth += 1;
350                found_open = true;
351            } else if ch == '}' {
352                brace_depth -= 1;
353            }
354        }
355
356        end = i + 1;
357
358        if found_open && brace_depth <= 0 {
359            break;
360        }
361
362        if !found_open && i > start {
363            let indent = line.len() - line.trim_start().len();
364            if indent <= base_indent && !line.trim().is_empty() && i > start {
365                end = i;
366                break;
367            }
368        }
369
370        if end - start > 200 {
371            break;
372        }
373    }
374
375    (start, end)
376}
377
378fn extract_summary(content: &str) -> String {
379    for line in content.lines().take(20) {
380        let trimmed = line.trim();
381        if trimmed.is_empty()
382            || trimmed.starts_with("//")
383            || trimmed.starts_with('#')
384            || trimmed.starts_with("/*")
385            || trimmed.starts_with('*')
386            || trimmed.starts_with("use ")
387            || trimmed.starts_with("import ")
388            || trimmed.starts_with("from ")
389            || trimmed.starts_with("require(")
390            || trimmed.starts_with("package ")
391        {
392            continue;
393        }
394        return trimmed.chars().take(120).collect();
395    }
396    String::new()
397}
398
399fn compute_hash(content: &str) -> String {
400    use std::collections::hash_map::DefaultHasher;
401    use std::hash::{Hash, Hasher};
402
403    let mut hasher = DefaultHasher::new();
404    content.hash(&mut hasher);
405    format!("{:016x}", hasher.finish())
406}
407
408fn short_hash(input: &str) -> String {
409    use std::collections::hash_map::DefaultHasher;
410    use std::hash::{Hash, Hasher};
411
412    let mut hasher = DefaultHasher::new();
413    input.hash(&mut hasher);
414    format!("{:08x}", hasher.finish() & 0xFFFF_FFFF)
415}
416
417fn make_relative(path: &str, root: &str) -> String {
418    path.strip_prefix(root)
419        .unwrap_or(path)
420        .trim_start_matches('/')
421        .to_string()
422}
423
424fn is_indexable_ext(ext: &str) -> bool {
425    matches!(
426        ext,
427        "rs" | "ts"
428            | "tsx"
429            | "js"
430            | "jsx"
431            | "py"
432            | "go"
433            | "java"
434            | "c"
435            | "cpp"
436            | "h"
437            | "hpp"
438            | "rb"
439            | "cs"
440            | "kt"
441            | "swift"
442            | "php"
443            | "ex"
444            | "exs"
445            | "vue"
446            | "svelte"
447    )
448}
449
450#[cfg(test)]
451mod tests {
452    use super::*;
453
454    #[test]
455    fn test_short_hash_deterministic() {
456        let h1 = short_hash("/Users/test/project");
457        let h2 = short_hash("/Users/test/project");
458        assert_eq!(h1, h2);
459        assert_eq!(h1.len(), 8);
460    }
461
462    #[test]
463    fn test_make_relative() {
464        assert_eq!(
465            make_relative("/foo/bar/src/main.rs", "/foo/bar"),
466            "src/main.rs"
467        );
468        assert_eq!(make_relative("src/main.rs", "/foo/bar"), "src/main.rs");
469    }
470
471    #[test]
472    fn test_extract_summary() {
473        let content = "// comment\nuse std::io;\n\npub fn main() {\n    println!(\"hello\");\n}";
474        let summary = extract_summary(content);
475        assert_eq!(summary, "pub fn main() {");
476    }
477
478    #[test]
479    fn test_compute_hash_deterministic() {
480        let h1 = compute_hash("hello world");
481        let h2 = compute_hash("hello world");
482        assert_eq!(h1, h2);
483        assert_ne!(h1, compute_hash("hello world!"));
484    }
485
486    #[test]
487    fn test_project_index_new() {
488        let idx = ProjectIndex::new("/test");
489        assert_eq!(idx.version, INDEX_VERSION);
490        assert_eq!(idx.project_root, "/test");
491        assert!(idx.files.is_empty());
492    }
493
494    #[test]
495    fn test_reverse_deps() {
496        let mut idx = ProjectIndex::new("/test");
497        idx.edges.push(IndexEdge {
498            from: "a.rs".to_string(),
499            to: "b.rs".to_string(),
500            kind: "import".to_string(),
501        });
502        idx.edges.push(IndexEdge {
503            from: "c.rs".to_string(),
504            to: "b.rs".to_string(),
505            kind: "import".to_string(),
506        });
507
508        let deps = idx.get_reverse_deps("b.rs", 1);
509        assert_eq!(deps.len(), 2);
510        assert!(deps.contains(&"a.rs".to_string()));
511        assert!(deps.contains(&"c.rs".to_string()));
512    }
513}