Skip to main content

the_code_graph_domain/use_cases/
index.rs

1use crate::error::Result;
2use crate::model::IndexStats;
3use crate::ports::{FileSystem, GitProvider, GraphStore, ParseProvider};
4use std::path::{Path, PathBuf};
5use std::time::Instant;
6
7const SUPPORTED_EXTENSIONS: &[&str] = &["ts", "tsx", "js", "jsx", "rs", "py", "go"];
8
9pub struct IndexUseCase<S, P, F, G> {
10    store: S,
11    parser: P,
12    fs: F,
13    git: G,
14}
15
16impl<S: GraphStore, P: ParseProvider, F: FileSystem, G: GitProvider> IndexUseCase<S, P, F, G> {
17    pub fn new(store: S, parser: P, fs: F, git: G) -> Self {
18        Self {
19            store,
20            parser,
21            fs,
22            git,
23        }
24    }
25
26    pub fn full_index(&self, root: &Path) -> Result<IndexStats> {
27        let start = Instant::now();
28
29        let file_paths = self.fs.list_files(root, SUPPORTED_EXTENSIONS)?;
30
31        let mut files_with_content = Vec::new();
32        for path in &file_paths {
33            let abs_path = root.join(path);
34            match self.fs.read_file(&abs_path) {
35                Ok(content) => {
36                    files_with_content.push((path.clone(), content.into_bytes()));
37                }
38                Err(e) => {
39                    tracing::warn!("skipping {}: {e}", path.display());
40                }
41            }
42        }
43
44        let file_data = self.parser.parse_and_resolve(&files_with_content, root)?;
45
46        let mut files_indexed = 0;
47        let mut symbols_extracted = 0;
48        let mut edges_created = 0;
49
50        for fd in &file_data {
51            self.store
52                .store_file_data(&fd.file, &fd.symbols, &fd.edges)?;
53            files_indexed += 1;
54            symbols_extracted += fd.symbols.len();
55            edges_created += fd.edges.len();
56        }
57
58        Ok(IndexStats {
59            files_indexed,
60            symbols_extracted,
61            edges_created,
62            duration: start.elapsed(),
63        })
64    }
65
66    pub fn incremental_index(&self, root: &Path) -> Result<IndexStats> {
67        let modified = self.git.modified_files()?;
68        self.run_incremental(root, modified)
69    }
70
71    pub fn incremental_files(&self, root: &Path, files: Vec<PathBuf>) -> Result<IndexStats> {
72        self.run_incremental(root, files)
73    }
74
75    fn run_incremental(&self, root: &Path, changed_paths: Vec<PathBuf>) -> Result<IndexStats> {
76        run_incremental_pipeline(&self.store, &self.parser, &self.fs, root, changed_paths)
77    }
78}
79
80/// Core incremental pipeline: hash-check, 1-hop dependent discovery, re-parse, atomic store update.
81///
82/// Extracted as a free function so both `IndexUseCase` and `ensure_fresh` can reuse it
83/// without ownership issues.
84pub fn run_incremental_pipeline<S: GraphStore, P: ParseProvider, F: FileSystem>(
85    store: &S,
86    parser: &P,
87    fs: &F,
88    root: &Path,
89    changed_paths: Vec<PathBuf>,
90) -> Result<IndexStats> {
91    let start = Instant::now();
92    let mut reparse_set = Vec::new();
93
94    // Phase 1: Hash check — filter to actually-changed files
95    for path in &changed_paths {
96        let abs_path = root.join(path);
97        let current_hash = match fs.file_hash(&abs_path) {
98            Ok(h) => h,
99            Err(_) => {
100                // File deleted — remove from store
101                store.remove_file_data(path)?;
102                continue;
103            }
104        };
105        let stored = store.get_file(path)?;
106        if stored.as_ref().is_some_and(|f| f.hash == current_hash) {
107            continue; // unchanged
108        }
109        reparse_set.push(path.clone());
110    }
111
112    // Phase 2: Find 1-hop dependents
113    let mut dependent_set = Vec::new();
114    let path_refs: Vec<&Path> = reparse_set.iter().map(|p| p.as_path()).collect();
115    let file_symbols_all = store.symbols_for_files(&path_refs)?;
116    for path in &reparse_set {
117        let file_symbols: Vec<_> = file_symbols_all
118            .iter()
119            .filter(|s| s.location.file == *path)
120            .collect();
121        for sym in file_symbols {
122            let incoming = store.get_edges_to(&sym.qualified_name)?;
123            for edge in incoming {
124                if let Some(source_sym) = store.get_symbol(&edge.source)? {
125                    let dep_path = source_sym.location.file.clone();
126                    if !reparse_set.contains(&dep_path) && !dependent_set.contains(&dep_path) {
127                        dependent_set.push(dep_path);
128                    }
129                }
130            }
131        }
132    }
133    reparse_set.extend(dependent_set);
134    reparse_set.sort();
135    reparse_set.dedup();
136
137    if reparse_set.is_empty() {
138        return Ok(IndexStats {
139            files_indexed: 0,
140            symbols_extracted: 0,
141            edges_created: 0,
142            duration: start.elapsed(),
143        });
144    }
145
146    // Phase 3: Read + parse + store
147    let mut files_with_content = Vec::new();
148    for path in &reparse_set {
149        let abs_path = root.join(path);
150        match fs.read_file(&abs_path) {
151            Ok(content) => files_with_content.push((path.clone(), content.into_bytes())),
152            Err(e) => tracing::warn!("skipping {}: {e}", path.display()),
153        }
154    }
155
156    let file_data = parser.parse_and_resolve(&files_with_content, root)?;
157    let mut stats = IndexStats {
158        files_indexed: 0,
159        symbols_extracted: 0,
160        edges_created: 0,
161        duration: start.elapsed(),
162    };
163    for fd in &file_data {
164        store.remove_file_data(&fd.file.path)?;
165        store.store_file_data(&fd.file, &fd.symbols, &fd.edges)?;
166        stats.files_indexed += 1;
167        stats.symbols_extracted += fd.symbols.len();
168        stats.edges_created += fd.edges.len();
169    }
170    stats.duration = start.elapsed();
171    Ok(stats)
172}
173
174#[cfg(test)]
175mod tests {
176    use super::*;
177    use crate::model::*;
178    use crate::ports::FileData;
179    use crate::test_support::*;
180    use std::path::PathBuf;
181
182    fn make_file_data(path: &str, num_symbols: usize, num_edges: usize) -> FileData {
183        let symbols: Vec<SymbolNode> = (0..num_symbols)
184            .map(|i| SymbolNode {
185                name: format!("sym{i}"),
186                qualified_name: format!("{path}::sym{i}"),
187                kind: SymbolKind::Function,
188                location: Location {
189                    file: path.into(),
190                    line_start: i + 1,
191                    line_end: i + 2,
192                    col_start: 0,
193                    col_end: 10,
194                },
195                visibility: Visibility::Public,
196                is_exported: true,
197                is_async: false,
198                is_test: false,
199                decorators: vec![],
200                signature: None,
201            })
202            .collect();
203
204        let edges: Vec<Edge> = (0..num_edges)
205            .map(|i| Edge {
206                kind: EdgeKind::Contains,
207                source: path.to_string(),
208                target: format!("{path}::sym{i}"),
209                metadata: None,
210            })
211            .collect();
212
213        FileData {
214            file: FileNode {
215                path: path.into(),
216                language: Language::TypeScript,
217                hash: "abc123".into(),
218            },
219            symbols,
220            edges,
221        }
222    }
223
224    #[test]
225    fn full_index_with_two_files_returns_correct_stats() {
226        let fd1 = make_file_data("src/a.ts", 3, 3);
227        let fd2 = make_file_data("src/b.ts", 2, 1);
228        let store = InMemoryGraphStore::new();
229        let parser = MockParseProvider::new(vec![fd1, fd2]);
230        let fs = MockFileSystem::new(vec![
231            (PathBuf::from("src/a.ts"), "content a".into()),
232            (PathBuf::from("src/b.ts"), "content b".into()),
233        ]);
234        let git = MockGitProvider::new();
235        let uc = IndexUseCase::new(store, parser, fs, git);
236        let stats = uc.full_index(Path::new("/project")).unwrap();
237        assert_eq!(stats.files_indexed, 2);
238        assert_eq!(stats.symbols_extracted, 5);
239        assert_eq!(stats.edges_created, 4);
240    }
241
242    #[test]
243    fn full_index_empty_file_list_returns_zeros() {
244        let store = InMemoryGraphStore::new();
245        let parser = MockParseProvider::new(vec![]);
246        let fs = MockFileSystem::new(vec![]);
247        let git = MockGitProvider::new();
248        let uc = IndexUseCase::new(store, parser, fs, git);
249        let stats = uc.full_index(Path::new("/project")).unwrap();
250        assert_eq!(stats.files_indexed, 0);
251        assert_eq!(stats.symbols_extracted, 0);
252        assert_eq!(stats.edges_created, 0);
253    }
254
255    #[test]
256    fn full_index_duration_is_non_zero() {
257        let store = InMemoryGraphStore::new();
258        let parser = MockParseProvider::new(vec![make_file_data("src/a.ts", 1, 1)]);
259        let fs = MockFileSystem::new(vec![(PathBuf::from("src/a.ts"), "content".into())]);
260        let git = MockGitProvider::new();
261        let uc = IndexUseCase::new(store, parser, fs, git);
262        let stats = uc.full_index(Path::new("/project")).unwrap();
263        assert!(stats.duration.as_nanos() > 0);
264    }
265
266    #[test]
267    fn full_index_with_three_files_reads_all() {
268        let fds = vec![
269            make_file_data("a.rs", 1, 1),
270            make_file_data("b.rs", 1, 1),
271            make_file_data("c.rs", 1, 1),
272        ];
273        let store = InMemoryGraphStore::new();
274        let parser = MockParseProvider::new(fds);
275        let fs = MockFileSystem::new(vec![
276            (PathBuf::from("a.rs"), "fn a(){}".into()),
277            (PathBuf::from("b.rs"), "fn b(){}".into()),
278            (PathBuf::from("c.rs"), "fn c(){}".into()),
279        ]);
280        let git = MockGitProvider::new();
281        let uc = IndexUseCase::new(store, parser, fs, git);
282        let stats = uc.full_index(Path::new("/project")).unwrap();
283        assert_eq!(stats.files_indexed, 3);
284    }
285
286    // --- Incremental pipeline tests ---
287
288    #[test]
289    fn incremental_index_skips_unchanged_files() {
290        // Store has file with hash "abc123", fs returns same hash → no re-parse
291        let mut store = InMemoryGraphStore::new();
292        store.insert_file(FileNode {
293            path: "src/a.ts".into(),
294            language: Language::TypeScript,
295            hash: "abc123".into(),
296        });
297        let parser = MockParseProvider::new(vec![]);
298        let fs = MockFileSystem::new(vec![])
299            .with_hashes(vec![(PathBuf::from("/project/src/a.ts"), "abc123".into())]);
300        let git = MockGitProvider::with_modified(vec![PathBuf::from("src/a.ts")]);
301        let uc = IndexUseCase::new(store, parser, fs, git);
302        let stats = uc.incremental_index(Path::new("/project")).unwrap();
303        assert_eq!(stats.files_indexed, 0);
304    }
305
306    #[test]
307    fn incremental_index_reparses_changed_files() {
308        // Store has file with hash "old", fs returns "new" → re-parse
309        let mut store = InMemoryGraphStore::new();
310        store.insert_file(FileNode {
311            path: "src/a.ts".into(),
312            language: Language::TypeScript,
313            hash: "old_hash".into(),
314        });
315        let fd = make_file_data("src/a.ts", 2, 1);
316        let parser = MockParseProvider::new(vec![fd]);
317        let fs = MockFileSystem::new(vec![(PathBuf::from("/project/src/a.ts"), "content".into())])
318            .with_hashes(vec![(
319                PathBuf::from("/project/src/a.ts"),
320                "new_hash".into(),
321            )]);
322        let git = MockGitProvider::with_modified(vec![PathBuf::from("src/a.ts")]);
323        let uc = IndexUseCase::new(store, parser, fs, git);
324        let stats = uc.incremental_index(Path::new("/project")).unwrap();
325        assert_eq!(stats.files_indexed, 1);
326        assert_eq!(stats.symbols_extracted, 2);
327    }
328
329    #[test]
330    fn incremental_index_reparses_one_hop_dependents() {
331        // File A changed, File B has edge targeting a symbol in A → both re-parsed
332        let mut store = InMemoryGraphStore::new();
333        store.insert_file(FileNode {
334            path: "src/a.ts".into(),
335            language: Language::TypeScript,
336            hash: "old_hash".into(),
337        });
338        store.insert_file(FileNode {
339            path: "src/b.ts".into(),
340            language: Language::TypeScript,
341            hash: "b_hash".into(),
342        });
343        // A has symbol "src/a.ts::func"
344        store.insert_symbol(SymbolNode {
345            name: "func".into(),
346            qualified_name: "src/a.ts::func".into(),
347            kind: SymbolKind::Function,
348            location: Location {
349                file: "src/a.ts".into(),
350                line_start: 1,
351                line_end: 5,
352                col_start: 0,
353                col_end: 10,
354            },
355            visibility: Visibility::Public,
356            is_exported: true,
357            is_async: false,
358            is_test: false,
359            decorators: vec![],
360            signature: None,
361        });
362        // B has symbol "src/b.ts::caller" that calls A's func
363        store.insert_symbol(SymbolNode {
364            name: "caller".into(),
365            qualified_name: "src/b.ts::caller".into(),
366            kind: SymbolKind::Function,
367            location: Location {
368                file: "src/b.ts".into(),
369                line_start: 1,
370                line_end: 5,
371                col_start: 0,
372                col_end: 10,
373            },
374            visibility: Visibility::Public,
375            is_exported: true,
376            is_async: false,
377            is_test: false,
378            decorators: vec![],
379            signature: None,
380        });
381        // Edge: B::caller → A::func (B imports/calls A)
382        store.insert_edge(Edge {
383            kind: EdgeKind::Calls,
384            source: "src/b.ts::caller".into(),
385            target: "src/a.ts::func".into(),
386            metadata: None,
387        });
388        // Parser returns data for both files
389        let fd_a = make_file_data("src/a.ts", 1, 0);
390        let fd_b = make_file_data("src/b.ts", 1, 0);
391        let parser = MockParseProvider::new(vec![fd_a, fd_b]);
392        let fs = MockFileSystem::new(vec![
393            (PathBuf::from("/project/src/a.ts"), "new content".into()),
394            (PathBuf::from("/project/src/b.ts"), "b content".into()),
395        ])
396        .with_hashes(vec![
397            (PathBuf::from("/project/src/a.ts"), "new_hash".into()),
398            (PathBuf::from("/project/src/b.ts"), "b_hash".into()),
399        ]);
400        // Only A is reported as modified by git
401        let git = MockGitProvider::with_modified(vec![PathBuf::from("src/a.ts")]);
402        let uc = IndexUseCase::new(store, parser, fs, git);
403        let stats = uc.incremental_index(Path::new("/project")).unwrap();
404        // Both A (changed) and B (dependent) should be re-parsed
405        assert_eq!(stats.files_indexed, 2);
406    }
407
408    #[test]
409    fn incremental_index_no_modified_files_returns_zeros() {
410        let store = InMemoryGraphStore::new();
411        let parser = MockParseProvider::new(vec![]);
412        let fs = MockFileSystem::new(vec![]);
413        let git = MockGitProvider::new(); // empty modified list
414        let uc = IndexUseCase::new(store, parser, fs, git);
415        let stats = uc.incremental_index(Path::new("/project")).unwrap();
416        assert_eq!(stats.files_indexed, 0);
417        assert_eq!(stats.symbols_extracted, 0);
418        assert_eq!(stats.edges_created, 0);
419    }
420
421    #[test]
422    fn incremental_files_processes_explicit_list() {
423        // Explicit file list bypasses git status
424        let mut store = InMemoryGraphStore::new();
425        store.insert_file(FileNode {
426            path: "src/a.ts".into(),
427            language: Language::TypeScript,
428            hash: "old_hash".into(),
429        });
430        let fd = make_file_data("src/a.ts", 1, 1);
431        let parser = MockParseProvider::new(vec![fd]);
432        let fs = MockFileSystem::new(vec![(PathBuf::from("/project/src/a.ts"), "content".into())])
433            .with_hashes(vec![(
434                PathBuf::from("/project/src/a.ts"),
435                "new_hash".into(),
436            )]);
437        let git = MockGitProvider::new(); // no modified — git not consulted
438        let uc = IndexUseCase::new(store, parser, fs, git);
439        let stats = uc
440            .incremental_files(Path::new("/project"), vec![PathBuf::from("src/a.ts")])
441            .unwrap();
442        assert_eq!(stats.files_indexed, 1);
443    }
444
445    #[test]
446    fn incremental_files_skips_unchanged_in_list() {
447        // Explicit list but hash matches → skip
448        let mut store = InMemoryGraphStore::new();
449        store.insert_file(FileNode {
450            path: "src/a.ts".into(),
451            language: Language::TypeScript,
452            hash: "same_hash".into(),
453        });
454        let parser = MockParseProvider::new(vec![]);
455        let fs = MockFileSystem::new(vec![]).with_hashes(vec![(
456            PathBuf::from("/project/src/a.ts"),
457            "same_hash".into(),
458        )]);
459        let git = MockGitProvider::new();
460        let uc = IndexUseCase::new(store, parser, fs, git);
461        let stats = uc
462            .incremental_files(Path::new("/project"), vec![PathBuf::from("src/a.ts")])
463            .unwrap();
464        assert_eq!(stats.files_indexed, 0);
465    }
466
467    #[test]
468    fn incremental_index_removes_deleted_files() {
469        // File reported as modified but hash fails (deleted) → remove from store
470        let mut store = InMemoryGraphStore::new();
471        store.insert_file(FileNode {
472            path: "src/deleted.ts".into(),
473            language: Language::TypeScript,
474            hash: "old".into(),
475        });
476        let parser = MockParseProvider::new(vec![]);
477        // No hash for deleted file → file_hash will error
478        let fs = MockFileSystem::new(vec![]).with_hashes(vec![]);
479        let git = MockGitProvider::with_modified(vec![PathBuf::from("src/deleted.ts")]);
480        let uc = IndexUseCase::new(store, parser, fs, git);
481        let stats = uc.incremental_index(Path::new("/project")).unwrap();
482        // Deleted file is removed, not re-parsed
483        assert_eq!(stats.files_indexed, 0);
484    }
485}