Skip to main content

infigraph_core/
lib.rs

1mod analysis;
2pub mod bench;
3pub mod bridges;
4pub mod check;
5pub mod cluster;
6pub mod diff;
7pub mod embed;
8pub mod export;
9pub mod extract;
10pub mod graph;
11pub mod lang;
12pub mod learned;
13pub mod manifest;
14pub mod model;
15pub mod multi;
16pub mod patterns;
17pub mod refactor;
18mod report;
19pub mod resolve;
20pub mod review;
21pub mod routes;
22pub mod scip;
23pub mod search;
24pub mod security;
25pub mod sequence;
26pub mod viz;
27pub mod vuln;
28pub mod watch;
29
30use std::path::{Path, PathBuf};
31
32use anyhow::{Context, Result};
33use rayon::prelude::*;
34use sha2::{Digest, Sha256};
35
36use graph::GraphStore;
37use lang::LanguageRegistry;
38use model::FileExtraction;
39
40fn escape_str(s: &str) -> String {
41    s.replace('\\', "\\\\").replace('\'', "\\'")
42}
43
44/// The main entry point for the infigraph framework.
45pub struct Infigraph {
46    root: PathBuf,
47    db_path: PathBuf,
48    registry: LanguageRegistry,
49    store: Option<GraphStore>,
50}
51
52impl Infigraph {
53    /// Open a project directory. Creates `.infigraph/` if it doesn't exist.
54    pub fn open(root: &Path, registry: LanguageRegistry) -> Result<Self> {
55        let root = root.canonicalize().context("invalid project root")?;
56        let db_path = root.join(".infigraph").join("graph");
57        Ok(Self {
58            root,
59            db_path,
60            registry,
61            store: None,
62        })
63    }
64
65    /// Initialize the graph store (creates DB on first run).
66    pub fn init(&mut self) -> Result<()> {
67        let store = GraphStore::open(&self.db_path)?;
68        self.store = Some(store);
69        Ok(())
70    }
71
72    /// Index all supported files in the project, building the graph.
73    /// Skips files whose content hash matches the stored hash (incremental).
74    pub fn index(&self) -> Result<IndexResult> {
75        let store = self.store.as_ref().context("call init() first")?;
76
77        let files = self.collect_files()?;
78        let total = files.len();
79
80        // Load existing hashes for incremental skip
81        let existing_hashes = store.get_file_hashes().unwrap_or_default();
82
83        // Parse all files in parallel; skip unchanged ones
84        let done = std::sync::atomic::AtomicUsize::new(0);
85        let extractions: Vec<FileExtraction> = files
86            .par_iter()
87            .filter_map(|path| {
88                let rel_path = path
89                    .strip_prefix(&self.root)
90                    .ok()?
91                    .to_string_lossy()
92                    .replace('\\', "/");
93                let source = std::fs::read(path).ok()?;
94                // Skip if hash unchanged
95                let hash = {
96                    let mut h = Sha256::new();
97                    h.update(&source);
98                    format!("{:x}", h.finalize())
99                };
100                let n = done.fetch_add(1, std::sync::atomic::Ordering::Relaxed) + 1;
101                let pct = n * 100 / total;
102                let prev_pct = (n - 1) * 100 / total;
103                if (pct / 25) > (prev_pct / 25) || n == total {
104                    eprintln!("Parsing: {}/{} ({}%)", n, total, pct);
105                }
106                if existing_hashes.get(&rel_path).map(|s| s.as_str()) == Some(hash.as_str()) {
107                    return None; // unchanged
108                }
109                let pack = self.registry.for_file_with_content(&rel_path, &source)?;
110                extract::extract_file(&rel_path, &source, pack).ok()
111            })
112            .collect();
113
114        let indexed = extractions.len();
115
116        // Write all changed files — use CSV bulk load for fresh index or large batches,
117        // fall back to per-file UNWIND only for small incremental updates.
118        let use_csv = !extractions.is_empty() && (existing_hashes.is_empty() || indexed > 100);
119        if !extractions.is_empty() {
120            if use_csv {
121                if !existing_hashes.is_empty() {
122                    // Incremental bulk: delete old data for changed files before CSV load
123                    let conn = store.connection()?;
124                    conn.query("BEGIN TRANSACTION")
125                        .context("failed to begin delete transaction")?;
126                    let file_list: Vec<String> = extractions
127                        .iter()
128                        .map(|e| format!("'{}'", escape_str(&e.file)))
129                        .collect();
130                    let files_in = file_list.join(", ");
131                    let _ = conn.query(&format!(
132                        "MATCH (f:File)-[:DEFINES]->(s:Symbol)-[:HAS_STATEMENT]->(st:Statement) WHERE f.id IN [{}] DETACH DELETE st",
133                        files_in
134                    ));
135                    let _ = conn.query(&format!(
136                        "MATCH (s:Symbol) WHERE s.file IN [{}] DETACH DELETE s",
137                        files_in
138                    ));
139                    let _ = conn.query(&format!(
140                        "MATCH (m:Module) WHERE m.file IN [{}] DETACH DELETE m",
141                        files_in
142                    ));
143                    let _ = conn.query(&format!(
144                        "MATCH (f:File) WHERE f.id IN [{}] DETACH DELETE f",
145                        files_in
146                    ));
147                    conn.query("COMMIT")
148                        .context("failed to commit delete transaction")?;
149                }
150                store.upsert_all_parquet(&extractions)?;
151            } else {
152                // Small incremental: per-file UNWIND (overhead acceptable for <100 files)
153                let conn = store.connection()?;
154                conn.query("BEGIN TRANSACTION")
155                    .context("failed to begin index transaction")?;
156                let file_list: Vec<String> = extractions
157                    .iter()
158                    .map(|e| format!("'{}'", escape_str(&e.file)))
159                    .collect();
160                let files_in = file_list.join(", ");
161                let _ = conn.query(&format!(
162                    "MATCH (f:File)-[:DEFINES]->(s:Symbol)-[:HAS_STATEMENT]->(st:Statement) WHERE f.id IN [{}] DETACH DELETE st",
163                    files_in
164                ));
165                let _ = conn.query(&format!(
166                    "MATCH (s:Symbol) WHERE s.file IN [{}] DETACH DELETE s",
167                    files_in
168                ));
169                let _ = conn.query(&format!(
170                    "MATCH (m:Module) WHERE m.file IN [{}] DETACH DELETE m",
171                    files_in
172                ));
173                let _ = conn.query(&format!(
174                    "MATCH (f:File) WHERE f.id IN [{}] DETACH DELETE f",
175                    files_in
176                ));
177                for extraction in &extractions {
178                    store.upsert_file_conn_no_delete(&conn, extraction)?;
179                }
180                conn.query("COMMIT")
181                    .context("failed to commit index transaction")?;
182                // Folder upsert outside transaction — COPY FROM can't run inside explicit txn
183                let file_paths: Vec<&str> = extractions.iter().map(|e| e.file.as_str()).collect();
184                store.upsert_folders_bulk_conn(&conn, &file_paths)?;
185            }
186        }
187
188        // Bulk-write folder hierarchy for CSV path — no explicit txn wrapper
189        // because upsert_folders_bulk_conn may use COPY FROM which can't run inside explicit txn
190        if use_csv {
191            let file_paths: Vec<&str> = extractions.iter().map(|e| e.file.as_str()).collect();
192            let conn = store.connection()?;
193            store.upsert_folders_bulk_conn(&conn, &file_paths)?;
194        }
195
196        // Post-indexing: resolve cross-file call targets using full graph symbol table
197        let resolve_stats = resolve::resolve_calls_incremental(store, &extractions, None)
198            .unwrap_or_else(|e| {
199                eprintln!("warning: call resolution failed: {e}");
200                resolve::ResolveStats {
201                    total_calls: 0,
202                    resolved: 0,
203                    unresolved: 0,
204                    learned_resolved: 0,
205                    inherits_resolved: 0,
206                }
207            });
208
209        Ok(IndexResult {
210            total_files: total,
211            indexed_files: indexed,
212            extractions,
213            resolve_stats,
214        })
215    }
216
217    /// Get graph statistics.
218    pub fn stats(&self) -> Result<graph::GraphStats> {
219        let store = self.store.as_ref().context("call init() first")?;
220        store.stats()
221    }
222
223    /// Access the underlying graph store (for direct queries).
224    pub fn store(&self) -> Option<&GraphStore> {
225        self.store.as_ref()
226    }
227
228    /// Access the language registry.
229    pub fn registry(&self) -> &LanguageRegistry {
230        &self.registry
231    }
232
233    /// Get the project root path.
234    pub fn root(&self) -> &Path {
235        &self.root
236    }
237
238    /// Index (or re-index) a single file by its path on disk.
239    /// Path may be absolute or relative to project root.
240    pub fn index_file(&self, path: &Path) -> Result<()> {
241        let store = self.store.as_ref().context("call init() first")?;
242        let rel = if path.is_absolute() {
243            path.strip_prefix(&self.root)
244                .unwrap_or(path)
245                .to_string_lossy()
246                .replace('\\', "/")
247        } else {
248            path.to_string_lossy().replace('\\', "/")
249        };
250        let abs = self.root.join(&rel);
251        let source = std::fs::read(&abs).with_context(|| format!("read {}", abs.display()))?;
252        let pack = self
253            .registry
254            .for_file_with_content(&rel, &source)
255            .with_context(|| format!("no language for {rel}"))?;
256        let extraction = extract::extract_file(&rel, &source, pack)?;
257        store.upsert_file(&extraction)?;
258        Ok(())
259    }
260
261    /// Index a batch of files by path, returning an IndexResult with all extractions.
262    pub fn index_files(&self, paths: &[PathBuf]) -> Result<IndexResult> {
263        let store = self.store.as_ref().context("call init() first")?;
264
265        if paths.is_empty() {
266            return Ok(IndexResult {
267                total_files: 0,
268                indexed_files: 0,
269                extractions: Vec::new(),
270                resolve_stats: resolve::ResolveStats {
271                    total_calls: 0,
272                    resolved: 0,
273                    unresolved: 0,
274                    learned_resolved: 0,
275                    inherits_resolved: 0,
276                },
277            });
278        }
279
280        let extractions: Vec<FileExtraction> = paths
281            .par_iter()
282            .filter_map(|path| {
283                let rel = if path.is_absolute() {
284                    path.strip_prefix(&self.root)
285                        .unwrap_or(path)
286                        .to_string_lossy()
287                        .replace('\\', "/")
288                } else {
289                    path.to_string_lossy().replace('\\', "/")
290                };
291                let abs = self.root.join(&rel);
292                let source = std::fs::read(&abs).ok()?;
293                let pack = self.registry.for_file_with_content(&rel, &source)?;
294                extract::extract_file(&rel, &source, pack).ok()
295            })
296            .collect();
297
298        let extractions = {
299            let mut seen = std::collections::HashSet::new();
300            extractions
301                .into_iter()
302                .filter(|e| seen.insert(e.file.clone()))
303                .collect::<Vec<_>>()
304        };
305
306        let indexed = extractions.len();
307
308        if !extractions.is_empty() {
309            let conn = store.connection()?;
310            conn.query("BEGIN TRANSACTION")
311                .context("failed to begin batch delete transaction")?;
312            let file_list: Vec<String> = extractions
313                .iter()
314                .map(|e| format!("'{}'", escape_str(&e.file)))
315                .collect();
316            let files_in = file_list.join(", ");
317            let _ = conn.query(&format!(
318                "MATCH (f:File)-[:DEFINES]->(s:Symbol)-[:HAS_STATEMENT]->(st:Statement) WHERE f.id IN [{files_in}] DETACH DELETE st"
319            ));
320            let _ = conn.query(&format!(
321                "MATCH (s:Symbol) WHERE s.file IN [{files_in}] DETACH DELETE s"
322            ));
323            let _ = conn.query(&format!(
324                "MATCH (m:Module) WHERE m.file IN [{files_in}] DETACH DELETE m"
325            ));
326            let _ = conn.query(&format!(
327                "MATCH (f:File) WHERE f.id IN [{files_in}] DETACH DELETE f"
328            ));
329            conn.query("COMMIT")
330                .context("failed to commit batch delete transaction")?;
331
332            if indexed > 10 {
333                store.upsert_all_parquet(&extractions)?;
334            } else {
335                let conn = store.connection()?;
336                store.upsert_all_bulk(&conn, &extractions)?;
337            }
338
339            let file_paths: Vec<&str> = extractions.iter().map(|e| e.file.as_str()).collect();
340            let conn = store.connection()?;
341            store.upsert_folders_bulk_conn(&conn, &file_paths)?;
342        }
343
344        let resolve_stats = resolve::resolve_calls_incremental(store, &extractions, None)
345            .unwrap_or_else(|e| {
346                eprintln!("warning: call resolution failed: {e}");
347                resolve::ResolveStats {
348                    total_calls: 0,
349                    resolved: 0,
350                    unresolved: 0,
351                    learned_resolved: 0,
352                    inherits_resolved: 0,
353                }
354            });
355
356        Ok(IndexResult {
357            total_files: paths.len(),
358            indexed_files: indexed,
359            extractions,
360            resolve_stats,
361        })
362    }
363
364    /// Detect cross-language bridges (FFI, JNI, cgo, gRPC, P/Invoke, WASM, ctypes).
365    pub fn detect_bridges(&self) -> Result<bridges::BridgeScanResult> {
366        bridges::detect_bridges(&self.root)
367    }
368
369    /// Remove a deleted file from the graph.
370    pub fn remove_file(&self, path: &Path) -> Result<()> {
371        let store = self.store.as_ref().context("call init() first")?;
372        let rel = if path.is_absolute() {
373            path.strip_prefix(&self.root)
374                .unwrap_or(path)
375                .to_string_lossy()
376                .replace('\\', "/")
377        } else {
378            path.to_string_lossy().replace('\\', "/")
379        };
380        store.remove_file(&rel)
381    }
382
383    fn collect_files(&self) -> Result<Vec<PathBuf>> {
384        use ignore::WalkBuilder;
385
386        let mut files = Vec::new();
387        let walker = WalkBuilder::new(&self.root)
388            .hidden(true)
389            .add_custom_ignore_filename(".infigraphignore")
390            .git_ignore(true)
391            .filter_entry(|e| {
392                let name = e.file_name().to_string_lossy();
393                !matches!(
394                    name.as_ref(),
395                    ".infigraph" | "node_modules" | "__pycache__" | ".tox"
396                )
397            })
398            .build();
399
400        for result in walker {
401            let entry = result?;
402            if entry.file_type().is_some_and(|ft| ft.is_file()) {
403                let path = entry.path();
404                if self.registry.for_file(&path.to_string_lossy()).is_some() {
405                    files.push(path.to_path_buf());
406                }
407            }
408        }
409        Ok(files)
410    }
411}
412
413pub struct IndexResult {
414    pub total_files: usize,
415    pub indexed_files: usize,
416    pub extractions: Vec<FileExtraction>,
417    pub resolve_stats: resolve::ResolveStats,
418}