Skip to main content

infigraph_core/
lib.rs

1mod analysis;
2pub mod bench;
3pub mod bridges;
4pub mod check;
5pub mod cluster;
6pub mod concerns;
7pub mod config;
8pub mod diff;
9pub mod embed;
10pub mod export;
11pub mod extract;
12pub mod graph;
13pub mod lang;
14pub mod learned;
15pub mod manifest;
16pub mod model;
17pub mod multi;
18pub mod patterns;
19pub mod refactor;
20pub mod reflection;
21mod report;
22pub mod resolve;
23pub mod review;
24pub mod routes;
25pub mod scip;
26pub mod search;
27pub mod security;
28pub mod sequence;
29pub mod structured;
30pub mod taint;
31pub mod viz;
32pub mod vuln;
33pub mod watch;
34
35use std::path::{Path, PathBuf};
36
37use anyhow::{Context, Result};
38use rayon::prelude::*;
39use sha2::{Digest, Sha256};
40
41use graph::GraphStore;
42use lang::LanguageRegistry;
43use model::FileExtraction;
44
45pub(crate) fn escape_str(s: &str) -> String {
46    s.replace('\\', "\\\\").replace('\'', "\\'")
47}
48
49/// The main entry point for the infigraph framework.
50pub struct Infigraph {
51    root: PathBuf,
52    db_path: PathBuf,
53    registry: LanguageRegistry,
54    store: Option<GraphStore>,
55}
56
57impl Infigraph {
58    /// Open a project directory. Creates `.infigraph/` if it doesn't exist.
59    pub fn open(root: &Path, registry: LanguageRegistry) -> Result<Self> {
60        let root = root.canonicalize().context("invalid project root")?;
61        let db_path = root.join(".infigraph").join("graph");
62        Ok(Self {
63            root,
64            db_path,
65            registry,
66            store: None,
67        })
68    }
69
70    /// Initialize the graph store (creates DB on first run).
71    pub fn init(&mut self) -> Result<()> {
72        let store = GraphStore::open(&self.db_path)?;
73        self.store = Some(store);
74        Ok(())
75    }
76
77    /// Index all supported files in the project, building the graph.
78    /// Skips files whose content hash matches the stored hash (incremental).
79    pub fn index(&self) -> Result<IndexResult> {
80        let store = self.store.as_ref().context("call init() first")?;
81
82        let files = self.collect_files()?;
83        let total = files.len();
84
85        // Load existing hashes for incremental skip
86        let existing_hashes = store.get_file_hashes().unwrap_or_default();
87
88        // Parse all files in parallel; skip unchanged ones
89        let done = std::sync::atomic::AtomicUsize::new(0);
90        let extractions: Vec<FileExtraction> = files
91            .par_iter()
92            .filter_map(|path| {
93                let rel_path = path
94                    .strip_prefix(&self.root)
95                    .ok()?
96                    .to_string_lossy()
97                    .replace('\\', "/");
98                let source = std::fs::read(path).ok()?;
99                // Skip if hash unchanged
100                let hash = {
101                    let mut h = Sha256::new();
102                    h.update(&source);
103                    format!("{:x}", h.finalize())
104                };
105                let n = done.fetch_add(1, std::sync::atomic::Ordering::Relaxed) + 1;
106                let pct = n * 100 / total;
107                let prev_pct = (n - 1) * 100 / total;
108                if (pct / 25) > (prev_pct / 25) || n == total {
109                    eprintln!("Parsing: {}/{} ({}%)", n, total, pct);
110                }
111                if existing_hashes.get(&rel_path).map(|s| s.as_str()) == Some(hash.as_str()) {
112                    return None; // unchanged
113                }
114                let pack = self.registry.for_file_with_content(&rel_path, &source)?;
115                extract::extract_file(&rel_path, &source, pack).ok()
116            })
117            .collect();
118
119        let indexed = extractions.len();
120
121        // Write all changed files — use CSV bulk load for fresh index or large batches,
122        // fall back to per-file UNWIND only for small incremental updates.
123        let use_csv = !extractions.is_empty() && (existing_hashes.is_empty() || indexed > 100);
124        let _write_lock = if !extractions.is_empty() {
125            Some(store.write_lock()?)
126        } else {
127            None
128        };
129
130        if !extractions.is_empty() {
131            if use_csv {
132                if !existing_hashes.is_empty() {
133                    let conn = store.connection()?;
134                    conn.query("BEGIN TRANSACTION")
135                        .context("failed to begin delete transaction")?;
136                    let file_list: Vec<String> = extractions
137                        .iter()
138                        .map(|e| format!("'{}'", escape_str(&e.file)))
139                        .collect();
140                    let files_in = file_list.join(", ");
141                    let _ = conn.query(&format!(
142                        "MATCH (f:File)-[:DEFINES]->(s:Symbol)-[:HAS_STATEMENT]->(st:Statement) WHERE f.id IN [{}] DETACH DELETE st",
143                        files_in
144                    ));
145                    let _ = conn.query(&format!(
146                        "MATCH (s:Symbol) WHERE s.file IN [{}] DETACH DELETE s",
147                        files_in
148                    ));
149                    let _ = conn.query(&format!(
150                        "MATCH (m:Module) WHERE m.file IN [{}] DETACH DELETE m",
151                        files_in
152                    ));
153                    let _ = conn.query(&format!(
154                        "MATCH (f:File) WHERE f.id IN [{}] DETACH DELETE f",
155                        files_in
156                    ));
157                    conn.query("COMMIT")
158                        .context("failed to commit delete transaction")?;
159                }
160                let conn = store.connection()?;
161                store.upsert_all_parquet_conn(&conn, &extractions)?;
162            } else {
163                let conn = store.connection()?;
164                conn.query("BEGIN TRANSACTION")
165                    .context("failed to begin index transaction")?;
166                let file_list: Vec<String> = extractions
167                    .iter()
168                    .map(|e| format!("'{}'", escape_str(&e.file)))
169                    .collect();
170                let files_in = file_list.join(", ");
171                let _ = conn.query(&format!(
172                    "MATCH (f:File)-[:DEFINES]->(s:Symbol)-[:HAS_STATEMENT]->(st:Statement) WHERE f.id IN [{}] DETACH DELETE st",
173                    files_in
174                ));
175                let _ = conn.query(&format!(
176                    "MATCH (s:Symbol) WHERE s.file IN [{}] DETACH DELETE s",
177                    files_in
178                ));
179                let _ = conn.query(&format!(
180                    "MATCH (m:Module) WHERE m.file IN [{}] DETACH DELETE m",
181                    files_in
182                ));
183                let _ = conn.query(&format!(
184                    "MATCH (f:File) WHERE f.id IN [{}] DETACH DELETE f",
185                    files_in
186                ));
187                for extraction in &extractions {
188                    store.upsert_file_conn_no_delete(&conn, extraction)?;
189                }
190                conn.query("COMMIT")
191                    .context("failed to commit index transaction")?;
192                let file_paths: Vec<&str> = extractions.iter().map(|e| e.file.as_str()).collect();
193                store.upsert_folders_bulk_conn(&conn, &file_paths)?;
194            }
195        }
196
197        if use_csv {
198            let file_paths: Vec<&str> = extractions.iter().map(|e| e.file.as_str()).collect();
199            let conn = store.connection()?;
200            store.upsert_folders_bulk_conn(&conn, &file_paths)?;
201        }
202
203        // resolve runs under the same write lock (creates CALLS/INHERITS edges)
204        let resolve_stats = resolve::resolve_calls_incremental(store, &extractions, None)
205            .unwrap_or_else(|e| {
206                eprintln!("warning: call resolution failed: {e}");
207                resolve::ResolveStats {
208                    total_calls: 0,
209                    resolved: 0,
210                    unresolved: 0,
211                    learned_resolved: 0,
212                    inherits_resolved: 0,
213                }
214            });
215
216        drop(_write_lock);
217
218        Ok(IndexResult {
219            total_files: total,
220            indexed_files: indexed,
221            extractions,
222            resolve_stats,
223        })
224    }
225
226    /// Get graph statistics.
227    pub fn stats(&self) -> Result<graph::GraphStats> {
228        let store = self.store.as_ref().context("call init() first")?;
229        store.stats()
230    }
231
232    /// Access the underlying graph store (for direct queries).
233    pub fn store(&self) -> Option<&GraphStore> {
234        self.store.as_ref()
235    }
236
237    /// Access the language registry.
238    pub fn registry(&self) -> &LanguageRegistry {
239        &self.registry
240    }
241
242    /// Get the project root path.
243    pub fn root(&self) -> &Path {
244        &self.root
245    }
246
247    /// Index (or re-index) a single file by its path on disk.
248    /// Path may be absolute or relative to project root.
249    pub fn index_file(&self, path: &Path) -> Result<()> {
250        let store = self.store.as_ref().context("call init() first")?;
251        let rel = if path.is_absolute() {
252            path.strip_prefix(&self.root)
253                .unwrap_or(path)
254                .to_string_lossy()
255                .replace('\\', "/")
256        } else {
257            path.to_string_lossy().replace('\\', "/")
258        };
259        let abs = self.root.join(&rel);
260        let source = std::fs::read(&abs).with_context(|| format!("read {}", abs.display()))?;
261        let pack = self
262            .registry
263            .for_file_with_content(&rel, &source)
264            .with_context(|| format!("no language for {rel}"))?;
265        let extraction = extract::extract_file(&rel, &source, pack)?;
266        store.upsert_file(&extraction)?;
267        Ok(())
268    }
269
270    /// Index a batch of files by path, returning an IndexResult with all extractions.
271    pub fn index_files(&self, paths: &[PathBuf]) -> Result<IndexResult> {
272        let store = self.store.as_ref().context("call init() first")?;
273
274        if paths.is_empty() {
275            return Ok(IndexResult {
276                total_files: 0,
277                indexed_files: 0,
278                extractions: Vec::new(),
279                resolve_stats: resolve::ResolveStats {
280                    total_calls: 0,
281                    resolved: 0,
282                    unresolved: 0,
283                    learned_resolved: 0,
284                    inherits_resolved: 0,
285                },
286            });
287        }
288
289        let extractions: Vec<FileExtraction> = paths
290            .par_iter()
291            .filter_map(|path| {
292                let rel = if path.is_absolute() {
293                    path.strip_prefix(&self.root)
294                        .unwrap_or(path)
295                        .to_string_lossy()
296                        .replace('\\', "/")
297                } else {
298                    path.to_string_lossy().replace('\\', "/")
299                };
300                let abs = self.root.join(&rel);
301                let source = std::fs::read(&abs).ok()?;
302                let pack = self.registry.for_file_with_content(&rel, &source)?;
303                extract::extract_file(&rel, &source, pack).ok()
304            })
305            .collect();
306
307        let extractions = {
308            let mut seen = std::collections::HashSet::new();
309            extractions
310                .into_iter()
311                .filter(|e| seen.insert(e.file.clone()))
312                .collect::<Vec<_>>()
313        };
314
315        let indexed = extractions.len();
316
317        let _write_lock = if !extractions.is_empty() {
318            Some(store.write_lock()?)
319        } else {
320            None
321        };
322
323        if !extractions.is_empty() {
324            let conn = store.connection()?;
325            conn.query("BEGIN TRANSACTION")
326                .context("failed to begin batch delete transaction")?;
327            let file_list: Vec<String> = extractions
328                .iter()
329                .map(|e| format!("'{}'", escape_str(&e.file)))
330                .collect();
331            let files_in = file_list.join(", ");
332            let _ = conn.query(&format!(
333                "MATCH (f:File)-[:DEFINES]->(s:Symbol)-[:HAS_STATEMENT]->(st:Statement) WHERE f.id IN [{files_in}] DETACH DELETE st"
334            ));
335            let _ = conn.query(&format!(
336                "MATCH (s:Symbol) WHERE s.file IN [{files_in}] DETACH DELETE s"
337            ));
338            let _ = conn.query(&format!(
339                "MATCH (m:Module) WHERE m.file IN [{files_in}] DETACH DELETE m"
340            ));
341            let _ = conn.query(&format!(
342                "MATCH (f:File) WHERE f.id IN [{files_in}] DETACH DELETE f"
343            ));
344            conn.query("COMMIT")
345                .context("failed to commit batch delete transaction")?;
346
347            if indexed > 10 {
348                let conn = store.connection()?;
349                store.upsert_all_parquet_conn(&conn, &extractions)?;
350            } else {
351                let conn = store.connection()?;
352                store.upsert_all_bulk(&conn, &extractions)?;
353            }
354
355            let file_paths: Vec<&str> = extractions.iter().map(|e| e.file.as_str()).collect();
356            let conn = store.connection()?;
357            store.upsert_folders_bulk_conn(&conn, &file_paths)?;
358        }
359
360        let resolve_stats = resolve::resolve_calls_incremental(store, &extractions, None)
361            .unwrap_or_else(|e| {
362                eprintln!("warning: call resolution failed: {e}");
363                resolve::ResolveStats {
364                    total_calls: 0,
365                    resolved: 0,
366                    unresolved: 0,
367                    learned_resolved: 0,
368                    inherits_resolved: 0,
369                }
370            });
371
372        drop(_write_lock);
373
374        Ok(IndexResult {
375            total_files: paths.len(),
376            indexed_files: indexed,
377            extractions,
378            resolve_stats,
379        })
380    }
381
382    /// Detect cross-language bridges (FFI, JNI, cgo, gRPC, P/Invoke, WASM, ctypes).
383    pub fn detect_bridges(&self) -> Result<bridges::BridgeScanResult> {
384        bridges::detect_bridges(&self.root)
385    }
386
387    /// Remove a deleted file from the graph.
388    pub fn remove_file(&self, path: &Path) -> Result<()> {
389        let store = self.store.as_ref().context("call init() first")?;
390        let rel = if path.is_absolute() {
391            path.strip_prefix(&self.root)
392                .unwrap_or(path)
393                .to_string_lossy()
394                .replace('\\', "/")
395        } else {
396            path.to_string_lossy().replace('\\', "/")
397        };
398        store.remove_file(&rel)
399    }
400
401    fn collect_files(&self) -> Result<Vec<PathBuf>> {
402        use ignore::WalkBuilder;
403
404        let mut files = Vec::new();
405        let walker = WalkBuilder::new(&self.root)
406            .hidden(true)
407            .add_custom_ignore_filename(".infigraphignore")
408            .git_ignore(true)
409            .filter_entry(|e| {
410                let name = e.file_name().to_string_lossy();
411                !matches!(
412                    name.as_ref(),
413                    ".infigraph" | "node_modules" | "__pycache__" | ".tox"
414                )
415            })
416            .build();
417
418        for result in walker {
419            let entry = result?;
420            if entry.file_type().is_some_and(|ft| ft.is_file()) {
421                let path = entry.path();
422                if self.registry.for_file(&path.to_string_lossy()).is_some() {
423                    files.push(path.to_path_buf());
424                }
425            }
426        }
427        Ok(files)
428    }
429}
430
431pub struct IndexResult {
432    pub total_files: usize,
433    pub indexed_files: usize,
434    pub extractions: Vec<FileExtraction>,
435    pub resolve_stats: resolve::ResolveStats,
436}