Skip to main content

infigraph_core/graph/
store_write.rs

1use std::collections::HashMap;
2
3use anyhow::{Context, Result};
4use kuzu::Connection;
5
6use super::schema::ensure_custom_edge_table;
7use super::store::GraphStore;
8use super::store_util::escape;
9use crate::model::{FileExtraction, RelationKind};
10
11impl GraphStore {
12    /// Insert a file extraction into the graph.
13    /// Removes old data for the file first (incremental update).
14    pub fn upsert_file(&self, extraction: &FileExtraction) -> Result<()> {
15        let conn = self.connection()?;
16        self.upsert_file_conn(&conn, extraction)
17    }
18
19    pub fn upsert_file_conn(
20        &self,
21        conn: &Connection<'_>,
22        extraction: &FileExtraction,
23    ) -> Result<()> {
24        // Remove old symbols for this file
25        let _ = conn.query(&format!(
26            "MATCH (s:Symbol) WHERE s.file = '{}' DETACH DELETE s",
27            escape(&extraction.file)
28        ));
29        let _ = conn.query(&format!(
30            "MATCH (m:Module) WHERE m.file = '{}' DETACH DELETE m",
31            escape(&extraction.file)
32        ));
33        let _ = conn.query(&format!(
34            "MATCH (f:File) WHERE f.id = '{}' DETACH DELETE f",
35            escape(&extraction.file)
36        ));
37        self.upsert_file_conn_no_delete(conn, extraction)
38    }
39
40    pub fn upsert_file_conn_no_delete(
41        &self,
42        conn: &Connection<'_>,
43        extraction: &FileExtraction,
44    ) -> Result<()> {
45        // Insert module node
46        let module_id = &extraction.file;
47        let module_name = extraction
48            .file
49            .rsplit_once('/')
50            .map(|(_, f)| f)
51            .unwrap_or(&extraction.file);
52        let insert_module = format!(
53            "CREATE (m:Module {{id: '{}', name: '{}', file: '{}', language: '{}', content_hash: '{}'}})",
54            escape(module_id),
55            escape(module_name),
56            escape(&extraction.file),
57            escape(&extraction.language),
58            escape(&extraction.content_hash),
59        );
60        conn.query(&insert_module)
61            .context("failed to insert module")?;
62
63        // Insert File node
64        let file_name = extraction
65            .file
66            .rsplit_once('/')
67            .map(|(_, f)| f)
68            .unwrap_or(&extraction.file);
69        let symbol_count = extraction.symbols.len() as i32;
70        let insert_file = format!(
71            "CREATE (f:File {{id: '{}', name: '{}', path: '{}', language: '{}', symbol_count: {}}})",
72            escape(&extraction.file),
73            escape(file_name),
74            escape(&extraction.file),
75            escape(&extraction.language),
76            symbol_count,
77        );
78        conn.query(&insert_file)
79            .context("failed to insert file node")?;
80
81        // Folder hierarchy is handled in bulk by upsert_folders_bulk — skip per-file here
82
83        // Batch insert symbols via UNWIND
84        if !extraction.symbols.is_empty() {
85            let sym_rows: Vec<String> = extraction.symbols.iter().map(|sym| {
86                format!(
87                    "{{id: '{}', name: '{}', kind: '{}', file: '{}', start_line: {}, end_line: {}, signature_hash: '{}', language: '{}', visibility: '{}', parent: '{}', docstring: '{}', complexity: {}, parameters: '{}', return_type: '{}'}}",
88                    escape(&sym.id),
89                    escape(&sym.name),
90                    sym.kind.as_str(),
91                    escape(&extraction.file),
92                    sym.span.start_line,
93                    sym.span.end_line,
94                    escape(&sym.signature_hash),
95                    escape(&sym.language),
96                    escape(sym.visibility.as_deref().unwrap_or("")),
97                    escape(sym.parent.as_deref().unwrap_or("")),
98                    escape(sym.docstring.as_deref().unwrap_or("")),
99                    sym.complexity,
100                    escape(sym.parameters.as_deref().unwrap_or("")),
101                    escape(sym.return_type.as_deref().unwrap_or("")),
102                )
103            }).collect();
104            let batch_insert = format!(
105                "UNWIND [{}] AS s CREATE (:Symbol {{id: s.id, name: s.name, kind: s.kind, file: s.file, start_line: s.start_line, end_line: s.end_line, signature_hash: s.signature_hash, language: s.language, visibility: s.visibility, parent: s.parent, docstring: s.docstring, complexity: s.complexity, parameters: s.parameters, return_type: s.return_type}})",
106                sym_rows.join(", ")
107            );
108            conn.query(&batch_insert)
109                .context("failed to batch insert symbols")?;
110
111            // Batch CONTAINS edges: module -> symbols
112            let sym_ids: Vec<String> = extraction
113                .symbols
114                .iter()
115                .map(|s| format!("'{}'", escape(&s.id)))
116                .collect();
117            let contains_batch = format!(
118                "MATCH (m:Module), (s:Symbol) WHERE m.id = '{}' AND s.id IN [{}] CREATE (m)-[:CONTAINS]->(s)",
119                escape(module_id),
120                sym_ids.join(", ")
121            );
122            let _ = conn.query(&contains_batch);
123
124            // Batch DEFINES edges: file -> symbols
125            let defines_batch = format!(
126                "MATCH (f:File), (s:Symbol) WHERE f.id = '{}' AND s.id IN [{}] CREATE (f)-[:DEFINES]->(s)",
127                escape(&extraction.file),
128                sym_ids.join(", ")
129            );
130            let _ = conn.query(&defines_batch);
131        }
132
133        // Batch insert relationships grouped by type
134        let mut calls_pairs: Vec<(&str, &str)> = Vec::new();
135        let mut inherits_pairs: Vec<(&str, &str)> = Vec::new();
136        let mut tested_by_pairs: Vec<(&str, &str)> = Vec::new();
137        let mut imports_pairs: Vec<(&str, &str)> = Vec::new();
138        let mut reads_pairs: Vec<(&str, &str)> = Vec::new();
139        let mut writes_pairs: Vec<(&str, &str)> = Vec::new();
140        let mut custom_pairs: HashMap<String, Vec<(&str, &str)>> = HashMap::new();
141        for rel in &extraction.relations {
142            match &rel.kind {
143                RelationKind::Calls | RelationKind::CalledBy => {
144                    calls_pairs.push((&rel.source_id, &rel.target_id))
145                }
146                RelationKind::Inherits | RelationKind::InheritedBy => {
147                    inherits_pairs.push((&rel.source_id, &rel.target_id))
148                }
149                RelationKind::TestedBy | RelationKind::Tests => {
150                    tested_by_pairs.push((&rel.source_id, &rel.target_id))
151                }
152                RelationKind::Imports | RelationKind::ImportedBy => {
153                    imports_pairs.push((&rel.source_id, &rel.target_id))
154                }
155                RelationKind::Reads => reads_pairs.push((&rel.source_id, &rel.target_id)),
156                RelationKind::Writes => writes_pairs.push((&rel.source_id, &rel.target_id)),
157                RelationKind::Custom(name) => {
158                    custom_pairs
159                        .entry(name.clone())
160                        .or_default()
161                        .push((&rel.source_id, &rel.target_id));
162                }
163                _ => {}
164            }
165        }
166        for (pairs, rel_type) in [
167            (&calls_pairs, "CALLS"),
168            (&inherits_pairs, "INHERITS"),
169            (&tested_by_pairs, "TESTED_BY"),
170            (&reads_pairs, "READS"),
171            (&writes_pairs, "WRITES"),
172        ] {
173            if pairs.is_empty() {
174                continue;
175            }
176            let pair_list: Vec<String> = pairs
177                .iter()
178                .map(|(a, b)| format!("{{a: '{}', b: '{}'}}", escape(a), escape(b)))
179                .collect();
180            let batch_rel = format!(
181                "UNWIND [{}] AS p MATCH (a:Symbol), (b:Symbol) WHERE a.id = p.a AND b.id = p.b CREATE (a)-[:{}]->(b)",
182                pair_list.join(", "),
183                rel_type
184            );
185            let _ = conn.query(&batch_rel);
186        }
187        if !imports_pairs.is_empty() {
188            let pair_list: Vec<String> = imports_pairs
189                .iter()
190                .map(|(a, b)| format!("{{a: '{}', b: '{}'}}", escape(a), escape(b)))
191                .collect();
192            let _ = conn.query(&format!(
193                "UNWIND [{}] AS p MATCH (a:Module), (b:Module) WHERE a.id = p.a AND b.id = p.b CREATE (a)-[:IMPORTS]->(b)",
194                pair_list.join(", ")
195            ));
196        }
197        for (edge_name, pairs) in &custom_pairs {
198            if pairs.is_empty() {
199                continue;
200            }
201            let _ = ensure_custom_edge_table(conn, edge_name);
202            let pair_list: Vec<String> = pairs
203                .iter()
204                .map(|(a, b)| format!("{{a: '{}', b: '{}'}}", escape(a), escape(b)))
205                .collect();
206            let _ = conn.query(&format!(
207                "UNWIND [{}] AS p MATCH (a:Symbol), (b:Symbol) WHERE a.id = p.a AND b.id = p.b CREATE (a)-[:{}]->(b)",
208                pair_list.join(", "),
209                edge_name
210            ));
211        }
212
213        // Insert Statement nodes + HAS_STATEMENT edges
214        if !extraction.statements.is_empty() {
215            let stmt_rows: Vec<String> = extraction.statements.iter().map(|s| {
216                format!(
217                    "{{id: '{}', kind: '{}', condition: '{}', start_line: {}, end_line: {}, depth: {}, parent_symbol: '{}'}}",
218                    escape(&s.id), s.kind.as_str(), escape(&s.condition),
219                    s.start_line, s.end_line, s.depth, escape(&s.parent_symbol),
220                )
221            }).collect();
222            let _ = conn.query(&format!(
223                "UNWIND [{}] AS s CREATE (:Statement {{id: s.id, kind: s.kind, condition: s.condition, start_line: s.start_line, end_line: s.end_line, depth: s.depth, parent_symbol: s.parent_symbol}})",
224                stmt_rows.join(", ")
225            ));
226
227            let edge_rows: Vec<String> = extraction.statements.iter().map(|s| {
228                format!("{{a: '{}', b: '{}'}}", escape(&s.parent_symbol), escape(&s.id))
229            }).collect();
230            let _ = conn.query(&format!(
231                "UNWIND [{}] AS p MATCH (a:Symbol), (b:Statement) WHERE a.id = p.a AND b.id = p.b CREATE (a)-[:HAS_STATEMENT]->(b)",
232                edge_rows.join(", ")
233            ));
234        }
235
236        Ok(())
237    }
238
239    /// Create Folder nodes for each ancestor directory and wire up
240    /// CONTAINS_FOLDER (parent -> child) and CONTAINS_FILE (leaf folder -> file) edges.
241    #[allow(dead_code)]
242    fn upsert_folder_hierarchy(&self, conn: &Connection<'_>, file_path: &str) -> Result<()> {
243        // Split the file path into components: "src/graph/store.rs" -> ["src", "graph"]
244        let parts: Vec<&str> = file_path.rsplitn(2, '/').collect();
245        let dir_path = if parts.len() == 2 {
246            parts[1]
247        } else {
248            return Ok(());
249        };
250
251        // Collect all ancestor folders: "src/graph" -> ["src", "src/graph"]
252        let segments: Vec<&str> = dir_path.split('/').collect();
253        let mut folder_paths: Vec<String> = Vec::with_capacity(segments.len());
254        for i in 0..segments.len() {
255            let path = segments[..=i].join("/");
256            folder_paths.push(path);
257        }
258
259        // Create Folder nodes (MERGE-style: only create if not exists)
260        for folder_path in &folder_paths {
261            let folder_name = folder_path
262                .rsplit_once('/')
263                .map(|(_, n)| n)
264                .unwrap_or(folder_path);
265            let merge_folder = format!("MERGE (d:Folder {{id: '{}'}})", escape(folder_path),);
266            // Try MERGE first; if Kuzu doesn't support MERGE, fall back to conditional create
267            if conn.query(&merge_folder).is_err() {
268                // Check if it already exists
269                let check = format!(
270                    "MATCH (d:Folder) WHERE d.id = '{}' RETURN d.id",
271                    escape(folder_path)
272                );
273                let mut result = conn
274                    .query(&check)
275                    .map_err(|e| anyhow::anyhow!("folder check failed: {e}"))?;
276                if result.next().is_none() {
277                    let create = format!(
278                        "CREATE (d:Folder {{id: '{}', name: '{}', path: '{}'}})",
279                        escape(folder_path),
280                        escape(folder_name),
281                        escape(folder_path),
282                    );
283                    let _ = conn.query(&create);
284                }
285            } else {
286                // MERGE succeeded but may not have set name/path; update them
287                let update = format!(
288                    "MATCH (d:Folder) WHERE d.id = '{}' SET d.name = '{}', d.path = '{}'",
289                    escape(folder_path),
290                    escape(folder_name),
291                    escape(folder_path),
292                );
293                let _ = conn.query(&update);
294            }
295        }
296
297        // Create CONTAINS_FOLDER edges between consecutive folders
298        for i in 1..folder_paths.len() {
299            let parent = &folder_paths[i - 1];
300            let child = &folder_paths[i];
301            // Check if edge already exists
302            let check_edge = format!(
303                "MATCH (p:Folder)-[:CONTAINS_FOLDER]->(c:Folder) WHERE p.id = '{}' AND c.id = '{}' RETURN p.id",
304                escape(parent),
305                escape(child),
306            );
307            let mut result = conn
308                .query(&check_edge)
309                .map_err(|e| anyhow::anyhow!("edge check failed: {e}"))?;
310            if result.next().is_none() {
311                let create_edge = format!(
312                    "MATCH (p:Folder), (c:Folder) WHERE p.id = '{}' AND c.id = '{}' CREATE (p)-[:CONTAINS_FOLDER]->(c)",
313                    escape(parent),
314                    escape(child),
315                );
316                let _ = conn.query(&create_edge);
317            }
318        }
319
320        // Create CONTAINS_FILE edge from leaf folder to File node
321        if let Some(leaf_folder) = folder_paths.last() {
322            let check_edge = format!(
323                "MATCH (d:Folder)-[:CONTAINS_FILE]->(f:File) WHERE d.id = '{}' AND f.id = '{}' RETURN d.id",
324                escape(leaf_folder),
325                escape(file_path),
326            );
327            let mut result = conn
328                .query(&check_edge)
329                .map_err(|e| anyhow::anyhow!("edge check failed: {e}"))?;
330            if result.next().is_none() {
331                let create_edge = format!(
332                    "MATCH (d:Folder), (f:File) WHERE d.id = '{}' AND f.id = '{}' CREATE (d)-[:CONTAINS_FILE]->(f)",
333                    escape(leaf_folder),
334                    escape(file_path),
335                );
336                let _ = conn.query(&create_edge);
337            }
338        }
339
340        Ok(())
341    }
342}