Skip to main content

infigraph_core/graph/
store_write.rs

1use std::collections::HashMap;
2
3use anyhow::{Context, Result};
4use kuzu::Connection;
5
6use super::schema::ensure_custom_edge_table;
7use super::store::GraphStore;
8use super::store_util::escape;
9use crate::model::{FileExtraction, RelationKind};
10
11impl GraphStore {
12    /// Insert a file extraction into the graph.
13    /// Removes old data for the file first (incremental update).
14    pub fn upsert_file(&self, extraction: &FileExtraction) -> Result<()> {
15        let _lock = self.write_lock()?;
16        let conn = self.connection()?;
17        self.upsert_file_conn(&conn, extraction)
18    }
19
20    /// Caller must hold WriteLock.
21    pub fn upsert_file_conn(
22        &self,
23        conn: &Connection<'_>,
24        extraction: &FileExtraction,
25    ) -> Result<()> {
26        // Remove old symbols for this file
27        let _ = conn.query(&format!(
28            "MATCH (s:Symbol) WHERE s.file = '{}' DETACH DELETE s",
29            escape(&extraction.file)
30        ));
31        let _ = conn.query(&format!(
32            "MATCH (m:Module) WHERE m.file = '{}' DETACH DELETE m",
33            escape(&extraction.file)
34        ));
35        let _ = conn.query(&format!(
36            "MATCH (f:File) WHERE f.id = '{}' DETACH DELETE f",
37            escape(&extraction.file)
38        ));
39        self.upsert_file_conn_no_delete(conn, extraction)
40    }
41
42    /// Caller must hold WriteLock.
43    pub fn upsert_file_conn_no_delete(
44        &self,
45        conn: &Connection<'_>,
46        extraction: &FileExtraction,
47    ) -> Result<()> {
48        // Insert module node
49        let module_id = &extraction.file;
50        let module_name = extraction
51            .file
52            .rsplit_once('/')
53            .map(|(_, f)| f)
54            .unwrap_or(&extraction.file);
55        let insert_module = format!(
56            "CREATE (m:Module {{id: '{}', name: '{}', file: '{}', language: '{}', content_hash: '{}'}})",
57            escape(module_id),
58            escape(module_name),
59            escape(&extraction.file),
60            escape(&extraction.language),
61            escape(&extraction.content_hash),
62        );
63        conn.query(&insert_module)
64            .context("failed to insert module")?;
65
66        // Insert File node
67        let file_name = extraction
68            .file
69            .rsplit_once('/')
70            .map(|(_, f)| f)
71            .unwrap_or(&extraction.file);
72        let symbol_count = extraction.symbols.len() as i32;
73        let insert_file = format!(
74            "CREATE (f:File {{id: '{}', name: '{}', path: '{}', language: '{}', symbol_count: {}}})",
75            escape(&extraction.file),
76            escape(file_name),
77            escape(&extraction.file),
78            escape(&extraction.language),
79            symbol_count,
80        );
81        conn.query(&insert_file)
82            .context("failed to insert file node")?;
83
84        // Folder hierarchy is handled in bulk by upsert_folders_bulk — skip per-file here
85
86        // Batch insert symbols via UNWIND
87        if !extraction.symbols.is_empty() {
88            let sym_rows: Vec<String> = extraction.symbols.iter().map(|sym| {
89                format!(
90                    "{{id: '{}', name: '{}', kind: '{}', file: '{}', start_line: {}, end_line: {}, signature_hash: '{}', language: '{}', visibility: '{}', parent: '{}', docstring: '{}', complexity: {}, parameters: '{}', return_type: '{}'}}",
91                    escape(&sym.id),
92                    escape(&sym.name),
93                    sym.kind.as_str(),
94                    escape(&extraction.file),
95                    sym.span.start_line,
96                    sym.span.end_line,
97                    escape(&sym.signature_hash),
98                    escape(&sym.language),
99                    escape(sym.visibility.as_deref().unwrap_or("")),
100                    escape(sym.parent.as_deref().unwrap_or("")),
101                    escape(sym.docstring.as_deref().unwrap_or("")),
102                    sym.complexity,
103                    escape(sym.parameters.as_deref().unwrap_or("")),
104                    escape(sym.return_type.as_deref().unwrap_or("")),
105                )
106            }).collect();
107            let batch_insert = format!(
108                "UNWIND [{}] AS s CREATE (:Symbol {{id: s.id, name: s.name, kind: s.kind, file: s.file, start_line: s.start_line, end_line: s.end_line, signature_hash: s.signature_hash, language: s.language, visibility: s.visibility, parent: s.parent, docstring: s.docstring, complexity: s.complexity, parameters: s.parameters, return_type: s.return_type}})",
109                sym_rows.join(", ")
110            );
111            conn.query(&batch_insert)
112                .context("failed to batch insert symbols")?;
113
114            // Batch CONTAINS edges: module -> symbols
115            let sym_ids: Vec<String> = extraction
116                .symbols
117                .iter()
118                .map(|s| format!("'{}'", escape(&s.id)))
119                .collect();
120            let contains_batch = format!(
121                "MATCH (m:Module), (s:Symbol) WHERE m.id = '{}' AND s.id IN [{}] CREATE (m)-[:CONTAINS]->(s)",
122                escape(module_id),
123                sym_ids.join(", ")
124            );
125            let _ = conn.query(&contains_batch);
126
127            // Batch DEFINES edges: file -> symbols
128            let defines_batch = format!(
129                "MATCH (f:File), (s:Symbol) WHERE f.id = '{}' AND s.id IN [{}] CREATE (f)-[:DEFINES]->(s)",
130                escape(&extraction.file),
131                sym_ids.join(", ")
132            );
133            let _ = conn.query(&defines_batch);
134        }
135
136        // Batch insert relationships grouped by type
137        let mut calls_pairs: Vec<(&str, &str)> = Vec::new();
138        let mut inherits_pairs: Vec<(&str, &str)> = Vec::new();
139        let mut tested_by_pairs: Vec<(&str, &str)> = Vec::new();
140        let mut imports_pairs: Vec<(&str, &str)> = Vec::new();
141        let mut reads_pairs: Vec<(&str, &str)> = Vec::new();
142        let mut writes_pairs: Vec<(&str, &str)> = Vec::new();
143        let mut custom_pairs: HashMap<String, Vec<(&str, &str)>> = HashMap::new();
144        for rel in &extraction.relations {
145            match &rel.kind {
146                RelationKind::Calls | RelationKind::CalledBy => {
147                    calls_pairs.push((&rel.source_id, &rel.target_id))
148                }
149                RelationKind::Inherits | RelationKind::InheritedBy => {
150                    inherits_pairs.push((&rel.source_id, &rel.target_id))
151                }
152                RelationKind::TestedBy | RelationKind::Tests => {
153                    tested_by_pairs.push((&rel.source_id, &rel.target_id))
154                }
155                RelationKind::Imports | RelationKind::ImportedBy => {
156                    imports_pairs.push((&rel.source_id, &rel.target_id))
157                }
158                RelationKind::Reads => reads_pairs.push((&rel.source_id, &rel.target_id)),
159                RelationKind::Writes => writes_pairs.push((&rel.source_id, &rel.target_id)),
160                RelationKind::Custom(name) => {
161                    custom_pairs
162                        .entry(name.clone())
163                        .or_default()
164                        .push((&rel.source_id, &rel.target_id));
165                }
166                _ => {}
167            }
168        }
169        for (pairs, rel_type) in [
170            (&calls_pairs, "CALLS"),
171            (&inherits_pairs, "INHERITS"),
172            (&tested_by_pairs, "TESTED_BY"),
173            (&reads_pairs, "READS"),
174            (&writes_pairs, "WRITES"),
175        ] {
176            if pairs.is_empty() {
177                continue;
178            }
179            let pair_list: Vec<String> = pairs
180                .iter()
181                .map(|(a, b)| format!("{{a: '{}', b: '{}'}}", escape(a), escape(b)))
182                .collect();
183            let batch_rel = format!(
184                "UNWIND [{}] AS p MATCH (a:Symbol), (b:Symbol) WHERE a.id = p.a AND b.id = p.b CREATE (a)-[:{}]->(b)",
185                pair_list.join(", "),
186                rel_type
187            );
188            let _ = conn.query(&batch_rel);
189        }
190        if !imports_pairs.is_empty() {
191            let pair_list: Vec<String> = imports_pairs
192                .iter()
193                .map(|(a, b)| format!("{{a: '{}', b: '{}'}}", escape(a), escape(b)))
194                .collect();
195            let _ = conn.query(&format!(
196                "UNWIND [{}] AS p MATCH (a:Module), (b:Module) WHERE a.id = p.a AND b.id = p.b CREATE (a)-[:IMPORTS]->(b)",
197                pair_list.join(", ")
198            ));
199        }
200        for (edge_name, pairs) in &custom_pairs {
201            if pairs.is_empty() {
202                continue;
203            }
204            let _ = ensure_custom_edge_table(conn, edge_name);
205            let pair_list: Vec<String> = pairs
206                .iter()
207                .map(|(a, b)| format!("{{a: '{}', b: '{}'}}", escape(a), escape(b)))
208                .collect();
209            let _ = conn.query(&format!(
210                "UNWIND [{}] AS p MATCH (a:Symbol), (b:Symbol) WHERE a.id = p.a AND b.id = p.b CREATE (a)-[:{}]->(b)",
211                pair_list.join(", "),
212                edge_name
213            ));
214        }
215
216        // Insert Statement nodes + HAS_STATEMENT edges
217        if !extraction.statements.is_empty() {
218            let stmt_rows: Vec<String> = extraction.statements.iter().map(|s| {
219                format!(
220                    "{{id: '{}', kind: '{}', condition: '{}', start_line: {}, end_line: {}, depth: {}, parent_symbol: '{}'}}",
221                    escape(&s.id), s.kind.as_str(), escape(&s.condition),
222                    s.start_line, s.end_line, s.depth, escape(&s.parent_symbol),
223                )
224            }).collect();
225            let _ = conn.query(&format!(
226                "UNWIND [{}] AS s CREATE (:Statement {{id: s.id, kind: s.kind, condition: s.condition, start_line: s.start_line, end_line: s.end_line, depth: s.depth, parent_symbol: s.parent_symbol}})",
227                stmt_rows.join(", ")
228            ));
229
230            let edge_rows: Vec<String> = extraction
231                .statements
232                .iter()
233                .map(|s| {
234                    format!(
235                        "{{a: '{}', b: '{}'}}",
236                        escape(&s.parent_symbol),
237                        escape(&s.id)
238                    )
239                })
240                .collect();
241            let _ = conn.query(&format!(
242                "UNWIND [{}] AS p MATCH (a:Symbol), (b:Statement) WHERE a.id = p.a AND b.id = p.b CREATE (a)-[:HAS_STATEMENT]->(b)",
243                edge_rows.join(", ")
244            ));
245        }
246
247        Ok(())
248    }
249
250    /// Create Folder nodes for each ancestor directory and wire up
251    /// CONTAINS_FOLDER (parent -> child) and CONTAINS_FILE (leaf folder -> file) edges.
252    #[allow(dead_code)]
253    fn upsert_folder_hierarchy(&self, conn: &Connection<'_>, file_path: &str) -> Result<()> {
254        // Split the file path into components: "src/graph/store.rs" -> ["src", "graph"]
255        let parts: Vec<&str> = file_path.rsplitn(2, '/').collect();
256        let dir_path = if parts.len() == 2 {
257            parts[1]
258        } else {
259            return Ok(());
260        };
261
262        // Collect all ancestor folders: "src/graph" -> ["src", "src/graph"]
263        let segments: Vec<&str> = dir_path.split('/').collect();
264        let mut folder_paths: Vec<String> = Vec::with_capacity(segments.len());
265        for i in 0..segments.len() {
266            let path = segments[..=i].join("/");
267            folder_paths.push(path);
268        }
269
270        // Create Folder nodes (MERGE-style: only create if not exists)
271        for folder_path in &folder_paths {
272            let folder_name = folder_path
273                .rsplit_once('/')
274                .map(|(_, n)| n)
275                .unwrap_or(folder_path);
276            let merge_folder = format!("MERGE (d:Folder {{id: '{}'}})", escape(folder_path),);
277            // Try MERGE first; if Kuzu doesn't support MERGE, fall back to conditional create
278            if conn.query(&merge_folder).is_err() {
279                // Check if it already exists
280                let check = format!(
281                    "MATCH (d:Folder) WHERE d.id = '{}' RETURN d.id",
282                    escape(folder_path)
283                );
284                let mut result = conn
285                    .query(&check)
286                    .map_err(|e| anyhow::anyhow!("folder check failed: {e}"))?;
287                if result.next().is_none() {
288                    let create = format!(
289                        "CREATE (d:Folder {{id: '{}', name: '{}', path: '{}'}})",
290                        escape(folder_path),
291                        escape(folder_name),
292                        escape(folder_path),
293                    );
294                    let _ = conn.query(&create);
295                }
296            } else {
297                // MERGE succeeded but may not have set name/path; update them
298                let update = format!(
299                    "MATCH (d:Folder) WHERE d.id = '{}' SET d.name = '{}', d.path = '{}'",
300                    escape(folder_path),
301                    escape(folder_name),
302                    escape(folder_path),
303                );
304                let _ = conn.query(&update);
305            }
306        }
307
308        // Create CONTAINS_FOLDER edges between consecutive folders
309        for i in 1..folder_paths.len() {
310            let parent = &folder_paths[i - 1];
311            let child = &folder_paths[i];
312            // Check if edge already exists
313            let check_edge = format!(
314                "MATCH (p:Folder)-[:CONTAINS_FOLDER]->(c:Folder) WHERE p.id = '{}' AND c.id = '{}' RETURN p.id",
315                escape(parent),
316                escape(child),
317            );
318            let mut result = conn
319                .query(&check_edge)
320                .map_err(|e| anyhow::anyhow!("edge check failed: {e}"))?;
321            if result.next().is_none() {
322                let create_edge = format!(
323                    "MATCH (p:Folder), (c:Folder) WHERE p.id = '{}' AND c.id = '{}' CREATE (p)-[:CONTAINS_FOLDER]->(c)",
324                    escape(parent),
325                    escape(child),
326                );
327                let _ = conn.query(&create_edge);
328            }
329        }
330
331        // Create CONTAINS_FILE edge from leaf folder to File node
332        if let Some(leaf_folder) = folder_paths.last() {
333            let check_edge = format!(
334                "MATCH (d:Folder)-[:CONTAINS_FILE]->(f:File) WHERE d.id = '{}' AND f.id = '{}' RETURN d.id",
335                escape(leaf_folder),
336                escape(file_path),
337            );
338            let mut result = conn
339                .query(&check_edge)
340                .map_err(|e| anyhow::anyhow!("edge check failed: {e}"))?;
341            if result.next().is_none() {
342                let create_edge = format!(
343                    "MATCH (d:Folder), (f:File) WHERE d.id = '{}' AND f.id = '{}' CREATE (d)-[:CONTAINS_FILE]->(f)",
344                    escape(leaf_folder),
345                    escape(file_path),
346                );
347                let _ = conn.query(&create_edge);
348            }
349        }
350
351        Ok(())
352    }
353}