Skip to main content

gobby_code/graph/code_graph/write/
mutation.rs

1use std::collections::BTreeMap;
2use std::sync::atomic::{AtomicU64, Ordering};
3use std::time::{SystemTime, UNIX_EPOCH};
4
5use crate::graph::typed_query::{TypedQuery, TypedValue};
6use crate::index::import_resolution::UNPARSED_IMPORT_PREFIX;
7use crate::models::{
8    CallRelation, CallTargetKind, ImportRelation, Symbol, make_external_symbol_id,
9    make_unresolved_callee_id,
10};
11
12use super::support::{sync_token_param, typed_query, usize_value};
13
14const EXTRACTED_PROVENANCE: &str = "EXTRACTED";
15const SOURCE_SYSTEM_GCODE: &str = crate::models::SOURCE_SYSTEM_GCODE;
16static SYNC_TOKEN_COUNTER: AtomicU64 = AtomicU64::new(0);
17const ADD_IMPORTS_CYPHER: &str = "UNWIND $imports AS import
18         MERGE (f:CodeFile {path: import.source_file, project: $project})
19         MERGE (m:CodeModule {name: import.target_module, project: $project})
20         MERGE (f)-[r:IMPORTS]->(m)
21         SET r.provenance = $provenance,
22             r.confidence = $confidence,
23             r.source_system = $source_system,
24             r.source_file_path = import.source_file,
25             r.sync_token = $sync_token";
26const ADD_DEFINITIONS_CYPHER: &str = "UNWIND $symbols AS symbol
27         MERGE (f:CodeFile {path: $file_path, project: $project})
28         MERGE (s:CodeSymbol {id: symbol.id, project: $project})
29         SET s.name = symbol.name,
30             s.qualified_name = symbol.qualified_name,
31             s.kind = symbol.kind,
32             s.language = symbol.language,
33             s.file_path = $file_path,
34             s.line_start = symbol.line_start,
35             s.line_end = symbol.line_end,
36             s.updated_at = timestamp(),
37             s.sync_token = $sync_token
38         MERGE (f)-[r:DEFINES]->(s)
39         SET r.provenance = $provenance,
40             r.confidence = $confidence,
41             r.source_system = $source_system,
42             r.source_file_path = $file_path,
43             r.source_line = symbol.line_start,
44             r.source_symbol_id = symbol.id,
45             r.sync_token = $sync_token";
46const ADD_SYMBOL_CALLS_CYPHER: &str = "UNWIND $symbol_calls AS call
47         MERGE (caller:CodeSymbol {id: call.caller_id, project: $project})
48         MERGE (callee:CodeSymbol {id: call.target_id, project: $project})
49         ON CREATE SET callee.name = call.callee_name, callee.updated_at = timestamp()
50         MERGE (caller)-[r:CALLS {file: call.file_path, line: call.line}]->(callee)
51         SET r.provenance = $provenance,
52             r.confidence = $confidence,
53             r.source_system = $source_system,
54             r.source_file_path = call.file_path,
55             r.source_line = call.line,
56             r.source_symbol_id = call.caller_id,
57             r.sync_token = $sync_token";
58const ADD_EXTERNAL_CALLS_CYPHER: &str = "UNWIND $external_calls AS call
59         MERGE (caller:CodeSymbol {id: call.caller_id, project: $project})
60         MERGE (callee:ExternalSymbol {id: call.target_id, project: $project})
61         ON CREATE SET callee.name = call.callee_name,
62             callee.external_module = call.callee_module,
63             callee.module = call.callee_module,
64             callee.updated_at = timestamp(),
65             callee.sync_token = $sync_token
66         MERGE (caller)-[r:CALLS {file: call.file_path, line: call.line}]->(callee)
67         SET r.provenance = $provenance,
68             r.confidence = $confidence,
69             r.source_system = $source_system,
70             r.source_file_path = call.file_path,
71             r.source_line = call.line,
72             r.source_symbol_id = call.caller_id,
73             r.sync_token = $sync_token";
74const ADD_UNRESOLVED_CALLS_CYPHER: &str = "UNWIND $unresolved_calls AS call
75         MERGE (caller:CodeSymbol {id: call.caller_id, project: $project})
76         MERGE (callee:UnresolvedCallee {id: call.target_id, project: $project})
77         ON CREATE SET callee.name = call.callee_name,
78             callee.updated_at = timestamp(),
79             callee.sync_token = $sync_token
80         MERGE (caller)-[r:CALLS {file: call.file_path, line: call.line}]->(callee)
81         SET r.provenance = $provenance,
82             r.confidence = $confidence,
83             r.source_system = $source_system,
84             r.source_file_path = call.file_path,
85             r.source_line = call.line,
86             r.source_symbol_id = call.caller_id,
87             r.sync_token = $sync_token";
88
89pub(super) fn new_sync_token(file_path: &str) -> String {
90    let nanos = SystemTime::now()
91        .duration_since(UNIX_EPOCH)
92        .map(|duration| duration.as_nanos())
93        .unwrap_or_default();
94    let suffix = SYNC_TOKEN_COUNTER.fetch_add(1, Ordering::Relaxed);
95    format!("{}:{}:{nanos}:{suffix}", std::process::id(), file_path)
96}
97
98#[derive(Debug, Clone)]
99pub(in crate::graph::code_graph) struct ImportGraphItem {
100    pub(in crate::graph::code_graph) source_file: String,
101    pub(in crate::graph::code_graph) target_module: String,
102}
103
104#[derive(Debug, Clone)]
105pub(in crate::graph::code_graph) struct CallGraphItem {
106    caller_id: String,
107    target_id: String,
108    callee_name: String,
109    pub(in crate::graph::code_graph) file_path: String,
110    line: usize,
111    callee_module: Option<String>,
112}
113
114#[derive(Debug, Clone, Default)]
115pub(in crate::graph::code_graph) struct CallGraphItems {
116    pub(in crate::graph::code_graph) symbol: Vec<CallGraphItem>,
117    pub(in crate::graph::code_graph) external: Vec<CallGraphItem>,
118    pub(in crate::graph::code_graph) unresolved: Vec<CallGraphItem>,
119}
120
121fn map_value(values: impl IntoIterator<Item = (&'static str, TypedValue)>) -> TypedValue {
122    TypedValue::Map(
123        values
124            .into_iter()
125            .map(|(key, value)| (key.to_string(), value))
126            .collect::<BTreeMap<_, _>>(),
127    )
128}
129
130pub(in crate::graph::code_graph) fn import_graph_items(
131    file_path: &str,
132    imports: &[ImportRelation],
133) -> Vec<ImportGraphItem> {
134    imports
135        .iter()
136        .filter(|import| {
137            !import.module_name.is_empty()
138                && !import.module_name.starts_with(UNPARSED_IMPORT_PREFIX)
139        })
140        .map(|import| ImportGraphItem {
141            source_file: file_path.to_string(),
142            target_module: import.module_name.clone(),
143        })
144        .collect()
145}
146
147pub(super) fn definition_graph_symbols(definitions: &[Symbol]) -> Vec<&Symbol> {
148    definitions
149        .iter()
150        .filter(|symbol| !symbol.id.is_empty() && !symbol.name.is_empty())
151        .collect()
152}
153
154pub(in crate::graph::code_graph) fn partition_call_graph_items(
155    project_id: &str,
156    file_path: &str,
157    calls: &[CallRelation],
158) -> CallGraphItems {
159    let mut groups = CallGraphItems::default();
160    for call in calls {
161        if call.caller_symbol_id.is_empty() {
162            continue;
163        }
164        let Some(target) = GraphCallTarget::from_call(project_id, call) else {
165            continue;
166        };
167        let item = CallGraphItem {
168            caller_id: call.caller_symbol_id.clone(),
169            target_id: target.id().to_string(),
170            callee_name: call.callee_name.clone(),
171            file_path: file_path.to_string(),
172            line: call.line,
173            callee_module: target.module().map(str::to_string),
174        };
175        match target {
176            GraphCallTarget::Symbol { .. } => groups.symbol.push(item),
177            GraphCallTarget::External { .. } => groups.external.push(item),
178            GraphCallTarget::Unresolved { .. } => groups.unresolved.push(item),
179        }
180    }
181    groups
182}
183
184fn metadata_params(sync_token: &str) -> Vec<(&'static str, TypedValue)> {
185    vec![
186        (
187            "provenance",
188            TypedValue::String(EXTRACTED_PROVENANCE.to_string()),
189        ),
190        ("confidence", TypedValue::Float(1.0)),
191        (
192            "source_system",
193            TypedValue::String(SOURCE_SYSTEM_GCODE.to_string()),
194        ),
195        sync_token_param(sync_token),
196    ]
197}
198
199pub(super) struct SyncFileMutation<'a> {
200    pub(super) project_id: &'a str,
201    pub(super) file_path: &'a str,
202    pub(super) symbol_count: usize,
203    pub(super) imports: &'a [ImportGraphItem],
204    pub(super) symbols: &'a [&'a Symbol],
205    pub(super) calls: &'a CallGraphItems,
206    pub(super) sync_token: &'a str,
207}
208
209pub(super) fn ensure_file_node_query(
210    project_id: &str,
211    file_path: &str,
212    symbol_count: usize,
213    sync_token: &str,
214) -> anyhow::Result<TypedQuery> {
215    typed_query(
216        "MERGE (f:CodeFile {path: $file_path, project: $project})
217         SET f.updated_at = timestamp(),
218             f.symbol_count = $symbol_count,
219             f.sync_token = $sync_token",
220        [
221            ("project", TypedValue::String(project_id.to_string())),
222            ("file_path", TypedValue::String(file_path.to_string())),
223            ("symbol_count", usize_value(symbol_count)?),
224            sync_token_param(sync_token),
225        ],
226    )
227}
228
229pub(super) fn add_imports_query(
230    project_id: &str,
231    imports: &[ImportGraphItem],
232    sync_token: &str,
233) -> anyhow::Result<TypedQuery> {
234    let mut params = vec![
235        ("project", TypedValue::String(project_id.to_string())),
236        (
237            "imports",
238            TypedValue::List(
239                imports
240                    .iter()
241                    .map(|import| {
242                        map_value([
243                            (
244                                "source_file",
245                                TypedValue::String(import.source_file.clone()),
246                            ),
247                            (
248                                "target_module",
249                                TypedValue::String(import.target_module.clone()),
250                            ),
251                        ])
252                    })
253                    .collect(),
254            ),
255        ),
256    ];
257    params.extend(metadata_params(sync_token));
258    typed_query(ADD_IMPORTS_CYPHER, params)
259}
260
261pub(super) fn add_definitions_query(
262    project_id: &str,
263    file_path: &str,
264    symbols: &[&Symbol],
265    sync_token: &str,
266) -> anyhow::Result<TypedQuery> {
267    let mut params = vec![
268        ("project", TypedValue::String(project_id.to_string())),
269        ("file_path", TypedValue::String(file_path.to_string())),
270        (
271            "symbols",
272            TypedValue::List(
273                symbols
274                    .iter()
275                    .map(|symbol| {
276                        Ok(map_value([
277                            ("id", TypedValue::String(symbol.id.clone())),
278                            ("name", TypedValue::String(symbol.name.clone())),
279                            (
280                                "qualified_name",
281                                TypedValue::String(symbol.qualified_name.clone()),
282                            ),
283                            ("kind", TypedValue::String(symbol.kind.clone())),
284                            ("language", TypedValue::String(symbol.language.clone())),
285                            ("line_start", usize_value(symbol.line_start)?),
286                            ("line_end", usize_value(symbol.line_end)?),
287                        ]))
288                    })
289                    .collect::<anyhow::Result<Vec<_>>>()?,
290            ),
291        ),
292    ];
293    params.extend(metadata_params(sync_token));
294    typed_query(ADD_DEFINITIONS_CYPHER, params)
295}
296
297enum GraphCallTarget {
298    Symbol { id: String },
299    External { id: String, module: String },
300    Unresolved { id: String },
301}
302
303impl GraphCallTarget {
304    fn from_call(project_id: &str, call: &CallRelation) -> Option<Self> {
305        if let Some(id) = call.callee_symbol_id.as_deref().filter(|id| !id.is_empty()) {
306            return Some(Self::Symbol { id: id.to_string() });
307        }
308        if call.callee_name.is_empty() {
309            return None;
310        }
311        if call.callee_target_kind == CallTargetKind::External {
312            let module = call.callee_external_module.clone().unwrap_or_default();
313            return Some(Self::External {
314                id: make_external_symbol_id(project_id, &call.callee_name, Some(&module)),
315                module,
316            });
317        }
318        Some(Self::Unresolved {
319            id: make_unresolved_callee_id(project_id, &call.callee_name),
320        })
321    }
322
323    fn id(&self) -> &str {
324        match self {
325            Self::Symbol { id } | Self::External { id, .. } | Self::Unresolved { id } => id,
326        }
327    }
328
329    fn module(&self) -> Option<&str> {
330        match self {
331            Self::External { module, .. } => Some(module),
332            Self::Symbol { .. } | Self::Unresolved { .. } => None,
333        }
334    }
335}
336
337pub fn call_target_id(project_id: &str, call: &CallRelation) -> Option<String> {
338    match GraphCallTarget::from_call(project_id, call)? {
339        GraphCallTarget::Symbol { id }
340        | GraphCallTarget::External { id, .. }
341        | GraphCallTarget::Unresolved { id } => Some(id),
342    }
343}
344
345fn call_rows(calls: &[CallGraphItem]) -> anyhow::Result<TypedValue> {
346    Ok(TypedValue::List(
347        calls
348            .iter()
349            .map(|call| {
350                Ok(map_value([
351                    ("caller_id", TypedValue::String(call.caller_id.clone())),
352                    ("target_id", TypedValue::String(call.target_id.clone())),
353                    ("callee_name", TypedValue::String(call.callee_name.clone())),
354                    ("file_path", TypedValue::String(call.file_path.clone())),
355                    ("line", usize_value(call.line)?),
356                    (
357                        "callee_module",
358                        TypedValue::String(call.callee_module.clone().unwrap_or_default()),
359                    ),
360                ]))
361            })
362            .collect::<anyhow::Result<Vec<_>>>()?,
363    ))
364}
365
366pub(super) fn add_symbol_calls_query(
367    project_id: &str,
368    calls: &[CallGraphItem],
369    sync_token: &str,
370) -> anyhow::Result<TypedQuery> {
371    let mut params = vec![
372        ("project", TypedValue::String(project_id.to_string())),
373        ("symbol_calls", call_rows(calls)?),
374    ];
375    params.extend(metadata_params(sync_token));
376    typed_query(ADD_SYMBOL_CALLS_CYPHER, params)
377}
378
379pub(super) fn add_external_calls_query(
380    project_id: &str,
381    calls: &[CallGraphItem],
382    sync_token: &str,
383) -> anyhow::Result<TypedQuery> {
384    let mut params = vec![
385        ("project", TypedValue::String(project_id.to_string())),
386        ("external_calls", call_rows(calls)?),
387    ];
388    params.extend(metadata_params(sync_token));
389    typed_query(ADD_EXTERNAL_CALLS_CYPHER, params)
390}
391
392pub(super) fn add_unresolved_calls_query(
393    project_id: &str,
394    calls: &[CallGraphItem],
395    sync_token: &str,
396) -> anyhow::Result<TypedQuery> {
397    let mut params = vec![
398        ("project", TypedValue::String(project_id.to_string())),
399        ("unresolved_calls", call_rows(calls)?),
400    ];
401    params.extend(metadata_params(sync_token));
402    typed_query(ADD_UNRESOLVED_CALLS_CYPHER, params)
403}
404
405#[cfg(test)]
406mod tests {
407    use super::*;
408    use crate::models::CallRelation;
409
410    #[test]
411    fn unresolved_local_import_projects_as_unresolved_not_external() {
412        // A `local_import` row the post-write pass could not resolve (empty
413        // callee_symbol_id) must degrade to an UnresolvedCallee — never an
414        // External node — even though it still carries candidate files in
415        // callee_external_module. Guards the no-regression degradation path.
416        let call = CallRelation::new(
417            "caller-1".to_string(),
418            "helper".to_string(),
419            "pkg/main.py".to_string(),
420            3,
421        )
422        .with_local_import_target(
423            "helper".to_string(),
424            vec![
425                "pkg/utils.py".to_string(),
426                "pkg/utils/__init__.py".to_string(),
427            ],
428        );
429
430        let groups = partition_call_graph_items("proj", "pkg/main.py", &[call]);
431        assert_eq!(groups.symbol.len(), 0);
432        assert_eq!(groups.external.len(), 0);
433        assert_eq!(groups.unresolved.len(), 1);
434        assert!(groups.unresolved[0].callee_module.is_none());
435    }
436
437    #[test]
438    fn resolved_local_import_projects_as_symbol() {
439        let call = CallRelation::new(
440            "caller-1".to_string(),
441            "helper".to_string(),
442            "pkg/main.py".to_string(),
443            3,
444        )
445        .with_symbol_target("callee-uuid".to_string());
446
447        let groups = partition_call_graph_items("proj", "pkg/main.py", &[call]);
448        assert_eq!(groups.symbol.len(), 1);
449        assert_eq!(groups.symbol[0].target_id, "callee-uuid");
450    }
451}