Skip to main content

graphify_export/
cypher.rs

1//! Neo4j Cypher export.
2
3use std::fmt::Write;
4use std::fs;
5use std::path::{Path, PathBuf};
6
7use std::collections::HashMap;
8
9use graphify_core::graph::KnowledgeGraph;
10use tracing::info;
11
12/// Export the graph as Cypher CREATE statements for Neo4j.
13pub fn export_cypher(graph: &KnowledgeGraph, output_dir: &Path) -> anyhow::Result<PathBuf> {
14    let mut cypher = String::with_capacity(4096);
15
16    let var_names = build_unique_var_names(graph);
17
18    for node in graph.nodes() {
19        let var = var_names.get(&node.id).map(|s| s.as_str()).unwrap_or("n");
20        let node_type_label = format!("{}", node.node_type);
21        write!(
22            cypher,
23            "CREATE ({}:{} {{id: '{}', label: '{}', source_file: '{}'",
24            var,
25            node_type_label,
26            cypher_escape(&node.id),
27            cypher_escape(&node.label),
28            cypher_escape(&node.source_file),
29        )?;
30        if let Some(loc) = &node.source_location {
31            write!(cypher, ", source_location: '{}'", cypher_escape(loc))?;
32        }
33        if let Some(c) = node.community {
34            write!(cypher, ", community: {c}")?;
35        }
36        writeln!(cypher, "}});")?;
37    }
38
39    writeln!(cypher)?;
40
41    for edge in graph.edges() {
42        let rel_type = edge
43            .relation
44            .to_uppercase()
45            .replace(|c: char| !c.is_ascii_alphanumeric() && c != '_', "_");
46        let src_var = var_names
47            .get(&edge.source)
48            .map(|s| s.as_str())
49            .unwrap_or("n");
50        let tgt_var = var_names
51            .get(&edge.target)
52            .map(|s| s.as_str())
53            .unwrap_or("n");
54        writeln!(
55            cypher,
56            "CREATE ({src})-[:{rel} {{relation: '{relation}', confidence: '{confidence}', confidence_score: {score:.2}, source_file: '{file}', weight: {weight:.2}}}]->({tgt});",
57            src = src_var,
58            rel = rel_type,
59            relation = cypher_escape(&edge.relation),
60            confidence = edge.confidence,
61            score = edge.confidence_score,
62            file = cypher_escape(&edge.source_file),
63            weight = edge.weight,
64            tgt = tgt_var,
65        )?;
66    }
67
68    fs::create_dir_all(output_dir)?;
69    let path = output_dir.join("graph.cypher");
70    fs::write(&path, &cypher)?;
71    info!(path = %path.display(), "exported Cypher statements");
72    Ok(path)
73}
74
75/// Make a valid Cypher variable name from a node ID.
76fn sanitize_var(id: &str) -> String {
77    let mut out = String::with_capacity(id.len());
78    for c in id.chars() {
79        if c.is_ascii_alphanumeric() || c == '_' {
80            out.push(c);
81        } else {
82            out.push('_');
83        }
84    }
85    if out.starts_with(|c: char| c.is_ascii_digit()) {
86        out.insert(0, '_');
87    }
88    out
89}
90
91fn build_unique_var_names(graph: &KnowledgeGraph) -> HashMap<String, String> {
92    let mut name_to_ids: HashMap<String, Vec<String>> = HashMap::new();
93    for node in graph.nodes() {
94        let sanitized = sanitize_var(&node.id);
95        name_to_ids
96            .entry(sanitized)
97            .or_default()
98            .push(node.id.clone());
99    }
100
101    let mut result = HashMap::new();
102    for (sanitized, mut ids) in name_to_ids {
103        if ids.len() == 1 {
104            if let Some(primary_id) = ids.pop() {
105                result.insert(primary_id, sanitized);
106            }
107        } else {
108            for (i, id) in ids.into_iter().enumerate() {
109                result.insert(id, format!("{sanitized}_{i}"));
110            }
111        }
112    }
113    result
114}
115
116fn cypher_escape(s: &str) -> String {
117    s.replace('\\', "\\\\")
118        .replace('\'', "\\'")
119        .replace('\n', "\\n")
120        .replace('\r', "\\r")
121}
122
123#[cfg(test)]
124mod tests {
125    use super::*;
126    use graphify_core::confidence::Confidence;
127    use graphify_core::graph::KnowledgeGraph;
128    use graphify_core::model::{GraphEdge, GraphNode, NodeType};
129    use std::collections::HashMap;
130
131    fn sample_graph() -> KnowledgeGraph {
132        let mut kg = KnowledgeGraph::new();
133        kg.add_node(GraphNode {
134            id: "my_class".into(),
135            label: "MyClass".into(),
136            source_file: "src/main.rs".into(),
137            source_location: Some("L42".into()),
138            node_type: NodeType::Class,
139            community: Some(0),
140            extra: HashMap::new(),
141        })
142        .unwrap();
143        kg.add_node(GraphNode {
144            id: "helper".into(),
145            label: "Helper".into(),
146            source_file: "src/util.rs".into(),
147            source_location: None,
148            node_type: NodeType::Function,
149            community: None,
150            extra: HashMap::new(),
151        })
152        .unwrap();
153        kg.add_edge(GraphEdge {
154            source: "my_class".into(),
155            target: "helper".into(),
156            relation: "calls".into(),
157            confidence: Confidence::Extracted,
158            confidence_score: 1.0,
159            source_file: "src/main.rs".into(),
160            source_location: None,
161            weight: 1.0,
162            extra: HashMap::new(),
163        })
164        .unwrap();
165        kg
166    }
167
168    #[test]
169    fn export_cypher_creates_file() {
170        let dir = tempfile::tempdir().unwrap();
171        let kg = sample_graph();
172        let path = export_cypher(&kg, dir.path()).unwrap();
173        assert!(path.exists());
174
175        let content = std::fs::read_to_string(&path).unwrap();
176        assert!(content.contains("CREATE ("));
177        assert!(content.contains("CALLS"));
178        assert!(content.contains("MyClass"));
179    }
180
181    #[test]
182    fn var_name_collision_gets_suffix() {
183        let mut kg = KnowledgeGraph::new();
184        kg.add_node(GraphNode {
185            id: "foo.bar".into(),
186            label: "FooBar".into(),
187            source_file: "a.rs".into(),
188            source_location: None,
189            node_type: NodeType::Class,
190            community: None,
191            extra: HashMap::new(),
192        })
193        .unwrap();
194        kg.add_node(GraphNode {
195            id: "foo_bar".into(),
196            label: "FooBar2".into(),
197            source_file: "b.rs".into(),
198            source_location: None,
199            node_type: NodeType::Function,
200            community: None,
201            extra: HashMap::new(),
202        })
203        .unwrap();
204
205        let dir = tempfile::tempdir().unwrap();
206        let path = export_cypher(&kg, dir.path()).unwrap();
207        let content = std::fs::read_to_string(&path).unwrap();
208
209        assert!(content.contains("foo_bar_0"));
210        assert!(content.contains("foo_bar_1"));
211        assert!(!content.contains("foo_bar}"));
212    }
213
214    #[test]
215    fn sanitize_var_removes_special_chars() {
216        assert_eq!(sanitize_var("my-class.foo"), "my_class_foo");
217        assert_eq!(sanitize_var("123abc"), "_123abc");
218    }
219
220    #[test]
221    fn cypher_escape_quotes() {
222        assert_eq!(cypher_escape("it's"), "it\\'s");
223    }
224
225    #[test]
226    fn cypher_escape_newlines() {
227        assert_eq!(cypher_escape("line1\nline2"), "line1\\nline2");
228        assert_eq!(cypher_escape("line1\r\nline2"), "line1\\r\\nline2");
229    }
230}