Skip to main content

infigraph_core/export/
mod.rs

1//! Export the code graph to various formats: Neo4j Cypher, GraphML, JSON.
2
3use std::io::Write;
4
5use anyhow::Result;
6
7use crate::graph::GraphQuery;
8
9/// Escape a string for use in a Cypher string literal (single-quoted).
10fn cypher_escape(s: &str) -> String {
11    s.replace('\\', "\\\\").replace('\'', "\\'")
12}
13
14/// Escape a string for use in an XML attribute value.
15fn xml_escape(s: &str) -> String {
16    s.replace('&', "&")
17        .replace('<', "&lt;")
18        .replace('>', "&gt;")
19        .replace('"', "&quot;")
20        .replace('\'', "&apos;")
21}
22
23/// Escape a string for JSON output (handles quotes, backslashes, control chars).
24fn json_escape(s: &str) -> String {
25    s.replace('\\', "\\\\")
26        .replace('"', "\\\"")
27        .replace('\n', "\\n")
28        .replace('\r', "\\r")
29        .replace('\t', "\\t")
30}
31
32/// Write Neo4j-compatible Cypher CREATE statements for all nodes and edges.
33///
34/// Produces output like:
35/// ```text
36/// CREATE (s:Symbol {id: '...', name: '...', kind: '...', ...});
37/// CREATE (s1)-[:CALLS]->(s2);
38/// ```
39pub fn export_cypher<W: Write>(gq: &GraphQuery, writer: &mut W) -> Result<()> {
40    writeln!(writer, "// Infigraph graph export — Neo4j Cypher")?;
41    writeln!(writer)?;
42
43    // ── Module nodes ──
44    let modules = gq.raw_query("MATCH (m:Module) RETURN m.id, m.name, m.file, m.language")?;
45    writeln!(writer, "// Modules ({} nodes)", modules.len())?;
46    for row in &modules {
47        let id = cypher_escape(&row[0]);
48        let name = cypher_escape(&row[1]);
49        let file = cypher_escape(&row[2]);
50        let language = cypher_escape(&row[3]);
51        writeln!(
52            writer,
53            "CREATE (:Module {{id: '{id}', name: '{name}', file: '{file}', language: '{language}'}});"
54        )?;
55    }
56    writeln!(writer)?;
57
58    // ── Symbol nodes ──
59    let symbols = gq.raw_query(
60        "MATCH (s:Symbol) RETURN s.id, s.name, s.kind, s.file, s.start_line, s.end_line, s.language, s.visibility, s.parent, s.docstring",
61    )?;
62    writeln!(writer, "// Symbols ({} nodes)", symbols.len())?;
63    for row in &symbols {
64        let id = cypher_escape(&row[0]);
65        let name = cypher_escape(&row[1]);
66        let kind = cypher_escape(&row[2]);
67        let file = cypher_escape(&row[3]);
68        let start_line = &row[4];
69        let end_line = &row[5];
70        let language = cypher_escape(&row[6]);
71        let visibility = cypher_escape(&row[7]);
72        let parent = cypher_escape(&row[8]);
73        let docstring = cypher_escape(&row[9]);
74        writeln!(
75            writer,
76            "CREATE (:Symbol {{id: '{id}', name: '{name}', kind: '{kind}', file: '{file}', start_line: {start_line}, end_line: {end_line}, language: '{language}', visibility: '{visibility}', parent: '{parent}', docstring: '{docstring}'}});"
77        )?;
78    }
79    writeln!(writer)?;
80
81    // ── Edges ──
82    // CONTAINS (Module -> Symbol)
83    let contains = gq.raw_query("MATCH (m:Module)-[:CONTAINS]->(s:Symbol) RETURN m.id, s.id")?;
84    writeln!(writer, "// CONTAINS edges ({} edges)", contains.len())?;
85    for row in &contains {
86        let src = cypher_escape(&row[0]);
87        let dst = cypher_escape(&row[1]);
88        writeln!(
89            writer,
90            "MATCH (a:Module {{id: '{src}'}}), (b:Symbol {{id: '{dst}'}}) CREATE (a)-[:CONTAINS]->(b);"
91        )?;
92    }
93    writeln!(writer)?;
94
95    // CALLS (Symbol -> Symbol)
96    let calls = gq.raw_query("MATCH (a:Symbol)-[:CALLS]->(b:Symbol) RETURN a.id, b.id")?;
97    writeln!(writer, "// CALLS edges ({} edges)", calls.len())?;
98    for row in &calls {
99        let src = cypher_escape(&row[0]);
100        let dst = cypher_escape(&row[1]);
101        writeln!(
102            writer,
103            "MATCH (a:Symbol {{id: '{src}'}}), (b:Symbol {{id: '{dst}'}}) CREATE (a)-[:CALLS]->(b);"
104        )?;
105    }
106    writeln!(writer)?;
107
108    // INHERITS (Symbol -> Symbol)
109    let inherits = gq.raw_query("MATCH (a:Symbol)-[:INHERITS]->(b:Symbol) RETURN a.id, b.id")?;
110    writeln!(writer, "// INHERITS edges ({} edges)", inherits.len())?;
111    for row in &inherits {
112        let src = cypher_escape(&row[0]);
113        let dst = cypher_escape(&row[1]);
114        writeln!(
115            writer,
116            "MATCH (a:Symbol {{id: '{src}'}}), (b:Symbol {{id: '{dst}'}}) CREATE (a)-[:INHERITS]->(b);"
117        )?;
118    }
119    writeln!(writer)?;
120
121    // TESTED_BY (Symbol -> Symbol)
122    let tested_by = gq.raw_query("MATCH (a:Symbol)-[:TESTED_BY]->(b:Symbol) RETURN a.id, b.id")?;
123    writeln!(writer, "// TESTED_BY edges ({} edges)", tested_by.len())?;
124    for row in &tested_by {
125        let src = cypher_escape(&row[0]);
126        let dst = cypher_escape(&row[1]);
127        writeln!(
128            writer,
129            "MATCH (a:Symbol {{id: '{src}'}}), (b:Symbol {{id: '{dst}'}}) CREATE (a)-[:TESTED_BY]->(b);"
130        )?;
131    }
132
133    Ok(())
134}
135
136/// Write GraphML XML format (compatible with Gephi/yEd).
137///
138/// Includes all node properties as `<data>` elements with declared `<key>` definitions.
139pub fn export_graphml<W: Write>(gq: &GraphQuery, writer: &mut W) -> Result<()> {
140    writeln!(writer, r#"<?xml version="1.0" encoding="UTF-8"?>"#)?;
141    writeln!(
142        writer,
143        r#"<graphml xmlns="http://graphml.graphstruct.org/graphml"
144         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
145         xsi:schemaLocation="http://graphml.graphstruct.org/graphml http://graphml.graphstruct.org/xmlns/1.0/graphml.xsd">"#
146    )?;
147
148    // Key definitions for node properties
149    writeln!(
150        writer,
151        r#"  <key id="d_name" for="node" attr.name="name" attr.type="string"/>"#
152    )?;
153    writeln!(
154        writer,
155        r#"  <key id="d_kind" for="node" attr.name="kind" attr.type="string"/>"#
156    )?;
157    writeln!(
158        writer,
159        r#"  <key id="d_file" for="node" attr.name="file" attr.type="string"/>"#
160    )?;
161    writeln!(
162        writer,
163        r#"  <key id="d_language" for="node" attr.name="language" attr.type="string"/>"#
164    )?;
165    writeln!(
166        writer,
167        r#"  <key id="d_start_line" for="node" attr.name="start_line" attr.type="int"/>"#
168    )?;
169    writeln!(
170        writer,
171        r#"  <key id="d_end_line" for="node" attr.name="end_line" attr.type="int"/>"#
172    )?;
173    writeln!(
174        writer,
175        r#"  <key id="d_visibility" for="node" attr.name="visibility" attr.type="string"/>"#
176    )?;
177    writeln!(
178        writer,
179        r#"  <key id="d_parent" for="node" attr.name="parent" attr.type="string"/>"#
180    )?;
181    writeln!(
182        writer,
183        r#"  <key id="d_docstring" for="node" attr.name="docstring" attr.type="string"/>"#
184    )?;
185    writeln!(
186        writer,
187        r#"  <key id="d_node_type" for="node" attr.name="node_type" attr.type="string"/>"#
188    )?;
189
190    // Key definition for edge label
191    writeln!(
192        writer,
193        r#"  <key id="d_label" for="edge" attr.name="label" attr.type="string"/>"#
194    )?;
195    writeln!(writer)?;
196    writeln!(writer, r#"  <graph id="infigraph" edgedefault="directed">"#)?;
197
198    // ── Module nodes ──
199    let modules = gq.raw_query("MATCH (m:Module) RETURN m.id, m.name, m.file, m.language")?;
200    for row in &modules {
201        let id = xml_escape(&row[0]);
202        let name = xml_escape(&row[1]);
203        let file = xml_escape(&row[2]);
204        let language = xml_escape(&row[3]);
205        writeln!(writer, r#"    <node id="{id}">"#)?;
206        writeln!(writer, r#"      <data key="d_node_type">Module</data>"#)?;
207        writeln!(writer, r#"      <data key="d_name">{name}</data>"#)?;
208        writeln!(writer, r#"      <data key="d_file">{file}</data>"#)?;
209        writeln!(writer, r#"      <data key="d_language">{language}</data>"#)?;
210        writeln!(writer, r#"    </node>"#)?;
211    }
212
213    // ── Symbol nodes ──
214    let symbols = gq.raw_query(
215        "MATCH (s:Symbol) RETURN s.id, s.name, s.kind, s.file, s.start_line, s.end_line, s.language, s.visibility, s.parent, s.docstring",
216    )?;
217    for row in &symbols {
218        let id = xml_escape(&row[0]);
219        let name = xml_escape(&row[1]);
220        let kind = xml_escape(&row[2]);
221        let file = xml_escape(&row[3]);
222        let start_line = &row[4];
223        let end_line = &row[5];
224        let language = xml_escape(&row[6]);
225        let visibility = xml_escape(&row[7]);
226        let parent = xml_escape(&row[8]);
227        let docstring = xml_escape(&row[9]);
228        writeln!(writer, r#"    <node id="{id}">"#)?;
229        writeln!(writer, r#"      <data key="d_node_type">Symbol</data>"#)?;
230        writeln!(writer, r#"      <data key="d_name">{name}</data>"#)?;
231        writeln!(writer, r#"      <data key="d_kind">{kind}</data>"#)?;
232        writeln!(writer, r#"      <data key="d_file">{file}</data>"#)?;
233        writeln!(
234            writer,
235            r#"      <data key="d_start_line">{start_line}</data>"#
236        )?;
237        writeln!(writer, r#"      <data key="d_end_line">{end_line}</data>"#)?;
238        writeln!(writer, r#"      <data key="d_language">{language}</data>"#)?;
239        if !visibility.is_empty() {
240            writeln!(
241                writer,
242                r#"      <data key="d_visibility">{visibility}</data>"#
243            )?;
244        }
245        if !parent.is_empty() {
246            writeln!(writer, r#"      <data key="d_parent">{parent}</data>"#)?;
247        }
248        if !docstring.is_empty() {
249            writeln!(
250                writer,
251                r#"      <data key="d_docstring">{docstring}</data>"#
252            )?;
253        }
254        writeln!(writer, r#"    </node>"#)?;
255    }
256
257    // ── Edges ──
258    let mut edge_id: u64 = 0;
259
260    let contains = gq.raw_query("MATCH (m:Module)-[:CONTAINS]->(s:Symbol) RETURN m.id, s.id")?;
261    for row in &contains {
262        let src = xml_escape(&row[0]);
263        let dst = xml_escape(&row[1]);
264        writeln!(
265            writer,
266            r#"    <edge id="e{edge_id}" source="{src}" target="{dst}"><data key="d_label">CONTAINS</data></edge>"#
267        )?;
268        edge_id += 1;
269    }
270
271    let calls = gq.raw_query("MATCH (a:Symbol)-[:CALLS]->(b:Symbol) RETURN a.id, b.id")?;
272    for row in &calls {
273        let src = xml_escape(&row[0]);
274        let dst = xml_escape(&row[1]);
275        writeln!(
276            writer,
277            r#"    <edge id="e{edge_id}" source="{src}" target="{dst}"><data key="d_label">CALLS</data></edge>"#
278        )?;
279        edge_id += 1;
280    }
281
282    let inherits = gq.raw_query("MATCH (a:Symbol)-[:INHERITS]->(b:Symbol) RETURN a.id, b.id")?;
283    for row in &inherits {
284        let src = xml_escape(&row[0]);
285        let dst = xml_escape(&row[1]);
286        writeln!(
287            writer,
288            r#"    <edge id="e{edge_id}" source="{src}" target="{dst}"><data key="d_label">INHERITS</data></edge>"#
289        )?;
290        edge_id += 1;
291    }
292
293    let tested_by = gq.raw_query("MATCH (a:Symbol)-[:TESTED_BY]->(b:Symbol) RETURN a.id, b.id")?;
294    for row in &tested_by {
295        let src = xml_escape(&row[0]);
296        let dst = xml_escape(&row[1]);
297        writeln!(
298            writer,
299            r#"    <edge id="e{edge_id}" source="{src}" target="{dst}"><data key="d_label">TESTED_BY</data></edge>"#
300        )?;
301        edge_id += 1;
302    }
303
304    writeln!(writer, r#"  </graph>"#)?;
305    writeln!(writer, r#"</graphml>"#)?;
306
307    Ok(())
308}
309
310/// Write JSON with `{"nodes": [...], "edges": [...]}` format.
311///
312/// Each node has `id`, `type` (Module/Symbol), and all relevant properties.
313/// Each edge has `source`, `target`, and `label`.
314pub fn export_json<W: Write>(gq: &GraphQuery, writer: &mut W) -> Result<()> {
315    // ── Collect nodes ──
316    let modules = gq.raw_query("MATCH (m:Module) RETURN m.id, m.name, m.file, m.language")?;
317    let symbols = gq.raw_query(
318        "MATCH (s:Symbol) RETURN s.id, s.name, s.kind, s.file, s.start_line, s.end_line, s.language, s.visibility, s.parent, s.docstring",
319    )?;
320
321    // ── Collect edges ──
322    let contains = gq.raw_query("MATCH (m:Module)-[:CONTAINS]->(s:Symbol) RETURN m.id, s.id")?;
323    let calls = gq.raw_query("MATCH (a:Symbol)-[:CALLS]->(b:Symbol) RETURN a.id, b.id")?;
324    let inherits = gq.raw_query("MATCH (a:Symbol)-[:INHERITS]->(b:Symbol) RETURN a.id, b.id")?;
325    let tested_by = gq.raw_query("MATCH (a:Symbol)-[:TESTED_BY]->(b:Symbol) RETURN a.id, b.id")?;
326
327    // Build output using manual JSON to avoid adding serde_json as a dependency
328    // (infigraph-core already has serde_json)
329    writeln!(writer, "{{")?;
330    writeln!(writer, "  \"nodes\": [")?;
331
332    let total_nodes = modules.len() + symbols.len();
333    let mut node_idx: usize = 0;
334
335    for row in &modules {
336        let comma = if node_idx + 1 < total_nodes { "," } else { "" };
337        writeln!(
338            writer,
339            "    {{\"id\": \"{}\", \"type\": \"Module\", \"name\": \"{}\", \"file\": \"{}\", \"language\": \"{}\"}}{}",
340            json_escape(&row[0]),
341            json_escape(&row[1]),
342            json_escape(&row[2]),
343            json_escape(&row[3]),
344            comma
345        )?;
346        node_idx += 1;
347    }
348
349    for row in &symbols {
350        let comma = if node_idx + 1 < total_nodes { "," } else { "" };
351        writeln!(
352            writer,
353            "    {{\"id\": \"{}\", \"type\": \"Symbol\", \"name\": \"{}\", \"kind\": \"{}\", \"file\": \"{}\", \"start_line\": {}, \"end_line\": {}, \"language\": \"{}\", \"visibility\": \"{}\", \"parent\": \"{}\", \"docstring\": \"{}\"}}{}",
354            json_escape(&row[0]),
355            json_escape(&row[1]),
356            json_escape(&row[2]),
357            json_escape(&row[3]),
358            row[4],
359            row[5],
360            json_escape(&row[6]),
361            json_escape(&row[7]),
362            json_escape(&row[8]),
363            json_escape(&row[9]),
364            comma
365        )?;
366        node_idx += 1;
367    }
368
369    writeln!(writer, "  ],")?;
370    writeln!(writer, "  \"edges\": [")?;
371
372    let total_edges = contains.len() + calls.len() + inherits.len() + tested_by.len();
373    let mut edge_idx: usize = 0;
374
375    let edge_sets: &[(&str, &Vec<Vec<String>>)] = &[
376        ("CONTAINS", &contains),
377        ("CALLS", &calls),
378        ("INHERITS", &inherits),
379        ("TESTED_BY", &tested_by),
380    ];
381
382    for (label, edges) in edge_sets {
383        for row in *edges {
384            let comma = if edge_idx + 1 < total_edges { "," } else { "" };
385            writeln!(
386                writer,
387                "    {{\"source\": \"{}\", \"target\": \"{}\", \"label\": \"{}\"}}{}",
388                json_escape(&row[0]),
389                json_escape(&row[1]),
390                label,
391                comma
392            )?;
393            edge_idx += 1;
394        }
395    }
396
397    writeln!(writer, "  ]")?;
398    writeln!(writer, "}}")?;
399
400    Ok(())
401}