Skip to main content

grafeo_engine/export/
mod.rs

1//! Graph export serializers (GEXF, GraphML).
2//!
3//! Provides streaming serializers that write graph data directly to a [`std::io::Write`] sink.
4//! No external XML library is needed: both formats are simple enough for `write!()` macros
5//! with proper escaping.
6
7pub mod gexf;
8pub mod graphml;
9
10use std::collections::BTreeMap;
11use std::io;
12
13use grafeo_common::PropertyKey;
14use grafeo_common::types::Value;
15use grafeo_core::graph::lpg::{Edge, Node};
16
17/// Errors from graph export operations.
18#[derive(Debug, thiserror::Error)]
19#[non_exhaustive]
20pub enum ExportError {
21    /// I/O error while writing output.
22    #[error("I/O error: {0}")]
23    Io(#[from] io::Error),
24}
25
26/// Escapes XML special characters in text content and attribute values.
27#[must_use]
28pub fn escape_xml(s: &str) -> String {
29    let mut result = String::with_capacity(s.len());
30    for ch in s.chars() {
31        match ch {
32            '&' => result.push_str("&"),
33            '<' => result.push_str("&lt;"),
34            '>' => result.push_str("&gt;"),
35            '"' => result.push_str("&quot;"),
36            '\'' => result.push_str("&apos;"),
37            _ => result.push(ch),
38        }
39    }
40    result
41}
42
43/// Maps a grafeo [`Value`] to a GEXF attribute type string.
44#[must_use]
45pub fn value_to_gexf_type(value: &Value) -> &'static str {
46    match value {
47        Value::Int64(_) => "integer",
48        Value::Float64(_) => "float",
49        Value::Bool(_) => "boolean",
50        Value::String(_) => "string",
51        Value::Date(_) => "date",
52        _ => "string",
53    }
54}
55
56/// Maps a grafeo [`Value`] to a GraphML attribute type string.
57#[must_use]
58pub fn value_to_graphml_type(value: &Value) -> &'static str {
59    match value {
60        Value::Int64(_) => "long",
61        Value::Float64(_) => "double",
62        Value::Bool(_) => "boolean",
63        Value::String(_) => "string",
64        _ => "string",
65    }
66}
67
68/// Converts a [`Value`] to an XML-safe string representation.
69///
70/// Returns `None` for `Value::Null` (callers should omit the element).
71#[must_use]
72pub fn value_to_xml_string(value: &Value) -> Option<String> {
73    match value {
74        Value::Null => None,
75        Value::Bool(b) => Some(b.to_string()),
76        Value::Int64(i) => Some(i.to_string()),
77        Value::Float64(f) => Some(f.to_string()),
78        Value::String(s) => Some(escape_xml(s.as_str())),
79        Value::Date(d) => Some(d.to_string()),
80        Value::Time(t) => Some(t.to_string()),
81        Value::Timestamp(ts) => Some(ts.to_string()),
82        Value::Duration(d) => Some(d.to_string()),
83        Value::ZonedDatetime(zdt) => Some(zdt.to_string()),
84        Value::Bytes(b) => {
85            // Hex-encode binary data
86            use std::fmt::Write;
87            let hex = b.iter().fold(String::new(), |mut acc, byte| {
88                let _ = write!(acc, "{byte:02x}");
89                acc
90            });
91            Some(hex)
92        }
93        Value::Vector(v) => {
94            let parts: Vec<String> = v.iter().map(|f| f.to_string()).collect();
95            Some(parts.join(","))
96        }
97        Value::List(items) => {
98            let parts: Vec<String> = items.iter().filter_map(value_to_xml_string).collect();
99            Some(parts.join(","))
100        }
101        Value::Map(m) => {
102            // Serialize as key=value pairs
103            let parts: Vec<String> = m
104                .iter()
105                .map(|(k, v)| {
106                    let val_str = value_to_xml_string(v).unwrap_or_default();
107                    format!("{}={}", escape_xml(k.as_str()), val_str)
108                })
109                .collect();
110            Some(parts.join(";"))
111        }
112        Value::Path { .. } | Value::GCounter(_) | Value::OnCounter { .. } => {
113            Some(escape_xml(&value.to_string()))
114        }
115        _ => Some(escape_xml(&value.to_string())),
116    }
117}
118
119/// Discovered property schema: maps property key to (attribute ID, GEXF/GraphML type).
120pub(crate) type PropertySchema = BTreeMap<PropertyKey, (usize, &'static str)>;
121
122/// Discovers the property schema for nodes by scanning all property keys and their types.
123pub(crate) fn discover_node_schema<F>(nodes: &[Node], type_fn: F) -> PropertySchema
124where
125    F: Fn(&Value) -> &'static str,
126{
127    let mut schema: BTreeMap<PropertyKey, Option<&'static str>> = BTreeMap::new();
128    for node in nodes {
129        for (key, value) in node.properties.iter() {
130            schema
131                .entry(key.clone())
132                .and_modify(|existing| {
133                    if existing.is_none() && !value.is_null() {
134                        *existing = Some(type_fn(value));
135                    }
136                })
137                .or_insert_with(|| {
138                    if value.is_null() {
139                        None
140                    } else {
141                        Some(type_fn(value))
142                    }
143                });
144        }
145    }
146    schema
147        .into_iter()
148        .enumerate()
149        .map(|(idx, (key, type_str))| (key, (idx, type_str.unwrap_or("string"))))
150        .collect()
151}
152
153/// Discovers the property schema for edges by scanning all property keys and their types.
154pub(crate) fn discover_edge_schema<F>(edges: &[Edge], type_fn: F) -> PropertySchema
155where
156    F: Fn(&Value) -> &'static str,
157{
158    let mut schema: BTreeMap<PropertyKey, Option<&'static str>> = BTreeMap::new();
159    for edge in edges {
160        for (key, value) in edge.properties.iter() {
161            schema
162                .entry(key.clone())
163                .and_modify(|existing| {
164                    if existing.is_none() && !value.is_null() {
165                        *existing = Some(type_fn(value));
166                    }
167                })
168                .or_insert_with(|| {
169                    if value.is_null() {
170                        None
171                    } else {
172                        Some(type_fn(value))
173                    }
174                });
175        }
176    }
177    schema
178        .into_iter()
179        .enumerate()
180        .map(|(idx, (key, type_str))| (key, (idx, type_str.unwrap_or("string"))))
181        .collect()
182}
183
184#[cfg(test)]
185mod tests {
186    use super::*;
187
188    #[test]
189    fn test_escape_xml_basic() {
190        assert_eq!(escape_xml("hello"), "hello");
191        assert_eq!(escape_xml("a & b"), "a &amp; b");
192        assert_eq!(escape_xml("<tag>"), "&lt;tag&gt;");
193        assert_eq!(escape_xml("she said \"hi\""), "she said &quot;hi&quot;");
194        assert_eq!(escape_xml("it's"), "it&apos;s");
195    }
196
197    #[test]
198    fn test_escape_xml_combined() {
199        assert_eq!(
200            escape_xml("<a href=\"x&y\">"),
201            "&lt;a href=&quot;x&amp;y&quot;&gt;"
202        );
203    }
204
205    #[test]
206    fn test_value_to_xml_string_null() {
207        assert!(value_to_xml_string(&Value::Null).is_none());
208    }
209
210    #[test]
211    fn test_value_to_xml_string_primitives() {
212        assert_eq!(value_to_xml_string(&Value::Bool(true)).unwrap(), "true");
213        assert_eq!(value_to_xml_string(&Value::Int64(42)).unwrap(), "42");
214        assert_eq!(
215            value_to_xml_string(&Value::Float64(3.125)).unwrap(),
216            "3.125"
217        );
218        assert_eq!(
219            value_to_xml_string(&Value::String("Alix & Gus".into())).unwrap(),
220            "Alix &amp; Gus"
221        );
222    }
223
224    #[test]
225    fn test_value_to_xml_string_vector() {
226        let v = Value::Vector(std::sync::Arc::from(vec![1.0f32, 2.0, 3.0].as_slice()));
227        assert_eq!(value_to_xml_string(&v).unwrap(), "1,2,3");
228    }
229
230    #[test]
231    fn test_gexf_type_mapping() {
232        assert_eq!(value_to_gexf_type(&Value::Int64(0)), "integer");
233        assert_eq!(value_to_gexf_type(&Value::Float64(0.0)), "float");
234        assert_eq!(value_to_gexf_type(&Value::Bool(true)), "boolean");
235        assert_eq!(value_to_gexf_type(&Value::String("".into())), "string");
236    }
237
238    #[test]
239    fn test_graphml_type_mapping() {
240        assert_eq!(value_to_graphml_type(&Value::Int64(0)), "long");
241        assert_eq!(value_to_graphml_type(&Value::Float64(0.0)), "double");
242        assert_eq!(value_to_graphml_type(&Value::Bool(true)), "boolean");
243        assert_eq!(value_to_graphml_type(&Value::String("".into())), "string");
244    }
245
246    #[test]
247    fn test_value_to_xml_string_bytes() {
248        let v = Value::Bytes(std::sync::Arc::from(
249            vec![0xDE, 0xAD, 0xBE, 0xEF].as_slice(),
250        ));
251        assert_eq!(value_to_xml_string(&v).unwrap(), "deadbeef");
252    }
253
254    #[test]
255    fn test_value_to_xml_string_bytes_empty() {
256        let v = Value::Bytes(std::sync::Arc::from(Vec::<u8>::new().as_slice()));
257        assert_eq!(value_to_xml_string(&v).unwrap(), "");
258    }
259
260    #[test]
261    fn test_value_to_xml_string_date() {
262        use grafeo_common::types::Date;
263        let date = Date::from_ymd(2025, 6, 15).unwrap();
264        let v = Value::Date(date);
265        let result = value_to_xml_string(&v).unwrap();
266        assert!(
267            result.contains("2025"),
268            "date should contain the year: {result}"
269        );
270    }
271
272    #[test]
273    fn test_value_to_xml_string_time() {
274        use grafeo_common::types::Time;
275        let time = Time::from_hms(14, 30, 0).unwrap();
276        let v = Value::Time(time);
277        let result = value_to_xml_string(&v).unwrap();
278        assert!(
279            result.contains("14"),
280            "time should contain the hour: {result}"
281        );
282    }
283
284    #[test]
285    fn test_value_to_xml_string_timestamp() {
286        use grafeo_common::types::Timestamp;
287        let ts = Timestamp::from_micros(1_000_000);
288        let v = Value::Timestamp(ts);
289        let result = value_to_xml_string(&v);
290        assert!(result.is_some());
291    }
292
293    #[test]
294    fn test_value_to_xml_string_duration() {
295        use grafeo_common::types::Duration;
296        let dur = Duration::new(2, 5, 0);
297        let v = Value::Duration(dur);
298        let result = value_to_xml_string(&v).unwrap();
299        assert!(!result.is_empty());
300    }
301
302    #[test]
303    fn test_value_to_xml_string_zoned_datetime() {
304        use grafeo_common::types::{Timestamp, ZonedDatetime};
305        let zdt = ZonedDatetime::from_timestamp_offset(Timestamp::from_micros(0), 3600);
306        let v = Value::ZonedDatetime(zdt);
307        let result = value_to_xml_string(&v).unwrap();
308        assert!(!result.is_empty());
309    }
310
311    #[test]
312    fn test_value_to_xml_string_list() {
313        let items = vec![
314            Value::Int64(1),
315            Value::Int64(2),
316            Value::Null,
317            Value::Int64(3),
318        ];
319        let v = Value::List(std::sync::Arc::from(items.as_slice()));
320        // Null is filtered out by value_to_xml_string returning None
321        assert_eq!(value_to_xml_string(&v).unwrap(), "1,2,3");
322    }
323
324    #[test]
325    fn test_value_to_xml_string_list_empty() {
326        let v = Value::List(std::sync::Arc::from(Vec::<Value>::new().as_slice()));
327        assert_eq!(value_to_xml_string(&v).unwrap(), "");
328    }
329
330    #[test]
331    fn test_value_to_xml_string_map() {
332        let mut map = BTreeMap::new();
333        map.insert(PropertyKey::from("city"), Value::String("Amsterdam".into()));
334        map.insert(PropertyKey::from("pop"), Value::Int64(900_000));
335        let v = Value::Map(std::sync::Arc::new(map));
336        let result = value_to_xml_string(&v).unwrap();
337        // BTreeMap is sorted, so "city" comes before "pop"
338        assert!(result.contains("city=Amsterdam"));
339        assert!(result.contains("pop=900000"));
340        assert!(result.contains(';'));
341    }
342
343    #[test]
344    fn test_value_to_xml_string_map_with_null_value() {
345        let mut map = BTreeMap::new();
346        map.insert(PropertyKey::from("key"), Value::Null);
347        let v = Value::Map(std::sync::Arc::new(map));
348        let result = value_to_xml_string(&v).unwrap();
349        // Null -> unwrap_or_default -> empty string
350        assert_eq!(result, "key=");
351    }
352
353    #[test]
354    fn test_value_to_xml_string_map_with_special_chars() {
355        let mut map = BTreeMap::new();
356        map.insert(PropertyKey::from("k&ey"), Value::String("<val>".into()));
357        let v = Value::Map(std::sync::Arc::new(map));
358        let result = value_to_xml_string(&v).unwrap();
359        assert!(result.contains("k&amp;ey=&lt;val&gt;"));
360    }
361
362    #[test]
363    fn test_gexf_type_date_variant() {
364        use grafeo_common::types::Date;
365        let date = Date::from_ymd(2025, 1, 1).unwrap();
366        assert_eq!(value_to_gexf_type(&Value::Date(date)), "date");
367    }
368
369    #[test]
370    fn test_gexf_type_fallback_to_string() {
371        let v = Value::Bytes(std::sync::Arc::from(vec![1u8].as_slice()));
372        assert_eq!(value_to_gexf_type(&v), "string");
373    }
374
375    #[test]
376    fn test_graphml_type_fallback_to_string() {
377        use grafeo_common::types::Duration;
378        let dur = Duration::new(0, 0, 0);
379        assert_eq!(value_to_graphml_type(&Value::Duration(dur)), "string");
380    }
381
382    #[test]
383    fn test_discover_node_schema_multiple_nodes() {
384        use grafeo_common::types::NodeId;
385        use grafeo_core::graph::lpg::Node;
386
387        let mut n1 = Node::new(NodeId(1));
388        n1.set_property("name", Value::String("Alix".into()));
389        n1.set_property("age", Value::Int64(30));
390
391        let mut n2 = Node::new(NodeId(2));
392        n2.set_property("name", Value::String("Gus".into()));
393        n2.set_property("score", Value::Float64(9.5));
394
395        let schema = discover_node_schema(&[n1, n2], value_to_gexf_type);
396        assert_eq!(schema.len(), 3); // name, age, score
397        assert_eq!(schema[&PropertyKey::from("name")].1, "string");
398        assert_eq!(schema[&PropertyKey::from("age")].1, "integer");
399        assert_eq!(schema[&PropertyKey::from("score")].1, "float");
400    }
401
402    #[test]
403    fn test_discover_node_schema_null_then_typed() {
404        use grafeo_common::types::NodeId;
405        use grafeo_core::graph::lpg::Node;
406
407        // First node has null for "age", second node has a typed value
408        let mut n1 = Node::new(NodeId(1));
409        n1.set_property("age", Value::Null);
410
411        let mut n2 = Node::new(NodeId(2));
412        n2.set_property("age", Value::Int64(25));
413
414        let schema = discover_node_schema(&[n1, n2], value_to_gexf_type);
415        // The type should be resolved from the second node
416        assert_eq!(schema[&PropertyKey::from("age")].1, "integer");
417    }
418
419    #[test]
420    fn test_discover_node_schema_all_null_falls_back_to_string() {
421        use grafeo_common::types::NodeId;
422        use grafeo_core::graph::lpg::Node;
423
424        let mut n1 = Node::new(NodeId(1));
425        n1.set_property("unknown", Value::Null);
426
427        let schema = discover_node_schema(&[n1], value_to_gexf_type);
428        // All null, should fall back to "string"
429        assert_eq!(schema[&PropertyKey::from("unknown")].1, "string");
430    }
431
432    #[test]
433    fn test_discover_node_schema_empty() {
434        let schema = discover_node_schema(&[], value_to_gexf_type);
435        assert!(schema.is_empty());
436    }
437
438    #[test]
439    fn test_discover_edge_schema_multiple_edges() {
440        use grafeo_common::types::{EdgeId, NodeId};
441        use grafeo_core::graph::lpg::Edge;
442
443        let mut e1 = Edge::new(EdgeId(1), NodeId(1), NodeId(2), "KNOWS");
444        e1.set_property("since", Value::Int64(2020));
445
446        let mut e2 = Edge::new(EdgeId(2), NodeId(2), NodeId(3), "FOLLOWS");
447        e2.set_property("weight", Value::Float64(0.8));
448
449        let schema = discover_edge_schema(&[e1, e2], value_to_graphml_type);
450        assert_eq!(schema.len(), 2);
451        assert_eq!(schema[&PropertyKey::from("since")].1, "long");
452        assert_eq!(schema[&PropertyKey::from("weight")].1, "double");
453    }
454
455    #[test]
456    fn test_discover_edge_schema_null_then_typed() {
457        use grafeo_common::types::{EdgeId, NodeId};
458        use grafeo_core::graph::lpg::Edge;
459
460        let mut e1 = Edge::new(EdgeId(1), NodeId(1), NodeId(2), "KNOWS");
461        e1.set_property("weight", Value::Null);
462
463        let mut e2 = Edge::new(EdgeId(2), NodeId(2), NodeId(3), "KNOWS");
464        e2.set_property("weight", Value::Float64(1.5));
465
466        let schema = discover_edge_schema(&[e1, e2], value_to_graphml_type);
467        assert_eq!(schema[&PropertyKey::from("weight")].1, "double");
468    }
469
470    #[test]
471    fn test_discover_edge_schema_empty() {
472        let schema = discover_edge_schema(&[], value_to_graphml_type);
473        assert!(schema.is_empty());
474    }
475
476    #[test]
477    fn test_discover_node_schema_ids_are_sequential() {
478        use grafeo_common::types::NodeId;
479        use grafeo_core::graph::lpg::Node;
480
481        let mut n1 = Node::new(NodeId(1));
482        n1.set_property("a", Value::Int64(1));
483        n1.set_property("b", Value::Bool(true));
484        n1.set_property("c", Value::Float64(1.0));
485
486        let schema = discover_node_schema(&[n1], value_to_gexf_type);
487        // IDs should be assigned sequentially from BTreeMap iteration (alphabetical)
488        let ids: Vec<usize> = schema.values().map(|(id, _)| *id).collect();
489        assert_eq!(ids, vec![0, 1, 2]);
490    }
491}