Skip to main content

datasynth_graph/exporters/
hypergraph.rs

1//! Multi-layer hypergraph exporter for RustGraph integration.
2//!
3//! Exports a built `Hypergraph` to JSONL files:
4//! - `nodes.jsonl` - All nodes with layer, entity_type_code
5//! - `edges.jsonl` - Cross-layer and intra-layer pairwise edges
6//! - `hyperedges.jsonl` - Journal entries and OCPM events as hyperedges
7//! - `metadata.json` - Schema, counts, layer stats, budget report
8
9use std::fs::{self, File};
10use std::io::{BufWriter, Write};
11use std::path::Path;
12
13use crate::models::hypergraph::{Hypergraph, HypergraphMetadata};
14
15/// Configuration for the hypergraph exporter.
16#[derive(Debug, Clone, Default)]
17pub struct HypergraphExportConfig {
18    /// Pretty-print metadata.json (for debugging).
19    pub pretty_print: bool,
20}
21
22/// Exports a `Hypergraph` to JSONL files for RustGraph import.
23pub struct HypergraphExporter {
24    config: HypergraphExportConfig,
25}
26
27impl HypergraphExporter {
28    /// Create a new exporter with the given configuration.
29    pub fn new(config: HypergraphExportConfig) -> Self {
30        Self { config }
31    }
32
33    /// Export the hypergraph to the given output directory.
34    ///
35    /// Creates:
36    /// - `nodes.jsonl` (one JSON object per line)
37    /// - `edges.jsonl` (one JSON object per line)
38    /// - `hyperedges.jsonl` (one JSON object per line)
39    /// - `metadata.json` (export metadata)
40    pub fn export(
41        &self,
42        hypergraph: &Hypergraph,
43        output_dir: &Path,
44    ) -> std::io::Result<HypergraphMetadata> {
45        fs::create_dir_all(output_dir)?;
46
47        // Export nodes
48        let nodes_path = output_dir.join("nodes.jsonl");
49        let file = File::create(&nodes_path)?;
50        let mut writer = BufWriter::new(file);
51        for node in &hypergraph.nodes {
52            serde_json::to_writer(&mut writer, node)?;
53            writeln!(writer)?;
54        }
55        writer.flush()?;
56
57        // Export edges
58        let edges_path = output_dir.join("edges.jsonl");
59        let file = File::create(&edges_path)?;
60        let mut writer = BufWriter::new(file);
61        for edge in &hypergraph.edges {
62            serde_json::to_writer(&mut writer, edge)?;
63            writeln!(writer)?;
64        }
65        writer.flush()?;
66
67        // Export hyperedges
68        let hyperedges_path = output_dir.join("hyperedges.jsonl");
69        let file = File::create(&hyperedges_path)?;
70        let mut writer = BufWriter::new(file);
71        for he in &hypergraph.hyperedges {
72            serde_json::to_writer(&mut writer, he)?;
73            writeln!(writer)?;
74        }
75        writer.flush()?;
76
77        // Export metadata
78        let metadata_path = output_dir.join("metadata.json");
79        let file = File::create(&metadata_path)?;
80        if self.config.pretty_print {
81            serde_json::to_writer_pretty(file, &hypergraph.metadata)?;
82        } else {
83            serde_json::to_writer(file, &hypergraph.metadata)?;
84        }
85
86        Ok(hypergraph.metadata.clone())
87    }
88}
89
90#[cfg(test)]
91#[allow(clippy::unwrap_used)]
92mod tests {
93    use super::*;
94    use crate::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
95    use crate::models::hypergraph::{
96        CrossLayerEdge, Hyperedge, HyperedgeParticipant, HypergraphLayer, HypergraphNode,
97    };
98    use std::collections::HashMap;
99    use tempfile::tempdir;
100
101    #[test]
102    fn test_export_creates_all_files() {
103        let config = HypergraphConfig {
104            max_nodes: 1000,
105            include_p2p: false,
106            include_o2c: false,
107            include_vendors: false,
108            include_customers: false,
109            include_employees: false,
110            ..Default::default()
111        };
112        let mut builder = HypergraphBuilder::new(config);
113        builder.add_coso_framework();
114
115        let hypergraph = builder.build();
116        let dir = tempdir().unwrap();
117
118        let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
119        let metadata = exporter.export(&hypergraph, dir.path()).unwrap();
120
121        assert!(dir.path().join("nodes.jsonl").exists());
122        assert!(dir.path().join("edges.jsonl").exists());
123        assert!(dir.path().join("hyperedges.jsonl").exists());
124        assert!(dir.path().join("metadata.json").exists());
125
126        assert_eq!(metadata.num_nodes, 22); // 5 components + 17 principles
127    }
128
129    #[test]
130    fn test_nodes_jsonl_parseable() {
131        let config = HypergraphConfig {
132            max_nodes: 1000,
133            include_p2p: false,
134            include_o2c: false,
135            include_vendors: false,
136            include_customers: false,
137            include_employees: false,
138            ..Default::default()
139        };
140        let mut builder = HypergraphBuilder::new(config);
141        builder.add_coso_framework();
142
143        let hypergraph = builder.build();
144        let dir = tempdir().unwrap();
145
146        let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
147        exporter.export(&hypergraph, dir.path()).unwrap();
148
149        // Read and parse each line
150        let content = std::fs::read_to_string(dir.path().join("nodes.jsonl")).unwrap();
151        let mut count = 0;
152        for line in content.lines() {
153            let node: HypergraphNode = serde_json::from_str(line).unwrap();
154            assert!(!node.id.is_empty());
155            assert!(!node.entity_type.is_empty());
156            count += 1;
157        }
158        assert_eq!(count, 22);
159    }
160
161    #[test]
162    fn test_edges_jsonl_parseable() {
163        let config = HypergraphConfig {
164            max_nodes: 1000,
165            include_p2p: false,
166            include_o2c: false,
167            include_vendors: false,
168            include_customers: false,
169            include_employees: false,
170            ..Default::default()
171        };
172        let mut builder = HypergraphBuilder::new(config);
173        builder.add_coso_framework();
174
175        let hypergraph = builder.build();
176        let dir = tempdir().unwrap();
177
178        let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
179        exporter.export(&hypergraph, dir.path()).unwrap();
180
181        let content = std::fs::read_to_string(dir.path().join("edges.jsonl")).unwrap();
182        for line in content.lines() {
183            let _edge: CrossLayerEdge = serde_json::from_str(line).unwrap();
184        }
185    }
186
187    #[test]
188    fn test_hyperedges_jsonl_parseable() {
189        // Build a hypergraph with a synthetic hyperedge
190        let config = HypergraphConfig {
191            max_nodes: 1000,
192            include_coso: false,
193            include_controls: false,
194            include_sox: false,
195            include_p2p: false,
196            include_o2c: false,
197            include_vendors: false,
198            include_customers: false,
199            include_employees: false,
200            ..Default::default()
201        };
202        let builder = HypergraphBuilder::new(config);
203        let mut hg = builder.build();
204
205        // Manually inject a hyperedge for testing
206        hg.hyperedges.push(Hyperedge {
207            id: "test_he".to_string(),
208            hyperedge_type: "JournalEntry".to_string(),
209            subtype: "R2R".to_string(),
210            participants: vec![
211                HyperedgeParticipant {
212                    node_id: "acct_1000".to_string(),
213                    role: "debit".to_string(),
214                    weight: Some(100.0),
215                },
216                HyperedgeParticipant {
217                    node_id: "acct_2000".to_string(),
218                    role: "credit".to_string(),
219                    weight: Some(100.0),
220                },
221            ],
222            layer: HypergraphLayer::AccountingNetwork,
223            properties: HashMap::new(),
224            timestamp: None,
225            is_anomaly: false,
226            anomaly_type: None,
227            features: vec![4.6, 2.0],
228        });
229
230        let dir = tempdir().unwrap();
231        let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
232        exporter.export(&hg, dir.path()).unwrap();
233
234        let content = std::fs::read_to_string(dir.path().join("hyperedges.jsonl")).unwrap();
235        let mut count = 0;
236        for line in content.lines() {
237            let he: Hyperedge = serde_json::from_str(line).unwrap();
238            assert_eq!(he.participants.len(), 2);
239            count += 1;
240        }
241        assert_eq!(count, 1);
242    }
243
244    #[test]
245    fn test_metadata_json_parseable() {
246        let config = HypergraphConfig {
247            max_nodes: 1000,
248            include_p2p: false,
249            include_o2c: false,
250            include_vendors: false,
251            include_customers: false,
252            include_employees: false,
253            ..Default::default()
254        };
255        let mut builder = HypergraphBuilder::new(config);
256        builder.add_coso_framework();
257
258        let hypergraph = builder.build();
259        let dir = tempdir().unwrap();
260
261        let exporter = HypergraphExporter::new(HypergraphExportConfig { pretty_print: true });
262        exporter.export(&hypergraph, dir.path()).unwrap();
263
264        let content = std::fs::read_to_string(dir.path().join("metadata.json")).unwrap();
265        let metadata: HypergraphMetadata = serde_json::from_str(&content).unwrap();
266        assert_eq!(metadata.num_nodes, 22);
267        assert_eq!(metadata.source, "datasynth");
268        assert!(!metadata.files.is_empty());
269    }
270}