Skip to main content

datasynth_graph/exporters/
hypergraph.rs

1//! Multi-layer hypergraph exporter for RustGraph integration.
2//!
3//! Exports a built `Hypergraph` to JSONL files:
4//! - `nodes.jsonl` - All nodes with layer, entity_type_code
5//! - `edges.jsonl` - Cross-layer and intra-layer pairwise edges
6//! - `hyperedges.jsonl` - Journal entries and OCPM events as hyperedges
7//! - `metadata.json` - Schema, counts, layer stats, budget report
8
9use std::fs::{self, File};
10use std::io::{BufWriter, Write};
11use std::path::Path;
12
13use crate::models::hypergraph::{Hypergraph, HypergraphMetadata};
14
15/// Configuration for the hypergraph exporter.
16#[derive(Debug, Clone, Default)]
17pub struct HypergraphExportConfig {
18    /// Pretty-print metadata.json (for debugging).
19    pub pretty_print: bool,
20}
21
22/// Exports a `Hypergraph` to JSONL files for RustGraph import.
23pub struct HypergraphExporter {
24    config: HypergraphExportConfig,
25}
26
27impl HypergraphExporter {
28    /// Create a new exporter with the given configuration.
29    pub fn new(config: HypergraphExportConfig) -> Self {
30        Self { config }
31    }
32
33    /// Export the hypergraph to the given output directory.
34    ///
35    /// Creates:
36    /// - `nodes.jsonl` (one JSON object per line)
37    /// - `edges.jsonl` (one JSON object per line)
38    /// - `hyperedges.jsonl` (one JSON object per line)
39    /// - `metadata.json` (export metadata)
40    pub fn export(
41        &self,
42        hypergraph: &Hypergraph,
43        output_dir: &Path,
44    ) -> std::io::Result<HypergraphMetadata> {
45        fs::create_dir_all(output_dir)?;
46
47        // Export nodes
48        let nodes_path = output_dir.join("nodes.jsonl");
49        let file = File::create(&nodes_path)?;
50        let mut writer = BufWriter::new(file);
51        for node in &hypergraph.nodes {
52            serde_json::to_writer(&mut writer, node)?;
53            writeln!(writer)?;
54        }
55        writer.flush()?;
56
57        // Export edges
58        let edges_path = output_dir.join("edges.jsonl");
59        let file = File::create(&edges_path)?;
60        let mut writer = BufWriter::new(file);
61        for edge in &hypergraph.edges {
62            serde_json::to_writer(&mut writer, edge)?;
63            writeln!(writer)?;
64        }
65        writer.flush()?;
66
67        // Export hyperedges
68        let hyperedges_path = output_dir.join("hyperedges.jsonl");
69        let file = File::create(&hyperedges_path)?;
70        let mut writer = BufWriter::new(file);
71        for he in &hypergraph.hyperedges {
72            serde_json::to_writer(&mut writer, he)?;
73            writeln!(writer)?;
74        }
75        writer.flush()?;
76
77        // Export metadata
78        let metadata_path = output_dir.join("metadata.json");
79        let file = File::create(&metadata_path)?;
80        if self.config.pretty_print {
81            serde_json::to_writer_pretty(file, &hypergraph.metadata)?;
82        } else {
83            serde_json::to_writer(file, &hypergraph.metadata)?;
84        }
85
86        Ok(hypergraph.metadata.clone())
87    }
88}
89
90#[cfg(test)]
91mod tests {
92    use super::*;
93    use crate::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
94    use crate::models::hypergraph::{
95        CrossLayerEdge, Hyperedge, HyperedgeParticipant, HypergraphLayer, HypergraphNode,
96    };
97    use std::collections::HashMap;
98    use tempfile::tempdir;
99
100    #[test]
101    fn test_export_creates_all_files() {
102        let config = HypergraphConfig {
103            max_nodes: 1000,
104            include_p2p: false,
105            include_o2c: false,
106            include_vendors: false,
107            include_customers: false,
108            include_employees: false,
109            ..Default::default()
110        };
111        let mut builder = HypergraphBuilder::new(config);
112        builder.add_coso_framework();
113
114        let hypergraph = builder.build();
115        let dir = tempdir().unwrap();
116
117        let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
118        let metadata = exporter.export(&hypergraph, dir.path()).unwrap();
119
120        assert!(dir.path().join("nodes.jsonl").exists());
121        assert!(dir.path().join("edges.jsonl").exists());
122        assert!(dir.path().join("hyperedges.jsonl").exists());
123        assert!(dir.path().join("metadata.json").exists());
124
125        assert_eq!(metadata.num_nodes, 22); // 5 components + 17 principles
126    }
127
128    #[test]
129    fn test_nodes_jsonl_parseable() {
130        let config = HypergraphConfig {
131            max_nodes: 1000,
132            include_p2p: false,
133            include_o2c: false,
134            include_vendors: false,
135            include_customers: false,
136            include_employees: false,
137            ..Default::default()
138        };
139        let mut builder = HypergraphBuilder::new(config);
140        builder.add_coso_framework();
141
142        let hypergraph = builder.build();
143        let dir = tempdir().unwrap();
144
145        let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
146        exporter.export(&hypergraph, dir.path()).unwrap();
147
148        // Read and parse each line
149        let content = std::fs::read_to_string(dir.path().join("nodes.jsonl")).unwrap();
150        let mut count = 0;
151        for line in content.lines() {
152            let node: HypergraphNode = serde_json::from_str(line).unwrap();
153            assert!(!node.id.is_empty());
154            assert!(!node.entity_type.is_empty());
155            count += 1;
156        }
157        assert_eq!(count, 22);
158    }
159
160    #[test]
161    fn test_edges_jsonl_parseable() {
162        let config = HypergraphConfig {
163            max_nodes: 1000,
164            include_p2p: false,
165            include_o2c: false,
166            include_vendors: false,
167            include_customers: false,
168            include_employees: false,
169            ..Default::default()
170        };
171        let mut builder = HypergraphBuilder::new(config);
172        builder.add_coso_framework();
173
174        let hypergraph = builder.build();
175        let dir = tempdir().unwrap();
176
177        let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
178        exporter.export(&hypergraph, dir.path()).unwrap();
179
180        let content = std::fs::read_to_string(dir.path().join("edges.jsonl")).unwrap();
181        for line in content.lines() {
182            let _edge: CrossLayerEdge = serde_json::from_str(line).unwrap();
183        }
184    }
185
186    #[test]
187    fn test_hyperedges_jsonl_parseable() {
188        // Build a hypergraph with a synthetic hyperedge
189        let config = HypergraphConfig {
190            max_nodes: 1000,
191            include_coso: false,
192            include_controls: false,
193            include_sox: false,
194            include_p2p: false,
195            include_o2c: false,
196            include_vendors: false,
197            include_customers: false,
198            include_employees: false,
199            ..Default::default()
200        };
201        let builder = HypergraphBuilder::new(config);
202        let mut hg = builder.build();
203
204        // Manually inject a hyperedge for testing
205        hg.hyperedges.push(Hyperedge {
206            id: "test_he".to_string(),
207            hyperedge_type: "JournalEntry".to_string(),
208            subtype: "R2R".to_string(),
209            participants: vec![
210                HyperedgeParticipant {
211                    node_id: "acct_1000".to_string(),
212                    role: "debit".to_string(),
213                    weight: Some(100.0),
214                },
215                HyperedgeParticipant {
216                    node_id: "acct_2000".to_string(),
217                    role: "credit".to_string(),
218                    weight: Some(100.0),
219                },
220            ],
221            layer: HypergraphLayer::AccountingNetwork,
222            properties: HashMap::new(),
223            timestamp: None,
224            is_anomaly: false,
225            anomaly_type: None,
226            features: vec![4.6, 2.0],
227        });
228
229        let dir = tempdir().unwrap();
230        let exporter = HypergraphExporter::new(HypergraphExportConfig::default());
231        exporter.export(&hg, dir.path()).unwrap();
232
233        let content = std::fs::read_to_string(dir.path().join("hyperedges.jsonl")).unwrap();
234        let mut count = 0;
235        for line in content.lines() {
236            let he: Hyperedge = serde_json::from_str(line).unwrap();
237            assert_eq!(he.participants.len(), 2);
238            count += 1;
239        }
240        assert_eq!(count, 1);
241    }
242
243    #[test]
244    fn test_metadata_json_parseable() {
245        let config = HypergraphConfig {
246            max_nodes: 1000,
247            include_p2p: false,
248            include_o2c: false,
249            include_vendors: false,
250            include_customers: false,
251            include_employees: false,
252            ..Default::default()
253        };
254        let mut builder = HypergraphBuilder::new(config);
255        builder.add_coso_framework();
256
257        let hypergraph = builder.build();
258        let dir = tempdir().unwrap();
259
260        let exporter = HypergraphExporter::new(HypergraphExportConfig { pretty_print: true });
261        exporter.export(&hypergraph, dir.path()).unwrap();
262
263        let content = std::fs::read_to_string(dir.path().join("metadata.json")).unwrap();
264        let metadata: HypergraphMetadata = serde_json::from_str(&content).unwrap();
265        assert_eq!(metadata.num_nodes, 22);
266        assert_eq!(metadata.source, "datasynth");
267        assert!(!metadata.files.is_empty());
268    }
269}