1use std::collections::HashMap;
12use std::fs::{self, File};
13use std::io::{BufWriter, Write};
14use std::path::Path;
15
16use chrono::NaiveDate;
17use serde::{Deserialize, Serialize};
18use serde_json::Value;
19
20use crate::models::hypergraph::{
21 CrossLayerEdge, Hyperedge, HyperedgeParticipant, Hypergraph, HypergraphMetadata,
22 HypergraphNode, NodeBudgetReport,
23};
24
25#[derive(Debug, Clone, Serialize, Deserialize)]
27pub struct RawUnifiedNode {
28 pub id: String,
30 pub node_type: String,
32 pub entity_type_code: u32,
34 pub layer: u8,
36 pub external_id: String,
38 pub name: String,
40 #[serde(default, skip_serializing_if = "HashMap::is_empty")]
42 pub properties: HashMap<String, Value>,
43 #[serde(default, skip_serializing_if = "Vec::is_empty")]
45 pub features: Vec<f64>,
46 #[serde(default)]
48 pub is_anomaly: bool,
49 #[serde(skip_serializing_if = "Option::is_none")]
51 pub anomaly_type: Option<String>,
52 #[serde(default)]
54 pub is_aggregate: bool,
55 #[serde(default)]
57 pub aggregate_count: usize,
58}
59
60impl RawUnifiedNode {
61 pub fn from_hypergraph_node(node: &HypergraphNode) -> Self {
63 Self {
64 id: node.id.clone(),
65 node_type: node.entity_type.clone(),
66 entity_type_code: node.entity_type_code,
67 layer: node.layer.index(),
68 external_id: node.external_id.clone(),
69 name: node.label.clone(),
70 properties: node.properties.clone(),
71 features: node.features.clone(),
72 is_anomaly: node.is_anomaly,
73 anomaly_type: node.anomaly_type.clone(),
74 is_aggregate: node.is_aggregate,
75 aggregate_count: node.aggregate_count,
76 }
77 }
78}
79
80#[derive(Debug, Clone, Serialize, Deserialize)]
82pub struct RawUnifiedEdge {
83 pub source: String,
85 pub target: String,
87 pub source_layer: u8,
89 pub target_layer: u8,
91 pub edge_type: String,
93 pub edge_type_code: u32,
95 pub weight: f32,
97 #[serde(default, skip_serializing_if = "HashMap::is_empty")]
99 pub properties: HashMap<String, Value>,
100}
101
102impl RawUnifiedEdge {
103 pub fn from_cross_layer_edge(edge: &CrossLayerEdge) -> Self {
105 Self {
106 source: edge.source_id.clone(),
107 target: edge.target_id.clone(),
108 source_layer: edge.source_layer.index(),
109 target_layer: edge.target_layer.index(),
110 edge_type: edge.edge_type.clone(),
111 edge_type_code: edge.edge_type_code,
112 weight: 1.0,
113 properties: edge.properties.clone(),
114 }
115 }
116}
117
118#[derive(Debug, Clone, Serialize, Deserialize)]
120pub struct RawUnifiedHyperedge {
121 pub id: String,
123 pub hyperedge_type: String,
125 pub subtype: String,
127 pub member_ids: Vec<String>,
129 pub layer: u8,
131 pub participants: Vec<HyperedgeParticipant>,
133 #[serde(default, skip_serializing_if = "HashMap::is_empty")]
135 pub properties: HashMap<String, Value>,
136 #[serde(skip_serializing_if = "Option::is_none")]
138 pub timestamp: Option<NaiveDate>,
139 #[serde(default)]
141 pub is_anomaly: bool,
142 #[serde(skip_serializing_if = "Option::is_none")]
144 pub anomaly_type: Option<String>,
145 #[serde(default, skip_serializing_if = "Vec::is_empty")]
147 pub features: Vec<f64>,
148}
149
150impl RawUnifiedHyperedge {
151 pub fn from_hyperedge(he: &Hyperedge) -> Self {
153 Self {
154 id: he.id.clone(),
155 hyperedge_type: he.hyperedge_type.clone(),
156 subtype: he.subtype.clone(),
157 member_ids: he.participants.iter().map(|p| p.node_id.clone()).collect(),
158 layer: he.layer.index(),
159 participants: he.participants.clone(),
160 properties: he.properties.clone(),
161 timestamp: he.timestamp,
162 is_anomaly: he.is_anomaly,
163 anomaly_type: he.anomaly_type.clone(),
164 features: he.features.clone(),
165 }
166 }
167}
168
169#[derive(Debug, Clone, Serialize, Deserialize)]
171pub struct UnifiedHypergraphMetadata {
172 pub format: String,
174 pub name: String,
176 pub num_nodes: usize,
178 pub num_edges: usize,
180 pub num_hyperedges: usize,
182 pub layer_node_counts: HashMap<String, usize>,
184 pub node_type_counts: HashMap<String, usize>,
186 pub edge_type_counts: HashMap<String, usize>,
188 pub hyperedge_type_counts: HashMap<String, usize>,
190 pub anomalous_nodes: usize,
192 pub anomalous_hyperedges: usize,
194 pub source: String,
196 pub generated_at: String,
198 pub budget_report: NodeBudgetReport,
200 pub files: Vec<String>,
202}
203
204impl UnifiedHypergraphMetadata {
205 pub fn from_metadata(meta: &HypergraphMetadata) -> Self {
207 Self {
208 format: "rustgraph_unified_v1".to_string(),
209 name: meta.name.clone(),
210 num_nodes: meta.num_nodes,
211 num_edges: meta.num_edges,
212 num_hyperedges: meta.num_hyperedges,
213 layer_node_counts: meta.layer_node_counts.clone(),
214 node_type_counts: meta.node_type_counts.clone(),
215 edge_type_counts: meta.edge_type_counts.clone(),
216 hyperedge_type_counts: meta.hyperedge_type_counts.clone(),
217 anomalous_nodes: meta.anomalous_nodes,
218 anomalous_hyperedges: meta.anomalous_hyperedges,
219 source: meta.source.clone(),
220 generated_at: meta.generated_at.clone(),
221 budget_report: meta.budget_report.clone(),
222 files: meta.files.clone(),
223 }
224 }
225}
226
227#[derive(Debug, Clone, Default)]
229pub struct UnifiedExportConfig {
230 pub pretty_print: bool,
232}
233
234pub struct RustGraphUnifiedExporter {
236 config: UnifiedExportConfig,
237}
238
239impl RustGraphUnifiedExporter {
240 pub fn new(config: UnifiedExportConfig) -> Self {
242 Self { config }
243 }
244
245 pub fn export(
253 &self,
254 hypergraph: &Hypergraph,
255 output_dir: &Path,
256 ) -> std::io::Result<UnifiedHypergraphMetadata> {
257 fs::create_dir_all(output_dir)?;
258
259 let nodes_path = output_dir.join("nodes.jsonl");
261 let file = File::create(nodes_path)?;
262 let mut writer = BufWriter::with_capacity(256 * 1024, file);
263 for node in &hypergraph.nodes {
264 let unified = RawUnifiedNode::from_hypergraph_node(node);
265 serde_json::to_writer(&mut writer, &unified)?;
266 writeln!(writer)?;
267 }
268 writer.flush()?;
269
270 let edges_path = output_dir.join("edges.jsonl");
272 let file = File::create(edges_path)?;
273 let mut writer = BufWriter::with_capacity(256 * 1024, file);
274 for edge in &hypergraph.edges {
275 let unified = RawUnifiedEdge::from_cross_layer_edge(edge);
276 serde_json::to_writer(&mut writer, &unified)?;
277 writeln!(writer)?;
278 }
279 writer.flush()?;
280
281 let hyperedges_path = output_dir.join("hyperedges.jsonl");
283 let file = File::create(hyperedges_path)?;
284 let mut writer = BufWriter::with_capacity(256 * 1024, file);
285 for he in &hypergraph.hyperedges {
286 let unified = RawUnifiedHyperedge::from_hyperedge(he);
287 serde_json::to_writer(&mut writer, &unified)?;
288 writeln!(writer)?;
289 }
290 writer.flush()?;
291
292 let mut metadata = UnifiedHypergraphMetadata::from_metadata(&hypergraph.metadata);
294 metadata.files = vec![
295 "nodes.jsonl".to_string(),
296 "edges.jsonl".to_string(),
297 "hyperedges.jsonl".to_string(),
298 "metadata.json".to_string(),
299 ];
300
301 let metadata_path = output_dir.join("metadata.json");
303 let file = File::create(metadata_path)?;
304 if self.config.pretty_print {
305 serde_json::to_writer_pretty(file, &metadata)?;
306 } else {
307 serde_json::to_writer(file, &metadata)?;
308 }
309
310 Ok(metadata)
311 }
312
313 pub fn export_to_writer<W: Write>(
319 &self,
320 hypergraph: &Hypergraph,
321 writer: &mut W,
322 ) -> std::io::Result<UnifiedHypergraphMetadata> {
323 for node in &hypergraph.nodes {
325 let unified = RawUnifiedNode::from_hypergraph_node(node);
326 let mut obj = serde_json::to_value(&unified)?;
327 obj.as_object_mut()
328 .expect("serialized struct is always a JSON object")
329 .insert("_type".to_string(), Value::String("node".to_string()));
330 serde_json::to_writer(&mut *writer, &obj)?;
331 writeln!(writer)?;
332 }
333
334 for edge in &hypergraph.edges {
336 let unified = RawUnifiedEdge::from_cross_layer_edge(edge);
337 let mut obj = serde_json::to_value(&unified)?;
338 obj.as_object_mut()
339 .expect("serialized struct is always a JSON object")
340 .insert("_type".to_string(), Value::String("edge".to_string()));
341 serde_json::to_writer(&mut *writer, &obj)?;
342 writeln!(writer)?;
343 }
344
345 for he in &hypergraph.hyperedges {
347 let unified = RawUnifiedHyperedge::from_hyperedge(he);
348 let mut obj = serde_json::to_value(&unified)?;
349 obj.as_object_mut()
350 .expect("serialized struct is always a JSON object")
351 .insert("_type".to_string(), Value::String("hyperedge".to_string()));
352 serde_json::to_writer(&mut *writer, &obj)?;
353 writeln!(writer)?;
354 }
355
356 let mut metadata = UnifiedHypergraphMetadata::from_metadata(&hypergraph.metadata);
357 metadata.files = vec![];
358
359 Ok(metadata)
360 }
361}
362
363#[cfg(feature = "rustgraph")]
364mod bulk_export {
365 use super::*;
366 use rustgraph_api_types::bulk::{BulkEdgeData, BulkNodeData};
367
368 #[derive(Debug, Clone)]
374 pub struct RustGraphBulkExport {
375 pub nodes: Vec<BulkNodeData>,
377 pub edges: Vec<BulkEdgeData>,
379 pub hyperedges: Vec<RawUnifiedHyperedge>,
381 pub id_map: HashMap<String, u64>,
383 }
384
385 impl RustGraphUnifiedExporter {
386 pub fn to_bulk_import(&self, hypergraph: &Hypergraph) -> RustGraphBulkExport {
393 let mut id_map: HashMap<String, u64> = HashMap::with_capacity(hypergraph.nodes.len());
394 let mut nodes = Vec::with_capacity(hypergraph.nodes.len());
395
396 for (idx, hg_node) in hypergraph.nodes.iter().enumerate() {
398 let id = (idx as u64) + 1; id_map.insert(hg_node.id.clone(), id);
400
401 let mut properties = hg_node.properties.clone();
402 properties.insert("entity_id".to_string(), Value::String(hg_node.id.clone()));
403 properties.insert(
404 "node_type_name".to_string(),
405 Value::String(hg_node.entity_type.clone()),
406 );
407 properties.insert(
408 "external_id".to_string(),
409 Value::String(hg_node.external_id.clone()),
410 );
411 if hg_node.is_anomaly {
412 properties.insert("is_anomaly".to_string(), Value::Bool(true));
413 if let Some(ref at) = hg_node.anomaly_type {
414 properties.insert("anomaly_type".to_string(), Value::String(at.clone()));
415 }
416 }
417 if !hg_node.features.is_empty() {
418 properties.insert(
419 "features".to_string(),
420 Value::Array(
421 hg_node
422 .features
423 .iter()
424 .map(|f| serde_json::json!(f))
425 .collect(),
426 ),
427 );
428 }
429
430 nodes.push(BulkNodeData {
431 id: Some(id),
432 node_type: hg_node.entity_type_code,
433 layer: Some(hg_node.layer.index()),
434 labels: vec![hg_node.label.clone()],
435 properties,
436 });
437 }
438
439 let mut edges = Vec::with_capacity(hypergraph.edges.len());
441 for edge in &hypergraph.edges {
442 let source = match id_map.get(&edge.source_id) {
443 Some(&id) => id,
444 None => continue, };
446 let target = match id_map.get(&edge.target_id) {
447 Some(&id) => id,
448 None => continue, };
450
451 let mut properties = edge.properties.clone();
452 properties.insert(
453 "edge_type_name".to_string(),
454 Value::String(edge.edge_type.clone()),
455 );
456
457 edges.push(BulkEdgeData {
458 source,
459 target,
460 edge_type: edge.edge_type_code,
461 weight: 1.0,
462 properties,
463 });
464 }
465
466 let hyperedges: Vec<RawUnifiedHyperedge> = hypergraph
468 .hyperedges
469 .iter()
470 .map(RawUnifiedHyperedge::from_hyperedge)
471 .collect();
472
473 RustGraphBulkExport {
474 nodes,
475 edges,
476 hyperedges,
477 id_map,
478 }
479 }
480 }
481}
482
483#[cfg(feature = "rustgraph")]
484pub use bulk_export::RustGraphBulkExport;
485
486#[cfg(test)]
487#[allow(clippy::unwrap_used)]
488mod tests {
489 use super::*;
490 use crate::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
491 use crate::models::hypergraph::HypergraphLayer;
492 use tempfile::tempdir;
493
494 fn build_test_hypergraph() -> Hypergraph {
495 let config = HypergraphConfig {
496 max_nodes: 1000,
497 include_p2p: false,
498 include_o2c: false,
499 include_vendors: false,
500 include_customers: false,
501 include_employees: false,
502 ..Default::default()
503 };
504 let mut builder = HypergraphBuilder::new(config);
505 builder.add_coso_framework();
506 builder.build()
507 }
508
509 #[test]
510 fn test_node_conversion() {
511 let node = HypergraphNode {
512 id: "node_1".to_string(),
513 entity_type: "Account".to_string(),
514 entity_type_code: 100,
515 layer: HypergraphLayer::AccountingNetwork,
516 external_id: "1000".to_string(),
517 label: "Cash".to_string(),
518 properties: HashMap::new(),
519 features: vec![1.0, 2.0],
520 is_anomaly: false,
521 anomaly_type: None,
522 is_aggregate: false,
523 aggregate_count: 0,
524 };
525
526 let unified = RawUnifiedNode::from_hypergraph_node(&node);
527 assert_eq!(unified.id, "node_1");
528 assert_eq!(unified.node_type, "Account");
529 assert_eq!(unified.name, "Cash");
530 assert_eq!(unified.layer, 3); assert_eq!(unified.entity_type_code, 100);
532 assert_eq!(unified.external_id, "1000");
533 assert_eq!(unified.features, vec![1.0, 2.0]);
534 }
535
536 #[test]
537 fn test_edge_conversion() {
538 let edge = CrossLayerEdge {
539 source_id: "ctrl_C001".to_string(),
540 source_layer: HypergraphLayer::GovernanceControls,
541 target_id: "acct_1000".to_string(),
542 target_layer: HypergraphLayer::AccountingNetwork,
543 edge_type: "ImplementsControl".to_string(),
544 edge_type_code: 40,
545 properties: HashMap::new(),
546 };
547
548 let unified = RawUnifiedEdge::from_cross_layer_edge(&edge);
549 assert_eq!(unified.source, "ctrl_C001");
550 assert_eq!(unified.target, "acct_1000");
551 assert_eq!(unified.source_layer, 1); assert_eq!(unified.target_layer, 3); assert_eq!(unified.edge_type, "ImplementsControl");
554 assert_eq!(unified.edge_type_code, 40);
555 assert_eq!(unified.weight, 1.0);
556 }
557
558 #[test]
559 fn test_hyperedge_conversion() {
560 let he = Hyperedge {
561 id: "he_1".to_string(),
562 hyperedge_type: "JournalEntry".to_string(),
563 subtype: "R2R".to_string(),
564 participants: vec![
565 HyperedgeParticipant {
566 node_id: "acct_1000".to_string(),
567 role: "debit".to_string(),
568 weight: Some(500.0),
569 },
570 HyperedgeParticipant {
571 node_id: "acct_2000".to_string(),
572 role: "credit".to_string(),
573 weight: Some(500.0),
574 },
575 ],
576 layer: HypergraphLayer::AccountingNetwork,
577 properties: HashMap::new(),
578 timestamp: Some(NaiveDate::from_ymd_opt(2024, 6, 15).unwrap()),
579 is_anomaly: true,
580 anomaly_type: Some("split_transaction".to_string()),
581 features: vec![6.2, 1.0],
582 };
583
584 let unified = RawUnifiedHyperedge::from_hyperedge(&he);
585 assert_eq!(unified.id, "he_1");
586 assert_eq!(unified.hyperedge_type, "JournalEntry");
587 assert_eq!(unified.layer, 3); assert_eq!(unified.member_ids, vec!["acct_1000", "acct_2000"]);
589 assert_eq!(unified.participants.len(), 2);
590 assert!(unified.is_anomaly);
591 assert_eq!(unified.anomaly_type, Some("split_transaction".to_string()));
592 }
593
594 #[test]
595 fn test_unified_export_creates_all_files() {
596 let hypergraph = build_test_hypergraph();
597 let dir = tempdir().unwrap();
598
599 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
600 let metadata = exporter.export(&hypergraph, dir.path()).unwrap();
601
602 assert!(dir.path().join("nodes.jsonl").exists());
603 assert!(dir.path().join("edges.jsonl").exists());
604 assert!(dir.path().join("hyperedges.jsonl").exists());
605 assert!(dir.path().join("metadata.json").exists());
606
607 assert_eq!(metadata.num_nodes, 22); assert_eq!(metadata.format, "rustgraph_unified_v1");
609 }
610
611 #[test]
612 fn test_unified_nodes_jsonl_parseable() {
613 let hypergraph = build_test_hypergraph();
614 let dir = tempdir().unwrap();
615
616 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
617 exporter.export(&hypergraph, dir.path()).unwrap();
618
619 let content = std::fs::read_to_string(dir.path().join("nodes.jsonl")).unwrap();
620 let mut count = 0;
621 for line in content.lines() {
622 let node: RawUnifiedNode = serde_json::from_str(line).unwrap();
623 assert!(!node.id.is_empty());
624 assert!(!node.node_type.is_empty());
625 assert!(!node.name.is_empty());
626 assert!(node.layer >= 1 && node.layer <= 3);
628 count += 1;
629 }
630 assert_eq!(count, 22);
631 }
632
633 #[test]
634 fn test_unified_edges_jsonl_uses_source_target() {
635 let hypergraph = build_test_hypergraph();
636 let dir = tempdir().unwrap();
637
638 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
639 exporter.export(&hypergraph, dir.path()).unwrap();
640
641 let content = std::fs::read_to_string(dir.path().join("edges.jsonl")).unwrap();
642 for line in content.lines() {
643 let edge: RawUnifiedEdge = serde_json::from_str(line).unwrap();
644 assert!(!edge.source.is_empty());
646 assert!(!edge.target.is_empty());
647 assert!(edge.source_layer >= 1 && edge.source_layer <= 3);
648 assert!(edge.target_layer >= 1 && edge.target_layer <= 3);
649 assert_eq!(edge.weight, 1.0);
650 }
651 }
652
653 #[test]
654 fn test_unified_metadata_has_format_field() {
655 let hypergraph = build_test_hypergraph();
656 let dir = tempdir().unwrap();
657
658 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig { pretty_print: true });
659 exporter.export(&hypergraph, dir.path()).unwrap();
660
661 let content = std::fs::read_to_string(dir.path().join("metadata.json")).unwrap();
662 let metadata: UnifiedHypergraphMetadata = serde_json::from_str(&content).unwrap();
663 assert_eq!(metadata.format, "rustgraph_unified_v1");
664 assert_eq!(metadata.source, "datasynth");
665 }
666
667 #[cfg(feature = "rustgraph")]
668 #[test]
669 fn test_to_bulk_import_nodes() {
670 let hypergraph = build_test_hypergraph();
671 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
672 let export = exporter.to_bulk_import(&hypergraph);
673
674 assert_eq!(export.nodes.len(), 22); assert_eq!(export.id_map.len(), 22);
676
677 let first = &export.nodes[0];
679 assert_eq!(first.id, Some(1)); assert!(first.node_type > 0); assert!(first.layer.is_some());
682 assert!(!first.labels.is_empty());
683 assert!(first.properties.contains_key("entity_id"));
685 assert!(first.properties.contains_key("node_type_name"));
686 }
687
688 #[cfg(feature = "rustgraph")]
689 #[test]
690 fn test_to_bulk_import_edges() {
691 let hypergraph = build_test_hypergraph();
692 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
693 let export = exporter.to_bulk_import(&hypergraph);
694
695 assert!(!export.edges.is_empty());
697
698 for edge in &export.edges {
700 assert!(export.id_map.values().any(|&id| id == edge.source));
701 assert!(export.id_map.values().any(|&id| id == edge.target));
702 assert!(edge.properties.contains_key("edge_type_name"));
703 }
704 }
705
706 #[cfg(feature = "rustgraph")]
707 #[test]
708 fn test_to_bulk_import_id_mapping() {
709 let hypergraph = build_test_hypergraph();
710 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
711 let export = exporter.to_bulk_import(&hypergraph);
712
713 let mut ids: Vec<u64> = export.nodes.iter().filter_map(|n| n.id).collect();
715 ids.sort();
716 assert_eq!(ids.first(), Some(&1u64));
717 assert_eq!(ids.last(), Some(&(export.nodes.len() as u64)));
718
719 for node in &export.nodes {
721 let string_id = node
722 .properties
723 .get("entity_id")
724 .and_then(|v| v.as_str())
725 .expect("entity_id should be a string");
726 assert_eq!(export.id_map.get(string_id).copied(), node.id);
727 }
728 }
729
730 #[test]
731 fn test_export_to_writer() {
732 let hypergraph = build_test_hypergraph();
733 let mut buffer = Vec::new();
734
735 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
736 let metadata = exporter.export_to_writer(&hypergraph, &mut buffer).unwrap();
737
738 assert_eq!(metadata.num_nodes, 22);
739
740 let content = String::from_utf8(buffer).unwrap();
742 let mut node_count = 0;
743 let mut edge_count = 0;
744 for line in content.lines() {
745 let obj: serde_json::Value = serde_json::from_str(line).unwrap();
746 let record_type = obj.get("_type").unwrap().as_str().unwrap();
747 match record_type {
748 "node" => node_count += 1,
749 "edge" => edge_count += 1,
750 "hyperedge" => {}
751 _ => panic!("Unexpected _type: {}", record_type),
752 }
753 }
754 assert_eq!(node_count, 22);
755 assert!(edge_count > 0); }
757}