1use std::collections::HashMap;
12use std::fs::{self, File};
13use std::io::{BufWriter, Write};
14use std::path::Path;
15
16use chrono::NaiveDate;
17use serde::{Deserialize, Serialize};
18use serde_json::Value;
19
20use crate::models::hypergraph::{
21 CrossLayerEdge, Hyperedge, HyperedgeParticipant, Hypergraph, HypergraphMetadata,
22 HypergraphNode, NodeBudgetReport,
23};
24
25#[derive(Debug, Clone, Serialize, Deserialize)]
27pub struct RawUnifiedNode {
28 pub id: String,
30 pub node_type: String,
32 pub entity_type_code: u32,
34 pub layer: u8,
36 pub external_id: String,
38 pub name: String,
40 #[serde(default, skip_serializing_if = "HashMap::is_empty")]
42 pub properties: HashMap<String, Value>,
43 #[serde(default, skip_serializing_if = "Vec::is_empty")]
45 pub features: Vec<f64>,
46 #[serde(default)]
48 pub is_anomaly: bool,
49 #[serde(skip_serializing_if = "Option::is_none")]
51 pub anomaly_type: Option<String>,
52 #[serde(default)]
54 pub is_aggregate: bool,
55 #[serde(default)]
57 pub aggregate_count: usize,
58}
59
60impl RawUnifiedNode {
61 pub fn from_hypergraph_node(node: &HypergraphNode) -> Self {
63 Self {
64 id: node.id.clone(),
65 node_type: node.entity_type.clone(),
66 entity_type_code: node.entity_type_code,
67 layer: node.layer.index(),
68 external_id: node.external_id.clone(),
69 name: node.label.clone(),
70 properties: node.properties.clone(),
71 features: node.features.clone(),
72 is_anomaly: node.is_anomaly,
73 anomaly_type: node.anomaly_type.clone(),
74 is_aggregate: node.is_aggregate,
75 aggregate_count: node.aggregate_count,
76 }
77 }
78}
79
80#[derive(Debug, Clone, Serialize, Deserialize)]
82pub struct RawUnifiedEdge {
83 pub source: String,
85 pub target: String,
87 pub source_layer: u8,
89 pub target_layer: u8,
91 pub edge_type: String,
93 pub edge_type_code: u32,
95 pub weight: f32,
97 #[serde(default, skip_serializing_if = "HashMap::is_empty")]
99 pub properties: HashMap<String, Value>,
100}
101
102impl RawUnifiedEdge {
103 pub fn from_cross_layer_edge(edge: &CrossLayerEdge) -> Self {
105 Self {
106 source: edge.source_id.clone(),
107 target: edge.target_id.clone(),
108 source_layer: edge.source_layer.index(),
109 target_layer: edge.target_layer.index(),
110 edge_type: edge.edge_type.clone(),
111 edge_type_code: edge.edge_type_code,
112 weight: 1.0,
113 properties: edge.properties.clone(),
114 }
115 }
116}
117
118#[derive(Debug, Clone, Serialize, Deserialize)]
120pub struct RawUnifiedHyperedge {
121 pub id: String,
123 pub hyperedge_type: String,
125 pub subtype: String,
127 pub member_ids: Vec<String>,
129 pub layer: u8,
131 pub participants: Vec<HyperedgeParticipant>,
133 #[serde(default, skip_serializing_if = "HashMap::is_empty")]
135 pub properties: HashMap<String, Value>,
136 #[serde(skip_serializing_if = "Option::is_none")]
138 pub timestamp: Option<NaiveDate>,
139 #[serde(default)]
141 pub is_anomaly: bool,
142 #[serde(skip_serializing_if = "Option::is_none")]
144 pub anomaly_type: Option<String>,
145 #[serde(default, skip_serializing_if = "Vec::is_empty")]
147 pub features: Vec<f64>,
148}
149
150impl RawUnifiedHyperedge {
151 pub fn from_hyperedge(he: &Hyperedge) -> Self {
153 Self {
154 id: he.id.clone(),
155 hyperedge_type: he.hyperedge_type.clone(),
156 subtype: he.subtype.clone(),
157 member_ids: he.participants.iter().map(|p| p.node_id.clone()).collect(),
158 layer: he.layer.index(),
159 participants: he.participants.clone(),
160 properties: he.properties.clone(),
161 timestamp: he.timestamp,
162 is_anomaly: he.is_anomaly,
163 anomaly_type: he.anomaly_type.clone(),
164 features: he.features.clone(),
165 }
166 }
167}
168
169#[derive(Debug, Clone, Serialize, Deserialize)]
171pub struct UnifiedHypergraphMetadata {
172 pub format: String,
174 pub name: String,
176 pub num_nodes: usize,
178 pub num_edges: usize,
180 pub num_hyperedges: usize,
182 pub layer_node_counts: HashMap<String, usize>,
184 pub node_type_counts: HashMap<String, usize>,
186 pub edge_type_counts: HashMap<String, usize>,
188 pub hyperedge_type_counts: HashMap<String, usize>,
190 pub anomalous_nodes: usize,
192 pub anomalous_hyperedges: usize,
194 pub source: String,
196 pub generated_at: String,
198 pub budget_report: NodeBudgetReport,
200 pub files: Vec<String>,
202}
203
204impl UnifiedHypergraphMetadata {
205 pub fn from_metadata(meta: &HypergraphMetadata) -> Self {
207 Self {
208 format: "rustgraph_unified_v1".to_string(),
209 name: meta.name.clone(),
210 num_nodes: meta.num_nodes,
211 num_edges: meta.num_edges,
212 num_hyperedges: meta.num_hyperedges,
213 layer_node_counts: meta.layer_node_counts.clone(),
214 node_type_counts: meta.node_type_counts.clone(),
215 edge_type_counts: meta.edge_type_counts.clone(),
216 hyperedge_type_counts: meta.hyperedge_type_counts.clone(),
217 anomalous_nodes: meta.anomalous_nodes,
218 anomalous_hyperedges: meta.anomalous_hyperedges,
219 source: meta.source.clone(),
220 generated_at: meta.generated_at.clone(),
221 budget_report: meta.budget_report.clone(),
222 files: meta.files.clone(),
223 }
224 }
225}
226
227#[derive(Debug, Clone, Default)]
229pub struct UnifiedExportConfig {
230 pub pretty_print: bool,
232}
233
234pub struct RustGraphUnifiedExporter {
236 config: UnifiedExportConfig,
237}
238
239impl RustGraphUnifiedExporter {
240 pub fn new(config: UnifiedExportConfig) -> Self {
242 Self { config }
243 }
244
245 pub fn export(
253 &self,
254 hypergraph: &Hypergraph,
255 output_dir: &Path,
256 ) -> std::io::Result<UnifiedHypergraphMetadata> {
257 fs::create_dir_all(output_dir)?;
258
259 let nodes_path = output_dir.join("nodes.jsonl");
261 let file = File::create(nodes_path)?;
262 let mut writer = BufWriter::with_capacity(256 * 1024, file);
263 for node in &hypergraph.nodes {
264 let unified = RawUnifiedNode::from_hypergraph_node(node);
265 serde_json::to_writer(&mut writer, &unified)?;
266 writeln!(writer)?;
267 }
268 writer.flush()?;
269
270 let edges_path = output_dir.join("edges.jsonl");
272 let file = File::create(edges_path)?;
273 let mut writer = BufWriter::with_capacity(256 * 1024, file);
274 for edge in &hypergraph.edges {
275 let unified = RawUnifiedEdge::from_cross_layer_edge(edge);
276 serde_json::to_writer(&mut writer, &unified)?;
277 writeln!(writer)?;
278 }
279 writer.flush()?;
280
281 let hyperedges_path = output_dir.join("hyperedges.jsonl");
283 let file = File::create(hyperedges_path)?;
284 let mut writer = BufWriter::with_capacity(256 * 1024, file);
285 for he in &hypergraph.hyperedges {
286 let unified = RawUnifiedHyperedge::from_hyperedge(he);
287 serde_json::to_writer(&mut writer, &unified)?;
288 writeln!(writer)?;
289 }
290 writer.flush()?;
291
292 let mut metadata = UnifiedHypergraphMetadata::from_metadata(&hypergraph.metadata);
294 metadata.files = vec![
295 "nodes.jsonl".to_string(),
296 "edges.jsonl".to_string(),
297 "hyperedges.jsonl".to_string(),
298 "metadata.json".to_string(),
299 ];
300
301 let metadata_path = output_dir.join("metadata.json");
303 let file = File::create(metadata_path)?;
304 if self.config.pretty_print {
305 serde_json::to_writer_pretty(file, &metadata)?;
306 } else {
307 serde_json::to_writer(file, &metadata)?;
308 }
309
310 Ok(metadata)
311 }
312
313 pub fn export_to_writer<W: Write>(
319 &self,
320 hypergraph: &Hypergraph,
321 writer: &mut W,
322 ) -> std::io::Result<UnifiedHypergraphMetadata> {
323 for node in &hypergraph.nodes {
325 let unified = RawUnifiedNode::from_hypergraph_node(node);
326 let mut obj = serde_json::to_value(&unified)?;
327 obj.as_object_mut()
328 .expect("serialized struct is always a JSON object")
329 .insert("_type".to_string(), Value::String("node".to_string()));
330 serde_json::to_writer(&mut *writer, &obj)?;
331 writeln!(writer)?;
332 }
333
334 for edge in &hypergraph.edges {
336 let unified = RawUnifiedEdge::from_cross_layer_edge(edge);
337 let mut obj = serde_json::to_value(&unified)?;
338 obj.as_object_mut()
339 .expect("serialized struct is always a JSON object")
340 .insert("_type".to_string(), Value::String("edge".to_string()));
341 serde_json::to_writer(&mut *writer, &obj)?;
342 writeln!(writer)?;
343 }
344
345 for he in &hypergraph.hyperedges {
347 let unified = RawUnifiedHyperedge::from_hyperedge(he);
348 let mut obj = serde_json::to_value(&unified)?;
349 obj.as_object_mut()
350 .expect("serialized struct is always a JSON object")
351 .insert("_type".to_string(), Value::String("hyperedge".to_string()));
352 serde_json::to_writer(&mut *writer, &obj)?;
353 writeln!(writer)?;
354 }
355
356 let mut metadata = UnifiedHypergraphMetadata::from_metadata(&hypergraph.metadata);
357 metadata.files = vec![];
358
359 Ok(metadata)
360 }
361}
362
363#[cfg(feature = "rustgraph")]
364mod bulk_export {
365 use super::*;
366 use rustgraph_api_types::bulk::{BulkEdgeData, BulkNodeData};
367
368 #[derive(Debug, Clone)]
374 pub struct RustGraphBulkExport {
375 pub nodes: Vec<BulkNodeData>,
377 pub edges: Vec<BulkEdgeData>,
379 pub hyperedges: Vec<RawUnifiedHyperedge>,
381 pub id_map: HashMap<String, u64>,
383 }
384
385 impl RustGraphUnifiedExporter {
386 pub fn to_bulk_import(&self, hypergraph: &Hypergraph) -> RustGraphBulkExport {
393 let mut id_map: HashMap<String, u64> = HashMap::with_capacity(hypergraph.nodes.len());
394 let mut nodes = Vec::with_capacity(hypergraph.nodes.len());
395
396 for (idx, hg_node) in hypergraph.nodes.iter().enumerate() {
398 let id = (idx as u64) + 1; id_map.insert(hg_node.id.clone(), id);
400
401 let mut properties = hg_node.properties.clone();
402 properties.insert("entity_id".to_string(), Value::String(hg_node.id.clone()));
403 properties.insert(
404 "node_type_name".to_string(),
405 Value::String(hg_node.entity_type.clone()),
406 );
407 properties.insert(
408 "external_id".to_string(),
409 Value::String(hg_node.external_id.clone()),
410 );
411 if hg_node.is_anomaly {
412 properties.insert("is_anomaly".to_string(), Value::Bool(true));
413 if let Some(ref at) = hg_node.anomaly_type {
414 properties.insert("anomaly_type".to_string(), Value::String(at.clone()));
415 }
416 }
417 if !hg_node.features.is_empty() {
418 properties.insert(
419 "features".to_string(),
420 Value::Array(
421 hg_node
422 .features
423 .iter()
424 .map(|f| serde_json::json!(f))
425 .collect(),
426 ),
427 );
428 }
429
430 nodes.push(BulkNodeData {
431 id: Some(id),
432 node_type: hg_node.entity_type_code,
433 layer: Some(hg_node.layer.index()),
434 labels: vec![hg_node.label.clone()],
435 properties,
436 });
437 }
438
439 let mut edges = Vec::with_capacity(hypergraph.edges.len());
441 for edge in &hypergraph.edges {
442 let source = match id_map.get(&edge.source_id) {
443 Some(&id) => id,
444 None => continue, };
446 let target = match id_map.get(&edge.target_id) {
447 Some(&id) => id,
448 None => continue, };
450
451 let mut properties = edge.properties.clone();
452 properties.insert(
453 "edge_type_name".to_string(),
454 Value::String(edge.edge_type.clone()),
455 );
456
457 edges.push(BulkEdgeData {
458 source,
459 target,
460 edge_type: edge.edge_type_code,
461 weight: 1.0,
462 properties,
463 });
464 }
465
466 let hyperedges: Vec<RawUnifiedHyperedge> = hypergraph
468 .hyperedges
469 .iter()
470 .map(RawUnifiedHyperedge::from_hyperedge)
471 .collect();
472
473 RustGraphBulkExport {
474 nodes,
475 edges,
476 hyperedges,
477 id_map,
478 }
479 }
480 }
481}
482
483#[cfg(feature = "rustgraph")]
484pub use bulk_export::RustGraphBulkExport;
485
486#[cfg(test)]
487mod tests {
488 use super::*;
489 use crate::builders::hypergraph::{HypergraphBuilder, HypergraphConfig};
490 use crate::models::hypergraph::HypergraphLayer;
491 use tempfile::tempdir;
492
493 fn build_test_hypergraph() -> Hypergraph {
494 let config = HypergraphConfig {
495 max_nodes: 1000,
496 include_p2p: false,
497 include_o2c: false,
498 include_vendors: false,
499 include_customers: false,
500 include_employees: false,
501 ..Default::default()
502 };
503 let mut builder = HypergraphBuilder::new(config);
504 builder.add_coso_framework();
505 builder.build()
506 }
507
508 #[test]
509 fn test_node_conversion() {
510 let node = HypergraphNode {
511 id: "node_1".to_string(),
512 entity_type: "Account".to_string(),
513 entity_type_code: 100,
514 layer: HypergraphLayer::AccountingNetwork,
515 external_id: "1000".to_string(),
516 label: "Cash".to_string(),
517 properties: HashMap::new(),
518 features: vec![1.0, 2.0],
519 is_anomaly: false,
520 anomaly_type: None,
521 is_aggregate: false,
522 aggregate_count: 0,
523 };
524
525 let unified = RawUnifiedNode::from_hypergraph_node(&node);
526 assert_eq!(unified.id, "node_1");
527 assert_eq!(unified.node_type, "Account");
528 assert_eq!(unified.name, "Cash");
529 assert_eq!(unified.layer, 3); assert_eq!(unified.entity_type_code, 100);
531 assert_eq!(unified.external_id, "1000");
532 assert_eq!(unified.features, vec![1.0, 2.0]);
533 }
534
535 #[test]
536 fn test_edge_conversion() {
537 let edge = CrossLayerEdge {
538 source_id: "ctrl_C001".to_string(),
539 source_layer: HypergraphLayer::GovernanceControls,
540 target_id: "acct_1000".to_string(),
541 target_layer: HypergraphLayer::AccountingNetwork,
542 edge_type: "ImplementsControl".to_string(),
543 edge_type_code: 40,
544 properties: HashMap::new(),
545 };
546
547 let unified = RawUnifiedEdge::from_cross_layer_edge(&edge);
548 assert_eq!(unified.source, "ctrl_C001");
549 assert_eq!(unified.target, "acct_1000");
550 assert_eq!(unified.source_layer, 1); assert_eq!(unified.target_layer, 3); assert_eq!(unified.edge_type, "ImplementsControl");
553 assert_eq!(unified.edge_type_code, 40);
554 assert_eq!(unified.weight, 1.0);
555 }
556
557 #[test]
558 fn test_hyperedge_conversion() {
559 let he = Hyperedge {
560 id: "he_1".to_string(),
561 hyperedge_type: "JournalEntry".to_string(),
562 subtype: "R2R".to_string(),
563 participants: vec![
564 HyperedgeParticipant {
565 node_id: "acct_1000".to_string(),
566 role: "debit".to_string(),
567 weight: Some(500.0),
568 },
569 HyperedgeParticipant {
570 node_id: "acct_2000".to_string(),
571 role: "credit".to_string(),
572 weight: Some(500.0),
573 },
574 ],
575 layer: HypergraphLayer::AccountingNetwork,
576 properties: HashMap::new(),
577 timestamp: Some(NaiveDate::from_ymd_opt(2024, 6, 15).unwrap()),
578 is_anomaly: true,
579 anomaly_type: Some("split_transaction".to_string()),
580 features: vec![6.2, 1.0],
581 };
582
583 let unified = RawUnifiedHyperedge::from_hyperedge(&he);
584 assert_eq!(unified.id, "he_1");
585 assert_eq!(unified.hyperedge_type, "JournalEntry");
586 assert_eq!(unified.layer, 3); assert_eq!(unified.member_ids, vec!["acct_1000", "acct_2000"]);
588 assert_eq!(unified.participants.len(), 2);
589 assert!(unified.is_anomaly);
590 assert_eq!(unified.anomaly_type, Some("split_transaction".to_string()));
591 }
592
593 #[test]
594 fn test_unified_export_creates_all_files() {
595 let hypergraph = build_test_hypergraph();
596 let dir = tempdir().unwrap();
597
598 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
599 let metadata = exporter.export(&hypergraph, dir.path()).unwrap();
600
601 assert!(dir.path().join("nodes.jsonl").exists());
602 assert!(dir.path().join("edges.jsonl").exists());
603 assert!(dir.path().join("hyperedges.jsonl").exists());
604 assert!(dir.path().join("metadata.json").exists());
605
606 assert_eq!(metadata.num_nodes, 22); assert_eq!(metadata.format, "rustgraph_unified_v1");
608 }
609
610 #[test]
611 fn test_unified_nodes_jsonl_parseable() {
612 let hypergraph = build_test_hypergraph();
613 let dir = tempdir().unwrap();
614
615 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
616 exporter.export(&hypergraph, dir.path()).unwrap();
617
618 let content = std::fs::read_to_string(dir.path().join("nodes.jsonl")).unwrap();
619 let mut count = 0;
620 for line in content.lines() {
621 let node: RawUnifiedNode = serde_json::from_str(line).unwrap();
622 assert!(!node.id.is_empty());
623 assert!(!node.node_type.is_empty());
624 assert!(!node.name.is_empty());
625 assert!(node.layer >= 1 && node.layer <= 3);
627 count += 1;
628 }
629 assert_eq!(count, 22);
630 }
631
632 #[test]
633 fn test_unified_edges_jsonl_uses_source_target() {
634 let hypergraph = build_test_hypergraph();
635 let dir = tempdir().unwrap();
636
637 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
638 exporter.export(&hypergraph, dir.path()).unwrap();
639
640 let content = std::fs::read_to_string(dir.path().join("edges.jsonl")).unwrap();
641 for line in content.lines() {
642 let edge: RawUnifiedEdge = serde_json::from_str(line).unwrap();
643 assert!(!edge.source.is_empty());
645 assert!(!edge.target.is_empty());
646 assert!(edge.source_layer >= 1 && edge.source_layer <= 3);
647 assert!(edge.target_layer >= 1 && edge.target_layer <= 3);
648 assert_eq!(edge.weight, 1.0);
649 }
650 }
651
652 #[test]
653 fn test_unified_metadata_has_format_field() {
654 let hypergraph = build_test_hypergraph();
655 let dir = tempdir().unwrap();
656
657 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig { pretty_print: true });
658 exporter.export(&hypergraph, dir.path()).unwrap();
659
660 let content = std::fs::read_to_string(dir.path().join("metadata.json")).unwrap();
661 let metadata: UnifiedHypergraphMetadata = serde_json::from_str(&content).unwrap();
662 assert_eq!(metadata.format, "rustgraph_unified_v1");
663 assert_eq!(metadata.source, "datasynth");
664 }
665
666 #[cfg(feature = "rustgraph")]
667 #[test]
668 fn test_to_bulk_import_nodes() {
669 let hypergraph = build_test_hypergraph();
670 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
671 let export = exporter.to_bulk_import(&hypergraph);
672
673 assert_eq!(export.nodes.len(), 22); assert_eq!(export.id_map.len(), 22);
675
676 let first = &export.nodes[0];
678 assert_eq!(first.id, Some(1)); assert!(first.node_type > 0); assert!(first.layer.is_some());
681 assert!(!first.labels.is_empty());
682 assert!(first.properties.contains_key("entity_id"));
684 assert!(first.properties.contains_key("node_type_name"));
685 }
686
687 #[cfg(feature = "rustgraph")]
688 #[test]
689 fn test_to_bulk_import_edges() {
690 let hypergraph = build_test_hypergraph();
691 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
692 let export = exporter.to_bulk_import(&hypergraph);
693
694 assert!(!export.edges.is_empty());
696
697 for edge in &export.edges {
699 assert!(export.id_map.values().any(|&id| id == edge.source));
700 assert!(export.id_map.values().any(|&id| id == edge.target));
701 assert!(edge.properties.contains_key("edge_type_name"));
702 }
703 }
704
705 #[cfg(feature = "rustgraph")]
706 #[test]
707 fn test_to_bulk_import_id_mapping() {
708 let hypergraph = build_test_hypergraph();
709 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
710 let export = exporter.to_bulk_import(&hypergraph);
711
712 let mut ids: Vec<u64> = export.nodes.iter().filter_map(|n| n.id).collect();
714 ids.sort();
715 assert_eq!(ids.first(), Some(&1u64));
716 assert_eq!(ids.last(), Some(&(export.nodes.len() as u64)));
717
718 for node in &export.nodes {
720 let string_id = node
721 .properties
722 .get("entity_id")
723 .and_then(|v| v.as_str())
724 .expect("entity_id should be a string");
725 assert_eq!(export.id_map.get(string_id).copied(), node.id);
726 }
727 }
728
729 #[test]
730 fn test_export_to_writer() {
731 let hypergraph = build_test_hypergraph();
732 let mut buffer = Vec::new();
733
734 let exporter = RustGraphUnifiedExporter::new(UnifiedExportConfig::default());
735 let metadata = exporter.export_to_writer(&hypergraph, &mut buffer).unwrap();
736
737 assert_eq!(metadata.num_nodes, 22);
738
739 let content = String::from_utf8(buffer).unwrap();
741 let mut node_count = 0;
742 let mut edge_count = 0;
743 for line in content.lines() {
744 let obj: serde_json::Value = serde_json::from_str(line).unwrap();
745 let record_type = obj.get("_type").unwrap().as_str().unwrap();
746 match record_type {
747 "node" => node_count += 1,
748 "edge" => edge_count += 1,
749 "hyperedge" => {}
750 _ => panic!("Unexpected _type: {}", record_type),
751 }
752 }
753 assert_eq!(node_count, 22);
754 assert!(edge_count > 0); }
756}