1use std::collections::HashMap;
12
13use chrono::Datelike;
14use serde_json::Value;
15
16use datasynth_banking::models::{BankAccount, BankTransaction, BankingCustomer};
17use datasynth_core::models::audit::{
18 AuditEngagement, AuditEvidence, AuditFinding, ProfessionalJudgment, RiskAssessment, Workpaper,
19};
20use datasynth_core::models::sourcing::{
21 BidEvaluation, ProcurementContract, RfxEvent, SourcingProject, SupplierBid,
22 SupplierQualification,
23};
24use datasynth_core::models::ExpenseReport;
25use datasynth_core::models::{
26 BankReconciliation, ChartOfAccounts, CosoComponent, CosoPrinciple, Customer, CycleCount,
27 Employee, InternalControl, JournalEntry, PayrollRun, ProductionOrder, QualityInspection,
28 TimeEntry, Vendor,
29};
30
31use crate::models::hypergraph::{
32 AggregationStrategy, CrossLayerEdge, Hyperedge, HyperedgeParticipant, Hypergraph,
33 HypergraphLayer, HypergraphMetadata, HypergraphNode, NodeBudget, NodeBudgetReport,
34};
35
36const MONTH_END_DAY_THRESHOLD: u32 = 28;
38const WEEKDAY_NORMALIZER: f64 = 6.0;
40const DAY_OF_MONTH_NORMALIZER: f64 = 31.0;
42const MONTH_NORMALIZER: f64 = 12.0;
44
45#[allow(dead_code)]
49mod type_codes {
50 pub const ACCOUNT: u32 = 100;
52 pub const JOURNAL_ENTRY: u32 = 101;
53
54 pub const VENDOR: u32 = 200;
56 pub const CUSTOMER: u32 = 201;
57 pub const EMPLOYEE: u32 = 202;
58 pub const BANKING_CUSTOMER: u32 = 203;
59
60 pub const PURCHASE_ORDER: u32 = 300;
62 pub const GOODS_RECEIPT: u32 = 301;
63 pub const VENDOR_INVOICE: u32 = 302;
64 pub const PAYMENT: u32 = 303;
65 pub const SALES_ORDER: u32 = 310;
67 pub const DELIVERY: u32 = 311;
68 pub const CUSTOMER_INVOICE: u32 = 312;
69 pub const SOURCING_PROJECT: u32 = 320;
71 pub const RFX_EVENT: u32 = 321;
72 pub const SUPPLIER_BID: u32 = 322;
73 pub const BID_EVALUATION: u32 = 323;
74 pub const PROCUREMENT_CONTRACT: u32 = 324;
75 pub const SUPPLIER_QUALIFICATION: u32 = 325;
76 pub const PAYROLL_RUN: u32 = 330;
78 pub const TIME_ENTRY: u32 = 331;
79 pub const EXPENSE_REPORT: u32 = 332;
80 pub const PAYROLL_LINE_ITEM: u32 = 333;
81 pub const PRODUCTION_ORDER: u32 = 340;
83 pub const QUALITY_INSPECTION: u32 = 341;
84 pub const CYCLE_COUNT: u32 = 342;
85 pub const BANK_ACCOUNT: u32 = 350;
87 pub const BANK_TRANSACTION: u32 = 351;
88 pub const BANK_STATEMENT_LINE: u32 = 352;
89 pub const AUDIT_ENGAGEMENT: u32 = 360;
91 pub const WORKPAPER: u32 = 361;
92 pub const AUDIT_FINDING: u32 = 362;
93 pub const AUDIT_EVIDENCE: u32 = 363;
94 pub const RISK_ASSESSMENT: u32 = 364;
95 pub const PROFESSIONAL_JUDGMENT: u32 = 365;
96 pub const BANK_RECONCILIATION: u32 = 370;
98 pub const RECONCILING_ITEM: u32 = 372;
99 pub const OCPM_EVENT: u32 = 400;
101 pub const POOL_NODE: u32 = 399;
103
104 pub const COSO_COMPONENT: u32 = 500;
106 pub const COSO_PRINCIPLE: u32 = 501;
107 pub const SOX_ASSERTION: u32 = 502;
108 pub const INTERNAL_CONTROL: u32 = 503;
109 pub const KYC_PROFILE: u32 = 504;
110
111 pub const IMPLEMENTS_CONTROL: u32 = 40;
113 pub const GOVERNED_BY_STANDARD: u32 = 41;
114 pub const OWNS_CONTROL: u32 = 42;
115 pub const OVERSEE_PROCESS: u32 = 43;
116 pub const ENFORCES_ASSERTION: u32 = 44;
117 pub const SUPPLIES_TO: u32 = 48;
118 pub const COVERS_COSO_PRINCIPLE: u32 = 54;
119 pub const CONTAINS_ACCOUNT: u32 = 55;
120}
121
122#[derive(Debug, Clone)]
124pub struct HypergraphConfig {
125 pub max_nodes: usize,
127 pub aggregation_strategy: AggregationStrategy,
129 pub include_coso: bool,
131 pub include_controls: bool,
132 pub include_sox: bool,
133 pub include_vendors: bool,
134 pub include_customers: bool,
135 pub include_employees: bool,
136 pub include_p2p: bool,
138 pub include_o2c: bool,
139 pub include_s2c: bool,
140 pub include_h2r: bool,
141 pub include_mfg: bool,
142 pub include_bank: bool,
143 pub include_audit: bool,
144 pub include_r2r: bool,
145 pub events_as_hyperedges: bool,
146 pub docs_per_counterparty_threshold: usize,
148 pub include_accounts: bool,
150 pub je_as_hyperedges: bool,
151 pub include_cross_layer_edges: bool,
153}
154
155impl Default for HypergraphConfig {
156 fn default() -> Self {
157 Self {
158 max_nodes: 50_000,
159 aggregation_strategy: AggregationStrategy::PoolByCounterparty,
160 include_coso: true,
161 include_controls: true,
162 include_sox: true,
163 include_vendors: true,
164 include_customers: true,
165 include_employees: true,
166 include_p2p: true,
167 include_o2c: true,
168 include_s2c: true,
169 include_h2r: true,
170 include_mfg: true,
171 include_bank: true,
172 include_audit: true,
173 include_r2r: true,
174 events_as_hyperedges: true,
175 docs_per_counterparty_threshold: 20,
176 include_accounts: true,
177 je_as_hyperedges: true,
178 include_cross_layer_edges: true,
179 }
180 }
181}
182
183pub struct HypergraphBuilder {
185 config: HypergraphConfig,
186 budget: NodeBudget,
187 nodes: Vec<HypergraphNode>,
188 edges: Vec<CrossLayerEdge>,
189 hyperedges: Vec<Hyperedge>,
190 node_index: HashMap<String, usize>,
192 aggregate_count: usize,
194 control_node_ids: HashMap<String, String>,
196 coso_component_ids: HashMap<String, String>,
198 account_node_ids: HashMap<String, String>,
200 vendor_node_ids: HashMap<String, String>,
202 customer_node_ids: HashMap<String, String>,
204 employee_node_ids: HashMap<String, String>,
206 doc_counterparty_links: Vec<(String, String, String)>, }
210
211impl HypergraphBuilder {
212 pub fn new(config: HypergraphConfig) -> Self {
214 let budget = NodeBudget::new(config.max_nodes);
215 Self {
216 config,
217 budget,
218 nodes: Vec::new(),
219 edges: Vec::new(),
220 hyperedges: Vec::new(),
221 node_index: HashMap::new(),
222 aggregate_count: 0,
223 control_node_ids: HashMap::new(),
224 coso_component_ids: HashMap::new(),
225 account_node_ids: HashMap::new(),
226 vendor_node_ids: HashMap::new(),
227 customer_node_ids: HashMap::new(),
228 employee_node_ids: HashMap::new(),
229 doc_counterparty_links: Vec::new(),
230 }
231 }
232
233 pub fn add_coso_framework(&mut self) {
235 if !self.config.include_coso {
236 return;
237 }
238
239 let components = [
240 (CosoComponent::ControlEnvironment, "Control Environment"),
241 (CosoComponent::RiskAssessment, "Risk Assessment"),
242 (CosoComponent::ControlActivities, "Control Activities"),
243 (
244 CosoComponent::InformationCommunication,
245 "Information & Communication",
246 ),
247 (CosoComponent::MonitoringActivities, "Monitoring Activities"),
248 ];
249
250 for (component, name) in &components {
251 let id = format!("coso_comp_{}", name.replace(' ', "_").replace('&', "and"));
252 if self.try_add_node(HypergraphNode {
253 id: id.clone(),
254 entity_type: "CosoComponent".to_string(),
255 entity_type_code: type_codes::COSO_COMPONENT,
256 layer: HypergraphLayer::GovernanceControls,
257 external_id: format!("{component:?}"),
258 label: name.to_string(),
259 properties: HashMap::new(),
260 features: vec![component_to_feature(component)],
261 is_anomaly: false,
262 anomaly_type: None,
263 is_aggregate: false,
264 aggregate_count: 0,
265 }) {
266 self.coso_component_ids.insert(format!("{component:?}"), id);
267 }
268 }
269
270 let principles = [
271 (
272 CosoPrinciple::IntegrityAndEthics,
273 "Integrity and Ethics",
274 CosoComponent::ControlEnvironment,
275 ),
276 (
277 CosoPrinciple::BoardOversight,
278 "Board Oversight",
279 CosoComponent::ControlEnvironment,
280 ),
281 (
282 CosoPrinciple::OrganizationalStructure,
283 "Organizational Structure",
284 CosoComponent::ControlEnvironment,
285 ),
286 (
287 CosoPrinciple::CommitmentToCompetence,
288 "Commitment to Competence",
289 CosoComponent::ControlEnvironment,
290 ),
291 (
292 CosoPrinciple::Accountability,
293 "Accountability",
294 CosoComponent::ControlEnvironment,
295 ),
296 (
297 CosoPrinciple::ClearObjectives,
298 "Clear Objectives",
299 CosoComponent::RiskAssessment,
300 ),
301 (
302 CosoPrinciple::IdentifyRisks,
303 "Identify Risks",
304 CosoComponent::RiskAssessment,
305 ),
306 (
307 CosoPrinciple::FraudRisk,
308 "Fraud Risk",
309 CosoComponent::RiskAssessment,
310 ),
311 (
312 CosoPrinciple::ChangeIdentification,
313 "Change Identification",
314 CosoComponent::RiskAssessment,
315 ),
316 (
317 CosoPrinciple::ControlActions,
318 "Control Actions",
319 CosoComponent::ControlActivities,
320 ),
321 (
322 CosoPrinciple::TechnologyControls,
323 "Technology Controls",
324 CosoComponent::ControlActivities,
325 ),
326 (
327 CosoPrinciple::PoliciesAndProcedures,
328 "Policies and Procedures",
329 CosoComponent::ControlActivities,
330 ),
331 (
332 CosoPrinciple::QualityInformation,
333 "Quality Information",
334 CosoComponent::InformationCommunication,
335 ),
336 (
337 CosoPrinciple::InternalCommunication,
338 "Internal Communication",
339 CosoComponent::InformationCommunication,
340 ),
341 (
342 CosoPrinciple::ExternalCommunication,
343 "External Communication",
344 CosoComponent::InformationCommunication,
345 ),
346 (
347 CosoPrinciple::OngoingMonitoring,
348 "Ongoing Monitoring",
349 CosoComponent::MonitoringActivities,
350 ),
351 (
352 CosoPrinciple::DeficiencyEvaluation,
353 "Deficiency Evaluation",
354 CosoComponent::MonitoringActivities,
355 ),
356 ];
357
358 for (principle, name, parent_component) in &principles {
359 let principle_id = format!("coso_prin_{}", name.replace(' ', "_").replace('&', "and"));
360 if self.try_add_node(HypergraphNode {
361 id: principle_id.clone(),
362 entity_type: "CosoPrinciple".to_string(),
363 entity_type_code: type_codes::COSO_PRINCIPLE,
364 layer: HypergraphLayer::GovernanceControls,
365 external_id: format!("{principle:?}"),
366 label: name.to_string(),
367 properties: {
368 let mut p = HashMap::new();
369 p.insert(
370 "principle_number".to_string(),
371 Value::Number(principle.principle_number().into()),
372 );
373 p
374 },
375 features: vec![principle.principle_number() as f64],
376 is_anomaly: false,
377 anomaly_type: None,
378 is_aggregate: false,
379 aggregate_count: 0,
380 }) {
381 let comp_key = format!("{parent_component:?}");
383 if let Some(comp_id) = self.coso_component_ids.get(&comp_key) {
384 self.edges.push(CrossLayerEdge {
385 source_id: principle_id,
386 source_layer: HypergraphLayer::GovernanceControls,
387 target_id: comp_id.clone(),
388 target_layer: HypergraphLayer::GovernanceControls,
389 edge_type: "CoversCosoPrinciple".to_string(),
390 edge_type_code: type_codes::COVERS_COSO_PRINCIPLE,
391 properties: HashMap::new(),
392 });
393 }
394 }
395 }
396 }
397
398 pub fn add_controls(&mut self, controls: &[InternalControl]) {
400 if !self.config.include_controls {
401 return;
402 }
403
404 for control in controls {
405 let node_id = format!("ctrl_{}", control.control_id);
406 if self.try_add_node(HypergraphNode {
407 id: node_id.clone(),
408 entity_type: "InternalControl".to_string(),
409 entity_type_code: type_codes::INTERNAL_CONTROL,
410 layer: HypergraphLayer::GovernanceControls,
411 external_id: control.control_id.clone(),
412 label: control.control_name.clone(),
413 properties: {
414 let mut p = HashMap::new();
415 p.insert(
416 "control_type".to_string(),
417 Value::String(format!("{:?}", control.control_type)),
418 );
419 p.insert(
420 "controlType".to_string(),
421 Value::String(format!("{}", control.control_type).to_lowercase()),
422 );
423 p.insert(
424 "risk_level".to_string(),
425 Value::String(format!("{:?}", control.risk_level)),
426 );
427 p.insert(
428 "is_key_control".to_string(),
429 Value::Bool(control.is_key_control),
430 );
431 p.insert(
432 "isKeyControl".to_string(),
433 Value::Bool(control.is_key_control),
434 );
435 p.insert(
436 "maturity_level".to_string(),
437 Value::String(format!("{:?}", control.maturity_level)),
438 );
439 let effectiveness = match control.maturity_level.level() {
440 4 | 5 => "effective",
441 3 => "partially-effective",
442 _ => "not-tested",
443 };
444 p.insert(
445 "effectiveness".to_string(),
446 Value::String(effectiveness.to_string()),
447 );
448 p.insert(
449 "description".to_string(),
450 Value::String(control.description.clone()),
451 );
452 p.insert(
453 "objective".to_string(),
454 Value::String(control.objective.clone()),
455 );
456 p.insert(
457 "frequency".to_string(),
458 Value::String(format!("{}", control.frequency).to_lowercase()),
459 );
460 p.insert(
461 "owner".to_string(),
462 Value::String(format!("{}", control.owner_role)),
463 );
464 p.insert(
465 "controlId".to_string(),
466 Value::String(control.control_id.clone()),
467 );
468 p.insert(
469 "name".to_string(),
470 Value::String(control.control_name.clone()),
471 );
472 p.insert(
473 "category".to_string(),
474 Value::String(format!("{}", control.control_type)),
475 );
476 p.insert(
477 "automated".to_string(),
478 Value::Bool(matches!(
479 control.control_type,
480 datasynth_core::models::ControlType::Monitoring
481 )),
482 );
483 p.insert(
484 "coso_component".to_string(),
485 Value::String(format!("{:?}", control.coso_component)),
486 );
487 p.insert(
488 "sox_assertion".to_string(),
489 Value::String(format!("{:?}", control.sox_assertion)),
490 );
491 p.insert(
492 "control_scope".to_string(),
493 Value::String(format!("{:?}", control.control_scope)),
494 );
495 p
496 },
497 features: vec![
498 if control.is_key_control { 1.0 } else { 0.0 },
499 control.maturity_level.level() as f64 / 5.0,
500 ],
501 is_anomaly: false,
502 anomaly_type: None,
503 is_aggregate: false,
504 aggregate_count: 0,
505 }) {
506 self.control_node_ids
507 .insert(control.control_id.clone(), node_id.clone());
508
509 let comp_key = format!("{:?}", control.coso_component);
511 if let Some(comp_id) = self.coso_component_ids.get(&comp_key) {
512 self.edges.push(CrossLayerEdge {
513 source_id: node_id.clone(),
514 source_layer: HypergraphLayer::GovernanceControls,
515 target_id: comp_id.clone(),
516 target_layer: HypergraphLayer::GovernanceControls,
517 edge_type: "ImplementsControl".to_string(),
518 edge_type_code: type_codes::IMPLEMENTS_CONTROL,
519 properties: HashMap::new(),
520 });
521 }
522
523 if self.config.include_sox {
525 let assertion_id = format!("sox_{:?}", control.sox_assertion).to_lowercase();
526 if !self.node_index.contains_key(&assertion_id) {
528 self.try_add_node(HypergraphNode {
529 id: assertion_id.clone(),
530 entity_type: "SoxAssertion".to_string(),
531 entity_type_code: type_codes::SOX_ASSERTION,
532 layer: HypergraphLayer::GovernanceControls,
533 external_id: format!("{:?}", control.sox_assertion),
534 label: format!("{:?}", control.sox_assertion),
535 properties: HashMap::new(),
536 features: vec![],
537 is_anomaly: false,
538 anomaly_type: None,
539 is_aggregate: false,
540 aggregate_count: 0,
541 });
542 }
543 self.edges.push(CrossLayerEdge {
544 source_id: node_id,
545 source_layer: HypergraphLayer::GovernanceControls,
546 target_id: assertion_id,
547 target_layer: HypergraphLayer::GovernanceControls,
548 edge_type: "EnforcesAssertion".to_string(),
549 edge_type_code: type_codes::ENFORCES_ASSERTION,
550 properties: HashMap::new(),
551 });
552 }
553 }
554 }
555 }
556
557 pub fn add_vendors(&mut self, vendors: &[Vendor]) {
559 if !self.config.include_vendors {
560 return;
561 }
562
563 for vendor in vendors {
564 let node_id = format!("vnd_{}", vendor.vendor_id);
565 if self.try_add_node(HypergraphNode {
566 id: node_id.clone(),
567 entity_type: "Vendor".to_string(),
568 entity_type_code: type_codes::VENDOR,
569 layer: HypergraphLayer::GovernanceControls,
570 external_id: vendor.vendor_id.clone(),
571 label: vendor.name.clone(),
572 properties: {
573 let mut p = HashMap::new();
574 p.insert(
575 "vendor_type".to_string(),
576 Value::String(format!("{:?}", vendor.vendor_type)),
577 );
578 p.insert("country".to_string(), Value::String(vendor.country.clone()));
579 p.insert("is_active".to_string(), Value::Bool(vendor.is_active));
580 p
581 },
582 features: vec![if vendor.is_active { 1.0 } else { 0.0 }],
583 is_anomaly: false,
584 anomaly_type: None,
585 is_aggregate: false,
586 aggregate_count: 0,
587 }) {
588 self.vendor_node_ids
589 .insert(vendor.vendor_id.clone(), node_id);
590 }
591 }
592 }
593
594 pub fn add_customers(&mut self, customers: &[Customer]) {
596 if !self.config.include_customers {
597 return;
598 }
599
600 for customer in customers {
601 let node_id = format!("cust_{}", customer.customer_id);
602 if self.try_add_node(HypergraphNode {
603 id: node_id.clone(),
604 entity_type: "Customer".to_string(),
605 entity_type_code: type_codes::CUSTOMER,
606 layer: HypergraphLayer::GovernanceControls,
607 external_id: customer.customer_id.clone(),
608 label: customer.name.clone(),
609 properties: {
610 let mut p = HashMap::new();
611 p.insert(
612 "customer_type".to_string(),
613 Value::String(format!("{:?}", customer.customer_type)),
614 );
615 p.insert(
616 "country".to_string(),
617 Value::String(customer.country.clone()),
618 );
619 p.insert(
620 "credit_rating".to_string(),
621 Value::String(format!("{:?}", customer.credit_rating)),
622 );
623 p
624 },
625 features: vec![if customer.is_active { 1.0 } else { 0.0 }],
626 is_anomaly: false,
627 anomaly_type: None,
628 is_aggregate: false,
629 aggregate_count: 0,
630 }) {
631 self.customer_node_ids
632 .insert(customer.customer_id.clone(), node_id);
633 }
634 }
635 }
636
637 pub fn add_employees(&mut self, employees: &[Employee]) {
639 if !self.config.include_employees {
640 return;
641 }
642
643 for employee in employees {
644 let node_id = format!("emp_{}", employee.employee_id);
645 if self.try_add_node(HypergraphNode {
646 id: node_id.clone(),
647 entity_type: "Employee".to_string(),
648 entity_type_code: type_codes::EMPLOYEE,
649 layer: HypergraphLayer::GovernanceControls,
650 external_id: employee.employee_id.clone(),
651 label: employee.display_name.clone(),
652 properties: {
653 let mut p = HashMap::new();
654 p.insert(
655 "persona".to_string(),
656 Value::String(employee.persona.to_string()),
657 );
658 p.insert(
659 "job_level".to_string(),
660 Value::String(format!("{:?}", employee.job_level)),
661 );
662 p.insert(
663 "company_code".to_string(),
664 Value::String(employee.company_code.clone()),
665 );
666 p.insert(
667 "fullName".to_string(),
668 Value::String(employee.display_name.clone()),
669 );
670 p.insert("email".to_string(), Value::String(employee.email.clone()));
671 p.insert(
672 "department".to_string(),
673 Value::String(employee.department_id.clone().unwrap_or_default()),
674 );
675 p.insert(
676 "job_title".to_string(),
677 Value::String(employee.job_title.clone()),
678 );
679 p.insert(
680 "status".to_string(),
681 Value::String(format!("{:?}", employee.status)),
682 );
683 p
684 },
685 features: vec![employee
686 .approval_limit
687 .to_string()
688 .parse::<f64>()
689 .unwrap_or(0.0)
690 .ln_1p()],
691 is_anomaly: false,
692 anomaly_type: None,
693 is_aggregate: false,
694 aggregate_count: 0,
695 }) {
696 self.employee_node_ids
697 .insert(employee.employee_id.clone(), node_id);
698 }
699 }
700 }
701
702 pub fn add_accounts(&mut self, coa: &ChartOfAccounts) {
704 if !self.config.include_accounts {
705 return;
706 }
707
708 for account in &coa.accounts {
709 let node_id = format!("acct_{}", account.account_number);
710 if self.try_add_node(HypergraphNode {
711 id: node_id.clone(),
712 entity_type: "Account".to_string(),
713 entity_type_code: type_codes::ACCOUNT,
714 layer: HypergraphLayer::AccountingNetwork,
715 external_id: account.account_number.clone(),
716 label: account.short_description.clone(),
717 properties: {
718 let mut p = HashMap::new();
719 p.insert(
720 "account_type".to_string(),
721 Value::String(format!("{:?}", account.account_type)),
722 );
723 p.insert(
724 "is_control_account".to_string(),
725 Value::Bool(account.is_control_account),
726 );
727 p.insert("is_postable".to_string(), Value::Bool(account.is_postable));
728 p
729 },
730 features: vec![
731 account_type_feature(&account.account_type),
732 if account.is_control_account { 1.0 } else { 0.0 },
733 if account.normal_debit_balance {
734 1.0
735 } else {
736 0.0
737 },
738 ],
739 is_anomaly: false,
740 anomaly_type: None,
741 is_aggregate: false,
742 aggregate_count: 0,
743 }) {
744 self.account_node_ids
745 .insert(account.account_number.clone(), node_id);
746 }
747 }
748 }
749
750 pub fn add_journal_entries_as_hyperedges(&mut self, entries: &[JournalEntry]) {
754 if !self.config.je_as_hyperedges {
755 return;
756 }
757
758 for entry in entries {
759 let mut participants = Vec::new();
760
761 for line in &entry.lines {
762 let account_id = format!("acct_{}", line.gl_account);
763
764 if !self.node_index.contains_key(&account_id) {
766 self.try_add_node(HypergraphNode {
767 id: account_id.clone(),
768 entity_type: "Account".to_string(),
769 entity_type_code: type_codes::ACCOUNT,
770 layer: HypergraphLayer::AccountingNetwork,
771 external_id: line.gl_account.clone(),
772 label: line
773 .account_description
774 .clone()
775 .unwrap_or_else(|| line.gl_account.clone()),
776 properties: HashMap::new(),
777 features: vec![],
778 is_anomaly: false,
779 anomaly_type: None,
780 is_aggregate: false,
781 aggregate_count: 0,
782 });
783 self.account_node_ids
784 .insert(line.gl_account.clone(), account_id.clone());
785 }
786
787 let amount: f64 = if !line.debit_amount.is_zero() {
788 line.debit_amount.to_string().parse().unwrap_or(0.0)
789 } else {
790 line.credit_amount.to_string().parse().unwrap_or(0.0)
791 };
792
793 let role = if !line.debit_amount.is_zero() {
794 "debit"
795 } else {
796 "credit"
797 };
798
799 participants.push(HyperedgeParticipant {
800 node_id: account_id,
801 role: role.to_string(),
802 weight: Some(amount),
803 });
804 }
805
806 if participants.is_empty() {
807 continue;
808 }
809
810 let doc_id = entry.header.document_id.to_string();
811 let subtype = entry
812 .header
813 .business_process
814 .as_ref()
815 .map(|bp| format!("{bp:?}"))
816 .unwrap_or_else(|| "General".to_string());
817
818 self.hyperedges.push(Hyperedge {
819 id: format!("je_{doc_id}"),
820 hyperedge_type: "JournalEntry".to_string(),
821 subtype,
822 participants,
823 layer: HypergraphLayer::AccountingNetwork,
824 properties: {
825 let mut p = HashMap::new();
826 p.insert("document_id".to_string(), Value::String(doc_id));
827 p.insert(
828 "company_code".to_string(),
829 Value::String(entry.header.company_code.clone()),
830 );
831 p.insert(
832 "document_type".to_string(),
833 Value::String(entry.header.document_type.clone()),
834 );
835 p.insert(
836 "created_by".to_string(),
837 Value::String(entry.header.created_by.clone()),
838 );
839 p
840 },
841 timestamp: Some(entry.header.posting_date),
842 is_anomaly: entry.header.is_anomaly || entry.header.is_fraud,
843 anomaly_type: entry
844 .header
845 .anomaly_type
846 .clone()
847 .or_else(|| entry.header.fraud_type.as_ref().map(|ft| format!("{ft:?}"))),
848 features: compute_je_features(entry),
849 });
850 }
851 }
852
853 pub fn add_journal_entry_nodes(&mut self, entries: &[JournalEntry]) {
859 for entry in entries {
860 let node_id = format!("je_{}", entry.header.document_id);
861 let total_amount: f64 = entry
862 .lines
863 .iter()
864 .map(|l| l.debit_amount.to_string().parse::<f64>().unwrap_or(0.0))
865 .sum();
866
867 let is_anomaly = entry.header.is_anomaly || entry.header.is_fraud;
868 let anomaly_type = entry
869 .header
870 .anomaly_type
871 .clone()
872 .or_else(|| entry.header.fraud_type.as_ref().map(|ft| format!("{ft:?}")));
873
874 self.try_add_node(HypergraphNode {
875 id: node_id,
876 entity_type: "JournalEntry".to_string(),
877 entity_type_code: type_codes::JOURNAL_ENTRY,
878 layer: HypergraphLayer::AccountingNetwork,
879 external_id: entry.header.document_id.to_string(),
880 label: format!("JE-{}", entry.header.document_id),
881 properties: {
882 let mut p = HashMap::new();
883 p.insert(
884 "amount".into(),
885 Value::Number(
886 serde_json::Number::from_f64(total_amount)
887 .unwrap_or_else(|| serde_json::Number::from(0)),
888 ),
889 );
890 p.insert(
891 "date".into(),
892 Value::String(entry.header.posting_date.to_string()),
893 );
894 p.insert(
895 "company_code".into(),
896 Value::String(entry.header.company_code.clone()),
897 );
898 p.insert(
899 "line_count".into(),
900 Value::Number((entry.lines.len() as u64).into()),
901 );
902 p.insert("is_anomaly".into(), Value::Bool(is_anomaly));
903 if let Some(ref at) = anomaly_type {
904 p.insert("anomaly_type".into(), Value::String(at.clone()));
905 }
906 p
907 },
908 features: vec![total_amount / 100_000.0],
909 is_anomaly,
910 anomaly_type,
911 is_aggregate: false,
912 aggregate_count: 0,
913 });
914 }
915 }
916
917 pub fn add_p2p_documents(
921 &mut self,
922 purchase_orders: &[datasynth_core::models::documents::PurchaseOrder],
923 goods_receipts: &[datasynth_core::models::documents::GoodsReceipt],
924 vendor_invoices: &[datasynth_core::models::documents::VendorInvoice],
925 payments: &[datasynth_core::models::documents::Payment],
926 ) {
927 if !self.config.include_p2p {
928 return;
929 }
930
931 let mut vendor_doc_counts: HashMap<String, usize> = HashMap::new();
933 for po in purchase_orders {
934 *vendor_doc_counts.entry(po.vendor_id.clone()).or_insert(0) += 1;
935 }
936
937 let threshold = self.config.docs_per_counterparty_threshold;
938 let should_aggregate = matches!(
939 self.config.aggregation_strategy,
940 AggregationStrategy::PoolByCounterparty
941 );
942
943 let vendors_needing_pools: Vec<String> = if should_aggregate {
945 vendor_doc_counts
946 .iter()
947 .filter(|(_, count)| **count > threshold)
948 .map(|(vid, _)| vid.clone())
949 .collect()
950 } else {
951 Vec::new()
952 };
953
954 for vendor_id in &vendors_needing_pools {
956 let count = vendor_doc_counts[vendor_id];
957 let pool_id = format!("pool_p2p_{vendor_id}");
958 if self.try_add_node(HypergraphNode {
959 id: pool_id.clone(),
960 entity_type: "P2PPool".to_string(),
961 entity_type_code: type_codes::POOL_NODE,
962 layer: HypergraphLayer::ProcessEvents,
963 external_id: format!("pool_p2p_{vendor_id}"),
964 label: format!("P2P Pool ({vendor_id}): {count} docs"),
965 properties: {
966 let mut p = HashMap::new();
967 p.insert("vendor_id".to_string(), Value::String(vendor_id.clone()));
968 p.insert("document_count".to_string(), Value::Number(count.into()));
969 p
970 },
971 features: vec![count as f64],
972 is_anomaly: false,
973 anomaly_type: None,
974 is_aggregate: true,
975 aggregate_count: count,
976 }) {
977 self.doc_counterparty_links.push((
978 pool_id,
979 "vendor".to_string(),
980 vendor_id.clone(),
981 ));
982 }
983 self.aggregate_count += 1;
984 }
985
986 for po in purchase_orders {
988 if should_aggregate && vendors_needing_pools.contains(&po.vendor_id) {
989 continue; }
991
992 let doc_id = &po.header.document_id;
993 let node_id = format!("po_{doc_id}");
994 if self.try_add_node(HypergraphNode {
995 id: node_id.clone(),
996 entity_type: "PurchaseOrder".to_string(),
997 entity_type_code: type_codes::PURCHASE_ORDER,
998 layer: HypergraphLayer::ProcessEvents,
999 external_id: doc_id.clone(),
1000 label: format!("PO {doc_id}"),
1001 properties: {
1002 let mut p = HashMap::new();
1003 p.insert("vendor_id".to_string(), Value::String(po.vendor_id.clone()));
1004 p.insert(
1005 "company_code".to_string(),
1006 Value::String(po.header.company_code.clone()),
1007 );
1008 p
1009 },
1010 features: vec![po
1011 .total_net_amount
1012 .to_string()
1013 .parse::<f64>()
1014 .unwrap_or(0.0)
1015 .ln_1p()],
1016 is_anomaly: false,
1017 anomaly_type: None,
1018 is_aggregate: false,
1019 aggregate_count: 0,
1020 }) {
1021 self.doc_counterparty_links.push((
1022 node_id,
1023 "vendor".to_string(),
1024 po.vendor_id.clone(),
1025 ));
1026 }
1027 }
1028
1029 for gr in goods_receipts {
1031 let vendor_id = gr.vendor_id.as_deref().unwrap_or("UNKNOWN");
1032 if should_aggregate && vendors_needing_pools.contains(&vendor_id.to_string()) {
1033 continue;
1034 }
1035 let doc_id = &gr.header.document_id;
1036 let node_id = format!("gr_{doc_id}");
1037 self.try_add_node(HypergraphNode {
1038 id: node_id,
1039 entity_type: "GoodsReceipt".to_string(),
1040 entity_type_code: type_codes::GOODS_RECEIPT,
1041 layer: HypergraphLayer::ProcessEvents,
1042 external_id: doc_id.clone(),
1043 label: format!("GR {doc_id}"),
1044 properties: {
1045 let mut p = HashMap::new();
1046 p.insert(
1047 "vendor_id".to_string(),
1048 Value::String(vendor_id.to_string()),
1049 );
1050 p
1051 },
1052 features: vec![gr
1053 .total_value
1054 .to_string()
1055 .parse::<f64>()
1056 .unwrap_or(0.0)
1057 .ln_1p()],
1058 is_anomaly: false,
1059 anomaly_type: None,
1060 is_aggregate: false,
1061 aggregate_count: 0,
1062 });
1063 }
1064
1065 for inv in vendor_invoices {
1067 if should_aggregate && vendors_needing_pools.contains(&inv.vendor_id) {
1068 continue;
1069 }
1070 let doc_id = &inv.header.document_id;
1071 let node_id = format!("vinv_{doc_id}");
1072 self.try_add_node(HypergraphNode {
1073 id: node_id,
1074 entity_type: "VendorInvoice".to_string(),
1075 entity_type_code: type_codes::VENDOR_INVOICE,
1076 layer: HypergraphLayer::ProcessEvents,
1077 external_id: doc_id.clone(),
1078 label: format!("VI {doc_id}"),
1079 properties: {
1080 let mut p = HashMap::new();
1081 p.insert(
1082 "vendor_id".to_string(),
1083 Value::String(inv.vendor_id.clone()),
1084 );
1085 p
1086 },
1087 features: vec![inv
1088 .payable_amount
1089 .to_string()
1090 .parse::<f64>()
1091 .unwrap_or(0.0)
1092 .ln_1p()],
1093 is_anomaly: false,
1094 anomaly_type: None,
1095 is_aggregate: false,
1096 aggregate_count: 0,
1097 });
1098 }
1099
1100 for pmt in payments {
1102 let doc_id = &pmt.header.document_id;
1103 let node_id = format!("pmt_{doc_id}");
1104 self.try_add_node(HypergraphNode {
1105 id: node_id,
1106 entity_type: "Payment".to_string(),
1107 entity_type_code: type_codes::PAYMENT,
1108 layer: HypergraphLayer::ProcessEvents,
1109 external_id: doc_id.clone(),
1110 label: format!("PMT {doc_id}"),
1111 properties: HashMap::new(),
1112 features: vec![pmt.amount.to_string().parse::<f64>().unwrap_or(0.0).ln_1p()],
1113 is_anomaly: false,
1114 anomaly_type: None,
1115 is_aggregate: false,
1116 aggregate_count: 0,
1117 });
1118 }
1119 }
1120
1121 pub fn add_o2c_documents(
1123 &mut self,
1124 sales_orders: &[datasynth_core::models::documents::SalesOrder],
1125 deliveries: &[datasynth_core::models::documents::Delivery],
1126 customer_invoices: &[datasynth_core::models::documents::CustomerInvoice],
1127 ) {
1128 if !self.config.include_o2c {
1129 return;
1130 }
1131
1132 let mut customer_doc_counts: HashMap<String, usize> = HashMap::new();
1134 for so in sales_orders {
1135 *customer_doc_counts
1136 .entry(so.customer_id.clone())
1137 .or_insert(0) += 1;
1138 }
1139
1140 let threshold = self.config.docs_per_counterparty_threshold;
1141 let should_aggregate = matches!(
1142 self.config.aggregation_strategy,
1143 AggregationStrategy::PoolByCounterparty
1144 );
1145
1146 let customers_needing_pools: Vec<String> = if should_aggregate {
1147 customer_doc_counts
1148 .iter()
1149 .filter(|(_, count)| **count > threshold)
1150 .map(|(cid, _)| cid.clone())
1151 .collect()
1152 } else {
1153 Vec::new()
1154 };
1155
1156 for customer_id in &customers_needing_pools {
1158 let count = customer_doc_counts[customer_id];
1159 let pool_id = format!("pool_o2c_{customer_id}");
1160 if self.try_add_node(HypergraphNode {
1161 id: pool_id.clone(),
1162 entity_type: "O2CPool".to_string(),
1163 entity_type_code: type_codes::POOL_NODE,
1164 layer: HypergraphLayer::ProcessEvents,
1165 external_id: format!("pool_o2c_{customer_id}"),
1166 label: format!("O2C Pool ({customer_id}): {count} docs"),
1167 properties: {
1168 let mut p = HashMap::new();
1169 p.insert(
1170 "customer_id".to_string(),
1171 Value::String(customer_id.clone()),
1172 );
1173 p.insert("document_count".to_string(), Value::Number(count.into()));
1174 p
1175 },
1176 features: vec![count as f64],
1177 is_anomaly: false,
1178 anomaly_type: None,
1179 is_aggregate: true,
1180 aggregate_count: count,
1181 }) {
1182 self.doc_counterparty_links.push((
1183 pool_id,
1184 "customer".to_string(),
1185 customer_id.clone(),
1186 ));
1187 }
1188 self.aggregate_count += 1;
1189 }
1190
1191 for so in sales_orders {
1192 if should_aggregate && customers_needing_pools.contains(&so.customer_id) {
1193 continue;
1194 }
1195 let doc_id = &so.header.document_id;
1196 let node_id = format!("so_{doc_id}");
1197 if self.try_add_node(HypergraphNode {
1198 id: node_id.clone(),
1199 entity_type: "SalesOrder".to_string(),
1200 entity_type_code: type_codes::SALES_ORDER,
1201 layer: HypergraphLayer::ProcessEvents,
1202 external_id: doc_id.clone(),
1203 label: format!("SO {doc_id}"),
1204 properties: {
1205 let mut p = HashMap::new();
1206 p.insert(
1207 "customer_id".to_string(),
1208 Value::String(so.customer_id.clone()),
1209 );
1210 p
1211 },
1212 features: vec![so
1213 .total_net_amount
1214 .to_string()
1215 .parse::<f64>()
1216 .unwrap_or(0.0)
1217 .ln_1p()],
1218 is_anomaly: false,
1219 anomaly_type: None,
1220 is_aggregate: false,
1221 aggregate_count: 0,
1222 }) {
1223 self.doc_counterparty_links.push((
1224 node_id,
1225 "customer".to_string(),
1226 so.customer_id.clone(),
1227 ));
1228 }
1229 }
1230
1231 for del in deliveries {
1232 if should_aggregate && customers_needing_pools.contains(&del.customer_id) {
1233 continue;
1234 }
1235 let doc_id = &del.header.document_id;
1236 let node_id = format!("del_{doc_id}");
1237 self.try_add_node(HypergraphNode {
1238 id: node_id,
1239 entity_type: "Delivery".to_string(),
1240 entity_type_code: type_codes::DELIVERY,
1241 layer: HypergraphLayer::ProcessEvents,
1242 external_id: doc_id.clone(),
1243 label: format!("DEL {doc_id}"),
1244 properties: HashMap::new(),
1245 features: vec![],
1246 is_anomaly: false,
1247 anomaly_type: None,
1248 is_aggregate: false,
1249 aggregate_count: 0,
1250 });
1251 }
1252
1253 for inv in customer_invoices {
1254 if should_aggregate && customers_needing_pools.contains(&inv.customer_id) {
1255 continue;
1256 }
1257 let doc_id = &inv.header.document_id;
1258 let node_id = format!("cinv_{doc_id}");
1259 self.try_add_node(HypergraphNode {
1260 id: node_id,
1261 entity_type: "CustomerInvoice".to_string(),
1262 entity_type_code: type_codes::CUSTOMER_INVOICE,
1263 layer: HypergraphLayer::ProcessEvents,
1264 external_id: doc_id.clone(),
1265 label: format!("CI {doc_id}"),
1266 properties: HashMap::new(),
1267 features: vec![inv
1268 .total_gross_amount
1269 .to_string()
1270 .parse::<f64>()
1271 .unwrap_or(0.0)
1272 .ln_1p()],
1273 is_anomaly: false,
1274 anomaly_type: None,
1275 is_aggregate: false,
1276 aggregate_count: 0,
1277 });
1278 }
1279 }
1280
1281 pub fn add_s2c_documents(
1283 &mut self,
1284 projects: &[SourcingProject],
1285 qualifications: &[SupplierQualification],
1286 rfx_events: &[RfxEvent],
1287 bids: &[SupplierBid],
1288 evaluations: &[BidEvaluation],
1289 contracts: &[ProcurementContract],
1290 ) {
1291 if !self.config.include_s2c {
1292 return;
1293 }
1294 for p in projects {
1295 let node_id = format!("s2c_proj_{}", p.project_id);
1296 self.try_add_node(HypergraphNode {
1297 id: node_id,
1298 entity_type: "SourcingProject".into(),
1299 entity_type_code: type_codes::SOURCING_PROJECT,
1300 layer: HypergraphLayer::ProcessEvents,
1301 external_id: p.project_id.clone(),
1302 label: format!("SPRJ {}", p.project_id),
1303 properties: HashMap::new(),
1304 features: vec![p
1305 .estimated_annual_spend
1306 .to_string()
1307 .parse::<f64>()
1308 .unwrap_or(0.0)
1309 .ln_1p()],
1310 is_anomaly: false,
1311 anomaly_type: None,
1312 is_aggregate: false,
1313 aggregate_count: 0,
1314 });
1315 }
1316 for q in qualifications {
1317 let node_id = format!("s2c_qual_{}", q.qualification_id);
1318 self.try_add_node(HypergraphNode {
1319 id: node_id,
1320 entity_type: "SupplierQualification".into(),
1321 entity_type_code: type_codes::SUPPLIER_QUALIFICATION,
1322 layer: HypergraphLayer::ProcessEvents,
1323 external_id: q.qualification_id.clone(),
1324 label: format!("SQUAL {}", q.qualification_id),
1325 properties: HashMap::new(),
1326 features: vec![],
1327 is_anomaly: false,
1328 anomaly_type: None,
1329 is_aggregate: false,
1330 aggregate_count: 0,
1331 });
1332 }
1333 for r in rfx_events {
1334 let node_id = format!("s2c_rfx_{}", r.rfx_id);
1335 self.try_add_node(HypergraphNode {
1336 id: node_id,
1337 entity_type: "RfxEvent".into(),
1338 entity_type_code: type_codes::RFX_EVENT,
1339 layer: HypergraphLayer::ProcessEvents,
1340 external_id: r.rfx_id.clone(),
1341 label: format!("RFX {}", r.rfx_id),
1342 properties: HashMap::new(),
1343 features: vec![],
1344 is_anomaly: false,
1345 anomaly_type: None,
1346 is_aggregate: false,
1347 aggregate_count: 0,
1348 });
1349 }
1350 for b in bids {
1351 let node_id = format!("s2c_bid_{}", b.bid_id);
1352 self.try_add_node(HypergraphNode {
1353 id: node_id,
1354 entity_type: "SupplierBid".into(),
1355 entity_type_code: type_codes::SUPPLIER_BID,
1356 layer: HypergraphLayer::ProcessEvents,
1357 external_id: b.bid_id.clone(),
1358 label: format!("BID {}", b.bid_id),
1359 properties: HashMap::new(),
1360 features: vec![b
1361 .total_amount
1362 .to_string()
1363 .parse::<f64>()
1364 .unwrap_or(0.0)
1365 .ln_1p()],
1366 is_anomaly: false,
1367 anomaly_type: None,
1368 is_aggregate: false,
1369 aggregate_count: 0,
1370 });
1371 }
1372 for e in evaluations {
1373 let node_id = format!("s2c_eval_{}", e.evaluation_id);
1374 self.try_add_node(HypergraphNode {
1375 id: node_id,
1376 entity_type: "BidEvaluation".into(),
1377 entity_type_code: type_codes::BID_EVALUATION,
1378 layer: HypergraphLayer::ProcessEvents,
1379 external_id: e.evaluation_id.clone(),
1380 label: format!("BEVAL {}", e.evaluation_id),
1381 properties: HashMap::new(),
1382 features: vec![],
1383 is_anomaly: false,
1384 anomaly_type: None,
1385 is_aggregate: false,
1386 aggregate_count: 0,
1387 });
1388 }
1389 for c in contracts {
1390 let node_id = format!("s2c_ctr_{}", c.contract_id);
1391 self.try_add_node(HypergraphNode {
1392 id: node_id,
1393 entity_type: "ProcurementContract".into(),
1394 entity_type_code: type_codes::PROCUREMENT_CONTRACT,
1395 layer: HypergraphLayer::ProcessEvents,
1396 external_id: c.contract_id.clone(),
1397 label: format!("CTR {}", c.contract_id),
1398 properties: HashMap::new(),
1399 features: vec![c
1400 .total_value
1401 .to_string()
1402 .parse::<f64>()
1403 .unwrap_or(0.0)
1404 .ln_1p()],
1405 is_anomaly: false,
1406 anomaly_type: None,
1407 is_aggregate: false,
1408 aggregate_count: 0,
1409 });
1410 self.doc_counterparty_links.push((
1412 format!("s2c_ctr_{}", c.contract_id),
1413 "vendor".into(),
1414 c.vendor_id.clone(),
1415 ));
1416 }
1417 }
1418
1419 pub fn add_h2r_documents(
1421 &mut self,
1422 payroll_runs: &[PayrollRun],
1423 time_entries: &[TimeEntry],
1424 expense_reports: &[ExpenseReport],
1425 ) {
1426 if !self.config.include_h2r {
1427 return;
1428 }
1429 for pr in payroll_runs {
1430 let node_id = format!("h2r_pay_{}", pr.payroll_id);
1431 self.try_add_node(HypergraphNode {
1432 id: node_id,
1433 entity_type: "PayrollRun".into(),
1434 entity_type_code: type_codes::PAYROLL_RUN,
1435 layer: HypergraphLayer::ProcessEvents,
1436 external_id: pr.payroll_id.clone(),
1437 label: format!("PAY {}", pr.payroll_id),
1438 properties: HashMap::new(),
1439 features: vec![pr
1440 .total_gross
1441 .to_string()
1442 .parse::<f64>()
1443 .unwrap_or(0.0)
1444 .ln_1p()],
1445 is_anomaly: false,
1446 anomaly_type: None,
1447 is_aggregate: false,
1448 aggregate_count: 0,
1449 });
1450 }
1451 for te in time_entries {
1452 let node_id = format!("h2r_time_{}", te.entry_id);
1453 self.try_add_node(HypergraphNode {
1454 id: node_id,
1455 entity_type: "TimeEntry".into(),
1456 entity_type_code: type_codes::TIME_ENTRY,
1457 layer: HypergraphLayer::ProcessEvents,
1458 external_id: te.entry_id.clone(),
1459 label: format!("TIME {}", te.entry_id),
1460 properties: HashMap::new(),
1461 features: vec![te.hours_regular + te.hours_overtime],
1462 is_anomaly: false,
1463 anomaly_type: None,
1464 is_aggregate: false,
1465 aggregate_count: 0,
1466 });
1467 }
1468 for er in expense_reports {
1469 let node_id = format!("h2r_exp_{}", er.report_id);
1470 self.try_add_node(HypergraphNode {
1471 id: node_id,
1472 entity_type: "ExpenseReport".into(),
1473 entity_type_code: type_codes::EXPENSE_REPORT,
1474 layer: HypergraphLayer::ProcessEvents,
1475 external_id: er.report_id.clone(),
1476 label: format!("EXP {}", er.report_id),
1477 properties: HashMap::new(),
1478 features: vec![er
1479 .total_amount
1480 .to_string()
1481 .parse::<f64>()
1482 .unwrap_or(0.0)
1483 .ln_1p()],
1484 is_anomaly: false,
1485 anomaly_type: None,
1486 is_aggregate: false,
1487 aggregate_count: 0,
1488 });
1489 }
1490 }
1491
1492 pub fn add_mfg_documents(
1494 &mut self,
1495 production_orders: &[ProductionOrder],
1496 quality_inspections: &[QualityInspection],
1497 cycle_counts: &[CycleCount],
1498 ) {
1499 if !self.config.include_mfg {
1500 return;
1501 }
1502 for po in production_orders {
1503 let node_id = format!("mfg_po_{}", po.order_id);
1504 self.try_add_node(HypergraphNode {
1505 id: node_id,
1506 entity_type: "ProductionOrder".into(),
1507 entity_type_code: type_codes::PRODUCTION_ORDER,
1508 layer: HypergraphLayer::ProcessEvents,
1509 external_id: po.order_id.clone(),
1510 label: format!("PROD {}", po.order_id),
1511 properties: HashMap::new(),
1512 features: vec![po
1513 .planned_quantity
1514 .to_string()
1515 .parse::<f64>()
1516 .unwrap_or(0.0)
1517 .ln_1p()],
1518 is_anomaly: false,
1519 anomaly_type: None,
1520 is_aggregate: false,
1521 aggregate_count: 0,
1522 });
1523 }
1524 for qi in quality_inspections {
1525 let node_id = format!("mfg_qi_{}", qi.inspection_id);
1526 self.try_add_node(HypergraphNode {
1527 id: node_id,
1528 entity_type: "QualityInspection".into(),
1529 entity_type_code: type_codes::QUALITY_INSPECTION,
1530 layer: HypergraphLayer::ProcessEvents,
1531 external_id: qi.inspection_id.clone(),
1532 label: format!("QI {}", qi.inspection_id),
1533 properties: HashMap::new(),
1534 features: vec![qi.defect_rate],
1535 is_anomaly: false,
1536 anomaly_type: None,
1537 is_aggregate: false,
1538 aggregate_count: 0,
1539 });
1540 }
1541 for cc in cycle_counts {
1542 let node_id = format!("mfg_cc_{}", cc.count_id);
1543 self.try_add_node(HypergraphNode {
1544 id: node_id,
1545 entity_type: "CycleCount".into(),
1546 entity_type_code: type_codes::CYCLE_COUNT,
1547 layer: HypergraphLayer::ProcessEvents,
1548 external_id: cc.count_id.clone(),
1549 label: format!("CC {}", cc.count_id),
1550 properties: HashMap::new(),
1551 features: vec![cc.variance_rate],
1552 is_anomaly: false,
1553 anomaly_type: None,
1554 is_aggregate: false,
1555 aggregate_count: 0,
1556 });
1557 }
1558 }
1559
1560 pub fn add_bank_documents(
1562 &mut self,
1563 customers: &[BankingCustomer],
1564 accounts: &[BankAccount],
1565 transactions: &[BankTransaction],
1566 ) {
1567 if !self.config.include_bank {
1568 return;
1569 }
1570 for cust in customers {
1571 let cid = cust.customer_id.to_string();
1572 let node_id = format!("bank_cust_{cid}");
1573 self.try_add_node(HypergraphNode {
1574 id: node_id,
1575 entity_type: "BankingCustomer".into(),
1576 entity_type_code: type_codes::BANKING_CUSTOMER,
1577 layer: HypergraphLayer::ProcessEvents,
1578 external_id: cid,
1579 label: format!("BCUST {}", cust.customer_id),
1580 properties: {
1581 let mut p = HashMap::new();
1582 p.insert(
1583 "customer_type".into(),
1584 Value::String(format!("{:?}", cust.customer_type)),
1585 );
1586 p.insert("name".into(), Value::String(cust.name.legal_name.clone()));
1587 p.insert(
1588 "residence_country".into(),
1589 Value::String(cust.residence_country.clone()),
1590 );
1591 p.insert(
1592 "risk_tier".into(),
1593 Value::String(format!("{:?}", cust.risk_tier)),
1594 );
1595 p.insert("is_pep".into(), Value::Bool(cust.is_pep));
1596 p
1597 },
1598 features: vec![],
1599 is_anomaly: cust.is_mule,
1600 anomaly_type: if cust.is_mule {
1601 Some("mule_account".into())
1602 } else {
1603 None
1604 },
1605 is_aggregate: false,
1606 aggregate_count: 0,
1607 });
1608 }
1609 for acct in accounts {
1610 let aid = acct.account_id.to_string();
1611 let node_id = format!("bank_acct_{aid}");
1612 self.try_add_node(HypergraphNode {
1613 id: node_id,
1614 entity_type: "BankAccount".into(),
1615 entity_type_code: type_codes::BANK_ACCOUNT,
1616 layer: HypergraphLayer::ProcessEvents,
1617 external_id: aid,
1618 label: format!("BACCT {}", acct.account_number),
1619 properties: {
1620 let mut p = HashMap::new();
1621 p.insert(
1622 "account_type".into(),
1623 Value::String(format!("{:?}", acct.account_type)),
1624 );
1625 p.insert("status".into(), Value::String(format!("{:?}", acct.status)));
1626 p.insert("currency".into(), Value::String(acct.currency.clone()));
1627 let balance: f64 = acct.current_balance.to_string().parse().unwrap_or(0.0);
1628 p.insert("balance".into(), serde_json::json!(balance));
1629 p.insert(
1630 "account_number".into(),
1631 Value::String(acct.account_number.clone()),
1632 );
1633 p
1634 },
1635 features: vec![acct
1636 .current_balance
1637 .to_string()
1638 .parse::<f64>()
1639 .unwrap_or(0.0)
1640 .ln_1p()],
1641 is_anomaly: acct.is_mule_account,
1642 anomaly_type: if acct.is_mule_account {
1643 Some("mule_account".into())
1644 } else {
1645 None
1646 },
1647 is_aggregate: false,
1648 aggregate_count: 0,
1649 });
1650 }
1651 for txn in transactions {
1652 let tid = txn.transaction_id.to_string();
1653 let node_id = format!("bank_txn_{tid}");
1654 self.try_add_node(HypergraphNode {
1655 id: node_id,
1656 entity_type: "BankTransaction".into(),
1657 entity_type_code: type_codes::BANK_TRANSACTION,
1658 layer: HypergraphLayer::ProcessEvents,
1659 external_id: tid,
1660 label: format!("BTXN {}", txn.reference),
1661 properties: {
1662 let mut p = HashMap::new();
1663 let amount: f64 = txn.amount.to_string().parse().unwrap_or(0.0);
1664 p.insert("amount".into(), serde_json::json!(amount));
1665 p.insert("currency".into(), Value::String(txn.currency.clone()));
1666 p.insert("reference".into(), Value::String(txn.reference.clone()));
1667 p.insert(
1668 "direction".into(),
1669 Value::String(format!("{:?}", txn.direction)),
1670 );
1671 p.insert(
1672 "channel".into(),
1673 Value::String(format!("{:?}", txn.channel)),
1674 );
1675 p.insert(
1676 "category".into(),
1677 Value::String(format!("{:?}", txn.category)),
1678 );
1679 p.insert(
1680 "transaction_type".into(),
1681 Value::String(txn.transaction_type.clone()),
1682 );
1683 p.insert("status".into(), Value::String(format!("{:?}", txn.status)));
1684 if txn.is_suspicious {
1685 p.insert("isAnomalous".into(), Value::Bool(true));
1686 p.insert("is_suspicious".into(), Value::Bool(true));
1687 if let Some(ref reason) = txn.suspicion_reason {
1688 p.insert(
1689 "suspicion_reason".into(),
1690 Value::String(format!("{reason:?}")),
1691 );
1692 }
1693 if let Some(ref stage) = txn.laundering_stage {
1694 p.insert(
1695 "laundering_stage".into(),
1696 Value::String(format!("{stage:?}")),
1697 );
1698 }
1699 }
1700 p
1701 },
1702 features: vec![txn
1703 .amount
1704 .to_string()
1705 .parse::<f64>()
1706 .unwrap_or(0.0)
1707 .abs()
1708 .ln_1p()],
1709 is_anomaly: txn.is_suspicious,
1710 anomaly_type: txn.suspicion_reason.as_ref().map(|r| format!("{r:?}")),
1711 is_aggregate: false,
1712 aggregate_count: 0,
1713 });
1714 }
1715 }
1716
1717 #[allow(clippy::too_many_arguments)]
1719 pub fn add_audit_documents(
1720 &mut self,
1721 engagements: &[AuditEngagement],
1722 workpapers: &[Workpaper],
1723 findings: &[AuditFinding],
1724 evidence: &[AuditEvidence],
1725 risks: &[RiskAssessment],
1726 judgments: &[ProfessionalJudgment],
1727 ) {
1728 if !self.config.include_audit {
1729 return;
1730 }
1731 for eng in engagements {
1732 let eid = eng.engagement_id.to_string();
1733 let node_id = format!("audit_eng_{eid}");
1734 self.try_add_node(HypergraphNode {
1735 id: node_id,
1736 entity_type: "AuditEngagement".into(),
1737 entity_type_code: type_codes::AUDIT_ENGAGEMENT,
1738 layer: HypergraphLayer::ProcessEvents,
1739 external_id: eid,
1740 label: format!("AENG {}", eng.engagement_ref),
1741 properties: {
1742 let mut p = HashMap::new();
1743 p.insert(
1744 "engagement_ref".into(),
1745 Value::String(eng.engagement_ref.clone()),
1746 );
1747 p.insert("status".into(), Value::String(format!("{:?}", eng.status)));
1748 p.insert(
1749 "engagement_type".into(),
1750 Value::String(format!("{:?}", eng.engagement_type)),
1751 );
1752 p.insert("client_name".into(), Value::String(eng.client_name.clone()));
1753 p.insert("fiscal_year".into(), serde_json::json!(eng.fiscal_year));
1754 let mat: f64 = eng.materiality.to_string().parse().unwrap_or(0.0);
1755 p.insert("materiality".into(), serde_json::json!(mat));
1756 p.insert(
1757 "fieldwork_start".into(),
1758 Value::String(eng.fieldwork_start.to_string()),
1759 );
1760 p.insert(
1761 "fieldwork_end".into(),
1762 Value::String(eng.fieldwork_end.to_string()),
1763 );
1764 p
1765 },
1766 features: vec![eng
1767 .materiality
1768 .to_string()
1769 .parse::<f64>()
1770 .unwrap_or(0.0)
1771 .ln_1p()],
1772 is_anomaly: false,
1773 anomaly_type: None,
1774 is_aggregate: false,
1775 aggregate_count: 0,
1776 });
1777 }
1778 for wp in workpapers {
1779 let wid = wp.workpaper_id.to_string();
1780 let node_id = format!("audit_wp_{wid}");
1781 self.try_add_node(HypergraphNode {
1782 id: node_id,
1783 entity_type: "Workpaper".into(),
1784 entity_type_code: type_codes::WORKPAPER,
1785 layer: HypergraphLayer::ProcessEvents,
1786 external_id: wid,
1787 label: format!("WP {}", wp.workpaper_ref),
1788 properties: {
1789 let mut p = HashMap::new();
1790 p.insert(
1791 "workpaper_ref".into(),
1792 Value::String(wp.workpaper_ref.clone()),
1793 );
1794 p.insert("title".into(), Value::String(wp.title.clone()));
1795 p.insert("status".into(), Value::String(format!("{:?}", wp.status)));
1796 p.insert("section".into(), Value::String(format!("{:?}", wp.section)));
1797 p
1798 },
1799 features: vec![],
1800 is_anomaly: false,
1801 anomaly_type: None,
1802 is_aggregate: false,
1803 aggregate_count: 0,
1804 });
1805 }
1806 for f in findings {
1807 let fid = f.finding_id.to_string();
1808 let node_id = format!("audit_find_{fid}");
1809 self.try_add_node(HypergraphNode {
1810 id: node_id,
1811 entity_type: "AuditFinding".into(),
1812 entity_type_code: type_codes::AUDIT_FINDING,
1813 layer: HypergraphLayer::ProcessEvents,
1814 external_id: fid,
1815 label: format!("AFIND {}", f.finding_ref),
1816 properties: {
1817 let mut p = HashMap::new();
1818 p.insert("finding_ref".into(), Value::String(f.finding_ref.clone()));
1819 p.insert("title".into(), Value::String(f.title.clone()));
1820 p.insert("description".into(), Value::String(f.condition.clone()));
1821 p.insert(
1822 "severity".into(),
1823 Value::String(format!("{:?}", f.severity)),
1824 );
1825 p.insert("status".into(), Value::String(format!("{:?}", f.status)));
1826 p.insert(
1827 "finding_type".into(),
1828 Value::String(format!("{:?}", f.finding_type)),
1829 );
1830 p
1831 },
1832 features: vec![f.severity.score() as f64 / 5.0],
1833 is_anomaly: false,
1834 anomaly_type: None,
1835 is_aggregate: false,
1836 aggregate_count: 0,
1837 });
1838 }
1839 for ev in evidence {
1840 let evid = ev.evidence_id.to_string();
1841 let node_id = format!("audit_ev_{evid}");
1842 self.try_add_node(HypergraphNode {
1843 id: node_id,
1844 entity_type: "AuditEvidence".into(),
1845 entity_type_code: type_codes::AUDIT_EVIDENCE,
1846 layer: HypergraphLayer::ProcessEvents,
1847 external_id: evid,
1848 label: format!("AEV {}", ev.evidence_id),
1849 properties: {
1850 let mut p = HashMap::new();
1851 p.insert(
1852 "evidence_type".into(),
1853 Value::String(format!("{:?}", ev.evidence_type)),
1854 );
1855 p.insert("description".into(), Value::String(ev.description.clone()));
1856 p.insert(
1857 "source_type".into(),
1858 Value::String(format!("{:?}", ev.source_type)),
1859 );
1860 p.insert(
1861 "reliability".into(),
1862 Value::String(format!(
1863 "{:?}",
1864 ev.reliability_assessment.overall_reliability
1865 )),
1866 );
1867 p
1868 },
1869 features: vec![ev.reliability_assessment.overall_reliability.score() as f64 / 3.0],
1870 is_anomaly: false,
1871 anomaly_type: None,
1872 is_aggregate: false,
1873 aggregate_count: 0,
1874 });
1875 }
1876 for r in risks {
1877 let rid = r.risk_id.to_string();
1878 let node_id = format!("audit_risk_{rid}");
1879 self.try_add_node(HypergraphNode {
1880 id: node_id,
1881 entity_type: "RiskAssessment".into(),
1882 entity_type_code: type_codes::RISK_ASSESSMENT,
1883 layer: HypergraphLayer::ProcessEvents,
1884 external_id: rid,
1885 label: format!("ARISK {}", r.risk_ref),
1886 properties: {
1887 let mut p = HashMap::new();
1888 p.insert("status".into(), Value::String("active".into()));
1889 p.insert("risk_ref".into(), Value::String(r.risk_ref.clone()));
1890 p.insert("name".into(), Value::String(r.risk_ref.clone()));
1891 p.insert("description".into(), Value::String(r.description.clone()));
1892 p.insert(
1893 "category".into(),
1894 Value::String(format!("{:?}", r.risk_category)),
1895 );
1896 p.insert(
1897 "account_or_process".into(),
1898 Value::String(r.account_or_process.clone()),
1899 );
1900 let inherent = match r.inherent_risk {
1902 datasynth_core::models::audit::RiskLevel::Low => "low",
1903 datasynth_core::models::audit::RiskLevel::Medium => "medium",
1904 datasynth_core::models::audit::RiskLevel::High => "high",
1905 datasynth_core::models::audit::RiskLevel::Significant => "critical",
1906 };
1907 let control = match r.control_risk {
1908 datasynth_core::models::audit::RiskLevel::Low => "low",
1909 datasynth_core::models::audit::RiskLevel::Medium => "medium",
1910 datasynth_core::models::audit::RiskLevel::High => "high",
1911 datasynth_core::models::audit::RiskLevel::Significant => "critical",
1912 };
1913 p.insert("inherentImpact".into(), Value::String(inherent.into()));
1914 p.insert("inherentLikelihood".into(), Value::String(inherent.into()));
1915 p.insert("residualImpact".into(), Value::String(control.into()));
1916 p.insert("residualLikelihood".into(), Value::String(control.into()));
1917 p.insert(
1918 "riskScore".into(),
1919 serde_json::json!(r.inherent_risk.score() as f64 * 25.0),
1920 );
1921 p.insert("owner".into(), Value::String(r.assessed_by.clone()));
1922 p.insert("isSignificant".into(), Value::Bool(r.is_significant_risk));
1923 p.insert(
1924 "is_significant_risk".into(),
1925 Value::Bool(r.is_significant_risk),
1926 );
1927 p.insert(
1928 "response_nature".into(),
1929 Value::String(format!("{:?}", r.response_nature)),
1930 );
1931 p
1932 },
1933 features: vec![
1934 r.inherent_risk.score() as f64 / 4.0,
1935 r.control_risk.score() as f64 / 4.0,
1936 if r.is_significant_risk { 1.0 } else { 0.0 },
1937 ],
1938 is_anomaly: false,
1939 anomaly_type: None,
1940 is_aggregate: false,
1941 aggregate_count: 0,
1942 });
1943 }
1944 for j in judgments {
1945 let jid = j.judgment_id.to_string();
1946 let node_id = format!("audit_judg_{jid}");
1947 self.try_add_node(HypergraphNode {
1948 id: node_id,
1949 entity_type: "ProfessionalJudgment".into(),
1950 entity_type_code: type_codes::PROFESSIONAL_JUDGMENT,
1951 layer: HypergraphLayer::ProcessEvents,
1952 external_id: jid,
1953 label: format!("AJUDG {}", j.judgment_id),
1954 properties: {
1955 let mut p = HashMap::new();
1956 p.insert("judgment_ref".into(), Value::String(j.judgment_ref.clone()));
1957 p.insert("subject".into(), Value::String(j.subject.clone()));
1958 p.insert(
1959 "description".into(),
1960 Value::String(j.issue_description.clone()),
1961 );
1962 p.insert("conclusion".into(), Value::String(j.conclusion.clone()));
1963 p.insert(
1964 "judgment_type".into(),
1965 Value::String(format!("{:?}", j.judgment_type)),
1966 );
1967 p
1968 },
1969 features: vec![],
1970 is_anomaly: false,
1971 anomaly_type: None,
1972 is_aggregate: false,
1973 aggregate_count: 0,
1974 });
1975 }
1976 }
1977
1978 pub fn add_bank_recon_documents(&mut self, reconciliations: &[BankReconciliation]) {
1980 if !self.config.include_r2r {
1981 return;
1982 }
1983 for recon in reconciliations {
1984 let node_id = format!("recon_{}", recon.reconciliation_id);
1985 self.try_add_node(HypergraphNode {
1986 id: node_id,
1987 entity_type: "BankReconciliation".into(),
1988 entity_type_code: type_codes::BANK_RECONCILIATION,
1989 layer: HypergraphLayer::ProcessEvents,
1990 external_id: recon.reconciliation_id.clone(),
1991 label: format!("RECON {}", recon.reconciliation_id),
1992 properties: HashMap::new(),
1993 features: vec![recon
1994 .bank_ending_balance
1995 .to_string()
1996 .parse::<f64>()
1997 .unwrap_or(0.0)
1998 .ln_1p()],
1999 is_anomaly: false,
2000 anomaly_type: None,
2001 is_aggregate: false,
2002 aggregate_count: 0,
2003 });
2004 for line in &recon.statement_lines {
2005 let node_id = format!("recon_line_{}", line.line_id);
2006 self.try_add_node(HypergraphNode {
2007 id: node_id,
2008 entity_type: "BankStatementLine".into(),
2009 entity_type_code: type_codes::BANK_STATEMENT_LINE,
2010 layer: HypergraphLayer::ProcessEvents,
2011 external_id: line.line_id.clone(),
2012 label: format!("BSL {}", line.line_id),
2013 properties: HashMap::new(),
2014 features: vec![line
2015 .amount
2016 .to_string()
2017 .parse::<f64>()
2018 .unwrap_or(0.0)
2019 .abs()
2020 .ln_1p()],
2021 is_anomaly: false,
2022 anomaly_type: None,
2023 is_aggregate: false,
2024 aggregate_count: 0,
2025 });
2026 }
2027 for item in &recon.reconciling_items {
2028 let node_id = format!("recon_item_{}", item.item_id);
2029 self.try_add_node(HypergraphNode {
2030 id: node_id,
2031 entity_type: "ReconcilingItem".into(),
2032 entity_type_code: type_codes::RECONCILING_ITEM,
2033 layer: HypergraphLayer::ProcessEvents,
2034 external_id: item.item_id.clone(),
2035 label: format!("RITEM {}", item.item_id),
2036 properties: HashMap::new(),
2037 features: vec![item
2038 .amount
2039 .to_string()
2040 .parse::<f64>()
2041 .unwrap_or(0.0)
2042 .abs()
2043 .ln_1p()],
2044 is_anomaly: false,
2045 anomaly_type: None,
2046 is_aggregate: false,
2047 aggregate_count: 0,
2048 });
2049 }
2050 }
2051 }
2052
2053 pub fn add_ocpm_events(&mut self, event_log: &datasynth_ocpm::OcpmEventLog) {
2055 if !self.config.events_as_hyperedges {
2056 return;
2057 }
2058 for event in &event_log.events {
2059 let participants: Vec<HyperedgeParticipant> = event
2060 .object_refs
2061 .iter()
2062 .map(|obj_ref| {
2063 let node_id = format!("ocpm_obj_{}", obj_ref.object_id);
2064 self.try_add_node(HypergraphNode {
2066 id: node_id.clone(),
2067 entity_type: "OcpmObject".into(),
2068 entity_type_code: type_codes::OCPM_EVENT,
2069 layer: HypergraphLayer::ProcessEvents,
2070 external_id: obj_ref.object_id.to_string(),
2071 label: format!("OBJ {}", obj_ref.object_type_id),
2072 properties: HashMap::new(),
2073 features: vec![],
2074 is_anomaly: false,
2075 anomaly_type: None,
2076 is_aggregate: false,
2077 aggregate_count: 0,
2078 });
2079 HyperedgeParticipant {
2080 node_id,
2081 role: format!("{:?}", obj_ref.qualifier),
2082 weight: None,
2083 }
2084 })
2085 .collect();
2086
2087 if !participants.is_empty() {
2088 let mut props = HashMap::new();
2089 props.insert(
2090 "activity_id".into(),
2091 Value::String(event.activity_id.clone()),
2092 );
2093 props.insert(
2094 "timestamp".into(),
2095 Value::String(event.timestamp.to_rfc3339()),
2096 );
2097 if !event.resource_id.is_empty() {
2098 props.insert("resource".into(), Value::String(event.resource_id.clone()));
2099 }
2100
2101 self.hyperedges.push(Hyperedge {
2102 id: format!("ocpm_evt_{}", event.event_id),
2103 hyperedge_type: "OcpmEvent".into(),
2104 subtype: event.activity_id.clone(),
2105 participants,
2106 layer: HypergraphLayer::ProcessEvents,
2107 properties: props,
2108 timestamp: Some(event.timestamp.date_naive()),
2109 is_anomaly: false,
2110 anomaly_type: None,
2111 features: vec![],
2112 });
2113 }
2114 }
2115 }
2116
2117 pub fn build_cross_layer_edges(&mut self) {
2119 if !self.config.include_cross_layer_edges {
2120 return;
2121 }
2122
2123 let links = std::mem::take(&mut self.doc_counterparty_links);
2125 for (doc_node_id, counterparty_type, counterparty_id) in &links {
2126 let source_node_id = match counterparty_type.as_str() {
2127 "vendor" => self.vendor_node_ids.get(counterparty_id),
2128 "customer" => self.customer_node_ids.get(counterparty_id),
2129 _ => None,
2130 };
2131 if let Some(source_id) = source_node_id {
2132 self.edges.push(CrossLayerEdge {
2133 source_id: source_id.clone(),
2134 source_layer: HypergraphLayer::GovernanceControls,
2135 target_id: doc_node_id.clone(),
2136 target_layer: HypergraphLayer::ProcessEvents,
2137 edge_type: "SuppliesTo".to_string(),
2138 edge_type_code: type_codes::SUPPLIES_TO,
2139 properties: HashMap::new(),
2140 });
2141 }
2142 }
2143 self.doc_counterparty_links = links;
2144 }
2145
2146 pub fn build(mut self) -> Hypergraph {
2148 self.build_cross_layer_edges();
2150
2151 let mut layer_node_counts: HashMap<String, usize> = HashMap::new();
2153 let mut node_type_counts: HashMap<String, usize> = HashMap::new();
2154 let mut anomalous_nodes = 0;
2155
2156 for node in &self.nodes {
2157 *layer_node_counts
2158 .entry(node.layer.name().to_string())
2159 .or_insert(0) += 1;
2160 *node_type_counts
2161 .entry(node.entity_type.clone())
2162 .or_insert(0) += 1;
2163 if node.is_anomaly {
2164 anomalous_nodes += 1;
2165 }
2166 }
2167
2168 let mut edge_type_counts: HashMap<String, usize> = HashMap::new();
2169 for edge in &self.edges {
2170 *edge_type_counts.entry(edge.edge_type.clone()).or_insert(0) += 1;
2171 }
2172
2173 let mut hyperedge_type_counts: HashMap<String, usize> = HashMap::new();
2174 let mut anomalous_hyperedges = 0;
2175 for he in &self.hyperedges {
2176 *hyperedge_type_counts
2177 .entry(he.hyperedge_type.clone())
2178 .or_insert(0) += 1;
2179 if he.is_anomaly {
2180 anomalous_hyperedges += 1;
2181 }
2182 }
2183
2184 let budget_report = NodeBudgetReport {
2185 total_budget: self.budget.total_max(),
2186 total_used: self.budget.total_count(),
2187 layer1_budget: self.budget.layer1_max,
2188 layer1_used: self.budget.layer1_count,
2189 layer2_budget: self.budget.layer2_max,
2190 layer2_used: self.budget.layer2_count,
2191 layer3_budget: self.budget.layer3_max,
2192 layer3_used: self.budget.layer3_count,
2193 aggregate_nodes_created: self.aggregate_count,
2194 aggregation_triggered: self.aggregate_count > 0,
2195 };
2196
2197 let metadata = HypergraphMetadata {
2198 name: "multi_layer_hypergraph".to_string(),
2199 num_nodes: self.nodes.len(),
2200 num_edges: self.edges.len(),
2201 num_hyperedges: self.hyperedges.len(),
2202 layer_node_counts,
2203 node_type_counts,
2204 edge_type_counts,
2205 hyperedge_type_counts,
2206 anomalous_nodes,
2207 anomalous_hyperedges,
2208 source: "datasynth".to_string(),
2209 generated_at: chrono::Utc::now().to_rfc3339(),
2210 budget_report: budget_report.clone(),
2211 files: vec![
2212 "nodes.jsonl".to_string(),
2213 "edges.jsonl".to_string(),
2214 "hyperedges.jsonl".to_string(),
2215 "metadata.json".to_string(),
2216 ],
2217 };
2218
2219 Hypergraph {
2220 nodes: self.nodes,
2221 edges: self.edges,
2222 hyperedges: self.hyperedges,
2223 metadata,
2224 budget_report,
2225 }
2226 }
2227
2228 fn try_add_node(&mut self, node: HypergraphNode) -> bool {
2230 if self.node_index.contains_key(&node.id) {
2231 return false; }
2233
2234 if !self.budget.can_add(node.layer) {
2235 return false; }
2237
2238 let id = node.id.clone();
2239 let layer = node.layer;
2240 self.nodes.push(node);
2241 let idx = self.nodes.len() - 1;
2242 self.node_index.insert(id, idx);
2243 self.budget.record_add(layer);
2244 true
2245 }
2246}
2247
2248fn component_to_feature(component: &CosoComponent) -> f64 {
2250 match component {
2251 CosoComponent::ControlEnvironment => 1.0,
2252 CosoComponent::RiskAssessment => 2.0,
2253 CosoComponent::ControlActivities => 3.0,
2254 CosoComponent::InformationCommunication => 4.0,
2255 CosoComponent::MonitoringActivities => 5.0,
2256 }
2257}
2258
2259fn account_type_feature(account_type: &datasynth_core::models::AccountType) -> f64 {
2261 use datasynth_core::models::AccountType;
2262 match account_type {
2263 AccountType::Asset => 1.0,
2264 AccountType::Liability => 2.0,
2265 AccountType::Equity => 3.0,
2266 AccountType::Revenue => 4.0,
2267 AccountType::Expense => 5.0,
2268 AccountType::Statistical => 6.0,
2269 }
2270}
2271
2272fn compute_je_features(entry: &JournalEntry) -> Vec<f64> {
2274 let total_debit: f64 = entry
2275 .lines
2276 .iter()
2277 .map(|l| l.debit_amount.to_string().parse::<f64>().unwrap_or(0.0))
2278 .sum();
2279
2280 let line_count = entry.lines.len() as f64;
2281 let posting_date = entry.header.posting_date;
2282 let weekday = posting_date.weekday().num_days_from_monday() as f64 / WEEKDAY_NORMALIZER;
2283 let day = posting_date.day() as f64 / DAY_OF_MONTH_NORMALIZER;
2284 let month = posting_date.month() as f64 / MONTH_NORMALIZER;
2285 let is_month_end = if posting_date.day() >= MONTH_END_DAY_THRESHOLD {
2286 1.0
2287 } else {
2288 0.0
2289 };
2290
2291 vec![
2292 (total_debit.abs() + 1.0).ln(), line_count, weekday, day, month, is_month_end, ]
2299}
2300
2301#[cfg(test)]
2302#[allow(clippy::unwrap_used)]
2303mod tests {
2304 use super::*;
2305 use datasynth_core::models::{
2306 AccountSubType, AccountType, ChartOfAccounts, CoAComplexity, ControlFrequency, ControlType,
2307 CosoComponent, CosoMaturityLevel, GLAccount, InternalControl, RiskLevel, SoxAssertion,
2308 UserPersona,
2309 };
2310
2311 fn make_test_coa() -> ChartOfAccounts {
2312 let mut coa = ChartOfAccounts::new(
2313 "TEST_COA".to_string(),
2314 "Test Chart".to_string(),
2315 "US".to_string(),
2316 datasynth_core::models::IndustrySector::Manufacturing,
2317 CoAComplexity::Small,
2318 );
2319
2320 coa.add_account(GLAccount::new(
2321 "1000".to_string(),
2322 "Cash".to_string(),
2323 AccountType::Asset,
2324 AccountSubType::Cash,
2325 ));
2326 coa.add_account(GLAccount::new(
2327 "2000".to_string(),
2328 "AP".to_string(),
2329 AccountType::Liability,
2330 AccountSubType::AccountsPayable,
2331 ));
2332
2333 coa
2334 }
2335
2336 fn make_test_control() -> InternalControl {
2337 InternalControl {
2338 control_id: "C001".to_string(),
2339 control_name: "Three-Way Match".to_string(),
2340 control_type: ControlType::Preventive,
2341 objective: "Ensure proper matching".to_string(),
2342 frequency: ControlFrequency::Transactional,
2343 owner_role: UserPersona::Controller,
2344 risk_level: RiskLevel::High,
2345 description: "Test control".to_string(),
2346 is_key_control: true,
2347 sox_assertion: SoxAssertion::Existence,
2348 coso_component: CosoComponent::ControlActivities,
2349 coso_principles: vec![CosoPrinciple::ControlActions],
2350 control_scope: datasynth_core::models::ControlScope::TransactionLevel,
2351 maturity_level: CosoMaturityLevel::Managed,
2352 }
2353 }
2354
2355 #[test]
2356 fn test_builder_coso_framework() {
2357 let config = HypergraphConfig {
2358 max_nodes: 1000,
2359 ..Default::default()
2360 };
2361 let mut builder = HypergraphBuilder::new(config);
2362 builder.add_coso_framework();
2363
2364 let hg = builder.build();
2365 assert_eq!(hg.nodes.len(), 22);
2367 assert!(hg
2368 .nodes
2369 .iter()
2370 .all(|n| n.layer == HypergraphLayer::GovernanceControls));
2371 assert_eq!(
2373 hg.edges
2374 .iter()
2375 .filter(|e| e.edge_type == "CoversCosoPrinciple")
2376 .count(),
2377 17
2378 );
2379 }
2380
2381 #[test]
2382 fn test_builder_controls() {
2383 let config = HypergraphConfig {
2384 max_nodes: 1000,
2385 ..Default::default()
2386 };
2387 let mut builder = HypergraphBuilder::new(config);
2388 builder.add_coso_framework();
2389 builder.add_controls(&[make_test_control()]);
2390
2391 let hg = builder.build();
2392 assert_eq!(hg.nodes.len(), 24);
2394 assert!(hg.nodes.iter().any(|n| n.entity_type == "InternalControl"));
2395 assert!(hg.nodes.iter().any(|n| n.entity_type == "SoxAssertion"));
2396 }
2397
2398 #[test]
2399 fn test_builder_accounts() {
2400 let config = HypergraphConfig {
2401 max_nodes: 1000,
2402 ..Default::default()
2403 };
2404 let mut builder = HypergraphBuilder::new(config);
2405 builder.add_accounts(&make_test_coa());
2406
2407 let hg = builder.build();
2408 assert_eq!(hg.nodes.len(), 2);
2409 assert!(hg
2410 .nodes
2411 .iter()
2412 .all(|n| n.layer == HypergraphLayer::AccountingNetwork));
2413 }
2414
2415 #[test]
2416 fn test_budget_enforcement() {
2417 let config = HypergraphConfig {
2418 max_nodes: 10, include_coso: false,
2420 include_controls: false,
2421 include_sox: false,
2422 include_vendors: false,
2423 include_customers: false,
2424 include_employees: false,
2425 include_p2p: false,
2426 include_o2c: false,
2427 ..Default::default()
2428 };
2429 let mut builder = HypergraphBuilder::new(config);
2430 builder.add_accounts(&make_test_coa());
2431
2432 let hg = builder.build();
2433 assert!(hg.nodes.len() <= 1);
2435 }
2436
2437 #[test]
2438 fn test_full_build() {
2439 let config = HypergraphConfig {
2440 max_nodes: 10000,
2441 ..Default::default()
2442 };
2443 let mut builder = HypergraphBuilder::new(config);
2444 builder.add_coso_framework();
2445 builder.add_controls(&[make_test_control()]);
2446 builder.add_accounts(&make_test_coa());
2447
2448 let hg = builder.build();
2449 assert!(!hg.nodes.is_empty());
2450 assert!(!hg.edges.is_empty());
2451 assert_eq!(hg.metadata.num_nodes, hg.nodes.len());
2452 assert_eq!(hg.metadata.num_edges, hg.edges.len());
2453 }
2454}