1use std::collections::HashMap;
12
13use chrono::Datelike;
14use serde_json::Value;
15
16use datasynth_banking::models::{BankAccount, BankTransaction, BankingCustomer};
17use datasynth_core::models::audit::{
18 AuditEngagement, AuditEvidence, AuditFinding, ProfessionalJudgment, RiskAssessment, Workpaper,
19};
20use datasynth_core::models::compliance::{ComplianceFinding, ComplianceStandard, RegulatoryFiling};
21use datasynth_core::models::sourcing::{
22 BidEvaluation, ProcurementContract, RfxEvent, SourcingProject, SupplierBid,
23 SupplierQualification,
24};
25use datasynth_core::models::ExpenseReport;
26use datasynth_core::models::{
27 BankReconciliation, ChartOfAccounts, CosoComponent, CosoPrinciple, Customer, CycleCount,
28 Employee, InternalControl, JournalEntry, PayrollRun, ProductionOrder, QualityInspection,
29 TimeEntry, Vendor,
30};
31
32use crate::models::hypergraph::{
33 AggregationStrategy, CrossLayerEdge, Hyperedge, HyperedgeParticipant, Hypergraph,
34 HypergraphLayer, HypergraphMetadata, HypergraphNode, NodeBudget, NodeBudgetReport,
35};
36
37const MONTH_END_DAY_THRESHOLD: u32 = 28;
39const WEEKDAY_NORMALIZER: f64 = 6.0;
41const DAY_OF_MONTH_NORMALIZER: f64 = 31.0;
43const MONTH_NORMALIZER: f64 = 12.0;
45
46#[allow(dead_code)]
50mod type_codes {
51 pub const ACCOUNT: u32 = 100;
53 pub const JOURNAL_ENTRY: u32 = 101;
54
55 pub const VENDOR: u32 = 200;
57 pub const CUSTOMER: u32 = 201;
58 pub const EMPLOYEE: u32 = 202;
59 pub const BANKING_CUSTOMER: u32 = 203;
60
61 pub const PURCHASE_ORDER: u32 = 300;
63 pub const GOODS_RECEIPT: u32 = 301;
64 pub const VENDOR_INVOICE: u32 = 302;
65 pub const PAYMENT: u32 = 303;
66 pub const SALES_ORDER: u32 = 310;
68 pub const DELIVERY: u32 = 311;
69 pub const CUSTOMER_INVOICE: u32 = 312;
70 pub const SOURCING_PROJECT: u32 = 320;
72 pub const RFX_EVENT: u32 = 321;
73 pub const SUPPLIER_BID: u32 = 322;
74 pub const BID_EVALUATION: u32 = 323;
75 pub const PROCUREMENT_CONTRACT: u32 = 324;
76 pub const SUPPLIER_QUALIFICATION: u32 = 325;
77 pub const PAYROLL_RUN: u32 = 330;
79 pub const TIME_ENTRY: u32 = 331;
80 pub const EXPENSE_REPORT: u32 = 332;
81 pub const PAYROLL_LINE_ITEM: u32 = 333;
82 pub const PRODUCTION_ORDER: u32 = 340;
84 pub const QUALITY_INSPECTION: u32 = 341;
85 pub const CYCLE_COUNT: u32 = 342;
86 pub const BANK_ACCOUNT: u32 = 350;
88 pub const BANK_TRANSACTION: u32 = 351;
89 pub const BANK_STATEMENT_LINE: u32 = 352;
90 pub const AUDIT_ENGAGEMENT: u32 = 360;
92 pub const WORKPAPER: u32 = 361;
93 pub const AUDIT_FINDING: u32 = 362;
94 pub const AUDIT_EVIDENCE: u32 = 363;
95 pub const RISK_ASSESSMENT: u32 = 364;
96 pub const PROFESSIONAL_JUDGMENT: u32 = 365;
97 pub const BANK_RECONCILIATION: u32 = 370;
99 pub const RECONCILING_ITEM: u32 = 372;
100 pub const OCPM_EVENT: u32 = 400;
102 pub const POOL_NODE: u32 = 399;
104
105 pub const COSO_COMPONENT: u32 = 500;
107 pub const COSO_PRINCIPLE: u32 = 501;
108 pub const SOX_ASSERTION: u32 = 502;
109 pub const INTERNAL_CONTROL: u32 = 503;
110 pub const KYC_PROFILE: u32 = 504;
111 pub const COMPLIANCE_STANDARD: u32 = 505;
112 pub const JURISDICTION: u32 = 506;
113 pub const REGULATORY_FILING: u32 = 507;
115 pub const COMPLIANCE_FINDING: u32 = 508;
116
117 pub const IMPLEMENTS_CONTROL: u32 = 40;
119 pub const GOVERNED_BY_STANDARD: u32 = 41;
120 pub const OWNS_CONTROL: u32 = 42;
121 pub const OVERSEE_PROCESS: u32 = 43;
122 pub const ENFORCES_ASSERTION: u32 = 44;
123 pub const STANDARD_TO_CONTROL: u32 = 45;
124 pub const FINDING_ON_CONTROL: u32 = 46;
125 pub const STANDARD_TO_ACCOUNT: u32 = 47;
126 pub const SUPPLIES_TO: u32 = 48;
127 pub const FILED_BY_COMPANY: u32 = 49;
128 pub const COVERS_COSO_PRINCIPLE: u32 = 54;
129 pub const CONTAINS_ACCOUNT: u32 = 55;
130}
131
132#[derive(Debug, Clone)]
134pub struct HypergraphConfig {
135 pub max_nodes: usize,
137 pub aggregation_strategy: AggregationStrategy,
139 pub include_coso: bool,
141 pub include_controls: bool,
142 pub include_sox: bool,
143 pub include_vendors: bool,
144 pub include_customers: bool,
145 pub include_employees: bool,
146 pub include_p2p: bool,
148 pub include_o2c: bool,
149 pub include_s2c: bool,
150 pub include_h2r: bool,
151 pub include_mfg: bool,
152 pub include_bank: bool,
153 pub include_audit: bool,
154 pub include_compliance: bool,
155 pub include_r2r: bool,
156 pub events_as_hyperedges: bool,
157 pub docs_per_counterparty_threshold: usize,
159 pub include_accounts: bool,
161 pub je_as_hyperedges: bool,
162 pub include_cross_layer_edges: bool,
164}
165
166impl Default for HypergraphConfig {
167 fn default() -> Self {
168 Self {
169 max_nodes: 50_000,
170 aggregation_strategy: AggregationStrategy::PoolByCounterparty,
171 include_coso: true,
172 include_controls: true,
173 include_sox: true,
174 include_vendors: true,
175 include_customers: true,
176 include_employees: true,
177 include_p2p: true,
178 include_o2c: true,
179 include_s2c: true,
180 include_h2r: true,
181 include_mfg: true,
182 include_bank: true,
183 include_audit: true,
184 include_compliance: true,
185 include_r2r: true,
186 events_as_hyperedges: true,
187 docs_per_counterparty_threshold: 20,
188 include_accounts: true,
189 je_as_hyperedges: true,
190 include_cross_layer_edges: true,
191 }
192 }
193}
194
195pub struct HypergraphBuilder {
197 config: HypergraphConfig,
198 budget: NodeBudget,
199 nodes: Vec<HypergraphNode>,
200 edges: Vec<CrossLayerEdge>,
201 hyperedges: Vec<Hyperedge>,
202 node_index: HashMap<String, usize>,
204 aggregate_count: usize,
206 control_node_ids: HashMap<String, String>,
208 coso_component_ids: HashMap<String, String>,
210 account_node_ids: HashMap<String, String>,
212 vendor_node_ids: HashMap<String, String>,
214 customer_node_ids: HashMap<String, String>,
216 employee_node_ids: HashMap<String, String>,
218 doc_counterparty_links: Vec<(String, String, String)>, standard_node_ids: HashMap<String, String>,
223 compliance_finding_control_links: Vec<(String, String)>, #[allow(dead_code)]
227 standard_account_links: Vec<(String, String)>, }
229
230impl HypergraphBuilder {
231 pub fn new(config: HypergraphConfig) -> Self {
233 let budget = NodeBudget::new(config.max_nodes);
234 Self {
235 config,
236 budget,
237 nodes: Vec::new(),
238 edges: Vec::new(),
239 hyperedges: Vec::new(),
240 node_index: HashMap::new(),
241 aggregate_count: 0,
242 control_node_ids: HashMap::new(),
243 coso_component_ids: HashMap::new(),
244 account_node_ids: HashMap::new(),
245 vendor_node_ids: HashMap::new(),
246 customer_node_ids: HashMap::new(),
247 employee_node_ids: HashMap::new(),
248 doc_counterparty_links: Vec::new(),
249 standard_node_ids: HashMap::new(),
250 compliance_finding_control_links: Vec::new(),
251 standard_account_links: Vec::new(),
252 }
253 }
254
255 pub fn add_coso_framework(&mut self) {
257 if !self.config.include_coso {
258 return;
259 }
260
261 let components = [
262 (CosoComponent::ControlEnvironment, "Control Environment"),
263 (CosoComponent::RiskAssessment, "Risk Assessment"),
264 (CosoComponent::ControlActivities, "Control Activities"),
265 (
266 CosoComponent::InformationCommunication,
267 "Information & Communication",
268 ),
269 (CosoComponent::MonitoringActivities, "Monitoring Activities"),
270 ];
271
272 for (component, name) in &components {
273 let id = format!("coso_comp_{}", name.replace(' ', "_").replace('&', "and"));
274 if self.try_add_node(HypergraphNode {
275 id: id.clone(),
276 entity_type: "CosoComponent".to_string(),
277 entity_type_code: type_codes::COSO_COMPONENT,
278 layer: HypergraphLayer::GovernanceControls,
279 external_id: format!("{component:?}"),
280 label: name.to_string(),
281 properties: HashMap::new(),
282 features: vec![component_to_feature(component)],
283 is_anomaly: false,
284 anomaly_type: None,
285 is_aggregate: false,
286 aggregate_count: 0,
287 }) {
288 self.coso_component_ids.insert(format!("{component:?}"), id);
289 }
290 }
291
292 let principles = [
293 (
294 CosoPrinciple::IntegrityAndEthics,
295 "Integrity and Ethics",
296 CosoComponent::ControlEnvironment,
297 ),
298 (
299 CosoPrinciple::BoardOversight,
300 "Board Oversight",
301 CosoComponent::ControlEnvironment,
302 ),
303 (
304 CosoPrinciple::OrganizationalStructure,
305 "Organizational Structure",
306 CosoComponent::ControlEnvironment,
307 ),
308 (
309 CosoPrinciple::CommitmentToCompetence,
310 "Commitment to Competence",
311 CosoComponent::ControlEnvironment,
312 ),
313 (
314 CosoPrinciple::Accountability,
315 "Accountability",
316 CosoComponent::ControlEnvironment,
317 ),
318 (
319 CosoPrinciple::ClearObjectives,
320 "Clear Objectives",
321 CosoComponent::RiskAssessment,
322 ),
323 (
324 CosoPrinciple::IdentifyRisks,
325 "Identify Risks",
326 CosoComponent::RiskAssessment,
327 ),
328 (
329 CosoPrinciple::FraudRisk,
330 "Fraud Risk",
331 CosoComponent::RiskAssessment,
332 ),
333 (
334 CosoPrinciple::ChangeIdentification,
335 "Change Identification",
336 CosoComponent::RiskAssessment,
337 ),
338 (
339 CosoPrinciple::ControlActions,
340 "Control Actions",
341 CosoComponent::ControlActivities,
342 ),
343 (
344 CosoPrinciple::TechnologyControls,
345 "Technology Controls",
346 CosoComponent::ControlActivities,
347 ),
348 (
349 CosoPrinciple::PoliciesAndProcedures,
350 "Policies and Procedures",
351 CosoComponent::ControlActivities,
352 ),
353 (
354 CosoPrinciple::QualityInformation,
355 "Quality Information",
356 CosoComponent::InformationCommunication,
357 ),
358 (
359 CosoPrinciple::InternalCommunication,
360 "Internal Communication",
361 CosoComponent::InformationCommunication,
362 ),
363 (
364 CosoPrinciple::ExternalCommunication,
365 "External Communication",
366 CosoComponent::InformationCommunication,
367 ),
368 (
369 CosoPrinciple::OngoingMonitoring,
370 "Ongoing Monitoring",
371 CosoComponent::MonitoringActivities,
372 ),
373 (
374 CosoPrinciple::DeficiencyEvaluation,
375 "Deficiency Evaluation",
376 CosoComponent::MonitoringActivities,
377 ),
378 ];
379
380 for (principle, name, parent_component) in &principles {
381 let principle_id = format!("coso_prin_{}", name.replace(' ', "_").replace('&', "and"));
382 if self.try_add_node(HypergraphNode {
383 id: principle_id.clone(),
384 entity_type: "CosoPrinciple".to_string(),
385 entity_type_code: type_codes::COSO_PRINCIPLE,
386 layer: HypergraphLayer::GovernanceControls,
387 external_id: format!("{principle:?}"),
388 label: name.to_string(),
389 properties: {
390 let mut p = HashMap::new();
391 p.insert(
392 "principle_number".to_string(),
393 Value::Number(principle.principle_number().into()),
394 );
395 p
396 },
397 features: vec![principle.principle_number() as f64],
398 is_anomaly: false,
399 anomaly_type: None,
400 is_aggregate: false,
401 aggregate_count: 0,
402 }) {
403 let comp_key = format!("{parent_component:?}");
405 if let Some(comp_id) = self.coso_component_ids.get(&comp_key) {
406 self.edges.push(CrossLayerEdge {
407 source_id: principle_id,
408 source_layer: HypergraphLayer::GovernanceControls,
409 target_id: comp_id.clone(),
410 target_layer: HypergraphLayer::GovernanceControls,
411 edge_type: "CoversCosoPrinciple".to_string(),
412 edge_type_code: type_codes::COVERS_COSO_PRINCIPLE,
413 properties: HashMap::new(),
414 });
415 }
416 }
417 }
418 }
419
420 pub fn add_controls(&mut self, controls: &[InternalControl]) {
422 if !self.config.include_controls {
423 return;
424 }
425
426 for control in controls {
427 let node_id = format!("ctrl_{}", control.control_id);
428 if self.try_add_node(HypergraphNode {
429 id: node_id.clone(),
430 entity_type: "InternalControl".to_string(),
431 entity_type_code: type_codes::INTERNAL_CONTROL,
432 layer: HypergraphLayer::GovernanceControls,
433 external_id: control.control_id.clone(),
434 label: control.control_name.clone(),
435 properties: {
436 let mut p = HashMap::new();
437 p.insert(
438 "control_type".to_string(),
439 Value::String(format!("{:?}", control.control_type)),
440 );
441 p.insert(
442 "controlType".to_string(),
443 Value::String(format!("{}", control.control_type).to_lowercase()),
444 );
445 p.insert(
446 "risk_level".to_string(),
447 Value::String(format!("{:?}", control.risk_level)),
448 );
449 p.insert(
450 "is_key_control".to_string(),
451 Value::Bool(control.is_key_control),
452 );
453 p.insert(
454 "isKeyControl".to_string(),
455 Value::Bool(control.is_key_control),
456 );
457 p.insert(
458 "maturity_level".to_string(),
459 Value::String(format!("{:?}", control.maturity_level)),
460 );
461 let effectiveness = match control.maturity_level.level() {
462 4 | 5 => "effective",
463 3 => "partially-effective",
464 _ => "not-tested",
465 };
466 p.insert(
467 "effectiveness".to_string(),
468 Value::String(effectiveness.to_string()),
469 );
470 p.insert(
471 "description".to_string(),
472 Value::String(control.description.clone()),
473 );
474 p.insert(
475 "objective".to_string(),
476 Value::String(control.objective.clone()),
477 );
478 p.insert(
479 "frequency".to_string(),
480 Value::String(format!("{}", control.frequency).to_lowercase()),
481 );
482 p.insert(
483 "owner".to_string(),
484 Value::String(format!("{}", control.owner_role)),
485 );
486 p.insert(
487 "controlId".to_string(),
488 Value::String(control.control_id.clone()),
489 );
490 p.insert(
491 "name".to_string(),
492 Value::String(control.control_name.clone()),
493 );
494 p.insert(
495 "category".to_string(),
496 Value::String(format!("{}", control.control_type)),
497 );
498 p.insert(
499 "automated".to_string(),
500 Value::Bool(matches!(
501 control.control_type,
502 datasynth_core::models::ControlType::Monitoring
503 )),
504 );
505 p.insert(
506 "coso_component".to_string(),
507 Value::String(format!("{:?}", control.coso_component)),
508 );
509 p.insert(
510 "sox_assertion".to_string(),
511 Value::String(format!("{:?}", control.sox_assertion)),
512 );
513 p.insert(
514 "control_scope".to_string(),
515 Value::String(format!("{:?}", control.control_scope)),
516 );
517 p
518 },
519 features: vec![
520 if control.is_key_control { 1.0 } else { 0.0 },
521 control.maturity_level.level() as f64 / 5.0,
522 ],
523 is_anomaly: false,
524 anomaly_type: None,
525 is_aggregate: false,
526 aggregate_count: 0,
527 }) {
528 self.control_node_ids
529 .insert(control.control_id.clone(), node_id.clone());
530
531 let comp_key = format!("{:?}", control.coso_component);
533 if let Some(comp_id) = self.coso_component_ids.get(&comp_key) {
534 self.edges.push(CrossLayerEdge {
535 source_id: node_id.clone(),
536 source_layer: HypergraphLayer::GovernanceControls,
537 target_id: comp_id.clone(),
538 target_layer: HypergraphLayer::GovernanceControls,
539 edge_type: "ImplementsControl".to_string(),
540 edge_type_code: type_codes::IMPLEMENTS_CONTROL,
541 properties: HashMap::new(),
542 });
543 }
544
545 if self.config.include_sox {
547 let assertion_id = format!("sox_{:?}", control.sox_assertion).to_lowercase();
548 if !self.node_index.contains_key(&assertion_id) {
550 self.try_add_node(HypergraphNode {
551 id: assertion_id.clone(),
552 entity_type: "SoxAssertion".to_string(),
553 entity_type_code: type_codes::SOX_ASSERTION,
554 layer: HypergraphLayer::GovernanceControls,
555 external_id: format!("{:?}", control.sox_assertion),
556 label: format!("{:?}", control.sox_assertion),
557 properties: HashMap::new(),
558 features: vec![],
559 is_anomaly: false,
560 anomaly_type: None,
561 is_aggregate: false,
562 aggregate_count: 0,
563 });
564 }
565 self.edges.push(CrossLayerEdge {
566 source_id: node_id,
567 source_layer: HypergraphLayer::GovernanceControls,
568 target_id: assertion_id,
569 target_layer: HypergraphLayer::GovernanceControls,
570 edge_type: "EnforcesAssertion".to_string(),
571 edge_type_code: type_codes::ENFORCES_ASSERTION,
572 properties: HashMap::new(),
573 });
574 }
575 }
576 }
577 }
578
579 pub fn add_vendors(&mut self, vendors: &[Vendor]) {
581 if !self.config.include_vendors {
582 return;
583 }
584
585 for vendor in vendors {
586 let node_id = format!("vnd_{}", vendor.vendor_id);
587 if self.try_add_node(HypergraphNode {
588 id: node_id.clone(),
589 entity_type: "Vendor".to_string(),
590 entity_type_code: type_codes::VENDOR,
591 layer: HypergraphLayer::GovernanceControls,
592 external_id: vendor.vendor_id.clone(),
593 label: vendor.name.clone(),
594 properties: {
595 let mut p = HashMap::new();
596 p.insert(
597 "vendor_type".to_string(),
598 Value::String(format!("{:?}", vendor.vendor_type)),
599 );
600 p.insert("country".to_string(), Value::String(vendor.country.clone()));
601 p.insert("is_active".to_string(), Value::Bool(vendor.is_active));
602 p
603 },
604 features: vec![if vendor.is_active { 1.0 } else { 0.0 }],
605 is_anomaly: false,
606 anomaly_type: None,
607 is_aggregate: false,
608 aggregate_count: 0,
609 }) {
610 self.vendor_node_ids
611 .insert(vendor.vendor_id.clone(), node_id);
612 }
613 }
614 }
615
616 pub fn add_customers(&mut self, customers: &[Customer]) {
618 if !self.config.include_customers {
619 return;
620 }
621
622 for customer in customers {
623 let node_id = format!("cust_{}", customer.customer_id);
624 if self.try_add_node(HypergraphNode {
625 id: node_id.clone(),
626 entity_type: "Customer".to_string(),
627 entity_type_code: type_codes::CUSTOMER,
628 layer: HypergraphLayer::GovernanceControls,
629 external_id: customer.customer_id.clone(),
630 label: customer.name.clone(),
631 properties: {
632 let mut p = HashMap::new();
633 p.insert(
634 "customer_type".to_string(),
635 Value::String(format!("{:?}", customer.customer_type)),
636 );
637 p.insert(
638 "country".to_string(),
639 Value::String(customer.country.clone()),
640 );
641 p.insert(
642 "credit_rating".to_string(),
643 Value::String(format!("{:?}", customer.credit_rating)),
644 );
645 p
646 },
647 features: vec![if customer.is_active { 1.0 } else { 0.0 }],
648 is_anomaly: false,
649 anomaly_type: None,
650 is_aggregate: false,
651 aggregate_count: 0,
652 }) {
653 self.customer_node_ids
654 .insert(customer.customer_id.clone(), node_id);
655 }
656 }
657 }
658
659 pub fn add_employees(&mut self, employees: &[Employee]) {
661 if !self.config.include_employees {
662 return;
663 }
664
665 for employee in employees {
666 let node_id = format!("emp_{}", employee.employee_id);
667 if self.try_add_node(HypergraphNode {
668 id: node_id.clone(),
669 entity_type: "Employee".to_string(),
670 entity_type_code: type_codes::EMPLOYEE,
671 layer: HypergraphLayer::GovernanceControls,
672 external_id: employee.employee_id.clone(),
673 label: employee.display_name.clone(),
674 properties: {
675 let mut p = HashMap::new();
676 p.insert(
677 "persona".to_string(),
678 Value::String(employee.persona.to_string()),
679 );
680 p.insert(
681 "job_level".to_string(),
682 Value::String(format!("{:?}", employee.job_level)),
683 );
684 p.insert(
685 "company_code".to_string(),
686 Value::String(employee.company_code.clone()),
687 );
688 p.insert(
689 "fullName".to_string(),
690 Value::String(employee.display_name.clone()),
691 );
692 p.insert("email".to_string(), Value::String(employee.email.clone()));
693 p.insert(
694 "department".to_string(),
695 Value::String(employee.department_id.clone().unwrap_or_default()),
696 );
697 p.insert(
698 "job_title".to_string(),
699 Value::String(employee.job_title.clone()),
700 );
701 p.insert(
702 "status".to_string(),
703 Value::String(format!("{:?}", employee.status)),
704 );
705 p
706 },
707 features: vec![employee
708 .approval_limit
709 .to_string()
710 .parse::<f64>()
711 .unwrap_or(0.0)
712 .ln_1p()],
713 is_anomaly: false,
714 anomaly_type: None,
715 is_aggregate: false,
716 aggregate_count: 0,
717 }) {
718 self.employee_node_ids
719 .insert(employee.employee_id.clone(), node_id);
720 }
721 }
722 }
723
724 pub fn add_accounts(&mut self, coa: &ChartOfAccounts) {
726 if !self.config.include_accounts {
727 return;
728 }
729
730 for account in &coa.accounts {
731 let node_id = format!("acct_{}", account.account_number);
732 if self.try_add_node(HypergraphNode {
733 id: node_id.clone(),
734 entity_type: "Account".to_string(),
735 entity_type_code: type_codes::ACCOUNT,
736 layer: HypergraphLayer::AccountingNetwork,
737 external_id: account.account_number.clone(),
738 label: account.short_description.clone(),
739 properties: {
740 let mut p = HashMap::new();
741 p.insert(
742 "account_type".to_string(),
743 Value::String(format!("{:?}", account.account_type)),
744 );
745 p.insert(
746 "is_control_account".to_string(),
747 Value::Bool(account.is_control_account),
748 );
749 p.insert("is_postable".to_string(), Value::Bool(account.is_postable));
750 p
751 },
752 features: vec![
753 account_type_feature(&account.account_type),
754 if account.is_control_account { 1.0 } else { 0.0 },
755 if account.normal_debit_balance {
756 1.0
757 } else {
758 0.0
759 },
760 ],
761 is_anomaly: false,
762 anomaly_type: None,
763 is_aggregate: false,
764 aggregate_count: 0,
765 }) {
766 self.account_node_ids
767 .insert(account.account_number.clone(), node_id);
768 }
769 }
770 }
771
772 pub fn add_journal_entries_as_hyperedges(&mut self, entries: &[JournalEntry]) {
776 if !self.config.je_as_hyperedges {
777 return;
778 }
779
780 for entry in entries {
781 let mut participants = Vec::new();
782
783 for line in &entry.lines {
784 let account_id = format!("acct_{}", line.gl_account);
785
786 if !self.node_index.contains_key(&account_id) {
788 self.try_add_node(HypergraphNode {
789 id: account_id.clone(),
790 entity_type: "Account".to_string(),
791 entity_type_code: type_codes::ACCOUNT,
792 layer: HypergraphLayer::AccountingNetwork,
793 external_id: line.gl_account.clone(),
794 label: line
795 .account_description
796 .clone()
797 .unwrap_or_else(|| line.gl_account.clone()),
798 properties: HashMap::new(),
799 features: vec![],
800 is_anomaly: false,
801 anomaly_type: None,
802 is_aggregate: false,
803 aggregate_count: 0,
804 });
805 self.account_node_ids
806 .insert(line.gl_account.clone(), account_id.clone());
807 }
808
809 let amount: f64 = if !line.debit_amount.is_zero() {
810 line.debit_amount.to_string().parse().unwrap_or(0.0)
811 } else {
812 line.credit_amount.to_string().parse().unwrap_or(0.0)
813 };
814
815 let role = if !line.debit_amount.is_zero() {
816 "debit"
817 } else {
818 "credit"
819 };
820
821 participants.push(HyperedgeParticipant {
822 node_id: account_id,
823 role: role.to_string(),
824 weight: Some(amount),
825 });
826 }
827
828 if participants.is_empty() {
829 continue;
830 }
831
832 let doc_id = entry.header.document_id.to_string();
833 let subtype = entry
834 .header
835 .business_process
836 .as_ref()
837 .map(|bp| format!("{bp:?}"))
838 .unwrap_or_else(|| "General".to_string());
839
840 self.hyperedges.push(Hyperedge {
841 id: format!("je_{doc_id}"),
842 hyperedge_type: "JournalEntry".to_string(),
843 subtype,
844 participants,
845 layer: HypergraphLayer::AccountingNetwork,
846 properties: {
847 let mut p = HashMap::new();
848 p.insert("document_id".to_string(), Value::String(doc_id));
849 p.insert(
850 "company_code".to_string(),
851 Value::String(entry.header.company_code.clone()),
852 );
853 p.insert(
854 "document_type".to_string(),
855 Value::String(entry.header.document_type.clone()),
856 );
857 p.insert(
858 "created_by".to_string(),
859 Value::String(entry.header.created_by.clone()),
860 );
861 p
862 },
863 timestamp: Some(entry.header.posting_date),
864 is_anomaly: entry.header.is_anomaly || entry.header.is_fraud,
865 anomaly_type: entry
866 .header
867 .anomaly_type
868 .clone()
869 .or_else(|| entry.header.fraud_type.as_ref().map(|ft| format!("{ft:?}"))),
870 features: compute_je_features(entry),
871 });
872 }
873 }
874
875 pub fn add_journal_entry_nodes(&mut self, entries: &[JournalEntry]) {
881 for entry in entries {
882 let node_id = format!("je_{}", entry.header.document_id);
883 let total_amount: f64 = entry
884 .lines
885 .iter()
886 .map(|l| l.debit_amount.to_string().parse::<f64>().unwrap_or(0.0))
887 .sum();
888
889 let is_anomaly = entry.header.is_anomaly || entry.header.is_fraud;
890 let anomaly_type = entry
891 .header
892 .anomaly_type
893 .clone()
894 .or_else(|| entry.header.fraud_type.as_ref().map(|ft| format!("{ft:?}")));
895
896 self.try_add_node(HypergraphNode {
897 id: node_id,
898 entity_type: "JournalEntry".to_string(),
899 entity_type_code: type_codes::JOURNAL_ENTRY,
900 layer: HypergraphLayer::AccountingNetwork,
901 external_id: entry.header.document_id.to_string(),
902 label: format!("JE-{}", entry.header.document_id),
903 properties: {
904 let mut p = HashMap::new();
905 p.insert(
906 "amount".into(),
907 Value::Number(
908 serde_json::Number::from_f64(total_amount)
909 .unwrap_or_else(|| serde_json::Number::from(0)),
910 ),
911 );
912 p.insert(
913 "date".into(),
914 Value::String(entry.header.posting_date.to_string()),
915 );
916 p.insert(
917 "company_code".into(),
918 Value::String(entry.header.company_code.clone()),
919 );
920 p.insert(
921 "line_count".into(),
922 Value::Number((entry.lines.len() as u64).into()),
923 );
924 p.insert("is_anomaly".into(), Value::Bool(is_anomaly));
925 if let Some(ref at) = anomaly_type {
926 p.insert("anomaly_type".into(), Value::String(at.clone()));
927 }
928 p
929 },
930 features: vec![total_amount / 100_000.0],
931 is_anomaly,
932 anomaly_type,
933 is_aggregate: false,
934 aggregate_count: 0,
935 });
936 }
937 }
938
939 pub fn add_p2p_documents(
943 &mut self,
944 purchase_orders: &[datasynth_core::models::documents::PurchaseOrder],
945 goods_receipts: &[datasynth_core::models::documents::GoodsReceipt],
946 vendor_invoices: &[datasynth_core::models::documents::VendorInvoice],
947 payments: &[datasynth_core::models::documents::Payment],
948 ) {
949 if !self.config.include_p2p {
950 return;
951 }
952
953 let mut vendor_doc_counts: HashMap<String, usize> = HashMap::new();
955 for po in purchase_orders {
956 *vendor_doc_counts.entry(po.vendor_id.clone()).or_insert(0) += 1;
957 }
958
959 let threshold = self.config.docs_per_counterparty_threshold;
960 let should_aggregate = matches!(
961 self.config.aggregation_strategy,
962 AggregationStrategy::PoolByCounterparty
963 );
964
965 let vendors_needing_pools: Vec<String> = if should_aggregate {
967 vendor_doc_counts
968 .iter()
969 .filter(|(_, count)| **count > threshold)
970 .map(|(vid, _)| vid.clone())
971 .collect()
972 } else {
973 Vec::new()
974 };
975
976 for vendor_id in &vendors_needing_pools {
978 let count = vendor_doc_counts[vendor_id];
979 let pool_id = format!("pool_p2p_{vendor_id}");
980 if self.try_add_node(HypergraphNode {
981 id: pool_id.clone(),
982 entity_type: "P2PPool".to_string(),
983 entity_type_code: type_codes::POOL_NODE,
984 layer: HypergraphLayer::ProcessEvents,
985 external_id: format!("pool_p2p_{vendor_id}"),
986 label: format!("P2P Pool ({vendor_id}): {count} docs"),
987 properties: {
988 let mut p = HashMap::new();
989 p.insert("vendor_id".to_string(), Value::String(vendor_id.clone()));
990 p.insert("document_count".to_string(), Value::Number(count.into()));
991 p
992 },
993 features: vec![count as f64],
994 is_anomaly: false,
995 anomaly_type: None,
996 is_aggregate: true,
997 aggregate_count: count,
998 }) {
999 self.doc_counterparty_links.push((
1000 pool_id,
1001 "vendor".to_string(),
1002 vendor_id.clone(),
1003 ));
1004 }
1005 self.aggregate_count += 1;
1006 }
1007
1008 for po in purchase_orders {
1010 if should_aggregate && vendors_needing_pools.contains(&po.vendor_id) {
1011 continue; }
1013
1014 let doc_id = &po.header.document_id;
1015 let node_id = format!("po_{doc_id}");
1016 if self.try_add_node(HypergraphNode {
1017 id: node_id.clone(),
1018 entity_type: "PurchaseOrder".to_string(),
1019 entity_type_code: type_codes::PURCHASE_ORDER,
1020 layer: HypergraphLayer::ProcessEvents,
1021 external_id: doc_id.clone(),
1022 label: format!("PO {doc_id}"),
1023 properties: {
1024 let mut p = HashMap::new();
1025 p.insert("vendor_id".to_string(), Value::String(po.vendor_id.clone()));
1026 p.insert(
1027 "company_code".to_string(),
1028 Value::String(po.header.company_code.clone()),
1029 );
1030 p
1031 },
1032 features: vec![po
1033 .total_net_amount
1034 .to_string()
1035 .parse::<f64>()
1036 .unwrap_or(0.0)
1037 .ln_1p()],
1038 is_anomaly: false,
1039 anomaly_type: None,
1040 is_aggregate: false,
1041 aggregate_count: 0,
1042 }) {
1043 self.doc_counterparty_links.push((
1044 node_id,
1045 "vendor".to_string(),
1046 po.vendor_id.clone(),
1047 ));
1048 }
1049 }
1050
1051 for gr in goods_receipts {
1053 let vendor_id = gr.vendor_id.as_deref().unwrap_or("UNKNOWN");
1054 if should_aggregate && vendors_needing_pools.contains(&vendor_id.to_string()) {
1055 continue;
1056 }
1057 let doc_id = &gr.header.document_id;
1058 let node_id = format!("gr_{doc_id}");
1059 self.try_add_node(HypergraphNode {
1060 id: node_id,
1061 entity_type: "GoodsReceipt".to_string(),
1062 entity_type_code: type_codes::GOODS_RECEIPT,
1063 layer: HypergraphLayer::ProcessEvents,
1064 external_id: doc_id.clone(),
1065 label: format!("GR {doc_id}"),
1066 properties: {
1067 let mut p = HashMap::new();
1068 p.insert(
1069 "vendor_id".to_string(),
1070 Value::String(vendor_id.to_string()),
1071 );
1072 p
1073 },
1074 features: vec![gr
1075 .total_value
1076 .to_string()
1077 .parse::<f64>()
1078 .unwrap_or(0.0)
1079 .ln_1p()],
1080 is_anomaly: false,
1081 anomaly_type: None,
1082 is_aggregate: false,
1083 aggregate_count: 0,
1084 });
1085 }
1086
1087 for inv in vendor_invoices {
1089 if should_aggregate && vendors_needing_pools.contains(&inv.vendor_id) {
1090 continue;
1091 }
1092 let doc_id = &inv.header.document_id;
1093 let node_id = format!("vinv_{doc_id}");
1094 self.try_add_node(HypergraphNode {
1095 id: node_id,
1096 entity_type: "VendorInvoice".to_string(),
1097 entity_type_code: type_codes::VENDOR_INVOICE,
1098 layer: HypergraphLayer::ProcessEvents,
1099 external_id: doc_id.clone(),
1100 label: format!("VI {doc_id}"),
1101 properties: {
1102 let mut p = HashMap::new();
1103 p.insert(
1104 "vendor_id".to_string(),
1105 Value::String(inv.vendor_id.clone()),
1106 );
1107 p
1108 },
1109 features: vec![inv
1110 .payable_amount
1111 .to_string()
1112 .parse::<f64>()
1113 .unwrap_or(0.0)
1114 .ln_1p()],
1115 is_anomaly: false,
1116 anomaly_type: None,
1117 is_aggregate: false,
1118 aggregate_count: 0,
1119 });
1120 }
1121
1122 for pmt in payments {
1124 let doc_id = &pmt.header.document_id;
1125 let node_id = format!("pmt_{doc_id}");
1126 self.try_add_node(HypergraphNode {
1127 id: node_id,
1128 entity_type: "Payment".to_string(),
1129 entity_type_code: type_codes::PAYMENT,
1130 layer: HypergraphLayer::ProcessEvents,
1131 external_id: doc_id.clone(),
1132 label: format!("PMT {doc_id}"),
1133 properties: HashMap::new(),
1134 features: vec![pmt.amount.to_string().parse::<f64>().unwrap_or(0.0).ln_1p()],
1135 is_anomaly: false,
1136 anomaly_type: None,
1137 is_aggregate: false,
1138 aggregate_count: 0,
1139 });
1140 }
1141 }
1142
1143 pub fn add_o2c_documents(
1145 &mut self,
1146 sales_orders: &[datasynth_core::models::documents::SalesOrder],
1147 deliveries: &[datasynth_core::models::documents::Delivery],
1148 customer_invoices: &[datasynth_core::models::documents::CustomerInvoice],
1149 ) {
1150 if !self.config.include_o2c {
1151 return;
1152 }
1153
1154 let mut customer_doc_counts: HashMap<String, usize> = HashMap::new();
1156 for so in sales_orders {
1157 *customer_doc_counts
1158 .entry(so.customer_id.clone())
1159 .or_insert(0) += 1;
1160 }
1161
1162 let threshold = self.config.docs_per_counterparty_threshold;
1163 let should_aggregate = matches!(
1164 self.config.aggregation_strategy,
1165 AggregationStrategy::PoolByCounterparty
1166 );
1167
1168 let customers_needing_pools: Vec<String> = if should_aggregate {
1169 customer_doc_counts
1170 .iter()
1171 .filter(|(_, count)| **count > threshold)
1172 .map(|(cid, _)| cid.clone())
1173 .collect()
1174 } else {
1175 Vec::new()
1176 };
1177
1178 for customer_id in &customers_needing_pools {
1180 let count = customer_doc_counts[customer_id];
1181 let pool_id = format!("pool_o2c_{customer_id}");
1182 if self.try_add_node(HypergraphNode {
1183 id: pool_id.clone(),
1184 entity_type: "O2CPool".to_string(),
1185 entity_type_code: type_codes::POOL_NODE,
1186 layer: HypergraphLayer::ProcessEvents,
1187 external_id: format!("pool_o2c_{customer_id}"),
1188 label: format!("O2C Pool ({customer_id}): {count} docs"),
1189 properties: {
1190 let mut p = HashMap::new();
1191 p.insert(
1192 "customer_id".to_string(),
1193 Value::String(customer_id.clone()),
1194 );
1195 p.insert("document_count".to_string(), Value::Number(count.into()));
1196 p
1197 },
1198 features: vec![count as f64],
1199 is_anomaly: false,
1200 anomaly_type: None,
1201 is_aggregate: true,
1202 aggregate_count: count,
1203 }) {
1204 self.doc_counterparty_links.push((
1205 pool_id,
1206 "customer".to_string(),
1207 customer_id.clone(),
1208 ));
1209 }
1210 self.aggregate_count += 1;
1211 }
1212
1213 for so in sales_orders {
1214 if should_aggregate && customers_needing_pools.contains(&so.customer_id) {
1215 continue;
1216 }
1217 let doc_id = &so.header.document_id;
1218 let node_id = format!("so_{doc_id}");
1219 if self.try_add_node(HypergraphNode {
1220 id: node_id.clone(),
1221 entity_type: "SalesOrder".to_string(),
1222 entity_type_code: type_codes::SALES_ORDER,
1223 layer: HypergraphLayer::ProcessEvents,
1224 external_id: doc_id.clone(),
1225 label: format!("SO {doc_id}"),
1226 properties: {
1227 let mut p = HashMap::new();
1228 p.insert(
1229 "customer_id".to_string(),
1230 Value::String(so.customer_id.clone()),
1231 );
1232 p
1233 },
1234 features: vec![so
1235 .total_net_amount
1236 .to_string()
1237 .parse::<f64>()
1238 .unwrap_or(0.0)
1239 .ln_1p()],
1240 is_anomaly: false,
1241 anomaly_type: None,
1242 is_aggregate: false,
1243 aggregate_count: 0,
1244 }) {
1245 self.doc_counterparty_links.push((
1246 node_id,
1247 "customer".to_string(),
1248 so.customer_id.clone(),
1249 ));
1250 }
1251 }
1252
1253 for del in deliveries {
1254 if should_aggregate && customers_needing_pools.contains(&del.customer_id) {
1255 continue;
1256 }
1257 let doc_id = &del.header.document_id;
1258 let node_id = format!("del_{doc_id}");
1259 self.try_add_node(HypergraphNode {
1260 id: node_id,
1261 entity_type: "Delivery".to_string(),
1262 entity_type_code: type_codes::DELIVERY,
1263 layer: HypergraphLayer::ProcessEvents,
1264 external_id: doc_id.clone(),
1265 label: format!("DEL {doc_id}"),
1266 properties: HashMap::new(),
1267 features: vec![],
1268 is_anomaly: false,
1269 anomaly_type: None,
1270 is_aggregate: false,
1271 aggregate_count: 0,
1272 });
1273 }
1274
1275 for inv in customer_invoices {
1276 if should_aggregate && customers_needing_pools.contains(&inv.customer_id) {
1277 continue;
1278 }
1279 let doc_id = &inv.header.document_id;
1280 let node_id = format!("cinv_{doc_id}");
1281 self.try_add_node(HypergraphNode {
1282 id: node_id,
1283 entity_type: "CustomerInvoice".to_string(),
1284 entity_type_code: type_codes::CUSTOMER_INVOICE,
1285 layer: HypergraphLayer::ProcessEvents,
1286 external_id: doc_id.clone(),
1287 label: format!("CI {doc_id}"),
1288 properties: HashMap::new(),
1289 features: vec![inv
1290 .total_gross_amount
1291 .to_string()
1292 .parse::<f64>()
1293 .unwrap_or(0.0)
1294 .ln_1p()],
1295 is_anomaly: false,
1296 anomaly_type: None,
1297 is_aggregate: false,
1298 aggregate_count: 0,
1299 });
1300 }
1301 }
1302
1303 pub fn add_s2c_documents(
1305 &mut self,
1306 projects: &[SourcingProject],
1307 qualifications: &[SupplierQualification],
1308 rfx_events: &[RfxEvent],
1309 bids: &[SupplierBid],
1310 evaluations: &[BidEvaluation],
1311 contracts: &[ProcurementContract],
1312 ) {
1313 if !self.config.include_s2c {
1314 return;
1315 }
1316 for p in projects {
1317 let node_id = format!("s2c_proj_{}", p.project_id);
1318 self.try_add_node(HypergraphNode {
1319 id: node_id,
1320 entity_type: "SourcingProject".into(),
1321 entity_type_code: type_codes::SOURCING_PROJECT,
1322 layer: HypergraphLayer::ProcessEvents,
1323 external_id: p.project_id.clone(),
1324 label: format!("SPRJ {}", p.project_id),
1325 properties: HashMap::new(),
1326 features: vec![p
1327 .estimated_annual_spend
1328 .to_string()
1329 .parse::<f64>()
1330 .unwrap_or(0.0)
1331 .ln_1p()],
1332 is_anomaly: false,
1333 anomaly_type: None,
1334 is_aggregate: false,
1335 aggregate_count: 0,
1336 });
1337 }
1338 for q in qualifications {
1339 let node_id = format!("s2c_qual_{}", q.qualification_id);
1340 self.try_add_node(HypergraphNode {
1341 id: node_id,
1342 entity_type: "SupplierQualification".into(),
1343 entity_type_code: type_codes::SUPPLIER_QUALIFICATION,
1344 layer: HypergraphLayer::ProcessEvents,
1345 external_id: q.qualification_id.clone(),
1346 label: format!("SQUAL {}", q.qualification_id),
1347 properties: HashMap::new(),
1348 features: vec![],
1349 is_anomaly: false,
1350 anomaly_type: None,
1351 is_aggregate: false,
1352 aggregate_count: 0,
1353 });
1354 }
1355 for r in rfx_events {
1356 let node_id = format!("s2c_rfx_{}", r.rfx_id);
1357 self.try_add_node(HypergraphNode {
1358 id: node_id,
1359 entity_type: "RfxEvent".into(),
1360 entity_type_code: type_codes::RFX_EVENT,
1361 layer: HypergraphLayer::ProcessEvents,
1362 external_id: r.rfx_id.clone(),
1363 label: format!("RFX {}", r.rfx_id),
1364 properties: HashMap::new(),
1365 features: vec![],
1366 is_anomaly: false,
1367 anomaly_type: None,
1368 is_aggregate: false,
1369 aggregate_count: 0,
1370 });
1371 }
1372 for b in bids {
1373 let node_id = format!("s2c_bid_{}", b.bid_id);
1374 self.try_add_node(HypergraphNode {
1375 id: node_id,
1376 entity_type: "SupplierBid".into(),
1377 entity_type_code: type_codes::SUPPLIER_BID,
1378 layer: HypergraphLayer::ProcessEvents,
1379 external_id: b.bid_id.clone(),
1380 label: format!("BID {}", b.bid_id),
1381 properties: HashMap::new(),
1382 features: vec![b
1383 .total_amount
1384 .to_string()
1385 .parse::<f64>()
1386 .unwrap_or(0.0)
1387 .ln_1p()],
1388 is_anomaly: false,
1389 anomaly_type: None,
1390 is_aggregate: false,
1391 aggregate_count: 0,
1392 });
1393 }
1394 for e in evaluations {
1395 let node_id = format!("s2c_eval_{}", e.evaluation_id);
1396 self.try_add_node(HypergraphNode {
1397 id: node_id,
1398 entity_type: "BidEvaluation".into(),
1399 entity_type_code: type_codes::BID_EVALUATION,
1400 layer: HypergraphLayer::ProcessEvents,
1401 external_id: e.evaluation_id.clone(),
1402 label: format!("BEVAL {}", e.evaluation_id),
1403 properties: HashMap::new(),
1404 features: vec![],
1405 is_anomaly: false,
1406 anomaly_type: None,
1407 is_aggregate: false,
1408 aggregate_count: 0,
1409 });
1410 }
1411 for c in contracts {
1412 let node_id = format!("s2c_ctr_{}", c.contract_id);
1413 self.try_add_node(HypergraphNode {
1414 id: node_id,
1415 entity_type: "ProcurementContract".into(),
1416 entity_type_code: type_codes::PROCUREMENT_CONTRACT,
1417 layer: HypergraphLayer::ProcessEvents,
1418 external_id: c.contract_id.clone(),
1419 label: format!("CTR {}", c.contract_id),
1420 properties: HashMap::new(),
1421 features: vec![c
1422 .total_value
1423 .to_string()
1424 .parse::<f64>()
1425 .unwrap_or(0.0)
1426 .ln_1p()],
1427 is_anomaly: false,
1428 anomaly_type: None,
1429 is_aggregate: false,
1430 aggregate_count: 0,
1431 });
1432 self.doc_counterparty_links.push((
1434 format!("s2c_ctr_{}", c.contract_id),
1435 "vendor".into(),
1436 c.vendor_id.clone(),
1437 ));
1438 }
1439 }
1440
1441 pub fn add_h2r_documents(
1443 &mut self,
1444 payroll_runs: &[PayrollRun],
1445 time_entries: &[TimeEntry],
1446 expense_reports: &[ExpenseReport],
1447 ) {
1448 if !self.config.include_h2r {
1449 return;
1450 }
1451 for pr in payroll_runs {
1452 let node_id = format!("h2r_pay_{}", pr.payroll_id);
1453 self.try_add_node(HypergraphNode {
1454 id: node_id,
1455 entity_type: "PayrollRun".into(),
1456 entity_type_code: type_codes::PAYROLL_RUN,
1457 layer: HypergraphLayer::ProcessEvents,
1458 external_id: pr.payroll_id.clone(),
1459 label: format!("PAY {}", pr.payroll_id),
1460 properties: HashMap::new(),
1461 features: vec![pr
1462 .total_gross
1463 .to_string()
1464 .parse::<f64>()
1465 .unwrap_or(0.0)
1466 .ln_1p()],
1467 is_anomaly: false,
1468 anomaly_type: None,
1469 is_aggregate: false,
1470 aggregate_count: 0,
1471 });
1472 }
1473 for te in time_entries {
1474 let node_id = format!("h2r_time_{}", te.entry_id);
1475 self.try_add_node(HypergraphNode {
1476 id: node_id,
1477 entity_type: "TimeEntry".into(),
1478 entity_type_code: type_codes::TIME_ENTRY,
1479 layer: HypergraphLayer::ProcessEvents,
1480 external_id: te.entry_id.clone(),
1481 label: format!("TIME {}", te.entry_id),
1482 properties: HashMap::new(),
1483 features: vec![te.hours_regular + te.hours_overtime],
1484 is_anomaly: false,
1485 anomaly_type: None,
1486 is_aggregate: false,
1487 aggregate_count: 0,
1488 });
1489 }
1490 for er in expense_reports {
1491 let node_id = format!("h2r_exp_{}", er.report_id);
1492 self.try_add_node(HypergraphNode {
1493 id: node_id,
1494 entity_type: "ExpenseReport".into(),
1495 entity_type_code: type_codes::EXPENSE_REPORT,
1496 layer: HypergraphLayer::ProcessEvents,
1497 external_id: er.report_id.clone(),
1498 label: format!("EXP {}", er.report_id),
1499 properties: HashMap::new(),
1500 features: vec![er
1501 .total_amount
1502 .to_string()
1503 .parse::<f64>()
1504 .unwrap_or(0.0)
1505 .ln_1p()],
1506 is_anomaly: false,
1507 anomaly_type: None,
1508 is_aggregate: false,
1509 aggregate_count: 0,
1510 });
1511 }
1512 }
1513
1514 pub fn add_mfg_documents(
1516 &mut self,
1517 production_orders: &[ProductionOrder],
1518 quality_inspections: &[QualityInspection],
1519 cycle_counts: &[CycleCount],
1520 ) {
1521 if !self.config.include_mfg {
1522 return;
1523 }
1524 for po in production_orders {
1525 let node_id = format!("mfg_po_{}", po.order_id);
1526 self.try_add_node(HypergraphNode {
1527 id: node_id,
1528 entity_type: "ProductionOrder".into(),
1529 entity_type_code: type_codes::PRODUCTION_ORDER,
1530 layer: HypergraphLayer::ProcessEvents,
1531 external_id: po.order_id.clone(),
1532 label: format!("PROD {}", po.order_id),
1533 properties: HashMap::new(),
1534 features: vec![po
1535 .planned_quantity
1536 .to_string()
1537 .parse::<f64>()
1538 .unwrap_or(0.0)
1539 .ln_1p()],
1540 is_anomaly: false,
1541 anomaly_type: None,
1542 is_aggregate: false,
1543 aggregate_count: 0,
1544 });
1545 }
1546 for qi in quality_inspections {
1547 let node_id = format!("mfg_qi_{}", qi.inspection_id);
1548 self.try_add_node(HypergraphNode {
1549 id: node_id,
1550 entity_type: "QualityInspection".into(),
1551 entity_type_code: type_codes::QUALITY_INSPECTION,
1552 layer: HypergraphLayer::ProcessEvents,
1553 external_id: qi.inspection_id.clone(),
1554 label: format!("QI {}", qi.inspection_id),
1555 properties: HashMap::new(),
1556 features: vec![qi.defect_rate],
1557 is_anomaly: false,
1558 anomaly_type: None,
1559 is_aggregate: false,
1560 aggregate_count: 0,
1561 });
1562 }
1563 for cc in cycle_counts {
1564 let node_id = format!("mfg_cc_{}", cc.count_id);
1565 self.try_add_node(HypergraphNode {
1566 id: node_id,
1567 entity_type: "CycleCount".into(),
1568 entity_type_code: type_codes::CYCLE_COUNT,
1569 layer: HypergraphLayer::ProcessEvents,
1570 external_id: cc.count_id.clone(),
1571 label: format!("CC {}", cc.count_id),
1572 properties: HashMap::new(),
1573 features: vec![cc.variance_rate],
1574 is_anomaly: false,
1575 anomaly_type: None,
1576 is_aggregate: false,
1577 aggregate_count: 0,
1578 });
1579 }
1580 }
1581
1582 pub fn add_bank_documents(
1584 &mut self,
1585 customers: &[BankingCustomer],
1586 accounts: &[BankAccount],
1587 transactions: &[BankTransaction],
1588 ) {
1589 if !self.config.include_bank {
1590 return;
1591 }
1592 for cust in customers {
1593 let cid = cust.customer_id.to_string();
1594 let node_id = format!("bank_cust_{cid}");
1595 self.try_add_node(HypergraphNode {
1596 id: node_id,
1597 entity_type: "BankingCustomer".into(),
1598 entity_type_code: type_codes::BANKING_CUSTOMER,
1599 layer: HypergraphLayer::ProcessEvents,
1600 external_id: cid,
1601 label: format!("BCUST {}", cust.customer_id),
1602 properties: {
1603 let mut p = HashMap::new();
1604 p.insert(
1605 "customer_type".into(),
1606 Value::String(format!("{:?}", cust.customer_type)),
1607 );
1608 p.insert("name".into(), Value::String(cust.name.legal_name.clone()));
1609 p.insert(
1610 "residence_country".into(),
1611 Value::String(cust.residence_country.clone()),
1612 );
1613 p.insert(
1614 "risk_tier".into(),
1615 Value::String(format!("{:?}", cust.risk_tier)),
1616 );
1617 p.insert("is_pep".into(), Value::Bool(cust.is_pep));
1618 p
1619 },
1620 features: vec![],
1621 is_anomaly: cust.is_mule,
1622 anomaly_type: if cust.is_mule {
1623 Some("mule_account".into())
1624 } else {
1625 None
1626 },
1627 is_aggregate: false,
1628 aggregate_count: 0,
1629 });
1630 }
1631 for acct in accounts {
1632 let aid = acct.account_id.to_string();
1633 let node_id = format!("bank_acct_{aid}");
1634 self.try_add_node(HypergraphNode {
1635 id: node_id,
1636 entity_type: "BankAccount".into(),
1637 entity_type_code: type_codes::BANK_ACCOUNT,
1638 layer: HypergraphLayer::ProcessEvents,
1639 external_id: aid,
1640 label: format!("BACCT {}", acct.account_number),
1641 properties: {
1642 let mut p = HashMap::new();
1643 p.insert(
1644 "account_type".into(),
1645 Value::String(format!("{:?}", acct.account_type)),
1646 );
1647 p.insert("status".into(), Value::String(format!("{:?}", acct.status)));
1648 p.insert("currency".into(), Value::String(acct.currency.clone()));
1649 let balance: f64 = acct.current_balance.to_string().parse().unwrap_or(0.0);
1650 p.insert("balance".into(), serde_json::json!(balance));
1651 p.insert(
1652 "account_number".into(),
1653 Value::String(acct.account_number.clone()),
1654 );
1655 p
1656 },
1657 features: vec![acct
1658 .current_balance
1659 .to_string()
1660 .parse::<f64>()
1661 .unwrap_or(0.0)
1662 .ln_1p()],
1663 is_anomaly: acct.is_mule_account,
1664 anomaly_type: if acct.is_mule_account {
1665 Some("mule_account".into())
1666 } else {
1667 None
1668 },
1669 is_aggregate: false,
1670 aggregate_count: 0,
1671 });
1672 }
1673 for txn in transactions {
1674 let tid = txn.transaction_id.to_string();
1675 let node_id = format!("bank_txn_{tid}");
1676 self.try_add_node(HypergraphNode {
1677 id: node_id,
1678 entity_type: "BankTransaction".into(),
1679 entity_type_code: type_codes::BANK_TRANSACTION,
1680 layer: HypergraphLayer::ProcessEvents,
1681 external_id: tid,
1682 label: format!("BTXN {}", txn.reference),
1683 properties: {
1684 let mut p = HashMap::new();
1685 let amount: f64 = txn.amount.to_string().parse().unwrap_or(0.0);
1686 p.insert("amount".into(), serde_json::json!(amount));
1687 p.insert("currency".into(), Value::String(txn.currency.clone()));
1688 p.insert("reference".into(), Value::String(txn.reference.clone()));
1689 p.insert(
1690 "direction".into(),
1691 Value::String(format!("{:?}", txn.direction)),
1692 );
1693 p.insert(
1694 "channel".into(),
1695 Value::String(format!("{:?}", txn.channel)),
1696 );
1697 p.insert(
1698 "category".into(),
1699 Value::String(format!("{:?}", txn.category)),
1700 );
1701 p.insert(
1702 "transaction_type".into(),
1703 Value::String(txn.transaction_type.clone()),
1704 );
1705 p.insert("status".into(), Value::String(format!("{:?}", txn.status)));
1706 if txn.is_suspicious {
1707 p.insert("isAnomalous".into(), Value::Bool(true));
1708 p.insert("is_suspicious".into(), Value::Bool(true));
1709 if let Some(ref reason) = txn.suspicion_reason {
1710 p.insert(
1711 "suspicion_reason".into(),
1712 Value::String(format!("{reason:?}")),
1713 );
1714 }
1715 if let Some(ref stage) = txn.laundering_stage {
1716 p.insert(
1717 "laundering_stage".into(),
1718 Value::String(format!("{stage:?}")),
1719 );
1720 }
1721 }
1722 p
1723 },
1724 features: vec![txn
1725 .amount
1726 .to_string()
1727 .parse::<f64>()
1728 .unwrap_or(0.0)
1729 .abs()
1730 .ln_1p()],
1731 is_anomaly: txn.is_suspicious,
1732 anomaly_type: txn.suspicion_reason.as_ref().map(|r| format!("{r:?}")),
1733 is_aggregate: false,
1734 aggregate_count: 0,
1735 });
1736 }
1737 }
1738
1739 #[allow(clippy::too_many_arguments)]
1741 pub fn add_audit_documents(
1742 &mut self,
1743 engagements: &[AuditEngagement],
1744 workpapers: &[Workpaper],
1745 findings: &[AuditFinding],
1746 evidence: &[AuditEvidence],
1747 risks: &[RiskAssessment],
1748 judgments: &[ProfessionalJudgment],
1749 ) {
1750 if !self.config.include_audit {
1751 return;
1752 }
1753 for eng in engagements {
1754 let eid = eng.engagement_id.to_string();
1755 let node_id = format!("audit_eng_{eid}");
1756 self.try_add_node(HypergraphNode {
1757 id: node_id,
1758 entity_type: "AuditEngagement".into(),
1759 entity_type_code: type_codes::AUDIT_ENGAGEMENT,
1760 layer: HypergraphLayer::ProcessEvents,
1761 external_id: eid,
1762 label: format!("AENG {}", eng.engagement_ref),
1763 properties: {
1764 let mut p = HashMap::new();
1765 p.insert(
1766 "engagement_ref".into(),
1767 Value::String(eng.engagement_ref.clone()),
1768 );
1769 p.insert("status".into(), Value::String(format!("{:?}", eng.status)));
1770 p.insert(
1771 "engagement_type".into(),
1772 Value::String(format!("{:?}", eng.engagement_type)),
1773 );
1774 p.insert("client_name".into(), Value::String(eng.client_name.clone()));
1775 p.insert("fiscal_year".into(), serde_json::json!(eng.fiscal_year));
1776 let mat: f64 = eng.materiality.to_string().parse().unwrap_or(0.0);
1777 p.insert("materiality".into(), serde_json::json!(mat));
1778 p.insert(
1779 "fieldwork_start".into(),
1780 Value::String(eng.fieldwork_start.to_string()),
1781 );
1782 p.insert(
1783 "fieldwork_end".into(),
1784 Value::String(eng.fieldwork_end.to_string()),
1785 );
1786 p
1787 },
1788 features: vec![eng
1789 .materiality
1790 .to_string()
1791 .parse::<f64>()
1792 .unwrap_or(0.0)
1793 .ln_1p()],
1794 is_anomaly: false,
1795 anomaly_type: None,
1796 is_aggregate: false,
1797 aggregate_count: 0,
1798 });
1799 }
1800 for wp in workpapers {
1801 let wid = wp.workpaper_id.to_string();
1802 let node_id = format!("audit_wp_{wid}");
1803 self.try_add_node(HypergraphNode {
1804 id: node_id,
1805 entity_type: "Workpaper".into(),
1806 entity_type_code: type_codes::WORKPAPER,
1807 layer: HypergraphLayer::ProcessEvents,
1808 external_id: wid,
1809 label: format!("WP {}", wp.workpaper_ref),
1810 properties: {
1811 let mut p = HashMap::new();
1812 p.insert(
1813 "workpaper_ref".into(),
1814 Value::String(wp.workpaper_ref.clone()),
1815 );
1816 p.insert("title".into(), Value::String(wp.title.clone()));
1817 p.insert("status".into(), Value::String(format!("{:?}", wp.status)));
1818 p.insert("section".into(), Value::String(format!("{:?}", wp.section)));
1819 p
1820 },
1821 features: vec![],
1822 is_anomaly: false,
1823 anomaly_type: None,
1824 is_aggregate: false,
1825 aggregate_count: 0,
1826 });
1827 }
1828 for f in findings {
1829 let fid = f.finding_id.to_string();
1830 let node_id = format!("audit_find_{fid}");
1831 self.try_add_node(HypergraphNode {
1832 id: node_id,
1833 entity_type: "AuditFinding".into(),
1834 entity_type_code: type_codes::AUDIT_FINDING,
1835 layer: HypergraphLayer::ProcessEvents,
1836 external_id: fid,
1837 label: format!("AFIND {}", f.finding_ref),
1838 properties: {
1839 let mut p = HashMap::new();
1840 p.insert("finding_ref".into(), Value::String(f.finding_ref.clone()));
1841 p.insert("title".into(), Value::String(f.title.clone()));
1842 p.insert("description".into(), Value::String(f.condition.clone()));
1843 p.insert(
1844 "severity".into(),
1845 Value::String(format!("{:?}", f.severity)),
1846 );
1847 p.insert("status".into(), Value::String(format!("{:?}", f.status)));
1848 p.insert(
1849 "finding_type".into(),
1850 Value::String(format!("{:?}", f.finding_type)),
1851 );
1852 p
1853 },
1854 features: vec![f.severity.score() as f64 / 5.0],
1855 is_anomaly: false,
1856 anomaly_type: None,
1857 is_aggregate: false,
1858 aggregate_count: 0,
1859 });
1860 }
1861 for ev in evidence {
1862 let evid = ev.evidence_id.to_string();
1863 let node_id = format!("audit_ev_{evid}");
1864 self.try_add_node(HypergraphNode {
1865 id: node_id,
1866 entity_type: "AuditEvidence".into(),
1867 entity_type_code: type_codes::AUDIT_EVIDENCE,
1868 layer: HypergraphLayer::ProcessEvents,
1869 external_id: evid,
1870 label: format!("AEV {}", ev.evidence_id),
1871 properties: {
1872 let mut p = HashMap::new();
1873 p.insert(
1874 "evidence_type".into(),
1875 Value::String(format!("{:?}", ev.evidence_type)),
1876 );
1877 p.insert("description".into(), Value::String(ev.description.clone()));
1878 p.insert(
1879 "source_type".into(),
1880 Value::String(format!("{:?}", ev.source_type)),
1881 );
1882 p.insert(
1883 "reliability".into(),
1884 Value::String(format!(
1885 "{:?}",
1886 ev.reliability_assessment.overall_reliability
1887 )),
1888 );
1889 p
1890 },
1891 features: vec![ev.reliability_assessment.overall_reliability.score() as f64 / 3.0],
1892 is_anomaly: false,
1893 anomaly_type: None,
1894 is_aggregate: false,
1895 aggregate_count: 0,
1896 });
1897 }
1898 for r in risks {
1899 let rid = r.risk_id.to_string();
1900 let node_id = format!("audit_risk_{rid}");
1901 self.try_add_node(HypergraphNode {
1902 id: node_id,
1903 entity_type: "RiskAssessment".into(),
1904 entity_type_code: type_codes::RISK_ASSESSMENT,
1905 layer: HypergraphLayer::ProcessEvents,
1906 external_id: rid,
1907 label: format!("ARISK {}", r.risk_ref),
1908 properties: {
1909 let mut p = HashMap::new();
1910 p.insert("status".into(), Value::String("active".into()));
1911 p.insert("risk_ref".into(), Value::String(r.risk_ref.clone()));
1912 p.insert("name".into(), Value::String(r.risk_ref.clone()));
1913 p.insert("description".into(), Value::String(r.description.clone()));
1914 p.insert(
1915 "category".into(),
1916 Value::String(format!("{:?}", r.risk_category)),
1917 );
1918 p.insert(
1919 "account_or_process".into(),
1920 Value::String(r.account_or_process.clone()),
1921 );
1922 let inherent = match r.inherent_risk {
1924 datasynth_core::models::audit::RiskLevel::Low => "low",
1925 datasynth_core::models::audit::RiskLevel::Medium => "medium",
1926 datasynth_core::models::audit::RiskLevel::High => "high",
1927 datasynth_core::models::audit::RiskLevel::Significant => "critical",
1928 };
1929 let control = match r.control_risk {
1930 datasynth_core::models::audit::RiskLevel::Low => "low",
1931 datasynth_core::models::audit::RiskLevel::Medium => "medium",
1932 datasynth_core::models::audit::RiskLevel::High => "high",
1933 datasynth_core::models::audit::RiskLevel::Significant => "critical",
1934 };
1935 p.insert("inherentImpact".into(), Value::String(inherent.into()));
1936 p.insert("inherentLikelihood".into(), Value::String(inherent.into()));
1937 p.insert("residualImpact".into(), Value::String(control.into()));
1938 p.insert("residualLikelihood".into(), Value::String(control.into()));
1939 p.insert(
1940 "riskScore".into(),
1941 serde_json::json!(r.inherent_risk.score() as f64 * 25.0),
1942 );
1943 p.insert("owner".into(), Value::String(r.assessed_by.clone()));
1944 p.insert("isSignificant".into(), Value::Bool(r.is_significant_risk));
1945 p.insert(
1946 "is_significant_risk".into(),
1947 Value::Bool(r.is_significant_risk),
1948 );
1949 p.insert(
1950 "response_nature".into(),
1951 Value::String(format!("{:?}", r.response_nature)),
1952 );
1953 p
1954 },
1955 features: vec![
1956 r.inherent_risk.score() as f64 / 4.0,
1957 r.control_risk.score() as f64 / 4.0,
1958 if r.is_significant_risk { 1.0 } else { 0.0 },
1959 ],
1960 is_anomaly: false,
1961 anomaly_type: None,
1962 is_aggregate: false,
1963 aggregate_count: 0,
1964 });
1965 }
1966 for j in judgments {
1967 let jid = j.judgment_id.to_string();
1968 let node_id = format!("audit_judg_{jid}");
1969 self.try_add_node(HypergraphNode {
1970 id: node_id,
1971 entity_type: "ProfessionalJudgment".into(),
1972 entity_type_code: type_codes::PROFESSIONAL_JUDGMENT,
1973 layer: HypergraphLayer::ProcessEvents,
1974 external_id: jid,
1975 label: format!("AJUDG {}", j.judgment_id),
1976 properties: {
1977 let mut p = HashMap::new();
1978 p.insert("judgment_ref".into(), Value::String(j.judgment_ref.clone()));
1979 p.insert("subject".into(), Value::String(j.subject.clone()));
1980 p.insert(
1981 "description".into(),
1982 Value::String(j.issue_description.clone()),
1983 );
1984 p.insert("conclusion".into(), Value::String(j.conclusion.clone()));
1985 p.insert(
1986 "judgment_type".into(),
1987 Value::String(format!("{:?}", j.judgment_type)),
1988 );
1989 p
1990 },
1991 features: vec![],
1992 is_anomaly: false,
1993 anomaly_type: None,
1994 is_aggregate: false,
1995 aggregate_count: 0,
1996 });
1997 }
1998 }
1999
2000 pub fn add_bank_recon_documents(&mut self, reconciliations: &[BankReconciliation]) {
2002 if !self.config.include_r2r {
2003 return;
2004 }
2005 for recon in reconciliations {
2006 let node_id = format!("recon_{}", recon.reconciliation_id);
2007 self.try_add_node(HypergraphNode {
2008 id: node_id,
2009 entity_type: "BankReconciliation".into(),
2010 entity_type_code: type_codes::BANK_RECONCILIATION,
2011 layer: HypergraphLayer::ProcessEvents,
2012 external_id: recon.reconciliation_id.clone(),
2013 label: format!("RECON {}", recon.reconciliation_id),
2014 properties: HashMap::new(),
2015 features: vec![recon
2016 .bank_ending_balance
2017 .to_string()
2018 .parse::<f64>()
2019 .unwrap_or(0.0)
2020 .ln_1p()],
2021 is_anomaly: false,
2022 anomaly_type: None,
2023 is_aggregate: false,
2024 aggregate_count: 0,
2025 });
2026 for line in &recon.statement_lines {
2027 let node_id = format!("recon_line_{}", line.line_id);
2028 self.try_add_node(HypergraphNode {
2029 id: node_id,
2030 entity_type: "BankStatementLine".into(),
2031 entity_type_code: type_codes::BANK_STATEMENT_LINE,
2032 layer: HypergraphLayer::ProcessEvents,
2033 external_id: line.line_id.clone(),
2034 label: format!("BSL {}", line.line_id),
2035 properties: HashMap::new(),
2036 features: vec![line
2037 .amount
2038 .to_string()
2039 .parse::<f64>()
2040 .unwrap_or(0.0)
2041 .abs()
2042 .ln_1p()],
2043 is_anomaly: false,
2044 anomaly_type: None,
2045 is_aggregate: false,
2046 aggregate_count: 0,
2047 });
2048 }
2049 for item in &recon.reconciling_items {
2050 let node_id = format!("recon_item_{}", item.item_id);
2051 self.try_add_node(HypergraphNode {
2052 id: node_id,
2053 entity_type: "ReconcilingItem".into(),
2054 entity_type_code: type_codes::RECONCILING_ITEM,
2055 layer: HypergraphLayer::ProcessEvents,
2056 external_id: item.item_id.clone(),
2057 label: format!("RITEM {}", item.item_id),
2058 properties: HashMap::new(),
2059 features: vec![item
2060 .amount
2061 .to_string()
2062 .parse::<f64>()
2063 .unwrap_or(0.0)
2064 .abs()
2065 .ln_1p()],
2066 is_anomaly: false,
2067 anomaly_type: None,
2068 is_aggregate: false,
2069 aggregate_count: 0,
2070 });
2071 }
2072 }
2073 }
2074
2075 pub fn add_ocpm_events(&mut self, event_log: &datasynth_ocpm::OcpmEventLog) {
2077 if !self.config.events_as_hyperedges {
2078 return;
2079 }
2080 for event in &event_log.events {
2081 let participants: Vec<HyperedgeParticipant> = event
2082 .object_refs
2083 .iter()
2084 .map(|obj_ref| {
2085 let node_id = format!("ocpm_obj_{}", obj_ref.object_id);
2086 self.try_add_node(HypergraphNode {
2088 id: node_id.clone(),
2089 entity_type: "OcpmObject".into(),
2090 entity_type_code: type_codes::OCPM_EVENT,
2091 layer: HypergraphLayer::ProcessEvents,
2092 external_id: obj_ref.object_id.to_string(),
2093 label: format!("OBJ {}", obj_ref.object_type_id),
2094 properties: HashMap::new(),
2095 features: vec![],
2096 is_anomaly: false,
2097 anomaly_type: None,
2098 is_aggregate: false,
2099 aggregate_count: 0,
2100 });
2101 HyperedgeParticipant {
2102 node_id,
2103 role: format!("{:?}", obj_ref.qualifier),
2104 weight: None,
2105 }
2106 })
2107 .collect();
2108
2109 if !participants.is_empty() {
2110 let mut props = HashMap::new();
2111 props.insert(
2112 "activity_id".into(),
2113 Value::String(event.activity_id.clone()),
2114 );
2115 props.insert(
2116 "timestamp".into(),
2117 Value::String(event.timestamp.to_rfc3339()),
2118 );
2119 if !event.resource_id.is_empty() {
2120 props.insert("resource".into(), Value::String(event.resource_id.clone()));
2121 }
2122
2123 self.hyperedges.push(Hyperedge {
2124 id: format!("ocpm_evt_{}", event.event_id),
2125 hyperedge_type: "OcpmEvent".into(),
2126 subtype: event.activity_id.clone(),
2127 participants,
2128 layer: HypergraphLayer::ProcessEvents,
2129 properties: props,
2130 timestamp: Some(event.timestamp.date_naive()),
2131 is_anomaly: false,
2132 anomaly_type: None,
2133 features: vec![],
2134 });
2135 }
2136 }
2137 }
2138
2139 pub fn add_compliance_regulations(
2146 &mut self,
2147 standards: &[ComplianceStandard],
2148 findings: &[ComplianceFinding],
2149 filings: &[RegulatoryFiling],
2150 ) {
2151 if !self.config.include_compliance {
2152 return;
2153 }
2154
2155 for std in standards {
2157 if std.is_superseded() {
2158 continue;
2159 }
2160 let sid = std.id.as_str().to_string();
2161 let node_id = format!("cr_std_{sid}");
2162 if self.try_add_node(HypergraphNode {
2163 id: node_id.clone(),
2164 entity_type: "ComplianceStandard".into(),
2165 entity_type_code: type_codes::COMPLIANCE_STANDARD,
2166 layer: HypergraphLayer::GovernanceControls,
2167 external_id: sid.clone(),
2168 label: format!("{}: {}", sid, std.title),
2169 properties: {
2170 let mut p = HashMap::new();
2171 p.insert("title".into(), Value::String(std.title.clone()));
2172 p.insert("category".into(), Value::String(std.category.to_string()));
2173 p.insert("domain".into(), Value::String(std.domain.to_string()));
2174 p.insert(
2175 "issuingBody".into(),
2176 Value::String(std.issuing_body.to_string()),
2177 );
2178 if !std.applicable_account_types.is_empty() {
2179 p.insert(
2180 "applicableAccountTypes".into(),
2181 Value::Array(
2182 std.applicable_account_types
2183 .iter()
2184 .map(|s| Value::String(s.clone()))
2185 .collect(),
2186 ),
2187 );
2188 }
2189 if !std.applicable_processes.is_empty() {
2190 p.insert(
2191 "applicableProcesses".into(),
2192 Value::Array(
2193 std.applicable_processes
2194 .iter()
2195 .map(|s| Value::String(s.clone()))
2196 .collect(),
2197 ),
2198 );
2199 }
2200 p
2201 },
2202 features: vec![
2203 std.versions.len() as f64,
2204 std.requirements.len() as f64,
2205 std.mandatory_jurisdictions.len() as f64,
2206 ],
2207 is_anomaly: false,
2208 anomaly_type: None,
2209 is_aggregate: false,
2210 aggregate_count: 0,
2211 }) {
2212 self.standard_node_ids.insert(sid.clone(), node_id.clone());
2213
2214 for _acct_type in &std.applicable_account_types {
2216 }
2219 }
2220 }
2221
2222 for finding in findings {
2224 let fid = finding.finding_id.to_string();
2225 let node_id = format!("cr_find_{fid}");
2226 if self.try_add_node(HypergraphNode {
2227 id: node_id.clone(),
2228 entity_type: "ComplianceFinding".into(),
2229 entity_type_code: type_codes::COMPLIANCE_FINDING,
2230 layer: HypergraphLayer::ProcessEvents,
2231 external_id: fid,
2232 label: format!("CF {} [{}]", finding.deficiency_level, finding.company_code),
2233 properties: {
2234 let mut p = HashMap::new();
2235 p.insert("title".into(), Value::String(finding.title.clone()));
2236 p.insert(
2237 "severity".into(),
2238 Value::String(finding.severity.to_string()),
2239 );
2240 p.insert(
2241 "deficiencyLevel".into(),
2242 Value::String(finding.deficiency_level.to_string()),
2243 );
2244 p.insert(
2245 "companyCode".into(),
2246 Value::String(finding.company_code.clone()),
2247 );
2248 p.insert(
2249 "remediationStatus".into(),
2250 Value::String(finding.remediation_status.to_string()),
2251 );
2252 p.insert("isRepeat".into(), Value::Bool(finding.is_repeat));
2253 p.insert(
2254 "identifiedDate".into(),
2255 Value::String(finding.identified_date.to_string()),
2256 );
2257 p
2258 },
2259 features: vec![
2260 finding.severity.score(),
2261 finding.deficiency_level.severity_score(),
2262 if finding.is_repeat { 1.0 } else { 0.0 },
2263 ],
2264 is_anomaly: false,
2265 anomaly_type: None,
2266 is_aggregate: false,
2267 aggregate_count: 0,
2268 }) {
2269 for std_id in &finding.related_standards {
2271 let sid = std_id.as_str().to_string();
2272 if let Some(std_node) = self.standard_node_ids.get(&sid) {
2273 self.edges.push(CrossLayerEdge {
2274 source_id: node_id.clone(),
2275 source_layer: HypergraphLayer::ProcessEvents,
2276 target_id: std_node.clone(),
2277 target_layer: HypergraphLayer::GovernanceControls,
2278 edge_type: "FindingOnStandard".to_string(),
2279 edge_type_code: type_codes::GOVERNED_BY_STANDARD,
2280 properties: HashMap::new(),
2281 });
2282 }
2283 }
2284
2285 if let Some(ref ctrl_id) = finding.control_id {
2287 self.compliance_finding_control_links
2288 .push((node_id, ctrl_id.clone()));
2289 }
2290 }
2291 }
2292
2293 for filing in filings {
2295 let filing_key = format!(
2296 "{}_{}_{}_{}",
2297 filing.filing_type, filing.company_code, filing.jurisdiction, filing.period_end
2298 );
2299 let node_id = format!("cr_filing_{filing_key}");
2300 self.try_add_node(HypergraphNode {
2301 id: node_id,
2302 entity_type: "RegulatoryFiling".into(),
2303 entity_type_code: type_codes::REGULATORY_FILING,
2304 layer: HypergraphLayer::ProcessEvents,
2305 external_id: filing_key,
2306 label: format!("{} [{}]", filing.filing_type, filing.company_code),
2307 properties: {
2308 let mut p = HashMap::new();
2309 p.insert(
2310 "filingType".into(),
2311 Value::String(filing.filing_type.to_string()),
2312 );
2313 p.insert(
2314 "companyCode".into(),
2315 Value::String(filing.company_code.clone()),
2316 );
2317 p.insert(
2318 "jurisdiction".into(),
2319 Value::String(filing.jurisdiction.clone()),
2320 );
2321 p.insert(
2322 "status".into(),
2323 Value::String(format!("{:?}", filing.status)),
2324 );
2325 p.insert(
2326 "periodEnd".into(),
2327 Value::String(filing.period_end.to_string()),
2328 );
2329 p.insert(
2330 "deadline".into(),
2331 Value::String(filing.deadline.to_string()),
2332 );
2333 p
2334 },
2335 features: vec![],
2336 is_anomaly: false,
2337 anomaly_type: None,
2338 is_aggregate: false,
2339 aggregate_count: 0,
2340 });
2341 }
2342 }
2343
2344 pub fn build_cross_layer_edges(&mut self) {
2346 if !self.config.include_cross_layer_edges {
2347 return;
2348 }
2349
2350 let links = std::mem::take(&mut self.doc_counterparty_links);
2352 for (doc_node_id, counterparty_type, counterparty_id) in &links {
2353 let source_node_id = match counterparty_type.as_str() {
2354 "vendor" => self.vendor_node_ids.get(counterparty_id),
2355 "customer" => self.customer_node_ids.get(counterparty_id),
2356 _ => None,
2357 };
2358 if let Some(source_id) = source_node_id {
2359 self.edges.push(CrossLayerEdge {
2360 source_id: source_id.clone(),
2361 source_layer: HypergraphLayer::GovernanceControls,
2362 target_id: doc_node_id.clone(),
2363 target_layer: HypergraphLayer::ProcessEvents,
2364 edge_type: "SuppliesTo".to_string(),
2365 edge_type_code: type_codes::SUPPLIES_TO,
2366 properties: HashMap::new(),
2367 });
2368 }
2369 }
2370 self.doc_counterparty_links = links;
2371
2372 let finding_ctrl_links = std::mem::take(&mut self.compliance_finding_control_links);
2374 for (finding_node_id, ctrl_id) in &finding_ctrl_links {
2375 if let Some(ctrl_node_id) = self.control_node_ids.get(ctrl_id) {
2376 self.edges.push(CrossLayerEdge {
2377 source_id: finding_node_id.clone(),
2378 source_layer: HypergraphLayer::ProcessEvents,
2379 target_id: ctrl_node_id.clone(),
2380 target_layer: HypergraphLayer::GovernanceControls,
2381 edge_type: "FindingOnControl".to_string(),
2382 edge_type_code: type_codes::FINDING_ON_CONTROL,
2383 properties: HashMap::new(),
2384 });
2385 }
2386 }
2387 self.compliance_finding_control_links = finding_ctrl_links;
2388
2389 let std_ids: Vec<(String, String)> = self
2391 .standard_node_ids
2392 .iter()
2393 .map(|(k, v)| (k.clone(), v.clone()))
2394 .collect();
2395 for (std_id, std_node_id) in &std_ids {
2396 if let Some(&node_idx) = self.node_index.get(std_node_id) {
2398 if let Some(node) = self.nodes.get(node_idx) {
2399 if let Some(Value::Array(acct_types)) =
2400 node.properties.get("applicableAccountTypes")
2401 {
2402 let type_strings: Vec<String> = acct_types
2403 .iter()
2404 .filter_map(|v| v.as_str().map(|s| s.to_lowercase()))
2405 .collect();
2406
2407 for (acct_code, acct_node_id) in &self.account_node_ids {
2409 if let Some(&acct_idx) = self.node_index.get(acct_node_id) {
2411 if let Some(acct_node) = self.nodes.get(acct_idx) {
2412 let label_lower = acct_node.label.to_lowercase();
2413 let matches = type_strings.iter().any(|t| {
2414 label_lower.contains(t)
2415 || acct_code.to_lowercase().contains(t)
2416 });
2417 if matches {
2418 self.edges.push(CrossLayerEdge {
2419 source_id: std_node_id.clone(),
2420 source_layer: HypergraphLayer::GovernanceControls,
2421 target_id: acct_node_id.clone(),
2422 target_layer: HypergraphLayer::AccountingNetwork,
2423 edge_type: format!("GovernedByStandard:{}", std_id),
2424 edge_type_code: type_codes::STANDARD_TO_ACCOUNT,
2425 properties: HashMap::new(),
2426 });
2427 }
2428 }
2429 }
2430 }
2431 }
2432 }
2433 }
2434 }
2435
2436 for (_std_id, std_node_id) in &std_ids {
2438 if let Some(&node_idx) = self.node_index.get(std_node_id) {
2439 if let Some(node) = self.nodes.get(node_idx) {
2440 if let Some(Value::Array(processes)) =
2441 node.properties.get("applicableProcesses")
2442 {
2443 let proc_strings: Vec<String> = processes
2444 .iter()
2445 .filter_map(|v| v.as_str().map(|s| s.to_string()))
2446 .collect();
2447
2448 let is_universal = proc_strings.len() >= 5;
2450 if is_universal {
2451 for ctrl_node_id in self.control_node_ids.values() {
2453 self.edges.push(CrossLayerEdge {
2454 source_id: std_node_id.clone(),
2455 source_layer: HypergraphLayer::GovernanceControls,
2456 target_id: ctrl_node_id.clone(),
2457 target_layer: HypergraphLayer::GovernanceControls,
2458 edge_type: "StandardToControl".to_string(),
2459 edge_type_code: type_codes::STANDARD_TO_CONTROL,
2460 properties: HashMap::new(),
2461 });
2462 }
2463 }
2464 }
2465 }
2466 }
2467 }
2468 }
2469
2470 pub fn build(mut self) -> Hypergraph {
2472 self.build_cross_layer_edges();
2474
2475 let mut layer_node_counts: HashMap<String, usize> = HashMap::new();
2477 let mut node_type_counts: HashMap<String, usize> = HashMap::new();
2478 let mut anomalous_nodes = 0;
2479
2480 for node in &self.nodes {
2481 *layer_node_counts
2482 .entry(node.layer.name().to_string())
2483 .or_insert(0) += 1;
2484 *node_type_counts
2485 .entry(node.entity_type.clone())
2486 .or_insert(0) += 1;
2487 if node.is_anomaly {
2488 anomalous_nodes += 1;
2489 }
2490 }
2491
2492 let mut edge_type_counts: HashMap<String, usize> = HashMap::new();
2493 for edge in &self.edges {
2494 *edge_type_counts.entry(edge.edge_type.clone()).or_insert(0) += 1;
2495 }
2496
2497 let mut hyperedge_type_counts: HashMap<String, usize> = HashMap::new();
2498 let mut anomalous_hyperedges = 0;
2499 for he in &self.hyperedges {
2500 *hyperedge_type_counts
2501 .entry(he.hyperedge_type.clone())
2502 .or_insert(0) += 1;
2503 if he.is_anomaly {
2504 anomalous_hyperedges += 1;
2505 }
2506 }
2507
2508 let budget_report = NodeBudgetReport {
2509 total_budget: self.budget.total_max(),
2510 total_used: self.budget.total_count(),
2511 layer1_budget: self.budget.layer1_max,
2512 layer1_used: self.budget.layer1_count,
2513 layer2_budget: self.budget.layer2_max,
2514 layer2_used: self.budget.layer2_count,
2515 layer3_budget: self.budget.layer3_max,
2516 layer3_used: self.budget.layer3_count,
2517 aggregate_nodes_created: self.aggregate_count,
2518 aggregation_triggered: self.aggregate_count > 0,
2519 };
2520
2521 let metadata = HypergraphMetadata {
2522 name: "multi_layer_hypergraph".to_string(),
2523 num_nodes: self.nodes.len(),
2524 num_edges: self.edges.len(),
2525 num_hyperedges: self.hyperedges.len(),
2526 layer_node_counts,
2527 node_type_counts,
2528 edge_type_counts,
2529 hyperedge_type_counts,
2530 anomalous_nodes,
2531 anomalous_hyperedges,
2532 source: "datasynth".to_string(),
2533 generated_at: chrono::Utc::now().to_rfc3339(),
2534 budget_report: budget_report.clone(),
2535 files: vec![
2536 "nodes.jsonl".to_string(),
2537 "edges.jsonl".to_string(),
2538 "hyperedges.jsonl".to_string(),
2539 "metadata.json".to_string(),
2540 ],
2541 };
2542
2543 Hypergraph {
2544 nodes: self.nodes,
2545 edges: self.edges,
2546 hyperedges: self.hyperedges,
2547 metadata,
2548 budget_report,
2549 }
2550 }
2551
2552 fn try_add_node(&mut self, node: HypergraphNode) -> bool {
2554 if self.node_index.contains_key(&node.id) {
2555 return false; }
2557
2558 if !self.budget.can_add(node.layer) {
2559 return false; }
2561
2562 let id = node.id.clone();
2563 let layer = node.layer;
2564 self.nodes.push(node);
2565 let idx = self.nodes.len() - 1;
2566 self.node_index.insert(id, idx);
2567 self.budget.record_add(layer);
2568 true
2569 }
2570}
2571
2572fn component_to_feature(component: &CosoComponent) -> f64 {
2574 match component {
2575 CosoComponent::ControlEnvironment => 1.0,
2576 CosoComponent::RiskAssessment => 2.0,
2577 CosoComponent::ControlActivities => 3.0,
2578 CosoComponent::InformationCommunication => 4.0,
2579 CosoComponent::MonitoringActivities => 5.0,
2580 }
2581}
2582
2583fn account_type_feature(account_type: &datasynth_core::models::AccountType) -> f64 {
2585 use datasynth_core::models::AccountType;
2586 match account_type {
2587 AccountType::Asset => 1.0,
2588 AccountType::Liability => 2.0,
2589 AccountType::Equity => 3.0,
2590 AccountType::Revenue => 4.0,
2591 AccountType::Expense => 5.0,
2592 AccountType::Statistical => 6.0,
2593 }
2594}
2595
2596fn compute_je_features(entry: &JournalEntry) -> Vec<f64> {
2598 let total_debit: f64 = entry
2599 .lines
2600 .iter()
2601 .map(|l| l.debit_amount.to_string().parse::<f64>().unwrap_or(0.0))
2602 .sum();
2603
2604 let line_count = entry.lines.len() as f64;
2605 let posting_date = entry.header.posting_date;
2606 let weekday = posting_date.weekday().num_days_from_monday() as f64 / WEEKDAY_NORMALIZER;
2607 let day = posting_date.day() as f64 / DAY_OF_MONTH_NORMALIZER;
2608 let month = posting_date.month() as f64 / MONTH_NORMALIZER;
2609 let is_month_end = if posting_date.day() >= MONTH_END_DAY_THRESHOLD {
2610 1.0
2611 } else {
2612 0.0
2613 };
2614
2615 vec![
2616 (total_debit.abs() + 1.0).ln(), line_count, weekday, day, month, is_month_end, ]
2623}
2624
2625#[cfg(test)]
2626#[allow(clippy::unwrap_used)]
2627mod tests {
2628 use super::*;
2629 use datasynth_core::models::{
2630 AccountSubType, AccountType, ChartOfAccounts, CoAComplexity, ControlFrequency, ControlType,
2631 CosoComponent, CosoMaturityLevel, GLAccount, InternalControl, RiskLevel, SoxAssertion,
2632 UserPersona,
2633 };
2634
2635 fn make_test_coa() -> ChartOfAccounts {
2636 let mut coa = ChartOfAccounts::new(
2637 "TEST_COA".to_string(),
2638 "Test Chart".to_string(),
2639 "US".to_string(),
2640 datasynth_core::models::IndustrySector::Manufacturing,
2641 CoAComplexity::Small,
2642 );
2643
2644 coa.add_account(GLAccount::new(
2645 "1000".to_string(),
2646 "Cash".to_string(),
2647 AccountType::Asset,
2648 AccountSubType::Cash,
2649 ));
2650 coa.add_account(GLAccount::new(
2651 "2000".to_string(),
2652 "AP".to_string(),
2653 AccountType::Liability,
2654 AccountSubType::AccountsPayable,
2655 ));
2656
2657 coa
2658 }
2659
2660 fn make_test_control() -> InternalControl {
2661 InternalControl {
2662 control_id: "C001".to_string(),
2663 control_name: "Three-Way Match".to_string(),
2664 control_type: ControlType::Preventive,
2665 objective: "Ensure proper matching".to_string(),
2666 frequency: ControlFrequency::Transactional,
2667 owner_role: UserPersona::Controller,
2668 risk_level: RiskLevel::High,
2669 description: "Test control".to_string(),
2670 is_key_control: true,
2671 sox_assertion: SoxAssertion::Existence,
2672 coso_component: CosoComponent::ControlActivities,
2673 coso_principles: vec![CosoPrinciple::ControlActions],
2674 control_scope: datasynth_core::models::ControlScope::TransactionLevel,
2675 maturity_level: CosoMaturityLevel::Managed,
2676 }
2677 }
2678
2679 #[test]
2680 fn test_builder_coso_framework() {
2681 let config = HypergraphConfig {
2682 max_nodes: 1000,
2683 ..Default::default()
2684 };
2685 let mut builder = HypergraphBuilder::new(config);
2686 builder.add_coso_framework();
2687
2688 let hg = builder.build();
2689 assert_eq!(hg.nodes.len(), 22);
2691 assert!(hg
2692 .nodes
2693 .iter()
2694 .all(|n| n.layer == HypergraphLayer::GovernanceControls));
2695 assert_eq!(
2697 hg.edges
2698 .iter()
2699 .filter(|e| e.edge_type == "CoversCosoPrinciple")
2700 .count(),
2701 17
2702 );
2703 }
2704
2705 #[test]
2706 fn test_builder_controls() {
2707 let config = HypergraphConfig {
2708 max_nodes: 1000,
2709 ..Default::default()
2710 };
2711 let mut builder = HypergraphBuilder::new(config);
2712 builder.add_coso_framework();
2713 builder.add_controls(&[make_test_control()]);
2714
2715 let hg = builder.build();
2716 assert_eq!(hg.nodes.len(), 24);
2718 assert!(hg.nodes.iter().any(|n| n.entity_type == "InternalControl"));
2719 assert!(hg.nodes.iter().any(|n| n.entity_type == "SoxAssertion"));
2720 }
2721
2722 #[test]
2723 fn test_builder_accounts() {
2724 let config = HypergraphConfig {
2725 max_nodes: 1000,
2726 ..Default::default()
2727 };
2728 let mut builder = HypergraphBuilder::new(config);
2729 builder.add_accounts(&make_test_coa());
2730
2731 let hg = builder.build();
2732 assert_eq!(hg.nodes.len(), 2);
2733 assert!(hg
2734 .nodes
2735 .iter()
2736 .all(|n| n.layer == HypergraphLayer::AccountingNetwork));
2737 }
2738
2739 #[test]
2740 fn test_budget_enforcement() {
2741 let config = HypergraphConfig {
2742 max_nodes: 10, include_coso: false,
2744 include_controls: false,
2745 include_sox: false,
2746 include_vendors: false,
2747 include_customers: false,
2748 include_employees: false,
2749 include_p2p: false,
2750 include_o2c: false,
2751 ..Default::default()
2752 };
2753 let mut builder = HypergraphBuilder::new(config);
2754 builder.add_accounts(&make_test_coa());
2755
2756 let hg = builder.build();
2757 assert!(hg.nodes.len() <= 1);
2759 }
2760
2761 #[test]
2762 fn test_full_build() {
2763 let config = HypergraphConfig {
2764 max_nodes: 10000,
2765 ..Default::default()
2766 };
2767 let mut builder = HypergraphBuilder::new(config);
2768 builder.add_coso_framework();
2769 builder.add_controls(&[make_test_control()]);
2770 builder.add_accounts(&make_test_coa());
2771
2772 let hg = builder.build();
2773 assert!(!hg.nodes.is_empty());
2774 assert!(!hg.edges.is_empty());
2775 assert_eq!(hg.metadata.num_nodes, hg.nodes.len());
2776 assert_eq!(hg.metadata.num_edges, hg.edges.len());
2777 }
2778}