Skip to main content

datasynth_graph/builders/
hypergraph.rs

1//! Multi-layer hypergraph builder for RustGraph integration.
2//!
3//! Constructs a 3-layer hypergraph from accounting data:
4//! - Layer 1: Governance & Controls (COSO, internal controls, master data)
5//! - Layer 2: Process Events (P2P/O2C documents, OCPM events)
6//! - Layer 3: Accounting Network (GL accounts, journal entries as hyperedges)
7//!
8//! Includes a node budget system that allocates capacity across layers and
9//! aggregates overflow nodes into pool nodes when budget is exceeded.
10
11use std::collections::HashMap;
12
13use chrono::Datelike;
14use serde_json::Value;
15
16use datasynth_banking::models::{BankAccount, BankTransaction, BankingCustomer};
17use datasynth_core::models::audit::{
18    AuditEngagement, AuditEvidence, AuditFinding, ProfessionalJudgment, RiskAssessment, Workpaper,
19};
20use datasynth_core::models::sourcing::{
21    BidEvaluation, ProcurementContract, RfxEvent, SourcingProject, SupplierBid,
22    SupplierQualification,
23};
24use datasynth_core::models::ExpenseReport;
25use datasynth_core::models::{
26    BankReconciliation, ChartOfAccounts, CosoComponent, CosoPrinciple, Customer, CycleCount,
27    Employee, InternalControl, JournalEntry, PayrollRun, ProductionOrder, QualityInspection,
28    TimeEntry, Vendor,
29};
30
31use crate::models::hypergraph::{
32    AggregationStrategy, CrossLayerEdge, Hyperedge, HyperedgeParticipant, Hypergraph,
33    HypergraphLayer, HypergraphMetadata, HypergraphNode, NodeBudget, NodeBudgetReport,
34};
35
36/// Day-of-month threshold for considering a date as "month-end" in features.
37const MONTH_END_DAY_THRESHOLD: u32 = 28;
38/// Normalizer for weekday feature (0=Monday..6=Sunday).
39const WEEKDAY_NORMALIZER: f64 = 6.0;
40/// Normalizer for day-of-month feature.
41const DAY_OF_MONTH_NORMALIZER: f64 = 31.0;
42/// Normalizer for month feature.
43const MONTH_NORMALIZER: f64 = 12.0;
44
45/// RustGraph entity type codes — canonical codes from AssureTwin's entity_registry.rs.
46/// Not all codes are consumed yet; the full set is kept for parity with the
47/// upstream registry so that new layer builders can reference them immediately.
48#[allow(dead_code)]
49mod type_codes {
50    // Layer 3 — Accounting / Master Data
51    pub const ACCOUNT: u32 = 100;
52    pub const JOURNAL_ENTRY: u32 = 101;
53
54    // People / Organizations
55    pub const VENDOR: u32 = 200;
56    pub const CUSTOMER: u32 = 201;
57    pub const EMPLOYEE: u32 = 202;
58    pub const BANKING_CUSTOMER: u32 = 203;
59
60    // Layer 2 process type codes — P2P
61    pub const PURCHASE_ORDER: u32 = 300;
62    pub const GOODS_RECEIPT: u32 = 301;
63    pub const VENDOR_INVOICE: u32 = 302;
64    pub const PAYMENT: u32 = 303;
65    // Layer 2 — O2C
66    pub const SALES_ORDER: u32 = 310;
67    pub const DELIVERY: u32 = 311;
68    pub const CUSTOMER_INVOICE: u32 = 312;
69    // Layer 2 — S2C
70    pub const SOURCING_PROJECT: u32 = 320;
71    pub const RFX_EVENT: u32 = 321;
72    pub const SUPPLIER_BID: u32 = 322;
73    pub const BID_EVALUATION: u32 = 323;
74    pub const PROCUREMENT_CONTRACT: u32 = 324;
75    pub const SUPPLIER_QUALIFICATION: u32 = 325;
76    // Layer 2 — H2R
77    pub const PAYROLL_RUN: u32 = 330;
78    pub const TIME_ENTRY: u32 = 331;
79    pub const EXPENSE_REPORT: u32 = 332;
80    pub const PAYROLL_LINE_ITEM: u32 = 333;
81    // Layer 2 — MFG
82    pub const PRODUCTION_ORDER: u32 = 340;
83    pub const QUALITY_INSPECTION: u32 = 341;
84    pub const CYCLE_COUNT: u32 = 342;
85    // Layer 2 — BANK
86    pub const BANK_ACCOUNT: u32 = 350;
87    pub const BANK_TRANSACTION: u32 = 351;
88    pub const BANK_STATEMENT_LINE: u32 = 352;
89    // Layer 2 — AUDIT
90    pub const AUDIT_ENGAGEMENT: u32 = 360;
91    pub const WORKPAPER: u32 = 361;
92    pub const AUDIT_FINDING: u32 = 362;
93    pub const AUDIT_EVIDENCE: u32 = 363;
94    pub const RISK_ASSESSMENT: u32 = 364;
95    pub const PROFESSIONAL_JUDGMENT: u32 = 365;
96    // Layer 2 — Bank Recon (R2R subfamily)
97    pub const BANK_RECONCILIATION: u32 = 370;
98    pub const RECONCILING_ITEM: u32 = 372;
99    // Layer 2 — OCPM events
100    pub const OCPM_EVENT: u32 = 400;
101    // Pool / aggregate
102    pub const POOL_NODE: u32 = 399;
103
104    // Layer 1 — Governance
105    pub const COSO_COMPONENT: u32 = 500;
106    pub const COSO_PRINCIPLE: u32 = 501;
107    pub const SOX_ASSERTION: u32 = 502;
108    pub const INTERNAL_CONTROL: u32 = 503;
109    pub const KYC_PROFILE: u32 = 504;
110
111    // Edge type codes
112    pub const IMPLEMENTS_CONTROL: u32 = 40;
113    pub const GOVERNED_BY_STANDARD: u32 = 41;
114    pub const OWNS_CONTROL: u32 = 42;
115    pub const OVERSEE_PROCESS: u32 = 43;
116    pub const ENFORCES_ASSERTION: u32 = 44;
117    pub const SUPPLIES_TO: u32 = 48;
118    pub const COVERS_COSO_PRINCIPLE: u32 = 54;
119    pub const CONTAINS_ACCOUNT: u32 = 55;
120}
121
122/// Configuration for the hypergraph builder.
123#[derive(Debug, Clone)]
124pub struct HypergraphConfig {
125    /// Maximum total nodes across all layers.
126    pub max_nodes: usize,
127    /// Aggregation strategy when budget is exceeded.
128    pub aggregation_strategy: AggregationStrategy,
129    // Layer 1 toggles
130    pub include_coso: bool,
131    pub include_controls: bool,
132    pub include_sox: bool,
133    pub include_vendors: bool,
134    pub include_customers: bool,
135    pub include_employees: bool,
136    // Layer 2 toggles
137    pub include_p2p: bool,
138    pub include_o2c: bool,
139    pub include_s2c: bool,
140    pub include_h2r: bool,
141    pub include_mfg: bool,
142    pub include_bank: bool,
143    pub include_audit: bool,
144    pub include_r2r: bool,
145    pub events_as_hyperedges: bool,
146    /// Documents per counterparty above which aggregation is triggered.
147    pub docs_per_counterparty_threshold: usize,
148    // Layer 3 toggles
149    pub include_accounts: bool,
150    pub je_as_hyperedges: bool,
151    // Cross-layer
152    pub include_cross_layer_edges: bool,
153}
154
155impl Default for HypergraphConfig {
156    fn default() -> Self {
157        Self {
158            max_nodes: 50_000,
159            aggregation_strategy: AggregationStrategy::PoolByCounterparty,
160            include_coso: true,
161            include_controls: true,
162            include_sox: true,
163            include_vendors: true,
164            include_customers: true,
165            include_employees: true,
166            include_p2p: true,
167            include_o2c: true,
168            include_s2c: true,
169            include_h2r: true,
170            include_mfg: true,
171            include_bank: true,
172            include_audit: true,
173            include_r2r: true,
174            events_as_hyperedges: true,
175            docs_per_counterparty_threshold: 20,
176            include_accounts: true,
177            je_as_hyperedges: true,
178            include_cross_layer_edges: true,
179        }
180    }
181}
182
183/// Builder for constructing a multi-layer hypergraph.
184pub struct HypergraphBuilder {
185    config: HypergraphConfig,
186    budget: NodeBudget,
187    nodes: Vec<HypergraphNode>,
188    edges: Vec<CrossLayerEdge>,
189    hyperedges: Vec<Hyperedge>,
190    /// Track node IDs to avoid duplicates: external_id → index in nodes vec.
191    node_index: HashMap<String, usize>,
192    /// Track aggregate node count.
193    aggregate_count: usize,
194    /// Control ID → node ID mapping for cross-layer edges.
195    control_node_ids: HashMap<String, String>,
196    /// COSO component → node ID mapping.
197    coso_component_ids: HashMap<String, String>,
198    /// Account code → node ID mapping.
199    account_node_ids: HashMap<String, String>,
200    /// Vendor ID → node ID mapping.
201    vendor_node_ids: HashMap<String, String>,
202    /// Customer ID → node ID mapping.
203    customer_node_ids: HashMap<String, String>,
204    /// Employee ID → node ID mapping.
205    employee_node_ids: HashMap<String, String>,
206    /// Process document node IDs to their counterparty type and ID.
207    /// (node_id, entity_type) → counterparty_id
208    doc_counterparty_links: Vec<(String, String, String)>, // (doc_node_id, counterparty_type, counterparty_id)
209}
210
211impl HypergraphBuilder {
212    /// Create a new builder with the given configuration.
213    pub fn new(config: HypergraphConfig) -> Self {
214        let budget = NodeBudget::new(config.max_nodes);
215        Self {
216            config,
217            budget,
218            nodes: Vec::new(),
219            edges: Vec::new(),
220            hyperedges: Vec::new(),
221            node_index: HashMap::new(),
222            aggregate_count: 0,
223            control_node_ids: HashMap::new(),
224            coso_component_ids: HashMap::new(),
225            account_node_ids: HashMap::new(),
226            vendor_node_ids: HashMap::new(),
227            customer_node_ids: HashMap::new(),
228            employee_node_ids: HashMap::new(),
229            doc_counterparty_links: Vec::new(),
230        }
231    }
232
233    /// Add COSO framework as Layer 1 nodes (5 components + 17 principles).
234    pub fn add_coso_framework(&mut self) {
235        if !self.config.include_coso {
236            return;
237        }
238
239        let components = [
240            (CosoComponent::ControlEnvironment, "Control Environment"),
241            (CosoComponent::RiskAssessment, "Risk Assessment"),
242            (CosoComponent::ControlActivities, "Control Activities"),
243            (
244                CosoComponent::InformationCommunication,
245                "Information & Communication",
246            ),
247            (CosoComponent::MonitoringActivities, "Monitoring Activities"),
248        ];
249
250        for (component, name) in &components {
251            let id = format!("coso_comp_{}", name.replace(' ', "_").replace('&', "and"));
252            if self.try_add_node(HypergraphNode {
253                id: id.clone(),
254                entity_type: "CosoComponent".to_string(),
255                entity_type_code: type_codes::COSO_COMPONENT,
256                layer: HypergraphLayer::GovernanceControls,
257                external_id: format!("{component:?}"),
258                label: name.to_string(),
259                properties: HashMap::new(),
260                features: vec![component_to_feature(component)],
261                is_anomaly: false,
262                anomaly_type: None,
263                is_aggregate: false,
264                aggregate_count: 0,
265            }) {
266                self.coso_component_ids.insert(format!("{component:?}"), id);
267            }
268        }
269
270        let principles = [
271            (
272                CosoPrinciple::IntegrityAndEthics,
273                "Integrity and Ethics",
274                CosoComponent::ControlEnvironment,
275            ),
276            (
277                CosoPrinciple::BoardOversight,
278                "Board Oversight",
279                CosoComponent::ControlEnvironment,
280            ),
281            (
282                CosoPrinciple::OrganizationalStructure,
283                "Organizational Structure",
284                CosoComponent::ControlEnvironment,
285            ),
286            (
287                CosoPrinciple::CommitmentToCompetence,
288                "Commitment to Competence",
289                CosoComponent::ControlEnvironment,
290            ),
291            (
292                CosoPrinciple::Accountability,
293                "Accountability",
294                CosoComponent::ControlEnvironment,
295            ),
296            (
297                CosoPrinciple::ClearObjectives,
298                "Clear Objectives",
299                CosoComponent::RiskAssessment,
300            ),
301            (
302                CosoPrinciple::IdentifyRisks,
303                "Identify Risks",
304                CosoComponent::RiskAssessment,
305            ),
306            (
307                CosoPrinciple::FraudRisk,
308                "Fraud Risk",
309                CosoComponent::RiskAssessment,
310            ),
311            (
312                CosoPrinciple::ChangeIdentification,
313                "Change Identification",
314                CosoComponent::RiskAssessment,
315            ),
316            (
317                CosoPrinciple::ControlActions,
318                "Control Actions",
319                CosoComponent::ControlActivities,
320            ),
321            (
322                CosoPrinciple::TechnologyControls,
323                "Technology Controls",
324                CosoComponent::ControlActivities,
325            ),
326            (
327                CosoPrinciple::PoliciesAndProcedures,
328                "Policies and Procedures",
329                CosoComponent::ControlActivities,
330            ),
331            (
332                CosoPrinciple::QualityInformation,
333                "Quality Information",
334                CosoComponent::InformationCommunication,
335            ),
336            (
337                CosoPrinciple::InternalCommunication,
338                "Internal Communication",
339                CosoComponent::InformationCommunication,
340            ),
341            (
342                CosoPrinciple::ExternalCommunication,
343                "External Communication",
344                CosoComponent::InformationCommunication,
345            ),
346            (
347                CosoPrinciple::OngoingMonitoring,
348                "Ongoing Monitoring",
349                CosoComponent::MonitoringActivities,
350            ),
351            (
352                CosoPrinciple::DeficiencyEvaluation,
353                "Deficiency Evaluation",
354                CosoComponent::MonitoringActivities,
355            ),
356        ];
357
358        for (principle, name, parent_component) in &principles {
359            let principle_id = format!("coso_prin_{}", name.replace(' ', "_").replace('&', "and"));
360            if self.try_add_node(HypergraphNode {
361                id: principle_id.clone(),
362                entity_type: "CosoPrinciple".to_string(),
363                entity_type_code: type_codes::COSO_PRINCIPLE,
364                layer: HypergraphLayer::GovernanceControls,
365                external_id: format!("{principle:?}"),
366                label: name.to_string(),
367                properties: {
368                    let mut p = HashMap::new();
369                    p.insert(
370                        "principle_number".to_string(),
371                        Value::Number(principle.principle_number().into()),
372                    );
373                    p
374                },
375                features: vec![principle.principle_number() as f64],
376                is_anomaly: false,
377                anomaly_type: None,
378                is_aggregate: false,
379                aggregate_count: 0,
380            }) {
381                // Link principle to its parent component
382                let comp_key = format!("{parent_component:?}");
383                if let Some(comp_id) = self.coso_component_ids.get(&comp_key) {
384                    self.edges.push(CrossLayerEdge {
385                        source_id: principle_id,
386                        source_layer: HypergraphLayer::GovernanceControls,
387                        target_id: comp_id.clone(),
388                        target_layer: HypergraphLayer::GovernanceControls,
389                        edge_type: "CoversCosoPrinciple".to_string(),
390                        edge_type_code: type_codes::COVERS_COSO_PRINCIPLE,
391                        properties: HashMap::new(),
392                    });
393                }
394            }
395        }
396    }
397
398    /// Add internal controls as Layer 1 nodes with edges to COSO components.
399    pub fn add_controls(&mut self, controls: &[InternalControl]) {
400        if !self.config.include_controls {
401            return;
402        }
403
404        for control in controls {
405            let node_id = format!("ctrl_{}", control.control_id);
406            if self.try_add_node(HypergraphNode {
407                id: node_id.clone(),
408                entity_type: "InternalControl".to_string(),
409                entity_type_code: type_codes::INTERNAL_CONTROL,
410                layer: HypergraphLayer::GovernanceControls,
411                external_id: control.control_id.clone(),
412                label: control.control_name.clone(),
413                properties: {
414                    let mut p = HashMap::new();
415                    p.insert(
416                        "control_type".to_string(),
417                        Value::String(format!("{:?}", control.control_type)),
418                    );
419                    p.insert(
420                        "controlType".to_string(),
421                        Value::String(format!("{}", control.control_type).to_lowercase()),
422                    );
423                    p.insert(
424                        "risk_level".to_string(),
425                        Value::String(format!("{:?}", control.risk_level)),
426                    );
427                    p.insert(
428                        "is_key_control".to_string(),
429                        Value::Bool(control.is_key_control),
430                    );
431                    p.insert(
432                        "isKeyControl".to_string(),
433                        Value::Bool(control.is_key_control),
434                    );
435                    p.insert(
436                        "maturity_level".to_string(),
437                        Value::String(format!("{:?}", control.maturity_level)),
438                    );
439                    let effectiveness = match control.maturity_level.level() {
440                        4 | 5 => "effective",
441                        3 => "partially-effective",
442                        _ => "not-tested",
443                    };
444                    p.insert(
445                        "effectiveness".to_string(),
446                        Value::String(effectiveness.to_string()),
447                    );
448                    p.insert(
449                        "description".to_string(),
450                        Value::String(control.description.clone()),
451                    );
452                    p.insert(
453                        "objective".to_string(),
454                        Value::String(control.objective.clone()),
455                    );
456                    p.insert(
457                        "frequency".to_string(),
458                        Value::String(format!("{}", control.frequency).to_lowercase()),
459                    );
460                    p.insert(
461                        "owner".to_string(),
462                        Value::String(format!("{}", control.owner_role)),
463                    );
464                    p.insert(
465                        "controlId".to_string(),
466                        Value::String(control.control_id.clone()),
467                    );
468                    p.insert(
469                        "name".to_string(),
470                        Value::String(control.control_name.clone()),
471                    );
472                    p.insert(
473                        "category".to_string(),
474                        Value::String(format!("{}", control.control_type)),
475                    );
476                    p.insert(
477                        "automated".to_string(),
478                        Value::Bool(matches!(
479                            control.control_type,
480                            datasynth_core::models::ControlType::Monitoring
481                        )),
482                    );
483                    p.insert(
484                        "coso_component".to_string(),
485                        Value::String(format!("{:?}", control.coso_component)),
486                    );
487                    p.insert(
488                        "sox_assertion".to_string(),
489                        Value::String(format!("{:?}", control.sox_assertion)),
490                    );
491                    p.insert(
492                        "control_scope".to_string(),
493                        Value::String(format!("{:?}", control.control_scope)),
494                    );
495                    p
496                },
497                features: vec![
498                    if control.is_key_control { 1.0 } else { 0.0 },
499                    control.maturity_level.level() as f64 / 5.0,
500                ],
501                is_anomaly: false,
502                anomaly_type: None,
503                is_aggregate: false,
504                aggregate_count: 0,
505            }) {
506                self.control_node_ids
507                    .insert(control.control_id.clone(), node_id.clone());
508
509                // Edge: Control → COSO Component
510                let comp_key = format!("{:?}", control.coso_component);
511                if let Some(comp_id) = self.coso_component_ids.get(&comp_key) {
512                    self.edges.push(CrossLayerEdge {
513                        source_id: node_id.clone(),
514                        source_layer: HypergraphLayer::GovernanceControls,
515                        target_id: comp_id.clone(),
516                        target_layer: HypergraphLayer::GovernanceControls,
517                        edge_type: "ImplementsControl".to_string(),
518                        edge_type_code: type_codes::IMPLEMENTS_CONTROL,
519                        properties: HashMap::new(),
520                    });
521                }
522
523                // Edge: Control → SOX Assertion
524                if self.config.include_sox {
525                    let assertion_id = format!("sox_{:?}", control.sox_assertion).to_lowercase();
526                    // Ensure SOX assertion node exists
527                    if !self.node_index.contains_key(&assertion_id) {
528                        self.try_add_node(HypergraphNode {
529                            id: assertion_id.clone(),
530                            entity_type: "SoxAssertion".to_string(),
531                            entity_type_code: type_codes::SOX_ASSERTION,
532                            layer: HypergraphLayer::GovernanceControls,
533                            external_id: format!("{:?}", control.sox_assertion),
534                            label: format!("{:?}", control.sox_assertion),
535                            properties: HashMap::new(),
536                            features: vec![],
537                            is_anomaly: false,
538                            anomaly_type: None,
539                            is_aggregate: false,
540                            aggregate_count: 0,
541                        });
542                    }
543                    self.edges.push(CrossLayerEdge {
544                        source_id: node_id,
545                        source_layer: HypergraphLayer::GovernanceControls,
546                        target_id: assertion_id,
547                        target_layer: HypergraphLayer::GovernanceControls,
548                        edge_type: "EnforcesAssertion".to_string(),
549                        edge_type_code: type_codes::ENFORCES_ASSERTION,
550                        properties: HashMap::new(),
551                    });
552                }
553            }
554        }
555    }
556
557    /// Add vendor master data as Layer 1 nodes.
558    pub fn add_vendors(&mut self, vendors: &[Vendor]) {
559        if !self.config.include_vendors {
560            return;
561        }
562
563        for vendor in vendors {
564            let node_id = format!("vnd_{}", vendor.vendor_id);
565            if self.try_add_node(HypergraphNode {
566                id: node_id.clone(),
567                entity_type: "Vendor".to_string(),
568                entity_type_code: type_codes::VENDOR,
569                layer: HypergraphLayer::GovernanceControls,
570                external_id: vendor.vendor_id.clone(),
571                label: vendor.name.clone(),
572                properties: {
573                    let mut p = HashMap::new();
574                    p.insert(
575                        "vendor_type".to_string(),
576                        Value::String(format!("{:?}", vendor.vendor_type)),
577                    );
578                    p.insert("country".to_string(), Value::String(vendor.country.clone()));
579                    p.insert("is_active".to_string(), Value::Bool(vendor.is_active));
580                    p
581                },
582                features: vec![if vendor.is_active { 1.0 } else { 0.0 }],
583                is_anomaly: false,
584                anomaly_type: None,
585                is_aggregate: false,
586                aggregate_count: 0,
587            }) {
588                self.vendor_node_ids
589                    .insert(vendor.vendor_id.clone(), node_id);
590            }
591        }
592    }
593
594    /// Add customer master data as Layer 1 nodes.
595    pub fn add_customers(&mut self, customers: &[Customer]) {
596        if !self.config.include_customers {
597            return;
598        }
599
600        for customer in customers {
601            let node_id = format!("cust_{}", customer.customer_id);
602            if self.try_add_node(HypergraphNode {
603                id: node_id.clone(),
604                entity_type: "Customer".to_string(),
605                entity_type_code: type_codes::CUSTOMER,
606                layer: HypergraphLayer::GovernanceControls,
607                external_id: customer.customer_id.clone(),
608                label: customer.name.clone(),
609                properties: {
610                    let mut p = HashMap::new();
611                    p.insert(
612                        "customer_type".to_string(),
613                        Value::String(format!("{:?}", customer.customer_type)),
614                    );
615                    p.insert(
616                        "country".to_string(),
617                        Value::String(customer.country.clone()),
618                    );
619                    p.insert(
620                        "credit_rating".to_string(),
621                        Value::String(format!("{:?}", customer.credit_rating)),
622                    );
623                    p
624                },
625                features: vec![if customer.is_active { 1.0 } else { 0.0 }],
626                is_anomaly: false,
627                anomaly_type: None,
628                is_aggregate: false,
629                aggregate_count: 0,
630            }) {
631                self.customer_node_ids
632                    .insert(customer.customer_id.clone(), node_id);
633            }
634        }
635    }
636
637    /// Add employee/organizational nodes as Layer 1 nodes.
638    pub fn add_employees(&mut self, employees: &[Employee]) {
639        if !self.config.include_employees {
640            return;
641        }
642
643        for employee in employees {
644            let node_id = format!("emp_{}", employee.employee_id);
645            if self.try_add_node(HypergraphNode {
646                id: node_id.clone(),
647                entity_type: "Employee".to_string(),
648                entity_type_code: type_codes::EMPLOYEE,
649                layer: HypergraphLayer::GovernanceControls,
650                external_id: employee.employee_id.clone(),
651                label: employee.display_name.clone(),
652                properties: {
653                    let mut p = HashMap::new();
654                    p.insert(
655                        "persona".to_string(),
656                        Value::String(employee.persona.to_string()),
657                    );
658                    p.insert(
659                        "job_level".to_string(),
660                        Value::String(format!("{:?}", employee.job_level)),
661                    );
662                    p.insert(
663                        "company_code".to_string(),
664                        Value::String(employee.company_code.clone()),
665                    );
666                    p.insert(
667                        "fullName".to_string(),
668                        Value::String(employee.display_name.clone()),
669                    );
670                    p.insert("email".to_string(), Value::String(employee.email.clone()));
671                    p.insert(
672                        "department".to_string(),
673                        Value::String(employee.department_id.clone().unwrap_or_default()),
674                    );
675                    p.insert(
676                        "job_title".to_string(),
677                        Value::String(employee.job_title.clone()),
678                    );
679                    p.insert(
680                        "status".to_string(),
681                        Value::String(format!("{:?}", employee.status)),
682                    );
683                    p
684                },
685                features: vec![employee
686                    .approval_limit
687                    .to_string()
688                    .parse::<f64>()
689                    .unwrap_or(0.0)
690                    .ln_1p()],
691                is_anomaly: false,
692                anomaly_type: None,
693                is_aggregate: false,
694                aggregate_count: 0,
695            }) {
696                self.employee_node_ids
697                    .insert(employee.employee_id.clone(), node_id);
698            }
699        }
700    }
701
702    /// Add GL accounts as Layer 3 nodes.
703    pub fn add_accounts(&mut self, coa: &ChartOfAccounts) {
704        if !self.config.include_accounts {
705            return;
706        }
707
708        for account in &coa.accounts {
709            let node_id = format!("acct_{}", account.account_number);
710            if self.try_add_node(HypergraphNode {
711                id: node_id.clone(),
712                entity_type: "Account".to_string(),
713                entity_type_code: type_codes::ACCOUNT,
714                layer: HypergraphLayer::AccountingNetwork,
715                external_id: account.account_number.clone(),
716                label: account.short_description.clone(),
717                properties: {
718                    let mut p = HashMap::new();
719                    p.insert(
720                        "account_type".to_string(),
721                        Value::String(format!("{:?}", account.account_type)),
722                    );
723                    p.insert(
724                        "is_control_account".to_string(),
725                        Value::Bool(account.is_control_account),
726                    );
727                    p.insert("is_postable".to_string(), Value::Bool(account.is_postable));
728                    p
729                },
730                features: vec![
731                    account_type_feature(&account.account_type),
732                    if account.is_control_account { 1.0 } else { 0.0 },
733                    if account.normal_debit_balance {
734                        1.0
735                    } else {
736                        0.0
737                    },
738                ],
739                is_anomaly: false,
740                anomaly_type: None,
741                is_aggregate: false,
742                aggregate_count: 0,
743            }) {
744                self.account_node_ids
745                    .insert(account.account_number.clone(), node_id);
746            }
747        }
748    }
749
750    /// Add journal entries as Layer 3 hyperedges.
751    ///
752    /// Each journal entry becomes a hyperedge connecting its debit and credit accounts.
753    pub fn add_journal_entries_as_hyperedges(&mut self, entries: &[JournalEntry]) {
754        if !self.config.je_as_hyperedges {
755            return;
756        }
757
758        for entry in entries {
759            let mut participants = Vec::new();
760
761            for line in &entry.lines {
762                let account_id = format!("acct_{}", line.gl_account);
763
764                // Ensure account node exists (might not if CoA was incomplete)
765                if !self.node_index.contains_key(&account_id) {
766                    self.try_add_node(HypergraphNode {
767                        id: account_id.clone(),
768                        entity_type: "Account".to_string(),
769                        entity_type_code: type_codes::ACCOUNT,
770                        layer: HypergraphLayer::AccountingNetwork,
771                        external_id: line.gl_account.clone(),
772                        label: line
773                            .account_description
774                            .clone()
775                            .unwrap_or_else(|| line.gl_account.clone()),
776                        properties: HashMap::new(),
777                        features: vec![],
778                        is_anomaly: false,
779                        anomaly_type: None,
780                        is_aggregate: false,
781                        aggregate_count: 0,
782                    });
783                    self.account_node_ids
784                        .insert(line.gl_account.clone(), account_id.clone());
785                }
786
787                let amount: f64 = if !line.debit_amount.is_zero() {
788                    line.debit_amount.to_string().parse().unwrap_or(0.0)
789                } else {
790                    line.credit_amount.to_string().parse().unwrap_or(0.0)
791                };
792
793                let role = if !line.debit_amount.is_zero() {
794                    "debit"
795                } else {
796                    "credit"
797                };
798
799                participants.push(HyperedgeParticipant {
800                    node_id: account_id,
801                    role: role.to_string(),
802                    weight: Some(amount),
803                });
804            }
805
806            if participants.is_empty() {
807                continue;
808            }
809
810            let doc_id = entry.header.document_id.to_string();
811            let subtype = entry
812                .header
813                .business_process
814                .as_ref()
815                .map(|bp| format!("{bp:?}"))
816                .unwrap_or_else(|| "General".to_string());
817
818            self.hyperedges.push(Hyperedge {
819                id: format!("je_{doc_id}"),
820                hyperedge_type: "JournalEntry".to_string(),
821                subtype,
822                participants,
823                layer: HypergraphLayer::AccountingNetwork,
824                properties: {
825                    let mut p = HashMap::new();
826                    p.insert("document_id".to_string(), Value::String(doc_id));
827                    p.insert(
828                        "company_code".to_string(),
829                        Value::String(entry.header.company_code.clone()),
830                    );
831                    p.insert(
832                        "document_type".to_string(),
833                        Value::String(entry.header.document_type.clone()),
834                    );
835                    p.insert(
836                        "created_by".to_string(),
837                        Value::String(entry.header.created_by.clone()),
838                    );
839                    p
840                },
841                timestamp: Some(entry.header.posting_date),
842                is_anomaly: entry.header.is_anomaly || entry.header.is_fraud,
843                anomaly_type: entry
844                    .header
845                    .anomaly_type
846                    .clone()
847                    .or_else(|| entry.header.fraud_type.as_ref().map(|ft| format!("{ft:?}"))),
848                features: compute_je_features(entry),
849            });
850        }
851    }
852
853    /// Add journal entries as standalone Layer 3 nodes.
854    ///
855    /// Creates a node per JE with amount, date, anomaly info, and line count.
856    /// Use alongside `add_journal_entries_as_hyperedges` so the dashboard can
857    /// count JE nodes while the accounting network still has proper hyperedges.
858    pub fn add_journal_entry_nodes(&mut self, entries: &[JournalEntry]) {
859        for entry in entries {
860            let node_id = format!("je_{}", entry.header.document_id);
861            let total_amount: f64 = entry
862                .lines
863                .iter()
864                .map(|l| l.debit_amount.to_string().parse::<f64>().unwrap_or(0.0))
865                .sum();
866
867            let is_anomaly = entry.header.is_anomaly || entry.header.is_fraud;
868            let anomaly_type = entry
869                .header
870                .anomaly_type
871                .clone()
872                .or_else(|| entry.header.fraud_type.as_ref().map(|ft| format!("{ft:?}")));
873
874            self.try_add_node(HypergraphNode {
875                id: node_id,
876                entity_type: "JournalEntry".to_string(),
877                entity_type_code: type_codes::JOURNAL_ENTRY,
878                layer: HypergraphLayer::AccountingNetwork,
879                external_id: entry.header.document_id.to_string(),
880                label: format!("JE-{}", entry.header.document_id),
881                properties: {
882                    let mut p = HashMap::new();
883                    p.insert(
884                        "amount".into(),
885                        Value::Number(
886                            serde_json::Number::from_f64(total_amount)
887                                .unwrap_or_else(|| serde_json::Number::from(0)),
888                        ),
889                    );
890                    p.insert(
891                        "date".into(),
892                        Value::String(entry.header.posting_date.to_string()),
893                    );
894                    p.insert(
895                        "company_code".into(),
896                        Value::String(entry.header.company_code.clone()),
897                    );
898                    p.insert(
899                        "line_count".into(),
900                        Value::Number((entry.lines.len() as u64).into()),
901                    );
902                    p.insert("is_anomaly".into(), Value::Bool(is_anomaly));
903                    if let Some(ref at) = anomaly_type {
904                        p.insert("anomaly_type".into(), Value::String(at.clone()));
905                    }
906                    p
907                },
908                features: vec![total_amount / 100_000.0],
909                is_anomaly,
910                anomaly_type,
911                is_aggregate: false,
912                aggregate_count: 0,
913            });
914        }
915    }
916
917    /// Add P2P document chains as Layer 2 nodes.
918    ///
919    /// If a vendor has more documents than the threshold, they're aggregated into pool nodes.
920    pub fn add_p2p_documents(
921        &mut self,
922        purchase_orders: &[datasynth_core::models::documents::PurchaseOrder],
923        goods_receipts: &[datasynth_core::models::documents::GoodsReceipt],
924        vendor_invoices: &[datasynth_core::models::documents::VendorInvoice],
925        payments: &[datasynth_core::models::documents::Payment],
926    ) {
927        if !self.config.include_p2p {
928            return;
929        }
930
931        // Count documents per vendor for aggregation decisions
932        let mut vendor_doc_counts: HashMap<String, usize> = HashMap::new();
933        for po in purchase_orders {
934            *vendor_doc_counts.entry(po.vendor_id.clone()).or_insert(0) += 1;
935        }
936
937        let threshold = self.config.docs_per_counterparty_threshold;
938        let should_aggregate = matches!(
939            self.config.aggregation_strategy,
940            AggregationStrategy::PoolByCounterparty
941        );
942
943        // Track which vendors need pool nodes
944        let vendors_needing_pools: Vec<String> = if should_aggregate {
945            vendor_doc_counts
946                .iter()
947                .filter(|(_, count)| **count > threshold)
948                .map(|(vid, _)| vid.clone())
949                .collect()
950        } else {
951            Vec::new()
952        };
953
954        // Create pool nodes for high-volume vendors
955        for vendor_id in &vendors_needing_pools {
956            let count = vendor_doc_counts[vendor_id];
957            let pool_id = format!("pool_p2p_{vendor_id}");
958            if self.try_add_node(HypergraphNode {
959                id: pool_id.clone(),
960                entity_type: "P2PPool".to_string(),
961                entity_type_code: type_codes::POOL_NODE,
962                layer: HypergraphLayer::ProcessEvents,
963                external_id: format!("pool_p2p_{vendor_id}"),
964                label: format!("P2P Pool ({vendor_id}): {count} docs"),
965                properties: {
966                    let mut p = HashMap::new();
967                    p.insert("vendor_id".to_string(), Value::String(vendor_id.clone()));
968                    p.insert("document_count".to_string(), Value::Number(count.into()));
969                    p
970                },
971                features: vec![count as f64],
972                is_anomaly: false,
973                anomaly_type: None,
974                is_aggregate: true,
975                aggregate_count: count,
976            }) {
977                self.doc_counterparty_links.push((
978                    pool_id,
979                    "vendor".to_string(),
980                    vendor_id.clone(),
981                ));
982            }
983            self.aggregate_count += 1;
984        }
985
986        // Add individual PO nodes (if not pooled)
987        for po in purchase_orders {
988            if should_aggregate && vendors_needing_pools.contains(&po.vendor_id) {
989                continue; // Pooled
990            }
991
992            let doc_id = &po.header.document_id;
993            let node_id = format!("po_{doc_id}");
994            if self.try_add_node(HypergraphNode {
995                id: node_id.clone(),
996                entity_type: "PurchaseOrder".to_string(),
997                entity_type_code: type_codes::PURCHASE_ORDER,
998                layer: HypergraphLayer::ProcessEvents,
999                external_id: doc_id.clone(),
1000                label: format!("PO {doc_id}"),
1001                properties: {
1002                    let mut p = HashMap::new();
1003                    p.insert("vendor_id".to_string(), Value::String(po.vendor_id.clone()));
1004                    p.insert(
1005                        "company_code".to_string(),
1006                        Value::String(po.header.company_code.clone()),
1007                    );
1008                    p
1009                },
1010                features: vec![po
1011                    .total_net_amount
1012                    .to_string()
1013                    .parse::<f64>()
1014                    .unwrap_or(0.0)
1015                    .ln_1p()],
1016                is_anomaly: false,
1017                anomaly_type: None,
1018                is_aggregate: false,
1019                aggregate_count: 0,
1020            }) {
1021                self.doc_counterparty_links.push((
1022                    node_id,
1023                    "vendor".to_string(),
1024                    po.vendor_id.clone(),
1025                ));
1026            }
1027        }
1028
1029        // Add GR nodes
1030        for gr in goods_receipts {
1031            let vendor_id = gr.vendor_id.as_deref().unwrap_or("UNKNOWN");
1032            if should_aggregate && vendors_needing_pools.contains(&vendor_id.to_string()) {
1033                continue;
1034            }
1035            let doc_id = &gr.header.document_id;
1036            let node_id = format!("gr_{doc_id}");
1037            self.try_add_node(HypergraphNode {
1038                id: node_id,
1039                entity_type: "GoodsReceipt".to_string(),
1040                entity_type_code: type_codes::GOODS_RECEIPT,
1041                layer: HypergraphLayer::ProcessEvents,
1042                external_id: doc_id.clone(),
1043                label: format!("GR {doc_id}"),
1044                properties: {
1045                    let mut p = HashMap::new();
1046                    p.insert(
1047                        "vendor_id".to_string(),
1048                        Value::String(vendor_id.to_string()),
1049                    );
1050                    p
1051                },
1052                features: vec![gr
1053                    .total_value
1054                    .to_string()
1055                    .parse::<f64>()
1056                    .unwrap_or(0.0)
1057                    .ln_1p()],
1058                is_anomaly: false,
1059                anomaly_type: None,
1060                is_aggregate: false,
1061                aggregate_count: 0,
1062            });
1063        }
1064
1065        // Add vendor invoice nodes
1066        for inv in vendor_invoices {
1067            if should_aggregate && vendors_needing_pools.contains(&inv.vendor_id) {
1068                continue;
1069            }
1070            let doc_id = &inv.header.document_id;
1071            let node_id = format!("vinv_{doc_id}");
1072            self.try_add_node(HypergraphNode {
1073                id: node_id,
1074                entity_type: "VendorInvoice".to_string(),
1075                entity_type_code: type_codes::VENDOR_INVOICE,
1076                layer: HypergraphLayer::ProcessEvents,
1077                external_id: doc_id.clone(),
1078                label: format!("VI {doc_id}"),
1079                properties: {
1080                    let mut p = HashMap::new();
1081                    p.insert(
1082                        "vendor_id".to_string(),
1083                        Value::String(inv.vendor_id.clone()),
1084                    );
1085                    p
1086                },
1087                features: vec![inv
1088                    .payable_amount
1089                    .to_string()
1090                    .parse::<f64>()
1091                    .unwrap_or(0.0)
1092                    .ln_1p()],
1093                is_anomaly: false,
1094                anomaly_type: None,
1095                is_aggregate: false,
1096                aggregate_count: 0,
1097            });
1098        }
1099
1100        // Add payment nodes
1101        for pmt in payments {
1102            let doc_id = &pmt.header.document_id;
1103            let node_id = format!("pmt_{doc_id}");
1104            self.try_add_node(HypergraphNode {
1105                id: node_id,
1106                entity_type: "Payment".to_string(),
1107                entity_type_code: type_codes::PAYMENT,
1108                layer: HypergraphLayer::ProcessEvents,
1109                external_id: doc_id.clone(),
1110                label: format!("PMT {doc_id}"),
1111                properties: HashMap::new(),
1112                features: vec![pmt.amount.to_string().parse::<f64>().unwrap_or(0.0).ln_1p()],
1113                is_anomaly: false,
1114                anomaly_type: None,
1115                is_aggregate: false,
1116                aggregate_count: 0,
1117            });
1118        }
1119    }
1120
1121    /// Add O2C document chains as Layer 2 nodes.
1122    pub fn add_o2c_documents(
1123        &mut self,
1124        sales_orders: &[datasynth_core::models::documents::SalesOrder],
1125        deliveries: &[datasynth_core::models::documents::Delivery],
1126        customer_invoices: &[datasynth_core::models::documents::CustomerInvoice],
1127    ) {
1128        if !self.config.include_o2c {
1129            return;
1130        }
1131
1132        // Count docs per customer for aggregation
1133        let mut customer_doc_counts: HashMap<String, usize> = HashMap::new();
1134        for so in sales_orders {
1135            *customer_doc_counts
1136                .entry(so.customer_id.clone())
1137                .or_insert(0) += 1;
1138        }
1139
1140        let threshold = self.config.docs_per_counterparty_threshold;
1141        let should_aggregate = matches!(
1142            self.config.aggregation_strategy,
1143            AggregationStrategy::PoolByCounterparty
1144        );
1145
1146        let customers_needing_pools: Vec<String> = if should_aggregate {
1147            customer_doc_counts
1148                .iter()
1149                .filter(|(_, count)| **count > threshold)
1150                .map(|(cid, _)| cid.clone())
1151                .collect()
1152        } else {
1153            Vec::new()
1154        };
1155
1156        // Create pool nodes
1157        for customer_id in &customers_needing_pools {
1158            let count = customer_doc_counts[customer_id];
1159            let pool_id = format!("pool_o2c_{customer_id}");
1160            if self.try_add_node(HypergraphNode {
1161                id: pool_id.clone(),
1162                entity_type: "O2CPool".to_string(),
1163                entity_type_code: type_codes::POOL_NODE,
1164                layer: HypergraphLayer::ProcessEvents,
1165                external_id: format!("pool_o2c_{customer_id}"),
1166                label: format!("O2C Pool ({customer_id}): {count} docs"),
1167                properties: {
1168                    let mut p = HashMap::new();
1169                    p.insert(
1170                        "customer_id".to_string(),
1171                        Value::String(customer_id.clone()),
1172                    );
1173                    p.insert("document_count".to_string(), Value::Number(count.into()));
1174                    p
1175                },
1176                features: vec![count as f64],
1177                is_anomaly: false,
1178                anomaly_type: None,
1179                is_aggregate: true,
1180                aggregate_count: count,
1181            }) {
1182                self.doc_counterparty_links.push((
1183                    pool_id,
1184                    "customer".to_string(),
1185                    customer_id.clone(),
1186                ));
1187            }
1188            self.aggregate_count += 1;
1189        }
1190
1191        for so in sales_orders {
1192            if should_aggregate && customers_needing_pools.contains(&so.customer_id) {
1193                continue;
1194            }
1195            let doc_id = &so.header.document_id;
1196            let node_id = format!("so_{doc_id}");
1197            if self.try_add_node(HypergraphNode {
1198                id: node_id.clone(),
1199                entity_type: "SalesOrder".to_string(),
1200                entity_type_code: type_codes::SALES_ORDER,
1201                layer: HypergraphLayer::ProcessEvents,
1202                external_id: doc_id.clone(),
1203                label: format!("SO {doc_id}"),
1204                properties: {
1205                    let mut p = HashMap::new();
1206                    p.insert(
1207                        "customer_id".to_string(),
1208                        Value::String(so.customer_id.clone()),
1209                    );
1210                    p
1211                },
1212                features: vec![so
1213                    .total_net_amount
1214                    .to_string()
1215                    .parse::<f64>()
1216                    .unwrap_or(0.0)
1217                    .ln_1p()],
1218                is_anomaly: false,
1219                anomaly_type: None,
1220                is_aggregate: false,
1221                aggregate_count: 0,
1222            }) {
1223                self.doc_counterparty_links.push((
1224                    node_id,
1225                    "customer".to_string(),
1226                    so.customer_id.clone(),
1227                ));
1228            }
1229        }
1230
1231        for del in deliveries {
1232            if should_aggregate && customers_needing_pools.contains(&del.customer_id) {
1233                continue;
1234            }
1235            let doc_id = &del.header.document_id;
1236            let node_id = format!("del_{doc_id}");
1237            self.try_add_node(HypergraphNode {
1238                id: node_id,
1239                entity_type: "Delivery".to_string(),
1240                entity_type_code: type_codes::DELIVERY,
1241                layer: HypergraphLayer::ProcessEvents,
1242                external_id: doc_id.clone(),
1243                label: format!("DEL {doc_id}"),
1244                properties: HashMap::new(),
1245                features: vec![],
1246                is_anomaly: false,
1247                anomaly_type: None,
1248                is_aggregate: false,
1249                aggregate_count: 0,
1250            });
1251        }
1252
1253        for inv in customer_invoices {
1254            if should_aggregate && customers_needing_pools.contains(&inv.customer_id) {
1255                continue;
1256            }
1257            let doc_id = &inv.header.document_id;
1258            let node_id = format!("cinv_{doc_id}");
1259            self.try_add_node(HypergraphNode {
1260                id: node_id,
1261                entity_type: "CustomerInvoice".to_string(),
1262                entity_type_code: type_codes::CUSTOMER_INVOICE,
1263                layer: HypergraphLayer::ProcessEvents,
1264                external_id: doc_id.clone(),
1265                label: format!("CI {doc_id}"),
1266                properties: HashMap::new(),
1267                features: vec![inv
1268                    .total_gross_amount
1269                    .to_string()
1270                    .parse::<f64>()
1271                    .unwrap_or(0.0)
1272                    .ln_1p()],
1273                is_anomaly: false,
1274                anomaly_type: None,
1275                is_aggregate: false,
1276                aggregate_count: 0,
1277            });
1278        }
1279    }
1280
1281    /// Add S2C (Source-to-Contract) documents as Layer 2 nodes.
1282    pub fn add_s2c_documents(
1283        &mut self,
1284        projects: &[SourcingProject],
1285        qualifications: &[SupplierQualification],
1286        rfx_events: &[RfxEvent],
1287        bids: &[SupplierBid],
1288        evaluations: &[BidEvaluation],
1289        contracts: &[ProcurementContract],
1290    ) {
1291        if !self.config.include_s2c {
1292            return;
1293        }
1294        for p in projects {
1295            let node_id = format!("s2c_proj_{}", p.project_id);
1296            self.try_add_node(HypergraphNode {
1297                id: node_id,
1298                entity_type: "SourcingProject".into(),
1299                entity_type_code: type_codes::SOURCING_PROJECT,
1300                layer: HypergraphLayer::ProcessEvents,
1301                external_id: p.project_id.clone(),
1302                label: format!("SPRJ {}", p.project_id),
1303                properties: HashMap::new(),
1304                features: vec![p
1305                    .estimated_annual_spend
1306                    .to_string()
1307                    .parse::<f64>()
1308                    .unwrap_or(0.0)
1309                    .ln_1p()],
1310                is_anomaly: false,
1311                anomaly_type: None,
1312                is_aggregate: false,
1313                aggregate_count: 0,
1314            });
1315        }
1316        for q in qualifications {
1317            let node_id = format!("s2c_qual_{}", q.qualification_id);
1318            self.try_add_node(HypergraphNode {
1319                id: node_id,
1320                entity_type: "SupplierQualification".into(),
1321                entity_type_code: type_codes::SUPPLIER_QUALIFICATION,
1322                layer: HypergraphLayer::ProcessEvents,
1323                external_id: q.qualification_id.clone(),
1324                label: format!("SQUAL {}", q.qualification_id),
1325                properties: HashMap::new(),
1326                features: vec![],
1327                is_anomaly: false,
1328                anomaly_type: None,
1329                is_aggregate: false,
1330                aggregate_count: 0,
1331            });
1332        }
1333        for r in rfx_events {
1334            let node_id = format!("s2c_rfx_{}", r.rfx_id);
1335            self.try_add_node(HypergraphNode {
1336                id: node_id,
1337                entity_type: "RfxEvent".into(),
1338                entity_type_code: type_codes::RFX_EVENT,
1339                layer: HypergraphLayer::ProcessEvents,
1340                external_id: r.rfx_id.clone(),
1341                label: format!("RFX {}", r.rfx_id),
1342                properties: HashMap::new(),
1343                features: vec![],
1344                is_anomaly: false,
1345                anomaly_type: None,
1346                is_aggregate: false,
1347                aggregate_count: 0,
1348            });
1349        }
1350        for b in bids {
1351            let node_id = format!("s2c_bid_{}", b.bid_id);
1352            self.try_add_node(HypergraphNode {
1353                id: node_id,
1354                entity_type: "SupplierBid".into(),
1355                entity_type_code: type_codes::SUPPLIER_BID,
1356                layer: HypergraphLayer::ProcessEvents,
1357                external_id: b.bid_id.clone(),
1358                label: format!("BID {}", b.bid_id),
1359                properties: HashMap::new(),
1360                features: vec![b
1361                    .total_amount
1362                    .to_string()
1363                    .parse::<f64>()
1364                    .unwrap_or(0.0)
1365                    .ln_1p()],
1366                is_anomaly: false,
1367                anomaly_type: None,
1368                is_aggregate: false,
1369                aggregate_count: 0,
1370            });
1371        }
1372        for e in evaluations {
1373            let node_id = format!("s2c_eval_{}", e.evaluation_id);
1374            self.try_add_node(HypergraphNode {
1375                id: node_id,
1376                entity_type: "BidEvaluation".into(),
1377                entity_type_code: type_codes::BID_EVALUATION,
1378                layer: HypergraphLayer::ProcessEvents,
1379                external_id: e.evaluation_id.clone(),
1380                label: format!("BEVAL {}", e.evaluation_id),
1381                properties: HashMap::new(),
1382                features: vec![],
1383                is_anomaly: false,
1384                anomaly_type: None,
1385                is_aggregate: false,
1386                aggregate_count: 0,
1387            });
1388        }
1389        for c in contracts {
1390            let node_id = format!("s2c_ctr_{}", c.contract_id);
1391            self.try_add_node(HypergraphNode {
1392                id: node_id,
1393                entity_type: "ProcurementContract".into(),
1394                entity_type_code: type_codes::PROCUREMENT_CONTRACT,
1395                layer: HypergraphLayer::ProcessEvents,
1396                external_id: c.contract_id.clone(),
1397                label: format!("CTR {}", c.contract_id),
1398                properties: HashMap::new(),
1399                features: vec![c
1400                    .total_value
1401                    .to_string()
1402                    .parse::<f64>()
1403                    .unwrap_or(0.0)
1404                    .ln_1p()],
1405                is_anomaly: false,
1406                anomaly_type: None,
1407                is_aggregate: false,
1408                aggregate_count: 0,
1409            });
1410            // Track vendor for cross-layer edges
1411            self.doc_counterparty_links.push((
1412                format!("s2c_ctr_{}", c.contract_id),
1413                "vendor".into(),
1414                c.vendor_id.clone(),
1415            ));
1416        }
1417    }
1418
1419    /// Add H2R (Hire-to-Retire) documents as Layer 2 nodes.
1420    pub fn add_h2r_documents(
1421        &mut self,
1422        payroll_runs: &[PayrollRun],
1423        time_entries: &[TimeEntry],
1424        expense_reports: &[ExpenseReport],
1425    ) {
1426        if !self.config.include_h2r {
1427            return;
1428        }
1429        for pr in payroll_runs {
1430            let node_id = format!("h2r_pay_{}", pr.payroll_id);
1431            self.try_add_node(HypergraphNode {
1432                id: node_id,
1433                entity_type: "PayrollRun".into(),
1434                entity_type_code: type_codes::PAYROLL_RUN,
1435                layer: HypergraphLayer::ProcessEvents,
1436                external_id: pr.payroll_id.clone(),
1437                label: format!("PAY {}", pr.payroll_id),
1438                properties: HashMap::new(),
1439                features: vec![pr
1440                    .total_gross
1441                    .to_string()
1442                    .parse::<f64>()
1443                    .unwrap_or(0.0)
1444                    .ln_1p()],
1445                is_anomaly: false,
1446                anomaly_type: None,
1447                is_aggregate: false,
1448                aggregate_count: 0,
1449            });
1450        }
1451        for te in time_entries {
1452            let node_id = format!("h2r_time_{}", te.entry_id);
1453            self.try_add_node(HypergraphNode {
1454                id: node_id,
1455                entity_type: "TimeEntry".into(),
1456                entity_type_code: type_codes::TIME_ENTRY,
1457                layer: HypergraphLayer::ProcessEvents,
1458                external_id: te.entry_id.clone(),
1459                label: format!("TIME {}", te.entry_id),
1460                properties: HashMap::new(),
1461                features: vec![te.hours_regular + te.hours_overtime],
1462                is_anomaly: false,
1463                anomaly_type: None,
1464                is_aggregate: false,
1465                aggregate_count: 0,
1466            });
1467        }
1468        for er in expense_reports {
1469            let node_id = format!("h2r_exp_{}", er.report_id);
1470            self.try_add_node(HypergraphNode {
1471                id: node_id,
1472                entity_type: "ExpenseReport".into(),
1473                entity_type_code: type_codes::EXPENSE_REPORT,
1474                layer: HypergraphLayer::ProcessEvents,
1475                external_id: er.report_id.clone(),
1476                label: format!("EXP {}", er.report_id),
1477                properties: HashMap::new(),
1478                features: vec![er
1479                    .total_amount
1480                    .to_string()
1481                    .parse::<f64>()
1482                    .unwrap_or(0.0)
1483                    .ln_1p()],
1484                is_anomaly: false,
1485                anomaly_type: None,
1486                is_aggregate: false,
1487                aggregate_count: 0,
1488            });
1489        }
1490    }
1491
1492    /// Add MFG (Manufacturing) documents as Layer 2 nodes.
1493    pub fn add_mfg_documents(
1494        &mut self,
1495        production_orders: &[ProductionOrder],
1496        quality_inspections: &[QualityInspection],
1497        cycle_counts: &[CycleCount],
1498    ) {
1499        if !self.config.include_mfg {
1500            return;
1501        }
1502        for po in production_orders {
1503            let node_id = format!("mfg_po_{}", po.order_id);
1504            self.try_add_node(HypergraphNode {
1505                id: node_id,
1506                entity_type: "ProductionOrder".into(),
1507                entity_type_code: type_codes::PRODUCTION_ORDER,
1508                layer: HypergraphLayer::ProcessEvents,
1509                external_id: po.order_id.clone(),
1510                label: format!("PROD {}", po.order_id),
1511                properties: HashMap::new(),
1512                features: vec![po
1513                    .planned_quantity
1514                    .to_string()
1515                    .parse::<f64>()
1516                    .unwrap_or(0.0)
1517                    .ln_1p()],
1518                is_anomaly: false,
1519                anomaly_type: None,
1520                is_aggregate: false,
1521                aggregate_count: 0,
1522            });
1523        }
1524        for qi in quality_inspections {
1525            let node_id = format!("mfg_qi_{}", qi.inspection_id);
1526            self.try_add_node(HypergraphNode {
1527                id: node_id,
1528                entity_type: "QualityInspection".into(),
1529                entity_type_code: type_codes::QUALITY_INSPECTION,
1530                layer: HypergraphLayer::ProcessEvents,
1531                external_id: qi.inspection_id.clone(),
1532                label: format!("QI {}", qi.inspection_id),
1533                properties: HashMap::new(),
1534                features: vec![qi.defect_rate],
1535                is_anomaly: false,
1536                anomaly_type: None,
1537                is_aggregate: false,
1538                aggregate_count: 0,
1539            });
1540        }
1541        for cc in cycle_counts {
1542            let node_id = format!("mfg_cc_{}", cc.count_id);
1543            self.try_add_node(HypergraphNode {
1544                id: node_id,
1545                entity_type: "CycleCount".into(),
1546                entity_type_code: type_codes::CYCLE_COUNT,
1547                layer: HypergraphLayer::ProcessEvents,
1548                external_id: cc.count_id.clone(),
1549                label: format!("CC {}", cc.count_id),
1550                properties: HashMap::new(),
1551                features: vec![cc.variance_rate],
1552                is_anomaly: false,
1553                anomaly_type: None,
1554                is_aggregate: false,
1555                aggregate_count: 0,
1556            });
1557        }
1558    }
1559
1560    /// Add Banking documents as Layer 2 nodes.
1561    pub fn add_bank_documents(
1562        &mut self,
1563        customers: &[BankingCustomer],
1564        accounts: &[BankAccount],
1565        transactions: &[BankTransaction],
1566    ) {
1567        if !self.config.include_bank {
1568            return;
1569        }
1570        for cust in customers {
1571            let cid = cust.customer_id.to_string();
1572            let node_id = format!("bank_cust_{cid}");
1573            self.try_add_node(HypergraphNode {
1574                id: node_id,
1575                entity_type: "BankingCustomer".into(),
1576                entity_type_code: type_codes::BANKING_CUSTOMER,
1577                layer: HypergraphLayer::ProcessEvents,
1578                external_id: cid,
1579                label: format!("BCUST {}", cust.customer_id),
1580                properties: {
1581                    let mut p = HashMap::new();
1582                    p.insert(
1583                        "customer_type".into(),
1584                        Value::String(format!("{:?}", cust.customer_type)),
1585                    );
1586                    p.insert("name".into(), Value::String(cust.name.legal_name.clone()));
1587                    p.insert(
1588                        "residence_country".into(),
1589                        Value::String(cust.residence_country.clone()),
1590                    );
1591                    p.insert(
1592                        "risk_tier".into(),
1593                        Value::String(format!("{:?}", cust.risk_tier)),
1594                    );
1595                    p.insert("is_pep".into(), Value::Bool(cust.is_pep));
1596                    p
1597                },
1598                features: vec![],
1599                is_anomaly: cust.is_mule,
1600                anomaly_type: if cust.is_mule {
1601                    Some("mule_account".into())
1602                } else {
1603                    None
1604                },
1605                is_aggregate: false,
1606                aggregate_count: 0,
1607            });
1608        }
1609        for acct in accounts {
1610            let aid = acct.account_id.to_string();
1611            let node_id = format!("bank_acct_{aid}");
1612            self.try_add_node(HypergraphNode {
1613                id: node_id,
1614                entity_type: "BankAccount".into(),
1615                entity_type_code: type_codes::BANK_ACCOUNT,
1616                layer: HypergraphLayer::ProcessEvents,
1617                external_id: aid,
1618                label: format!("BACCT {}", acct.account_number),
1619                properties: {
1620                    let mut p = HashMap::new();
1621                    p.insert(
1622                        "account_type".into(),
1623                        Value::String(format!("{:?}", acct.account_type)),
1624                    );
1625                    p.insert("status".into(), Value::String(format!("{:?}", acct.status)));
1626                    p.insert("currency".into(), Value::String(acct.currency.clone()));
1627                    let balance: f64 = acct.current_balance.to_string().parse().unwrap_or(0.0);
1628                    p.insert("balance".into(), serde_json::json!(balance));
1629                    p.insert(
1630                        "account_number".into(),
1631                        Value::String(acct.account_number.clone()),
1632                    );
1633                    p
1634                },
1635                features: vec![acct
1636                    .current_balance
1637                    .to_string()
1638                    .parse::<f64>()
1639                    .unwrap_or(0.0)
1640                    .ln_1p()],
1641                is_anomaly: acct.is_mule_account,
1642                anomaly_type: if acct.is_mule_account {
1643                    Some("mule_account".into())
1644                } else {
1645                    None
1646                },
1647                is_aggregate: false,
1648                aggregate_count: 0,
1649            });
1650        }
1651        for txn in transactions {
1652            let tid = txn.transaction_id.to_string();
1653            let node_id = format!("bank_txn_{tid}");
1654            self.try_add_node(HypergraphNode {
1655                id: node_id,
1656                entity_type: "BankTransaction".into(),
1657                entity_type_code: type_codes::BANK_TRANSACTION,
1658                layer: HypergraphLayer::ProcessEvents,
1659                external_id: tid,
1660                label: format!("BTXN {}", txn.reference),
1661                properties: {
1662                    let mut p = HashMap::new();
1663                    let amount: f64 = txn.amount.to_string().parse().unwrap_or(0.0);
1664                    p.insert("amount".into(), serde_json::json!(amount));
1665                    p.insert("currency".into(), Value::String(txn.currency.clone()));
1666                    p.insert("reference".into(), Value::String(txn.reference.clone()));
1667                    p.insert(
1668                        "direction".into(),
1669                        Value::String(format!("{:?}", txn.direction)),
1670                    );
1671                    p.insert(
1672                        "channel".into(),
1673                        Value::String(format!("{:?}", txn.channel)),
1674                    );
1675                    p.insert(
1676                        "category".into(),
1677                        Value::String(format!("{:?}", txn.category)),
1678                    );
1679                    p.insert(
1680                        "transaction_type".into(),
1681                        Value::String(txn.transaction_type.clone()),
1682                    );
1683                    p.insert("status".into(), Value::String(format!("{:?}", txn.status)));
1684                    if txn.is_suspicious {
1685                        p.insert("isAnomalous".into(), Value::Bool(true));
1686                        p.insert("is_suspicious".into(), Value::Bool(true));
1687                        if let Some(ref reason) = txn.suspicion_reason {
1688                            p.insert(
1689                                "suspicion_reason".into(),
1690                                Value::String(format!("{reason:?}")),
1691                            );
1692                        }
1693                        if let Some(ref stage) = txn.laundering_stage {
1694                            p.insert(
1695                                "laundering_stage".into(),
1696                                Value::String(format!("{stage:?}")),
1697                            );
1698                        }
1699                    }
1700                    p
1701                },
1702                features: vec![txn
1703                    .amount
1704                    .to_string()
1705                    .parse::<f64>()
1706                    .unwrap_or(0.0)
1707                    .abs()
1708                    .ln_1p()],
1709                is_anomaly: txn.is_suspicious,
1710                anomaly_type: txn.suspicion_reason.as_ref().map(|r| format!("{r:?}")),
1711                is_aggregate: false,
1712                aggregate_count: 0,
1713            });
1714        }
1715    }
1716
1717    /// Add Audit documents as Layer 2 nodes.
1718    #[allow(clippy::too_many_arguments)]
1719    pub fn add_audit_documents(
1720        &mut self,
1721        engagements: &[AuditEngagement],
1722        workpapers: &[Workpaper],
1723        findings: &[AuditFinding],
1724        evidence: &[AuditEvidence],
1725        risks: &[RiskAssessment],
1726        judgments: &[ProfessionalJudgment],
1727    ) {
1728        if !self.config.include_audit {
1729            return;
1730        }
1731        for eng in engagements {
1732            let eid = eng.engagement_id.to_string();
1733            let node_id = format!("audit_eng_{eid}");
1734            self.try_add_node(HypergraphNode {
1735                id: node_id,
1736                entity_type: "AuditEngagement".into(),
1737                entity_type_code: type_codes::AUDIT_ENGAGEMENT,
1738                layer: HypergraphLayer::ProcessEvents,
1739                external_id: eid,
1740                label: format!("AENG {}", eng.engagement_ref),
1741                properties: {
1742                    let mut p = HashMap::new();
1743                    p.insert(
1744                        "engagement_ref".into(),
1745                        Value::String(eng.engagement_ref.clone()),
1746                    );
1747                    p.insert("status".into(), Value::String(format!("{:?}", eng.status)));
1748                    p.insert(
1749                        "engagement_type".into(),
1750                        Value::String(format!("{:?}", eng.engagement_type)),
1751                    );
1752                    p.insert("client_name".into(), Value::String(eng.client_name.clone()));
1753                    p.insert("fiscal_year".into(), serde_json::json!(eng.fiscal_year));
1754                    let mat: f64 = eng.materiality.to_string().parse().unwrap_or(0.0);
1755                    p.insert("materiality".into(), serde_json::json!(mat));
1756                    p.insert(
1757                        "fieldwork_start".into(),
1758                        Value::String(eng.fieldwork_start.to_string()),
1759                    );
1760                    p.insert(
1761                        "fieldwork_end".into(),
1762                        Value::String(eng.fieldwork_end.to_string()),
1763                    );
1764                    p
1765                },
1766                features: vec![eng
1767                    .materiality
1768                    .to_string()
1769                    .parse::<f64>()
1770                    .unwrap_or(0.0)
1771                    .ln_1p()],
1772                is_anomaly: false,
1773                anomaly_type: None,
1774                is_aggregate: false,
1775                aggregate_count: 0,
1776            });
1777        }
1778        for wp in workpapers {
1779            let wid = wp.workpaper_id.to_string();
1780            let node_id = format!("audit_wp_{wid}");
1781            self.try_add_node(HypergraphNode {
1782                id: node_id,
1783                entity_type: "Workpaper".into(),
1784                entity_type_code: type_codes::WORKPAPER,
1785                layer: HypergraphLayer::ProcessEvents,
1786                external_id: wid,
1787                label: format!("WP {}", wp.workpaper_ref),
1788                properties: {
1789                    let mut p = HashMap::new();
1790                    p.insert(
1791                        "workpaper_ref".into(),
1792                        Value::String(wp.workpaper_ref.clone()),
1793                    );
1794                    p.insert("title".into(), Value::String(wp.title.clone()));
1795                    p.insert("status".into(), Value::String(format!("{:?}", wp.status)));
1796                    p.insert("section".into(), Value::String(format!("{:?}", wp.section)));
1797                    p
1798                },
1799                features: vec![],
1800                is_anomaly: false,
1801                anomaly_type: None,
1802                is_aggregate: false,
1803                aggregate_count: 0,
1804            });
1805        }
1806        for f in findings {
1807            let fid = f.finding_id.to_string();
1808            let node_id = format!("audit_find_{fid}");
1809            self.try_add_node(HypergraphNode {
1810                id: node_id,
1811                entity_type: "AuditFinding".into(),
1812                entity_type_code: type_codes::AUDIT_FINDING,
1813                layer: HypergraphLayer::ProcessEvents,
1814                external_id: fid,
1815                label: format!("AFIND {}", f.finding_ref),
1816                properties: {
1817                    let mut p = HashMap::new();
1818                    p.insert("finding_ref".into(), Value::String(f.finding_ref.clone()));
1819                    p.insert("title".into(), Value::String(f.title.clone()));
1820                    p.insert("description".into(), Value::String(f.condition.clone()));
1821                    p.insert(
1822                        "severity".into(),
1823                        Value::String(format!("{:?}", f.severity)),
1824                    );
1825                    p.insert("status".into(), Value::String(format!("{:?}", f.status)));
1826                    p.insert(
1827                        "finding_type".into(),
1828                        Value::String(format!("{:?}", f.finding_type)),
1829                    );
1830                    p
1831                },
1832                features: vec![f.severity.score() as f64 / 5.0],
1833                is_anomaly: false,
1834                anomaly_type: None,
1835                is_aggregate: false,
1836                aggregate_count: 0,
1837            });
1838        }
1839        for ev in evidence {
1840            let evid = ev.evidence_id.to_string();
1841            let node_id = format!("audit_ev_{evid}");
1842            self.try_add_node(HypergraphNode {
1843                id: node_id,
1844                entity_type: "AuditEvidence".into(),
1845                entity_type_code: type_codes::AUDIT_EVIDENCE,
1846                layer: HypergraphLayer::ProcessEvents,
1847                external_id: evid,
1848                label: format!("AEV {}", ev.evidence_id),
1849                properties: {
1850                    let mut p = HashMap::new();
1851                    p.insert(
1852                        "evidence_type".into(),
1853                        Value::String(format!("{:?}", ev.evidence_type)),
1854                    );
1855                    p.insert("description".into(), Value::String(ev.description.clone()));
1856                    p.insert(
1857                        "source_type".into(),
1858                        Value::String(format!("{:?}", ev.source_type)),
1859                    );
1860                    p.insert(
1861                        "reliability".into(),
1862                        Value::String(format!(
1863                            "{:?}",
1864                            ev.reliability_assessment.overall_reliability
1865                        )),
1866                    );
1867                    p
1868                },
1869                features: vec![ev.reliability_assessment.overall_reliability.score() as f64 / 3.0],
1870                is_anomaly: false,
1871                anomaly_type: None,
1872                is_aggregate: false,
1873                aggregate_count: 0,
1874            });
1875        }
1876        for r in risks {
1877            let rid = r.risk_id.to_string();
1878            let node_id = format!("audit_risk_{rid}");
1879            self.try_add_node(HypergraphNode {
1880                id: node_id,
1881                entity_type: "RiskAssessment".into(),
1882                entity_type_code: type_codes::RISK_ASSESSMENT,
1883                layer: HypergraphLayer::ProcessEvents,
1884                external_id: rid,
1885                label: format!("ARISK {}", r.risk_ref),
1886                properties: {
1887                    let mut p = HashMap::new();
1888                    p.insert("status".into(), Value::String("active".into()));
1889                    p.insert("risk_ref".into(), Value::String(r.risk_ref.clone()));
1890                    p.insert("name".into(), Value::String(r.risk_ref.clone()));
1891                    p.insert("description".into(), Value::String(r.description.clone()));
1892                    p.insert(
1893                        "category".into(),
1894                        Value::String(format!("{:?}", r.risk_category)),
1895                    );
1896                    p.insert(
1897                        "account_or_process".into(),
1898                        Value::String(r.account_or_process.clone()),
1899                    );
1900                    // Risk levels as lowercase strings for dashboard consumption
1901                    let inherent = match r.inherent_risk {
1902                        datasynth_core::models::audit::RiskLevel::Low => "low",
1903                        datasynth_core::models::audit::RiskLevel::Medium => "medium",
1904                        datasynth_core::models::audit::RiskLevel::High => "high",
1905                        datasynth_core::models::audit::RiskLevel::Significant => "critical",
1906                    };
1907                    let control = match r.control_risk {
1908                        datasynth_core::models::audit::RiskLevel::Low => "low",
1909                        datasynth_core::models::audit::RiskLevel::Medium => "medium",
1910                        datasynth_core::models::audit::RiskLevel::High => "high",
1911                        datasynth_core::models::audit::RiskLevel::Significant => "critical",
1912                    };
1913                    p.insert("inherentImpact".into(), Value::String(inherent.into()));
1914                    p.insert("inherentLikelihood".into(), Value::String(inherent.into()));
1915                    p.insert("residualImpact".into(), Value::String(control.into()));
1916                    p.insert("residualLikelihood".into(), Value::String(control.into()));
1917                    p.insert(
1918                        "riskScore".into(),
1919                        serde_json::json!(r.inherent_risk.score() as f64 * 25.0),
1920                    );
1921                    p.insert("owner".into(), Value::String(r.assessed_by.clone()));
1922                    p.insert("isSignificant".into(), Value::Bool(r.is_significant_risk));
1923                    p.insert(
1924                        "is_significant_risk".into(),
1925                        Value::Bool(r.is_significant_risk),
1926                    );
1927                    p.insert(
1928                        "response_nature".into(),
1929                        Value::String(format!("{:?}", r.response_nature)),
1930                    );
1931                    p
1932                },
1933                features: vec![
1934                    r.inherent_risk.score() as f64 / 4.0,
1935                    r.control_risk.score() as f64 / 4.0,
1936                    if r.is_significant_risk { 1.0 } else { 0.0 },
1937                ],
1938                is_anomaly: false,
1939                anomaly_type: None,
1940                is_aggregate: false,
1941                aggregate_count: 0,
1942            });
1943        }
1944        for j in judgments {
1945            let jid = j.judgment_id.to_string();
1946            let node_id = format!("audit_judg_{jid}");
1947            self.try_add_node(HypergraphNode {
1948                id: node_id,
1949                entity_type: "ProfessionalJudgment".into(),
1950                entity_type_code: type_codes::PROFESSIONAL_JUDGMENT,
1951                layer: HypergraphLayer::ProcessEvents,
1952                external_id: jid,
1953                label: format!("AJUDG {}", j.judgment_id),
1954                properties: {
1955                    let mut p = HashMap::new();
1956                    p.insert("judgment_ref".into(), Value::String(j.judgment_ref.clone()));
1957                    p.insert("subject".into(), Value::String(j.subject.clone()));
1958                    p.insert(
1959                        "description".into(),
1960                        Value::String(j.issue_description.clone()),
1961                    );
1962                    p.insert("conclusion".into(), Value::String(j.conclusion.clone()));
1963                    p.insert(
1964                        "judgment_type".into(),
1965                        Value::String(format!("{:?}", j.judgment_type)),
1966                    );
1967                    p
1968                },
1969                features: vec![],
1970                is_anomaly: false,
1971                anomaly_type: None,
1972                is_aggregate: false,
1973                aggregate_count: 0,
1974            });
1975        }
1976    }
1977
1978    /// Add Bank Reconciliation documents as Layer 2 nodes.
1979    pub fn add_bank_recon_documents(&mut self, reconciliations: &[BankReconciliation]) {
1980        if !self.config.include_r2r {
1981            return;
1982        }
1983        for recon in reconciliations {
1984            let node_id = format!("recon_{}", recon.reconciliation_id);
1985            self.try_add_node(HypergraphNode {
1986                id: node_id,
1987                entity_type: "BankReconciliation".into(),
1988                entity_type_code: type_codes::BANK_RECONCILIATION,
1989                layer: HypergraphLayer::ProcessEvents,
1990                external_id: recon.reconciliation_id.clone(),
1991                label: format!("RECON {}", recon.reconciliation_id),
1992                properties: HashMap::new(),
1993                features: vec![recon
1994                    .bank_ending_balance
1995                    .to_string()
1996                    .parse::<f64>()
1997                    .unwrap_or(0.0)
1998                    .ln_1p()],
1999                is_anomaly: false,
2000                anomaly_type: None,
2001                is_aggregate: false,
2002                aggregate_count: 0,
2003            });
2004            for line in &recon.statement_lines {
2005                let node_id = format!("recon_line_{}", line.line_id);
2006                self.try_add_node(HypergraphNode {
2007                    id: node_id,
2008                    entity_type: "BankStatementLine".into(),
2009                    entity_type_code: type_codes::BANK_STATEMENT_LINE,
2010                    layer: HypergraphLayer::ProcessEvents,
2011                    external_id: line.line_id.clone(),
2012                    label: format!("BSL {}", line.line_id),
2013                    properties: HashMap::new(),
2014                    features: vec![line
2015                        .amount
2016                        .to_string()
2017                        .parse::<f64>()
2018                        .unwrap_or(0.0)
2019                        .abs()
2020                        .ln_1p()],
2021                    is_anomaly: false,
2022                    anomaly_type: None,
2023                    is_aggregate: false,
2024                    aggregate_count: 0,
2025                });
2026            }
2027            for item in &recon.reconciling_items {
2028                let node_id = format!("recon_item_{}", item.item_id);
2029                self.try_add_node(HypergraphNode {
2030                    id: node_id,
2031                    entity_type: "ReconcilingItem".into(),
2032                    entity_type_code: type_codes::RECONCILING_ITEM,
2033                    layer: HypergraphLayer::ProcessEvents,
2034                    external_id: item.item_id.clone(),
2035                    label: format!("RITEM {}", item.item_id),
2036                    properties: HashMap::new(),
2037                    features: vec![item
2038                        .amount
2039                        .to_string()
2040                        .parse::<f64>()
2041                        .unwrap_or(0.0)
2042                        .abs()
2043                        .ln_1p()],
2044                    is_anomaly: false,
2045                    anomaly_type: None,
2046                    is_aggregate: false,
2047                    aggregate_count: 0,
2048                });
2049            }
2050        }
2051    }
2052
2053    /// Add OCPM events as hyperedges connecting their participating objects.
2054    pub fn add_ocpm_events(&mut self, event_log: &datasynth_ocpm::OcpmEventLog) {
2055        if !self.config.events_as_hyperedges {
2056            return;
2057        }
2058        for event in &event_log.events {
2059            let participants: Vec<HyperedgeParticipant> = event
2060                .object_refs
2061                .iter()
2062                .map(|obj_ref| {
2063                    let node_id = format!("ocpm_obj_{}", obj_ref.object_id);
2064                    // Ensure the object node exists
2065                    self.try_add_node(HypergraphNode {
2066                        id: node_id.clone(),
2067                        entity_type: "OcpmObject".into(),
2068                        entity_type_code: type_codes::OCPM_EVENT,
2069                        layer: HypergraphLayer::ProcessEvents,
2070                        external_id: obj_ref.object_id.to_string(),
2071                        label: format!("OBJ {}", obj_ref.object_type_id),
2072                        properties: HashMap::new(),
2073                        features: vec![],
2074                        is_anomaly: false,
2075                        anomaly_type: None,
2076                        is_aggregate: false,
2077                        aggregate_count: 0,
2078                    });
2079                    HyperedgeParticipant {
2080                        node_id,
2081                        role: format!("{:?}", obj_ref.qualifier),
2082                        weight: None,
2083                    }
2084                })
2085                .collect();
2086
2087            if !participants.is_empty() {
2088                let mut props = HashMap::new();
2089                props.insert(
2090                    "activity_id".into(),
2091                    Value::String(event.activity_id.clone()),
2092                );
2093                props.insert(
2094                    "timestamp".into(),
2095                    Value::String(event.timestamp.to_rfc3339()),
2096                );
2097                if !event.resource_id.is_empty() {
2098                    props.insert("resource".into(), Value::String(event.resource_id.clone()));
2099                }
2100
2101                self.hyperedges.push(Hyperedge {
2102                    id: format!("ocpm_evt_{}", event.event_id),
2103                    hyperedge_type: "OcpmEvent".into(),
2104                    subtype: event.activity_id.clone(),
2105                    participants,
2106                    layer: HypergraphLayer::ProcessEvents,
2107                    properties: props,
2108                    timestamp: Some(event.timestamp.date_naive()),
2109                    is_anomaly: false,
2110                    anomaly_type: None,
2111                    features: vec![],
2112                });
2113            }
2114        }
2115    }
2116
2117    /// Build cross-layer edges linking governance to accounting and process layers.
2118    pub fn build_cross_layer_edges(&mut self) {
2119        if !self.config.include_cross_layer_edges {
2120            return;
2121        }
2122
2123        // Use pre-collected counterparty links instead of iterating all nodes
2124        let links = std::mem::take(&mut self.doc_counterparty_links);
2125        for (doc_node_id, counterparty_type, counterparty_id) in &links {
2126            let source_node_id = match counterparty_type.as_str() {
2127                "vendor" => self.vendor_node_ids.get(counterparty_id),
2128                "customer" => self.customer_node_ids.get(counterparty_id),
2129                _ => None,
2130            };
2131            if let Some(source_id) = source_node_id {
2132                self.edges.push(CrossLayerEdge {
2133                    source_id: source_id.clone(),
2134                    source_layer: HypergraphLayer::GovernanceControls,
2135                    target_id: doc_node_id.clone(),
2136                    target_layer: HypergraphLayer::ProcessEvents,
2137                    edge_type: "SuppliesTo".to_string(),
2138                    edge_type_code: type_codes::SUPPLIES_TO,
2139                    properties: HashMap::new(),
2140                });
2141            }
2142        }
2143        self.doc_counterparty_links = links;
2144    }
2145
2146    /// Finalize and build the Hypergraph.
2147    pub fn build(mut self) -> Hypergraph {
2148        // Build cross-layer edges last (they reference all nodes)
2149        self.build_cross_layer_edges();
2150
2151        // Compute metadata
2152        let mut layer_node_counts: HashMap<String, usize> = HashMap::new();
2153        let mut node_type_counts: HashMap<String, usize> = HashMap::new();
2154        let mut anomalous_nodes = 0;
2155
2156        for node in &self.nodes {
2157            *layer_node_counts
2158                .entry(node.layer.name().to_string())
2159                .or_insert(0) += 1;
2160            *node_type_counts
2161                .entry(node.entity_type.clone())
2162                .or_insert(0) += 1;
2163            if node.is_anomaly {
2164                anomalous_nodes += 1;
2165            }
2166        }
2167
2168        let mut edge_type_counts: HashMap<String, usize> = HashMap::new();
2169        for edge in &self.edges {
2170            *edge_type_counts.entry(edge.edge_type.clone()).or_insert(0) += 1;
2171        }
2172
2173        let mut hyperedge_type_counts: HashMap<String, usize> = HashMap::new();
2174        let mut anomalous_hyperedges = 0;
2175        for he in &self.hyperedges {
2176            *hyperedge_type_counts
2177                .entry(he.hyperedge_type.clone())
2178                .or_insert(0) += 1;
2179            if he.is_anomaly {
2180                anomalous_hyperedges += 1;
2181            }
2182        }
2183
2184        let budget_report = NodeBudgetReport {
2185            total_budget: self.budget.total_max(),
2186            total_used: self.budget.total_count(),
2187            layer1_budget: self.budget.layer1_max,
2188            layer1_used: self.budget.layer1_count,
2189            layer2_budget: self.budget.layer2_max,
2190            layer2_used: self.budget.layer2_count,
2191            layer3_budget: self.budget.layer3_max,
2192            layer3_used: self.budget.layer3_count,
2193            aggregate_nodes_created: self.aggregate_count,
2194            aggregation_triggered: self.aggregate_count > 0,
2195        };
2196
2197        let metadata = HypergraphMetadata {
2198            name: "multi_layer_hypergraph".to_string(),
2199            num_nodes: self.nodes.len(),
2200            num_edges: self.edges.len(),
2201            num_hyperedges: self.hyperedges.len(),
2202            layer_node_counts,
2203            node_type_counts,
2204            edge_type_counts,
2205            hyperedge_type_counts,
2206            anomalous_nodes,
2207            anomalous_hyperedges,
2208            source: "datasynth".to_string(),
2209            generated_at: chrono::Utc::now().to_rfc3339(),
2210            budget_report: budget_report.clone(),
2211            files: vec![
2212                "nodes.jsonl".to_string(),
2213                "edges.jsonl".to_string(),
2214                "hyperedges.jsonl".to_string(),
2215                "metadata.json".to_string(),
2216            ],
2217        };
2218
2219        Hypergraph {
2220            nodes: self.nodes,
2221            edges: self.edges,
2222            hyperedges: self.hyperedges,
2223            metadata,
2224            budget_report,
2225        }
2226    }
2227
2228    /// Try to add a node, respecting the budget. Returns true if added.
2229    fn try_add_node(&mut self, node: HypergraphNode) -> bool {
2230        if self.node_index.contains_key(&node.id) {
2231            return false; // Already exists
2232        }
2233
2234        if !self.budget.can_add(node.layer) {
2235            return false; // Budget exceeded
2236        }
2237
2238        let id = node.id.clone();
2239        let layer = node.layer;
2240        self.nodes.push(node);
2241        let idx = self.nodes.len() - 1;
2242        self.node_index.insert(id, idx);
2243        self.budget.record_add(layer);
2244        true
2245    }
2246}
2247
2248/// Map COSO component to a numeric feature.
2249fn component_to_feature(component: &CosoComponent) -> f64 {
2250    match component {
2251        CosoComponent::ControlEnvironment => 1.0,
2252        CosoComponent::RiskAssessment => 2.0,
2253        CosoComponent::ControlActivities => 3.0,
2254        CosoComponent::InformationCommunication => 4.0,
2255        CosoComponent::MonitoringActivities => 5.0,
2256    }
2257}
2258
2259/// Map account type to a numeric feature.
2260fn account_type_feature(account_type: &datasynth_core::models::AccountType) -> f64 {
2261    use datasynth_core::models::AccountType;
2262    match account_type {
2263        AccountType::Asset => 1.0,
2264        AccountType::Liability => 2.0,
2265        AccountType::Equity => 3.0,
2266        AccountType::Revenue => 4.0,
2267        AccountType::Expense => 5.0,
2268        AccountType::Statistical => 6.0,
2269    }
2270}
2271
2272/// Compute features for a journal entry hyperedge.
2273fn compute_je_features(entry: &JournalEntry) -> Vec<f64> {
2274    let total_debit: f64 = entry
2275        .lines
2276        .iter()
2277        .map(|l| l.debit_amount.to_string().parse::<f64>().unwrap_or(0.0))
2278        .sum();
2279
2280    let line_count = entry.lines.len() as f64;
2281    let posting_date = entry.header.posting_date;
2282    let weekday = posting_date.weekday().num_days_from_monday() as f64 / WEEKDAY_NORMALIZER;
2283    let day = posting_date.day() as f64 / DAY_OF_MONTH_NORMALIZER;
2284    let month = posting_date.month() as f64 / MONTH_NORMALIZER;
2285    let is_month_end = if posting_date.day() >= MONTH_END_DAY_THRESHOLD {
2286        1.0
2287    } else {
2288        0.0
2289    };
2290
2291    vec![
2292        (total_debit.abs() + 1.0).ln(), // log amount
2293        line_count,                     // number of lines
2294        weekday,                        // weekday normalized
2295        day,                            // day of month normalized
2296        month,                          // month normalized
2297        is_month_end,                   // month-end flag
2298    ]
2299}
2300
2301#[cfg(test)]
2302#[allow(clippy::unwrap_used)]
2303mod tests {
2304    use super::*;
2305    use datasynth_core::models::{
2306        AccountSubType, AccountType, ChartOfAccounts, CoAComplexity, ControlFrequency, ControlType,
2307        CosoComponent, CosoMaturityLevel, GLAccount, InternalControl, RiskLevel, SoxAssertion,
2308        UserPersona,
2309    };
2310
2311    fn make_test_coa() -> ChartOfAccounts {
2312        let mut coa = ChartOfAccounts::new(
2313            "TEST_COA".to_string(),
2314            "Test Chart".to_string(),
2315            "US".to_string(),
2316            datasynth_core::models::IndustrySector::Manufacturing,
2317            CoAComplexity::Small,
2318        );
2319
2320        coa.add_account(GLAccount::new(
2321            "1000".to_string(),
2322            "Cash".to_string(),
2323            AccountType::Asset,
2324            AccountSubType::Cash,
2325        ));
2326        coa.add_account(GLAccount::new(
2327            "2000".to_string(),
2328            "AP".to_string(),
2329            AccountType::Liability,
2330            AccountSubType::AccountsPayable,
2331        ));
2332
2333        coa
2334    }
2335
2336    fn make_test_control() -> InternalControl {
2337        InternalControl {
2338            control_id: "C001".to_string(),
2339            control_name: "Three-Way Match".to_string(),
2340            control_type: ControlType::Preventive,
2341            objective: "Ensure proper matching".to_string(),
2342            frequency: ControlFrequency::Transactional,
2343            owner_role: UserPersona::Controller,
2344            risk_level: RiskLevel::High,
2345            description: "Test control".to_string(),
2346            is_key_control: true,
2347            sox_assertion: SoxAssertion::Existence,
2348            coso_component: CosoComponent::ControlActivities,
2349            coso_principles: vec![CosoPrinciple::ControlActions],
2350            control_scope: datasynth_core::models::ControlScope::TransactionLevel,
2351            maturity_level: CosoMaturityLevel::Managed,
2352        }
2353    }
2354
2355    #[test]
2356    fn test_builder_coso_framework() {
2357        let config = HypergraphConfig {
2358            max_nodes: 1000,
2359            ..Default::default()
2360        };
2361        let mut builder = HypergraphBuilder::new(config);
2362        builder.add_coso_framework();
2363
2364        let hg = builder.build();
2365        // 5 components + 17 principles = 22 nodes
2366        assert_eq!(hg.nodes.len(), 22);
2367        assert!(hg
2368            .nodes
2369            .iter()
2370            .all(|n| n.layer == HypergraphLayer::GovernanceControls));
2371        // 17 principle → component edges
2372        assert_eq!(
2373            hg.edges
2374                .iter()
2375                .filter(|e| e.edge_type == "CoversCosoPrinciple")
2376                .count(),
2377            17
2378        );
2379    }
2380
2381    #[test]
2382    fn test_builder_controls() {
2383        let config = HypergraphConfig {
2384            max_nodes: 1000,
2385            ..Default::default()
2386        };
2387        let mut builder = HypergraphBuilder::new(config);
2388        builder.add_coso_framework();
2389        builder.add_controls(&[make_test_control()]);
2390
2391        let hg = builder.build();
2392        // 22 COSO + 1 control + 1 SOX assertion = 24
2393        assert_eq!(hg.nodes.len(), 24);
2394        assert!(hg.nodes.iter().any(|n| n.entity_type == "InternalControl"));
2395        assert!(hg.nodes.iter().any(|n| n.entity_type == "SoxAssertion"));
2396    }
2397
2398    #[test]
2399    fn test_builder_accounts() {
2400        let config = HypergraphConfig {
2401            max_nodes: 1000,
2402            ..Default::default()
2403        };
2404        let mut builder = HypergraphBuilder::new(config);
2405        builder.add_accounts(&make_test_coa());
2406
2407        let hg = builder.build();
2408        assert_eq!(hg.nodes.len(), 2);
2409        assert!(hg
2410            .nodes
2411            .iter()
2412            .all(|n| n.layer == HypergraphLayer::AccountingNetwork));
2413    }
2414
2415    #[test]
2416    fn test_budget_enforcement() {
2417        let config = HypergraphConfig {
2418            max_nodes: 10, // Very small budget
2419            include_coso: false,
2420            include_controls: false,
2421            include_sox: false,
2422            include_vendors: false,
2423            include_customers: false,
2424            include_employees: false,
2425            include_p2p: false,
2426            include_o2c: false,
2427            ..Default::default()
2428        };
2429        let mut builder = HypergraphBuilder::new(config);
2430        builder.add_accounts(&make_test_coa());
2431
2432        let hg = builder.build();
2433        // Budget for L3 is 10% of 10 = 1, so only 1 of 2 accounts should be added
2434        assert!(hg.nodes.len() <= 1);
2435    }
2436
2437    #[test]
2438    fn test_full_build() {
2439        let config = HypergraphConfig {
2440            max_nodes: 10000,
2441            ..Default::default()
2442        };
2443        let mut builder = HypergraphBuilder::new(config);
2444        builder.add_coso_framework();
2445        builder.add_controls(&[make_test_control()]);
2446        builder.add_accounts(&make_test_coa());
2447
2448        let hg = builder.build();
2449        assert!(!hg.nodes.is_empty());
2450        assert!(!hg.edges.is_empty());
2451        assert_eq!(hg.metadata.num_nodes, hg.nodes.len());
2452        assert_eq!(hg.metadata.num_edges, hg.edges.len());
2453    }
2454}