Skip to main content

datasynth_generators/relationships/
entity_graph_generator.rs

1//! Entity graph generator for interconnectivity modeling.
2//!
3//! Provides generation of comprehensive entity relationship graphs including:
4//! - Transactional relationships from journal entries and document flows
5//! - Cross-process linkages (P2P ↔ O2C via inventory)
6//! - Relationship strength calculation
7//! - Network analysis support
8
9use chrono::NaiveDate;
10use datasynth_core::models::{
11    CrossProcessLink, CrossProcessLinkType, EntityGraph, EntityNode, GraphEntityId,
12    GraphEntityType, GraphMetadata, RelationshipEdge, RelationshipStrengthCalculator,
13    RelationshipType, VendorNetwork,
14};
15use datasynth_core::utils::seeded_rng;
16use rand::prelude::*;
17use rand_chacha::ChaCha8Rng;
18use rust_decimal::Decimal;
19use std::collections::{HashMap, HashSet};
20
21/// Configuration for entity graph generation.
22#[derive(Debug, Clone)]
23pub struct EntityGraphConfig {
24    /// Enable entity graph generation
25    pub enabled: bool,
26    /// Cross-process link configuration
27    pub cross_process: CrossProcessConfig,
28    /// Strength calculation settings
29    pub strength_config: StrengthConfig,
30    /// Include organizational relationships
31    pub include_organizational: bool,
32    /// Include document relationships
33    pub include_document: bool,
34}
35
36impl Default for EntityGraphConfig {
37    fn default() -> Self {
38        Self {
39            enabled: false,
40            cross_process: CrossProcessConfig::default(),
41            strength_config: StrengthConfig::default(),
42            include_organizational: true,
43            include_document: true,
44        }
45    }
46}
47
48/// Configuration for cross-process linkages.
49#[derive(Debug, Clone)]
50pub struct CrossProcessConfig {
51    /// Enable inventory links between P2P and O2C
52    pub enable_inventory_links: bool,
53    /// Enable return flow generation
54    pub enable_return_flows: bool,
55    /// Enable payment reconciliation links
56    pub enable_payment_links: bool,
57    /// Enable intercompany bilateral matching
58    pub enable_ic_bilateral: bool,
59    /// Percentage of GR/Deliveries to link via inventory (0.0 - 1.0)
60    pub inventory_link_rate: f64,
61    /// Percentage of payments to link for reconciliation (0.0 - 1.0)
62    pub payment_link_rate: f64,
63}
64
65impl Default for CrossProcessConfig {
66    fn default() -> Self {
67        Self {
68            enable_inventory_links: true,
69            enable_return_flows: true,
70            enable_payment_links: true,
71            enable_ic_bilateral: true,
72            inventory_link_rate: 0.30,
73            payment_link_rate: 0.80,
74        }
75    }
76}
77
78/// Configuration for relationship strength calculation.
79#[derive(Debug, Clone)]
80pub struct StrengthConfig {
81    /// Transaction volume weight
82    pub transaction_volume_weight: f64,
83    /// Transaction count weight
84    pub transaction_count_weight: f64,
85    /// Duration weight
86    pub duration_weight: f64,
87    /// Recency weight
88    pub recency_weight: f64,
89    /// Mutual connections weight
90    pub mutual_connections_weight: f64,
91    /// Recency half-life in days
92    pub recency_half_life_days: u32,
93}
94
95impl Default for StrengthConfig {
96    fn default() -> Self {
97        Self {
98            transaction_volume_weight: 0.30,
99            transaction_count_weight: 0.25,
100            duration_weight: 0.20,
101            recency_weight: 0.15,
102            mutual_connections_weight: 0.10,
103            recency_half_life_days: 90,
104        }
105    }
106}
107
108/// Summary of transaction history between two entities.
109#[derive(Debug, Clone)]
110pub struct TransactionSummary {
111    /// Total transaction volume
112    pub total_volume: Decimal,
113    /// Number of transactions
114    pub transaction_count: u32,
115    /// First transaction date
116    pub first_transaction_date: NaiveDate,
117    /// Last transaction date
118    pub last_transaction_date: NaiveDate,
119    /// Related entity IDs (for mutual connection calculation)
120    pub related_entities: HashSet<String>,
121}
122
123impl Default for TransactionSummary {
124    fn default() -> Self {
125        Self {
126            total_volume: Decimal::ZERO,
127            transaction_count: 0,
128            first_transaction_date: NaiveDate::from_ymd_opt(2020, 1, 1)
129                .expect("valid default date"),
130            last_transaction_date: NaiveDate::from_ymd_opt(2020, 1, 1).expect("valid default date"),
131            related_entities: HashSet::new(),
132        }
133    }
134}
135
136/// Goods receipt summary for cross-process linking.
137#[derive(Debug, Clone)]
138pub struct GoodsReceiptRef {
139    /// GR document ID
140    pub document_id: String,
141    /// Material ID
142    pub material_id: String,
143    /// Quantity received
144    pub quantity: Decimal,
145    /// Receipt date
146    pub receipt_date: NaiveDate,
147    /// Vendor ID
148    pub vendor_id: String,
149    /// Company code
150    pub company_code: String,
151}
152
153/// Delivery summary for cross-process linking.
154#[derive(Debug, Clone)]
155pub struct DeliveryRef {
156    /// Delivery document ID
157    pub document_id: String,
158    /// Material ID
159    pub material_id: String,
160    /// Quantity delivered
161    pub quantity: Decimal,
162    /// Delivery date
163    pub delivery_date: NaiveDate,
164    /// Customer ID
165    pub customer_id: String,
166    /// Company code
167    pub company_code: String,
168}
169
170/// Generator for entity relationship graphs.
171pub struct EntityGraphGenerator {
172    rng: ChaCha8Rng,
173    seed: u64,
174    config: EntityGraphConfig,
175    strength_calculator: RelationshipStrengthCalculator,
176}
177
178impl EntityGraphGenerator {
179    /// Create a new entity graph generator.
180    pub fn new(seed: u64) -> Self {
181        Self::with_config(seed, EntityGraphConfig::default())
182    }
183
184    /// Create a new entity graph generator with configuration.
185    pub fn with_config(seed: u64, config: EntityGraphConfig) -> Self {
186        let strength_calculator = RelationshipStrengthCalculator {
187            weights: datasynth_core::models::StrengthWeights {
188                transaction_volume_weight: config.strength_config.transaction_volume_weight,
189                transaction_count_weight: config.strength_config.transaction_count_weight,
190                duration_weight: config.strength_config.duration_weight,
191                recency_weight: config.strength_config.recency_weight,
192                mutual_connections_weight: config.strength_config.mutual_connections_weight,
193            },
194            recency_half_life_days: config.strength_config.recency_half_life_days,
195            ..Default::default()
196        };
197
198        Self {
199            rng: seeded_rng(seed, 0),
200            seed,
201            config,
202            strength_calculator,
203        }
204    }
205
206    /// Generate an entity graph from transaction data.
207    pub fn generate_entity_graph(
208        &mut self,
209        company_code: &str,
210        as_of_date: NaiveDate,
211        vendors: &[EntitySummary],
212        customers: &[EntitySummary],
213        transaction_summaries: &HashMap<(String, String), TransactionSummary>,
214    ) -> EntityGraph {
215        let mut graph = EntityGraph::new();
216        graph.metadata = GraphMetadata {
217            company_code: Some(company_code.to_string()),
218            created_date: Some(as_of_date),
219            total_transaction_volume: Decimal::ZERO,
220            date_range: None,
221        };
222
223        if !self.config.enabled {
224            return graph;
225        }
226
227        // Add company node
228        let company_id = GraphEntityId::new(GraphEntityType::Company, company_code);
229        graph.add_node(EntityNode::new(
230            company_id.clone(),
231            format!("Company {}", company_code),
232            as_of_date,
233        ));
234
235        // Add vendor nodes
236        for vendor in vendors {
237            let vendor_id = GraphEntityId::new(GraphEntityType::Vendor, &vendor.entity_id);
238            let node = EntityNode::new(vendor_id.clone(), &vendor.name, as_of_date)
239                .with_company(company_code);
240            graph.add_node(node);
241
242            // Add relationship: Company buys from Vendor
243            let edge = RelationshipEdge::new(
244                company_id.clone(),
245                vendor_id,
246                RelationshipType::BuysFrom,
247                vendor.first_activity_date,
248            );
249            graph.add_edge(edge);
250        }
251
252        // Add customer nodes
253        for customer in customers {
254            let customer_id = GraphEntityId::new(GraphEntityType::Customer, &customer.entity_id);
255            let node = EntityNode::new(customer_id.clone(), &customer.name, as_of_date)
256                .with_company(company_code);
257            graph.add_node(node);
258
259            // Add relationship: Company sells to Customer
260            let edge = RelationshipEdge::new(
261                company_id.clone(),
262                customer_id,
263                RelationshipType::SellsTo,
264                customer.first_activity_date,
265            );
266            graph.add_edge(edge);
267        }
268
269        // Add transactional relationships with strength
270        let total_connections = transaction_summaries.len().max(1);
271        for ((from_id, to_id), summary) in transaction_summaries {
272            let from_entity_id = self.infer_entity_id(from_id);
273            let to_entity_id = self.infer_entity_id(to_id);
274
275            // Calculate relationship strength
276            let days_since_last = (as_of_date - summary.last_transaction_date)
277                .num_days()
278                .max(0) as u32;
279            let relationship_days = (as_of_date - summary.first_transaction_date)
280                .num_days()
281                .max(1) as u32;
282
283            let components = self.strength_calculator.calculate(
284                summary.total_volume,
285                summary.transaction_count,
286                relationship_days,
287                days_since_last,
288                summary.related_entities.len(),
289                total_connections,
290            );
291
292            let rel_type = self.infer_relationship_type(&from_entity_id, &to_entity_id);
293
294            let edge = RelationshipEdge::new(
295                from_entity_id,
296                to_entity_id,
297                rel_type,
298                summary.first_transaction_date,
299            )
300            .with_strength_components(components);
301
302            graph.add_edge(edge);
303        }
304
305        // Calculate total transaction volume
306        graph.metadata.total_transaction_volume =
307            transaction_summaries.values().map(|s| s.total_volume).sum();
308
309        graph
310    }
311
312    /// Generate cross-process links between P2P and O2C.
313    pub fn generate_cross_process_links(
314        &mut self,
315        goods_receipts: &[GoodsReceiptRef],
316        deliveries: &[DeliveryRef],
317    ) -> Vec<CrossProcessLink> {
318        let mut links = Vec::new();
319
320        if !self.config.cross_process.enable_inventory_links {
321            return links;
322        }
323
324        // Group deliveries by material for matching
325        let deliveries_by_material: HashMap<String, Vec<&DeliveryRef>> =
326            deliveries.iter().fold(HashMap::new(), |mut acc, del| {
327                acc.entry(del.material_id.clone()).or_default().push(del);
328                acc
329            });
330
331        // Link GRs to Deliveries via shared material
332        for gr in goods_receipts {
333            if self.rng.random::<f64>() > self.config.cross_process.inventory_link_rate {
334                continue;
335            }
336
337            if let Some(matching_deliveries) = deliveries_by_material.get(&gr.material_id) {
338                // Find a delivery that could have used this inventory
339                // (delivery date after receipt date)
340                let valid_deliveries: Vec<_> = matching_deliveries
341                    .iter()
342                    .filter(|d| {
343                        d.delivery_date >= gr.receipt_date && d.company_code == gr.company_code
344                    })
345                    .collect();
346
347                if !valid_deliveries.is_empty() {
348                    let delivery =
349                        valid_deliveries[self.rng.random_range(0..valid_deliveries.len())];
350
351                    // Calculate linked quantity (minimum of available)
352                    let linked_qty = gr.quantity.min(delivery.quantity);
353
354                    links.push(CrossProcessLink::new(
355                        &gr.material_id,
356                        "P2P",
357                        &gr.document_id,
358                        "O2C",
359                        &delivery.document_id,
360                        CrossProcessLinkType::InventoryMovement,
361                        linked_qty,
362                        delivery.delivery_date,
363                    ));
364                }
365            }
366        }
367
368        links
369    }
370
371    /// Generate graph from vendor network.
372    pub fn generate_from_vendor_network(
373        &mut self,
374        vendor_network: &VendorNetwork,
375        as_of_date: NaiveDate,
376    ) -> EntityGraph {
377        let mut graph = EntityGraph::new();
378        graph.metadata = GraphMetadata {
379            company_code: Some(vendor_network.company_code.clone()),
380            created_date: Some(as_of_date),
381            total_transaction_volume: vendor_network.statistics.total_annual_spend,
382            date_range: None,
383        };
384
385        if !self.config.enabled {
386            return graph;
387        }
388
389        // Add company node
390        let company_id = GraphEntityId::new(GraphEntityType::Company, &vendor_network.company_code);
391        graph.add_node(EntityNode::new(
392            company_id.clone(),
393            format!("Company {}", vendor_network.company_code),
394            as_of_date,
395        ));
396
397        // Add all vendors from the network
398        for (vendor_id, relationship) in &vendor_network.relationships {
399            let entity_id = GraphEntityId::new(GraphEntityType::Vendor, vendor_id);
400            let node = EntityNode::new(entity_id.clone(), vendor_id, as_of_date)
401                .with_company(&vendor_network.company_code)
402                .with_attribute("tier", format!("{:?}", relationship.tier))
403                .with_attribute("cluster", format!("{:?}", relationship.cluster))
404                .with_attribute(
405                    "strategic_level",
406                    format!("{:?}", relationship.strategic_importance),
407                );
408            graph.add_node(node);
409
410            // Add relationship to company (for Tier 1) or parent vendor (for Tier 2/3)
411            if let Some(parent_id) = &relationship.parent_vendor {
412                let parent_entity_id = GraphEntityId::new(GraphEntityType::Vendor, parent_id);
413                let edge = RelationshipEdge::new(
414                    entity_id.clone(),
415                    parent_entity_id,
416                    RelationshipType::SuppliesTo,
417                    relationship.start_date,
418                )
419                .with_strength(relationship.relationship_score());
420                graph.add_edge(edge);
421            } else {
422                // Tier 1 supplies directly to company
423                let edge = RelationshipEdge::new(
424                    entity_id,
425                    company_id.clone(),
426                    RelationshipType::SuppliesTo,
427                    relationship.start_date,
428                )
429                .with_strength(relationship.relationship_score());
430                graph.add_edge(edge);
431            }
432        }
433
434        graph
435    }
436
437    /// Infer entity ID from string (simple heuristic).
438    fn infer_entity_id(&self, id: &str) -> GraphEntityId {
439        if id.starts_with("V-") || id.starts_with("VN-") {
440            GraphEntityId::new(GraphEntityType::Vendor, id)
441        } else if id.starts_with("C-") || id.starts_with("CU-") {
442            GraphEntityId::new(GraphEntityType::Customer, id)
443        } else if id.starts_with("E-") || id.starts_with("EM-") {
444            GraphEntityId::new(GraphEntityType::Employee, id)
445        } else if id.starts_with("MAT-") || id.starts_with("M-") {
446            GraphEntityId::new(GraphEntityType::Material, id)
447        } else if id.starts_with("PO-") {
448            GraphEntityId::new(GraphEntityType::PurchaseOrder, id)
449        } else if id.starts_with("SO-") {
450            GraphEntityId::new(GraphEntityType::SalesOrder, id)
451        } else if id.starts_with("INV-") || id.starts_with("IV-") {
452            GraphEntityId::new(GraphEntityType::Invoice, id)
453        } else if id.starts_with("PAY-") || id.starts_with("PM-") {
454            GraphEntityId::new(GraphEntityType::Payment, id)
455        } else {
456            GraphEntityId::new(GraphEntityType::Company, id)
457        }
458    }
459
460    /// Infer relationship type between two entities.
461    fn infer_relationship_type(
462        &self,
463        from: &GraphEntityId,
464        to: &GraphEntityId,
465    ) -> RelationshipType {
466        match (&from.entity_type, &to.entity_type) {
467            (GraphEntityType::Company, GraphEntityType::Vendor) => RelationshipType::BuysFrom,
468            (GraphEntityType::Company, GraphEntityType::Customer) => RelationshipType::SellsTo,
469            (GraphEntityType::Vendor, GraphEntityType::Company) => RelationshipType::SuppliesTo,
470            (GraphEntityType::Customer, GraphEntityType::Company) => RelationshipType::SourcesFrom,
471            (GraphEntityType::PurchaseOrder, GraphEntityType::Invoice) => {
472                RelationshipType::References
473            }
474            (GraphEntityType::Invoice, GraphEntityType::Payment) => RelationshipType::FulfilledBy,
475            (GraphEntityType::Payment, GraphEntityType::Invoice) => RelationshipType::AppliesTo,
476            (GraphEntityType::Employee, GraphEntityType::Employee) => RelationshipType::ReportsTo,
477            (GraphEntityType::Employee, GraphEntityType::Department) => RelationshipType::WorksIn,
478            _ => RelationshipType::References,
479        }
480    }
481
482    /// Reset the generator.
483    pub fn reset(&mut self) {
484        self.rng = seeded_rng(self.seed, 0);
485    }
486}
487
488/// Summary of an entity for graph generation.
489#[derive(Debug, Clone)]
490pub struct EntitySummary {
491    /// Entity ID
492    pub entity_id: String,
493    /// Entity name
494    pub name: String,
495    /// First activity date
496    pub first_activity_date: NaiveDate,
497    /// Entity type (for categorization)
498    pub entity_type: GraphEntityType,
499    /// Additional attributes
500    pub attributes: HashMap<String, String>,
501}
502
503impl EntitySummary {
504    /// Create a new entity summary.
505    pub fn new(
506        entity_id: impl Into<String>,
507        name: impl Into<String>,
508        entity_type: GraphEntityType,
509        first_activity_date: NaiveDate,
510    ) -> Self {
511        Self {
512            entity_id: entity_id.into(),
513            name: name.into(),
514            first_activity_date,
515            entity_type,
516            attributes: HashMap::new(),
517        }
518    }
519}
520
521#[cfg(test)]
522#[allow(clippy::unwrap_used)]
523mod tests {
524    use super::*;
525
526    #[test]
527    fn test_entity_graph_generation() {
528        let config = EntityGraphConfig {
529            enabled: true,
530            ..Default::default()
531        };
532
533        let mut gen = EntityGraphGenerator::with_config(42, config);
534
535        let vendors = vec![
536            EntitySummary::new(
537                "V-001",
538                "Acme Supplies",
539                GraphEntityType::Vendor,
540                NaiveDate::from_ymd_opt(2023, 1, 1).unwrap(),
541            ),
542            EntitySummary::new(
543                "V-002",
544                "Global Parts",
545                GraphEntityType::Vendor,
546                NaiveDate::from_ymd_opt(2023, 3, 1).unwrap(),
547            ),
548        ];
549
550        let customers = vec![EntitySummary::new(
551            "C-001",
552            "Contoso Corp",
553            GraphEntityType::Customer,
554            NaiveDate::from_ymd_opt(2023, 2, 1).unwrap(),
555        )];
556
557        let graph = gen.generate_entity_graph(
558            "1000",
559            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
560            &vendors,
561            &customers,
562            &HashMap::new(),
563        );
564
565        // Should have company + 2 vendors + 1 customer = 4 nodes
566        assert_eq!(graph.nodes.len(), 4);
567        // Should have 3 edges (company buys from 2 vendors, sells to 1 customer)
568        assert_eq!(graph.edges.len(), 3);
569    }
570
571    #[test]
572    fn test_cross_process_link_generation() {
573        let config = EntityGraphConfig {
574            enabled: true,
575            cross_process: CrossProcessConfig {
576                enable_inventory_links: true,
577                inventory_link_rate: 1.0, // Always link for testing
578                ..Default::default()
579            },
580            ..Default::default()
581        };
582
583        let mut gen = EntityGraphGenerator::with_config(42, config);
584
585        let goods_receipts = vec![GoodsReceiptRef {
586            document_id: "GR-001".to_string(),
587            material_id: "MAT-100".to_string(),
588            quantity: Decimal::from(100),
589            receipt_date: NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
590            vendor_id: "V-001".to_string(),
591            company_code: "1000".to_string(),
592        }];
593
594        let deliveries = vec![DeliveryRef {
595            document_id: "DEL-001".to_string(),
596            material_id: "MAT-100".to_string(),
597            quantity: Decimal::from(50),
598            delivery_date: NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
599            customer_id: "C-001".to_string(),
600            company_code: "1000".to_string(),
601        }];
602
603        let links = gen.generate_cross_process_links(&goods_receipts, &deliveries);
604
605        assert_eq!(links.len(), 1);
606        assert_eq!(links[0].material_id, "MAT-100");
607        assert_eq!(links[0].source_document_id, "GR-001");
608        assert_eq!(links[0].target_document_id, "DEL-001");
609        assert_eq!(links[0].link_type, CrossProcessLinkType::InventoryMovement);
610    }
611
612    #[test]
613    fn test_disabled_graph_generation() {
614        let config = EntityGraphConfig {
615            enabled: false,
616            ..Default::default()
617        };
618
619        let mut gen = EntityGraphGenerator::with_config(42, config);
620
621        let graph = gen.generate_entity_graph(
622            "1000",
623            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
624            &[],
625            &[],
626            &HashMap::new(),
627        );
628
629        assert!(graph.nodes.is_empty());
630    }
631
632    #[test]
633    fn test_entity_id_inference() {
634        let gen = EntityGraphGenerator::new(42);
635
636        let vendor_id = gen.infer_entity_id("V-001");
637        assert_eq!(vendor_id.entity_type, GraphEntityType::Vendor);
638
639        let customer_id = gen.infer_entity_id("C-001");
640        assert_eq!(customer_id.entity_type, GraphEntityType::Customer);
641
642        let po_id = gen.infer_entity_id("PO-12345");
643        assert_eq!(po_id.entity_type, GraphEntityType::PurchaseOrder);
644    }
645
646    #[test]
647    fn test_relationship_type_inference() {
648        let gen = EntityGraphGenerator::new(42);
649
650        let company_id = GraphEntityId::new(GraphEntityType::Company, "1000");
651        let vendor_id = GraphEntityId::new(GraphEntityType::Vendor, "V-001");
652
653        let rel_type = gen.infer_relationship_type(&company_id, &vendor_id);
654        assert_eq!(rel_type, RelationshipType::BuysFrom);
655
656        let rel_type = gen.infer_relationship_type(&vendor_id, &company_id);
657        assert_eq!(rel_type, RelationshipType::SuppliesTo);
658    }
659}