Skip to main content

datasynth_generators/relationships/
entity_graph_generator.rs

1//! Entity graph generator for interconnectivity modeling.
2//!
3//! Provides generation of comprehensive entity relationship graphs including:
4//! - Transactional relationships from journal entries and document flows
5//! - Cross-process linkages (P2P ↔ O2C via inventory)
6//! - Relationship strength calculation
7//! - Network analysis support
8
9use chrono::NaiveDate;
10use datasynth_core::models::{
11    CrossProcessLink, CrossProcessLinkType, EntityGraph, EntityNode, GraphEntityId,
12    GraphEntityType, GraphMetadata, RelationshipEdge, RelationshipStrengthCalculator,
13    RelationshipType, VendorNetwork,
14};
15use datasynth_core::utils::seeded_rng;
16use rand::prelude::*;
17use rand_chacha::ChaCha8Rng;
18use rust_decimal::Decimal;
19use std::collections::{HashMap, HashSet};
20
21/// Configuration for entity graph generation.
22#[derive(Debug, Clone)]
23pub struct EntityGraphConfig {
24    /// Enable entity graph generation
25    pub enabled: bool,
26    /// Cross-process link configuration
27    pub cross_process: CrossProcessConfig,
28    /// Strength calculation settings
29    pub strength_config: StrengthConfig,
30    /// Include organizational relationships
31    pub include_organizational: bool,
32    /// Include document relationships
33    pub include_document: bool,
34}
35
36impl Default for EntityGraphConfig {
37    fn default() -> Self {
38        Self {
39            enabled: false,
40            cross_process: CrossProcessConfig::default(),
41            strength_config: StrengthConfig::default(),
42            include_organizational: true,
43            include_document: true,
44        }
45    }
46}
47
48/// Configuration for cross-process linkages.
49#[derive(Debug, Clone)]
50pub struct CrossProcessConfig {
51    /// Enable inventory links between P2P and O2C
52    pub enable_inventory_links: bool,
53    /// Enable return flow generation
54    pub enable_return_flows: bool,
55    /// Enable payment reconciliation links
56    pub enable_payment_links: bool,
57    /// Enable intercompany bilateral matching
58    pub enable_ic_bilateral: bool,
59    /// Percentage of GR/Deliveries to link via inventory (0.0 - 1.0)
60    pub inventory_link_rate: f64,
61    /// Percentage of payments to link for reconciliation (0.0 - 1.0)
62    pub payment_link_rate: f64,
63}
64
65impl Default for CrossProcessConfig {
66    fn default() -> Self {
67        Self {
68            enable_inventory_links: true,
69            enable_return_flows: true,
70            enable_payment_links: true,
71            enable_ic_bilateral: true,
72            inventory_link_rate: 0.30,
73            payment_link_rate: 0.80,
74        }
75    }
76}
77
78/// Configuration for relationship strength calculation.
79#[derive(Debug, Clone)]
80pub struct StrengthConfig {
81    /// Transaction volume weight
82    pub transaction_volume_weight: f64,
83    /// Transaction count weight
84    pub transaction_count_weight: f64,
85    /// Duration weight
86    pub duration_weight: f64,
87    /// Recency weight
88    pub recency_weight: f64,
89    /// Mutual connections weight
90    pub mutual_connections_weight: f64,
91    /// Recency half-life in days
92    pub recency_half_life_days: u32,
93}
94
95impl Default for StrengthConfig {
96    fn default() -> Self {
97        Self {
98            transaction_volume_weight: 0.30,
99            transaction_count_weight: 0.25,
100            duration_weight: 0.20,
101            recency_weight: 0.15,
102            mutual_connections_weight: 0.10,
103            recency_half_life_days: 90,
104        }
105    }
106}
107
108/// Summary of transaction history between two entities.
109#[derive(Debug, Clone)]
110pub struct TransactionSummary {
111    /// Total transaction volume
112    pub total_volume: Decimal,
113    /// Number of transactions
114    pub transaction_count: u32,
115    /// First transaction date
116    pub first_transaction_date: NaiveDate,
117    /// Last transaction date
118    pub last_transaction_date: NaiveDate,
119    /// Related entity IDs (for mutual connection calculation)
120    pub related_entities: HashSet<String>,
121}
122
123impl Default for TransactionSummary {
124    fn default() -> Self {
125        Self {
126            total_volume: Decimal::ZERO,
127            transaction_count: 0,
128            first_transaction_date: NaiveDate::from_ymd_opt(2020, 1, 1)
129                .expect("valid default date"),
130            last_transaction_date: NaiveDate::from_ymd_opt(2020, 1, 1).expect("valid default date"),
131            related_entities: HashSet::new(),
132        }
133    }
134}
135
136/// Goods receipt summary for cross-process linking.
137#[derive(Debug, Clone)]
138pub struct GoodsReceiptRef {
139    /// GR document ID
140    pub document_id: String,
141    /// Material ID
142    pub material_id: String,
143    /// Quantity received
144    pub quantity: Decimal,
145    /// Receipt date
146    pub receipt_date: NaiveDate,
147    /// Vendor ID
148    pub vendor_id: String,
149    /// Company code
150    pub company_code: String,
151}
152
153/// Delivery summary for cross-process linking.
154#[derive(Debug, Clone)]
155pub struct DeliveryRef {
156    /// Delivery document ID
157    pub document_id: String,
158    /// Material ID
159    pub material_id: String,
160    /// Quantity delivered
161    pub quantity: Decimal,
162    /// Delivery date
163    pub delivery_date: NaiveDate,
164    /// Customer ID
165    pub customer_id: String,
166    /// Company code
167    pub company_code: String,
168}
169
170/// Generator for entity relationship graphs.
171pub struct EntityGraphGenerator {
172    rng: ChaCha8Rng,
173    seed: u64,
174    config: EntityGraphConfig,
175    strength_calculator: RelationshipStrengthCalculator,
176}
177
178impl EntityGraphGenerator {
179    /// Create a new entity graph generator.
180    pub fn new(seed: u64) -> Self {
181        Self::with_config(seed, EntityGraphConfig::default())
182    }
183
184    /// Create a new entity graph generator with configuration.
185    pub fn with_config(seed: u64, config: EntityGraphConfig) -> Self {
186        let strength_calculator = RelationshipStrengthCalculator {
187            weights: datasynth_core::models::StrengthWeights {
188                transaction_volume_weight: config.strength_config.transaction_volume_weight,
189                transaction_count_weight: config.strength_config.transaction_count_weight,
190                duration_weight: config.strength_config.duration_weight,
191                recency_weight: config.strength_config.recency_weight,
192                mutual_connections_weight: config.strength_config.mutual_connections_weight,
193            },
194            recency_half_life_days: config.strength_config.recency_half_life_days,
195            ..Default::default()
196        };
197
198        Self {
199            rng: seeded_rng(seed, 0),
200            seed,
201            config,
202            strength_calculator,
203        }
204    }
205
206    /// Generate an entity graph from transaction data.
207    pub fn generate_entity_graph(
208        &mut self,
209        company_code: &str,
210        as_of_date: NaiveDate,
211        vendors: &[EntitySummary],
212        customers: &[EntitySummary],
213        transaction_summaries: &HashMap<(String, String), TransactionSummary>,
214    ) -> EntityGraph {
215        let mut graph = EntityGraph::new();
216        graph.metadata = GraphMetadata {
217            company_code: Some(company_code.to_string()),
218            created_date: Some(as_of_date),
219            total_transaction_volume: Decimal::ZERO,
220            date_range: None,
221        };
222
223        if !self.config.enabled {
224            return graph;
225        }
226
227        // Add company node
228        let company_id = GraphEntityId::new(GraphEntityType::Company, company_code);
229        graph.add_node(EntityNode::new(
230            company_id.clone(),
231            format!("Company {company_code}"),
232            as_of_date,
233        ));
234
235        // Add vendor nodes (edges added below after transaction summary check)
236        for vendor in vendors {
237            let vendor_id = GraphEntityId::new(GraphEntityType::Vendor, &vendor.entity_id);
238            let node = EntityNode::new(vendor_id.clone(), &vendor.name, as_of_date)
239                .with_company(company_code);
240            graph.add_node(node);
241
242            // Only add a default-strength edge if no transaction summary will
243            // supply a computed-strength edge for this vendor.
244            let has_txn = transaction_summaries
245                .keys()
246                .any(|(_, to)| to == &vendor.entity_id);
247            if !has_txn {
248                let edge = RelationshipEdge::new(
249                    company_id.clone(),
250                    vendor_id,
251                    RelationshipType::BuysFrom,
252                    vendor.first_activity_date,
253                );
254                graph.add_edge(edge);
255            }
256        }
257
258        // Add customer nodes (edges added below after transaction summary check)
259        for customer in customers {
260            let customer_id = GraphEntityId::new(GraphEntityType::Customer, &customer.entity_id);
261            let node = EntityNode::new(customer_id.clone(), &customer.name, as_of_date)
262                .with_company(company_code);
263            graph.add_node(node);
264
265            // Only add a default-strength edge if no transaction summary will
266            // supply a computed-strength edge for this customer.
267            let has_txn = transaction_summaries
268                .keys()
269                .any(|(_, to)| to == &customer.entity_id);
270            if !has_txn {
271                let edge = RelationshipEdge::new(
272                    company_id.clone(),
273                    customer_id,
274                    RelationshipType::SellsTo,
275                    customer.first_activity_date,
276                );
277                graph.add_edge(edge);
278            }
279        }
280
281        // Add transactional relationships with strength
282        let total_connections = transaction_summaries.len().max(1);
283        for ((from_id, to_id), summary) in transaction_summaries {
284            let from_entity_id = self.infer_entity_id(from_id);
285            let to_entity_id = self.infer_entity_id(to_id);
286
287            // Calculate relationship strength
288            let days_since_last = (as_of_date - summary.last_transaction_date)
289                .num_days()
290                .max(0) as u32;
291            let relationship_days = (as_of_date - summary.first_transaction_date)
292                .num_days()
293                .max(1) as u32;
294
295            let components = self.strength_calculator.calculate(
296                summary.total_volume,
297                summary.transaction_count,
298                relationship_days,
299                days_since_last,
300                summary.related_entities.len(),
301                total_connections,
302            );
303
304            let rel_type = self.infer_relationship_type(&from_entity_id, &to_entity_id);
305
306            let edge = RelationshipEdge::new(
307                from_entity_id,
308                to_entity_id,
309                rel_type,
310                summary.first_transaction_date,
311            )
312            .with_strength_components(components);
313
314            graph.add_edge(edge);
315        }
316
317        // Calculate total transaction volume
318        graph.metadata.total_transaction_volume =
319            transaction_summaries.values().map(|s| s.total_volume).sum();
320
321        graph
322    }
323
324    /// Generate cross-process links between P2P and O2C.
325    pub fn generate_cross_process_links(
326        &mut self,
327        goods_receipts: &[GoodsReceiptRef],
328        deliveries: &[DeliveryRef],
329    ) -> Vec<CrossProcessLink> {
330        let mut links = Vec::new();
331
332        if !self.config.cross_process.enable_inventory_links {
333            return links;
334        }
335
336        // Group deliveries by material for matching
337        let deliveries_by_material: HashMap<String, Vec<&DeliveryRef>> =
338            deliveries.iter().fold(HashMap::new(), |mut acc, del| {
339                acc.entry(del.material_id.clone()).or_default().push(del);
340                acc
341            });
342
343        // Link GRs to Deliveries via shared material
344        for gr in goods_receipts {
345            if self.rng.random::<f64>() > self.config.cross_process.inventory_link_rate {
346                continue;
347            }
348
349            if let Some(matching_deliveries) = deliveries_by_material.get(&gr.material_id) {
350                // Find a delivery in the same company that shares this material.
351                // P2P and O2C chains are generated independently, so we match
352                // on material + company without requiring a specific date order.
353                let valid_deliveries: Vec<_> = matching_deliveries
354                    .iter()
355                    .filter(|d| d.company_code == gr.company_code)
356                    .collect();
357
358                if !valid_deliveries.is_empty() {
359                    let delivery =
360                        valid_deliveries[self.rng.random_range(0..valid_deliveries.len())];
361
362                    // Calculate linked quantity (minimum of available)
363                    let linked_qty = gr.quantity.min(delivery.quantity);
364
365                    let link_date = gr.receipt_date.max(delivery.delivery_date);
366                    links.push(CrossProcessLink::new(
367                        &gr.material_id,
368                        "P2P",
369                        &gr.document_id,
370                        "O2C",
371                        &delivery.document_id,
372                        CrossProcessLinkType::InventoryMovement,
373                        linked_qty,
374                        link_date,
375                    ));
376                }
377            }
378        }
379
380        links
381    }
382
383    /// Generate graph from vendor network.
384    pub fn generate_from_vendor_network(
385        &mut self,
386        vendor_network: &VendorNetwork,
387        as_of_date: NaiveDate,
388    ) -> EntityGraph {
389        let mut graph = EntityGraph::new();
390        graph.metadata = GraphMetadata {
391            company_code: Some(vendor_network.company_code.clone()),
392            created_date: Some(as_of_date),
393            total_transaction_volume: vendor_network.statistics.total_annual_spend,
394            date_range: None,
395        };
396
397        if !self.config.enabled {
398            return graph;
399        }
400
401        // Add company node
402        let company_id = GraphEntityId::new(GraphEntityType::Company, &vendor_network.company_code);
403        graph.add_node(EntityNode::new(
404            company_id.clone(),
405            format!("Company {}", vendor_network.company_code),
406            as_of_date,
407        ));
408
409        // Add all vendors from the network
410        for (vendor_id, relationship) in &vendor_network.relationships {
411            let entity_id = GraphEntityId::new(GraphEntityType::Vendor, vendor_id);
412            let node = EntityNode::new(entity_id.clone(), vendor_id, as_of_date)
413                .with_company(&vendor_network.company_code)
414                .with_attribute("tier", format!("{:?}", relationship.tier))
415                .with_attribute("cluster", format!("{:?}", relationship.cluster))
416                .with_attribute(
417                    "strategic_level",
418                    format!("{:?}", relationship.strategic_importance),
419                );
420            graph.add_node(node);
421
422            // Add relationship to company (for Tier 1) or parent vendor (for Tier 2/3)
423            if let Some(parent_id) = &relationship.parent_vendor {
424                let parent_entity_id = GraphEntityId::new(GraphEntityType::Vendor, parent_id);
425                let edge = RelationshipEdge::new(
426                    entity_id.clone(),
427                    parent_entity_id,
428                    RelationshipType::SuppliesTo,
429                    relationship.start_date,
430                )
431                .with_strength(relationship.relationship_score());
432                graph.add_edge(edge);
433            } else {
434                // Tier 1 supplies directly to company
435                let edge = RelationshipEdge::new(
436                    entity_id,
437                    company_id.clone(),
438                    RelationshipType::SuppliesTo,
439                    relationship.start_date,
440                )
441                .with_strength(relationship.relationship_score());
442                graph.add_edge(edge);
443            }
444        }
445
446        graph
447    }
448
449    /// Infer entity ID from string (simple heuristic).
450    fn infer_entity_id(&self, id: &str) -> GraphEntityId {
451        if id.starts_with("V-") || id.starts_with("VN-") {
452            GraphEntityId::new(GraphEntityType::Vendor, id)
453        } else if id.starts_with("C-") || id.starts_with("CU-") {
454            GraphEntityId::new(GraphEntityType::Customer, id)
455        } else if id.starts_with("E-") || id.starts_with("EM-") {
456            GraphEntityId::new(GraphEntityType::Employee, id)
457        } else if id.starts_with("MAT-") || id.starts_with("M-") {
458            GraphEntityId::new(GraphEntityType::Material, id)
459        } else if id.starts_with("PO-") {
460            GraphEntityId::new(GraphEntityType::PurchaseOrder, id)
461        } else if id.starts_with("SO-") {
462            GraphEntityId::new(GraphEntityType::SalesOrder, id)
463        } else if id.starts_with("INV-") || id.starts_with("IV-") {
464            GraphEntityId::new(GraphEntityType::Invoice, id)
465        } else if id.starts_with("PAY-") || id.starts_with("PM-") {
466            GraphEntityId::new(GraphEntityType::Payment, id)
467        } else {
468            GraphEntityId::new(GraphEntityType::Company, id)
469        }
470    }
471
472    /// Infer relationship type between two entities.
473    fn infer_relationship_type(
474        &self,
475        from: &GraphEntityId,
476        to: &GraphEntityId,
477    ) -> RelationshipType {
478        match (&from.entity_type, &to.entity_type) {
479            (GraphEntityType::Company, GraphEntityType::Vendor) => RelationshipType::BuysFrom,
480            (GraphEntityType::Company, GraphEntityType::Customer) => RelationshipType::SellsTo,
481            (GraphEntityType::Vendor, GraphEntityType::Company) => RelationshipType::SuppliesTo,
482            (GraphEntityType::Customer, GraphEntityType::Company) => RelationshipType::SourcesFrom,
483            (GraphEntityType::PurchaseOrder, GraphEntityType::Invoice) => {
484                RelationshipType::References
485            }
486            (GraphEntityType::Invoice, GraphEntityType::Payment) => RelationshipType::FulfilledBy,
487            (GraphEntityType::Payment, GraphEntityType::Invoice) => RelationshipType::AppliesTo,
488            (GraphEntityType::Employee, GraphEntityType::Employee) => RelationshipType::ReportsTo,
489            (GraphEntityType::Employee, GraphEntityType::Department) => RelationshipType::WorksIn,
490            _ => RelationshipType::References,
491        }
492    }
493
494    /// Reset the generator.
495    pub fn reset(&mut self) {
496        self.rng = seeded_rng(self.seed, 0);
497    }
498}
499
500/// Summary of an entity for graph generation.
501#[derive(Debug, Clone)]
502pub struct EntitySummary {
503    /// Entity ID
504    pub entity_id: String,
505    /// Entity name
506    pub name: String,
507    /// First activity date
508    pub first_activity_date: NaiveDate,
509    /// Entity type (for categorization)
510    pub entity_type: GraphEntityType,
511    /// Additional attributes
512    pub attributes: HashMap<String, String>,
513}
514
515impl EntitySummary {
516    /// Create a new entity summary.
517    pub fn new(
518        entity_id: impl Into<String>,
519        name: impl Into<String>,
520        entity_type: GraphEntityType,
521        first_activity_date: NaiveDate,
522    ) -> Self {
523        Self {
524            entity_id: entity_id.into(),
525            name: name.into(),
526            first_activity_date,
527            entity_type,
528            attributes: HashMap::new(),
529        }
530    }
531}
532
533#[cfg(test)]
534#[allow(clippy::unwrap_used)]
535mod tests {
536    use super::*;
537
538    #[test]
539    fn test_entity_graph_generation() {
540        let config = EntityGraphConfig {
541            enabled: true,
542            ..Default::default()
543        };
544
545        let mut gen = EntityGraphGenerator::with_config(42, config);
546
547        let vendors = vec![
548            EntitySummary::new(
549                "V-001",
550                "Acme Supplies",
551                GraphEntityType::Vendor,
552                NaiveDate::from_ymd_opt(2023, 1, 1).unwrap(),
553            ),
554            EntitySummary::new(
555                "V-002",
556                "Global Parts",
557                GraphEntityType::Vendor,
558                NaiveDate::from_ymd_opt(2023, 3, 1).unwrap(),
559            ),
560        ];
561
562        let customers = vec![EntitySummary::new(
563            "C-001",
564            "Contoso Corp",
565            GraphEntityType::Customer,
566            NaiveDate::from_ymd_opt(2023, 2, 1).unwrap(),
567        )];
568
569        let graph = gen.generate_entity_graph(
570            "1000",
571            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
572            &vendors,
573            &customers,
574            &HashMap::new(),
575        );
576
577        // Should have company + 2 vendors + 1 customer = 4 nodes
578        assert_eq!(graph.nodes.len(), 4);
579        // Should have 3 edges (company buys from 2 vendors, sells to 1 customer)
580        assert_eq!(graph.edges.len(), 3);
581    }
582
583    #[test]
584    fn test_cross_process_link_generation() {
585        let config = EntityGraphConfig {
586            enabled: true,
587            cross_process: CrossProcessConfig {
588                enable_inventory_links: true,
589                inventory_link_rate: 1.0, // Always link for testing
590                ..Default::default()
591            },
592            ..Default::default()
593        };
594
595        let mut gen = EntityGraphGenerator::with_config(42, config);
596
597        let goods_receipts = vec![GoodsReceiptRef {
598            document_id: "GR-001".to_string(),
599            material_id: "MAT-100".to_string(),
600            quantity: Decimal::from(100),
601            receipt_date: NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
602            vendor_id: "V-001".to_string(),
603            company_code: "1000".to_string(),
604        }];
605
606        let deliveries = vec![DeliveryRef {
607            document_id: "DEL-001".to_string(),
608            material_id: "MAT-100".to_string(),
609            quantity: Decimal::from(50),
610            delivery_date: NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
611            customer_id: "C-001".to_string(),
612            company_code: "1000".to_string(),
613        }];
614
615        let links = gen.generate_cross_process_links(&goods_receipts, &deliveries);
616
617        assert_eq!(links.len(), 1);
618        assert_eq!(links[0].material_id, "MAT-100");
619        assert_eq!(links[0].source_document_id, "GR-001");
620        assert_eq!(links[0].target_document_id, "DEL-001");
621        assert_eq!(links[0].link_type, CrossProcessLinkType::InventoryMovement);
622    }
623
624    #[test]
625    fn test_disabled_graph_generation() {
626        let config = EntityGraphConfig {
627            enabled: false,
628            ..Default::default()
629        };
630
631        let mut gen = EntityGraphGenerator::with_config(42, config);
632
633        let graph = gen.generate_entity_graph(
634            "1000",
635            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
636            &[],
637            &[],
638            &HashMap::new(),
639        );
640
641        assert!(graph.nodes.is_empty());
642    }
643
644    #[test]
645    fn test_entity_id_inference() {
646        let gen = EntityGraphGenerator::new(42);
647
648        let vendor_id = gen.infer_entity_id("V-001");
649        assert_eq!(vendor_id.entity_type, GraphEntityType::Vendor);
650
651        let customer_id = gen.infer_entity_id("C-001");
652        assert_eq!(customer_id.entity_type, GraphEntityType::Customer);
653
654        let po_id = gen.infer_entity_id("PO-12345");
655        assert_eq!(po_id.entity_type, GraphEntityType::PurchaseOrder);
656    }
657
658    #[test]
659    fn test_relationship_type_inference() {
660        let gen = EntityGraphGenerator::new(42);
661
662        let company_id = GraphEntityId::new(GraphEntityType::Company, "1000");
663        let vendor_id = GraphEntityId::new(GraphEntityType::Vendor, "V-001");
664
665        let rel_type = gen.infer_relationship_type(&company_id, &vendor_id);
666        assert_eq!(rel_type, RelationshipType::BuysFrom);
667
668        let rel_type = gen.infer_relationship_type(&vendor_id, &company_id);
669        assert_eq!(rel_type, RelationshipType::SuppliesTo);
670    }
671}