Skip to main content

datasynth_eval/coherence/
network.rs

1//! Network evaluation module for interconnectivity analysis.
2//!
3//! Provides evaluation of entity relationship graphs and network metrics including:
4//! - Graph connectivity analysis
5//! - Degree distribution (power law fit)
6//! - Clustering coefficient
7//! - Vendor/customer concentration
8//! - Relationship strength validation
9
10use serde::{Deserialize, Serialize};
11use std::collections::{HashMap, HashSet};
12
13/// Results of network evaluation.
14#[derive(Debug, Clone, Serialize, Deserialize)]
15pub struct NetworkEvaluation {
16    /// Number of nodes in the graph.
17    pub node_count: usize,
18    /// Number of edges in the graph.
19    pub edge_count: usize,
20    /// Largest connected component size as fraction of total nodes.
21    pub connectivity_ratio: f64,
22    /// Power law exponent (alpha) for degree distribution.
23    pub power_law_alpha: Option<f64>,
24    /// Global clustering coefficient.
25    pub clustering_coefficient: f64,
26    /// Vendor concentration metrics.
27    pub vendor_concentration: ConcentrationMetrics,
28    /// Customer concentration metrics.
29    pub customer_concentration: ConcentrationMetrics,
30    /// Average relationship strength.
31    pub avg_relationship_strength: f64,
32    /// Relationship strength distribution statistics.
33    pub strength_stats: StrengthStats,
34    /// Cross-process link coverage (P2P↔O2C via inventory).
35    pub cross_process_link_rate: f64,
36    /// Whether the network passes all thresholds.
37    pub passes: bool,
38    /// List of threshold violations.
39    pub issues: Vec<String>,
40}
41
42impl Default for NetworkEvaluation {
43    fn default() -> Self {
44        Self {
45            node_count: 0,
46            edge_count: 0,
47            connectivity_ratio: 0.0,
48            power_law_alpha: None,
49            clustering_coefficient: 0.0,
50            vendor_concentration: ConcentrationMetrics::default(),
51            customer_concentration: ConcentrationMetrics::default(),
52            avg_relationship_strength: 0.0,
53            strength_stats: StrengthStats::default(),
54            cross_process_link_rate: 0.0,
55            passes: true,
56            issues: Vec::new(),
57        }
58    }
59}
60
61/// Concentration metrics for vendor or customer analysis.
62#[derive(Debug, Clone, Default, Serialize, Deserialize)]
63pub struct ConcentrationMetrics {
64    /// Total count of entities.
65    pub total_count: usize,
66    /// Top vendor/customer share of volume.
67    pub top_1_share: f64,
68    /// Top 5 vendors/customers share of volume.
69    pub top_5_share: f64,
70    /// Herfindahl-Hirschman Index (HHI).
71    pub hhi: f64,
72    /// Whether concentration violates limits.
73    pub exceeds_limits: bool,
74}
75
76/// Relationship strength distribution statistics.
77#[derive(Debug, Clone, Default, Serialize, Deserialize)]
78pub struct StrengthStats {
79    /// Minimum strength value.
80    pub min: f64,
81    /// Maximum strength value.
82    pub max: f64,
83    /// Mean strength value.
84    pub mean: f64,
85    /// Standard deviation of strength.
86    pub std_dev: f64,
87    /// Count of strong relationships (>= 0.7).
88    pub strong_count: usize,
89    /// Count of moderate relationships (0.4-0.7).
90    pub moderate_count: usize,
91    /// Count of weak relationships (0.1-0.4).
92    pub weak_count: usize,
93    /// Count of dormant relationships (< 0.1).
94    pub dormant_count: usize,
95}
96
97/// Configuration for network evaluation thresholds.
98#[derive(Debug, Clone)]
99pub struct NetworkThresholds {
100    /// Minimum connectivity ratio (fraction of nodes in largest component).
101    pub connectivity_min: f64,
102    /// Expected power law alpha range.
103    pub power_law_alpha_min: f64,
104    pub power_law_alpha_max: f64,
105    /// Expected clustering coefficient range.
106    pub clustering_min: f64,
107    pub clustering_max: f64,
108    /// Maximum single vendor concentration.
109    pub max_single_vendor_concentration: f64,
110    /// Maximum top 5 vendor concentration.
111    pub max_top5_vendor_concentration: f64,
112    /// Minimum cross-process link rate.
113    pub min_cross_process_link_rate: f64,
114}
115
116impl Default for NetworkThresholds {
117    fn default() -> Self {
118        Self {
119            connectivity_min: 0.95,
120            power_law_alpha_min: 2.0,
121            power_law_alpha_max: 3.0,
122            clustering_min: 0.10,
123            clustering_max: 0.50,
124            max_single_vendor_concentration: 0.15,
125            max_top5_vendor_concentration: 0.45,
126            min_cross_process_link_rate: 0.30,
127        }
128    }
129}
130
131/// Input edge for network analysis.
132#[derive(Debug, Clone)]
133pub struct NetworkEdge {
134    /// Source node ID.
135    pub from_id: String,
136    /// Target node ID.
137    pub to_id: String,
138    /// Relationship strength (0.0 to 1.0).
139    pub strength: f64,
140    /// Transaction volume for this edge.
141    pub volume: f64,
142}
143
144/// Input node with type information.
145#[derive(Debug, Clone)]
146pub struct NetworkNode {
147    /// Node ID.
148    pub id: String,
149    /// Node type (vendor, customer, company, etc.).
150    pub node_type: String,
151    /// Associated transaction volume.
152    pub volume: f64,
153}
154
155/// Network evaluator for graph analysis.
156pub struct NetworkEvaluator {
157    thresholds: NetworkThresholds,
158}
159
160impl NetworkEvaluator {
161    /// Create a new network evaluator with default thresholds.
162    pub fn new() -> Self {
163        Self {
164            thresholds: NetworkThresholds::default(),
165        }
166    }
167
168    /// Create a network evaluator with custom thresholds.
169    pub fn with_thresholds(thresholds: NetworkThresholds) -> Self {
170        Self { thresholds }
171    }
172
173    /// Evaluate a network graph.
174    pub fn evaluate(
175        &self,
176        nodes: &[NetworkNode],
177        edges: &[NetworkEdge],
178        cross_process_links: usize,
179        potential_links: usize,
180    ) -> NetworkEvaluation {
181        let mut eval = NetworkEvaluation {
182            node_count: nodes.len(),
183            edge_count: edges.len(),
184            ..Default::default()
185        };
186
187        if nodes.is_empty() {
188            eval.issues.push("Empty graph".to_string());
189            eval.passes = false;
190            return eval;
191        }
192
193        // Calculate connectivity
194        eval.connectivity_ratio = self.calculate_connectivity(nodes, edges);
195
196        // Calculate degree distribution and power law fit
197        eval.power_law_alpha = self.estimate_power_law_alpha(nodes, edges);
198
199        // Calculate clustering coefficient
200        eval.clustering_coefficient = self.calculate_clustering_coefficient(nodes, edges);
201
202        // Calculate concentration metrics
203        eval.vendor_concentration = self.calculate_concentration(nodes, "vendor");
204        eval.customer_concentration = self.calculate_concentration(nodes, "customer");
205
206        // Calculate relationship strength statistics
207        eval.strength_stats = self.calculate_strength_stats(edges);
208        eval.avg_relationship_strength = eval.strength_stats.mean;
209
210        // Calculate cross-process link rate
211        eval.cross_process_link_rate = if potential_links > 0 {
212            cross_process_links as f64 / potential_links as f64
213        } else {
214            0.0
215        };
216
217        // Check thresholds
218        self.check_thresholds(&mut eval);
219
220        eval
221    }
222
223    /// Calculate graph connectivity (largest component ratio).
224    fn calculate_connectivity(&self, nodes: &[NetworkNode], edges: &[NetworkEdge]) -> f64 {
225        if nodes.is_empty() {
226            return 0.0;
227        }
228
229        // Build adjacency list
230        let mut adjacency: HashMap<&str, HashSet<&str>> = HashMap::new();
231        for node in nodes {
232            adjacency.insert(&node.id, HashSet::new());
233        }
234        for edge in edges {
235            if let Some(neighbors) = adjacency.get_mut(edge.from_id.as_str()) {
236                neighbors.insert(&edge.to_id);
237            }
238            if let Some(neighbors) = adjacency.get_mut(edge.to_id.as_str()) {
239                neighbors.insert(&edge.from_id);
240            }
241        }
242
243        // Find connected components using BFS
244        let mut visited: HashSet<&str> = HashSet::new();
245        let mut largest_component = 0usize;
246
247        for node in nodes {
248            if visited.contains(node.id.as_str()) {
249                continue;
250            }
251
252            let mut component_size = 0;
253            let mut queue = vec![node.id.as_str()];
254
255            while let Some(current) = queue.pop() {
256                if visited.contains(current) {
257                    continue;
258                }
259                visited.insert(current);
260                component_size += 1;
261
262                if let Some(neighbors) = adjacency.get(current) {
263                    for neighbor in neighbors {
264                        if !visited.contains(*neighbor) {
265                            queue.push(neighbor);
266                        }
267                    }
268                }
269            }
270
271            largest_component = largest_component.max(component_size);
272        }
273
274        largest_component as f64 / nodes.len() as f64
275    }
276
277    /// Estimate power law exponent for degree distribution.
278    fn estimate_power_law_alpha(
279        &self,
280        nodes: &[NetworkNode],
281        edges: &[NetworkEdge],
282    ) -> Option<f64> {
283        // Calculate degree for each node
284        let mut degrees: HashMap<&str, usize> = HashMap::new();
285        for node in nodes {
286            degrees.insert(&node.id, 0);
287        }
288        for edge in edges {
289            *degrees.entry(&edge.from_id).or_insert(0) += 1;
290            *degrees.entry(&edge.to_id).or_insert(0) += 1;
291        }
292
293        let degree_values: Vec<f64> = degrees
294            .values()
295            .filter(|&&d| d > 0)
296            .map(|&d| d as f64)
297            .collect();
298
299        if degree_values.len() < 10 {
300            return None;
301        }
302
303        // Simple MLE estimation of power law alpha
304        // alpha = 1 + n / sum(ln(x_i / x_min))
305        let x_min = degree_values.iter().cloned().fold(f64::INFINITY, f64::min);
306        if x_min <= 0.0 {
307            return None;
308        }
309
310        let sum_log: f64 = degree_values.iter().map(|x| (x / x_min).ln()).sum();
311
312        if sum_log <= 0.0 {
313            return None;
314        }
315
316        let alpha = 1.0 + degree_values.len() as f64 / sum_log;
317        Some(alpha)
318    }
319
320    /// Calculate global clustering coefficient.
321    fn calculate_clustering_coefficient(
322        &self,
323        nodes: &[NetworkNode],
324        edges: &[NetworkEdge],
325    ) -> f64 {
326        if nodes.len() < 3 {
327            return 0.0;
328        }
329
330        // Build adjacency set
331        let mut neighbors: HashMap<&str, HashSet<&str>> = HashMap::new();
332        for node in nodes {
333            neighbors.insert(&node.id, HashSet::new());
334        }
335        for edge in edges {
336            if let Some(set) = neighbors.get_mut(edge.from_id.as_str()) {
337                set.insert(&edge.to_id);
338            }
339            if let Some(set) = neighbors.get_mut(edge.to_id.as_str()) {
340                set.insert(&edge.from_id);
341            }
342        }
343
344        // Calculate local clustering for each node
345        let mut total_clustering = 0.0;
346        let mut valid_nodes = 0;
347
348        for node in nodes {
349            let node_neighbors = match neighbors.get(node.id.as_str()) {
350                Some(n) => n,
351                None => continue,
352            };
353
354            let k = node_neighbors.len();
355            if k < 2 {
356                continue;
357            }
358
359            // Count edges between neighbors
360            let mut neighbor_edges = 0;
361            let neighbor_list: Vec<_> = node_neighbors.iter().collect();
362            for i in 0..neighbor_list.len() {
363                for j in (i + 1)..neighbor_list.len() {
364                    if let Some(n_neighbors) = neighbors.get(*neighbor_list[i]) {
365                        if n_neighbors.contains(*neighbor_list[j]) {
366                            neighbor_edges += 1;
367                        }
368                    }
369                }
370            }
371
372            let max_edges = k * (k - 1) / 2;
373            if max_edges > 0 {
374                total_clustering += neighbor_edges as f64 / max_edges as f64;
375                valid_nodes += 1;
376            }
377        }
378
379        if valid_nodes > 0 {
380            total_clustering / valid_nodes as f64
381        } else {
382            0.0
383        }
384    }
385
386    /// Calculate concentration metrics for a node type.
387    fn calculate_concentration(
388        &self,
389        nodes: &[NetworkNode],
390        node_type: &str,
391    ) -> ConcentrationMetrics {
392        let type_nodes: Vec<_> = nodes
393            .iter()
394            .filter(|n| n.node_type.to_lowercase() == node_type.to_lowercase())
395            .collect();
396
397        if type_nodes.is_empty() {
398            return ConcentrationMetrics::default();
399        }
400
401        let total_volume: f64 = type_nodes.iter().map(|n| n.volume).sum();
402        if total_volume <= 0.0 {
403            return ConcentrationMetrics {
404                total_count: type_nodes.len(),
405                ..Default::default()
406            };
407        }
408
409        // Sort by volume descending
410        let mut volumes: Vec<f64> = type_nodes.iter().map(|n| n.volume).collect();
411        volumes.sort_by(|a, b| b.partial_cmp(a).unwrap_or(std::cmp::Ordering::Equal));
412
413        let top_1_share = volumes.first().map(|v| v / total_volume).unwrap_or(0.0);
414        let top_5_share: f64 = volumes.iter().take(5).sum::<f64>() / total_volume;
415
416        // Calculate HHI (sum of squared market shares)
417        let hhi: f64 = volumes.iter().map(|v| (v / total_volume).powi(2)).sum();
418
419        let exceeds_limits = top_1_share > self.thresholds.max_single_vendor_concentration
420            || top_5_share > self.thresholds.max_top5_vendor_concentration;
421
422        ConcentrationMetrics {
423            total_count: type_nodes.len(),
424            top_1_share,
425            top_5_share,
426            hhi,
427            exceeds_limits,
428        }
429    }
430
431    /// Calculate relationship strength statistics.
432    fn calculate_strength_stats(&self, edges: &[NetworkEdge]) -> StrengthStats {
433        if edges.is_empty() {
434            return StrengthStats::default();
435        }
436
437        let strengths: Vec<f64> = edges.iter().map(|e| e.strength).collect();
438        let n = strengths.len() as f64;
439
440        let min = strengths.iter().cloned().fold(f64::INFINITY, f64::min);
441        let max = strengths.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
442        let mean = strengths.iter().sum::<f64>() / n;
443        let variance = strengths.iter().map(|s| (s - mean).powi(2)).sum::<f64>() / n;
444        let std_dev = variance.sqrt();
445
446        let strong_count = strengths.iter().filter(|&&s| s >= 0.7).count();
447        let moderate_count = strengths
448            .iter()
449            .filter(|&&s| (0.4..0.7).contains(&s))
450            .count();
451        let weak_count = strengths
452            .iter()
453            .filter(|&&s| (0.1..0.4).contains(&s))
454            .count();
455        let dormant_count = strengths.iter().filter(|&&s| s < 0.1).count();
456
457        StrengthStats {
458            min,
459            max,
460            mean,
461            std_dev,
462            strong_count,
463            moderate_count,
464            weak_count,
465            dormant_count,
466        }
467    }
468
469    /// Check evaluation against thresholds.
470    fn check_thresholds(&self, eval: &mut NetworkEvaluation) {
471        eval.issues.clear();
472
473        // Check connectivity
474        if eval.connectivity_ratio < self.thresholds.connectivity_min {
475            eval.issues.push(format!(
476                "Connectivity ratio {:.2} < {:.2} (threshold)",
477                eval.connectivity_ratio, self.thresholds.connectivity_min
478            ));
479        }
480
481        // Check power law alpha
482        if let Some(alpha) = eval.power_law_alpha {
483            if alpha < self.thresholds.power_law_alpha_min
484                || alpha > self.thresholds.power_law_alpha_max
485            {
486                eval.issues.push(format!(
487                    "Power law alpha {:.2} not in range [{:.1}, {:.1}]",
488                    alpha, self.thresholds.power_law_alpha_min, self.thresholds.power_law_alpha_max
489                ));
490            }
491        }
492
493        // Check clustering coefficient
494        if eval.clustering_coefficient < self.thresholds.clustering_min
495            || eval.clustering_coefficient > self.thresholds.clustering_max
496        {
497            eval.issues.push(format!(
498                "Clustering coefficient {:.3} not in range [{:.2}, {:.2}]",
499                eval.clustering_coefficient,
500                self.thresholds.clustering_min,
501                self.thresholds.clustering_max
502            ));
503        }
504
505        // Check vendor concentration
506        if eval.vendor_concentration.exceeds_limits {
507            if eval.vendor_concentration.top_1_share
508                > self.thresholds.max_single_vendor_concentration
509            {
510                eval.issues.push(format!(
511                    "Single vendor concentration {:.2}% > {:.0}% (limit)",
512                    eval.vendor_concentration.top_1_share * 100.0,
513                    self.thresholds.max_single_vendor_concentration * 100.0
514                ));
515            }
516            if eval.vendor_concentration.top_5_share > self.thresholds.max_top5_vendor_concentration
517            {
518                eval.issues.push(format!(
519                    "Top 5 vendor concentration {:.2}% > {:.0}% (limit)",
520                    eval.vendor_concentration.top_5_share * 100.0,
521                    self.thresholds.max_top5_vendor_concentration * 100.0
522                ));
523            }
524        }
525
526        // Check cross-process link rate
527        if eval.cross_process_link_rate < self.thresholds.min_cross_process_link_rate {
528            eval.issues.push(format!(
529                "Cross-process link rate {:.2}% < {:.0}% (threshold)",
530                eval.cross_process_link_rate * 100.0,
531                self.thresholds.min_cross_process_link_rate * 100.0
532            ));
533        }
534
535        eval.passes = eval.issues.is_empty();
536    }
537}
538
539impl Default for NetworkEvaluator {
540    fn default() -> Self {
541        Self::new()
542    }
543}
544
545#[cfg(test)]
546mod tests {
547    use super::*;
548
549    fn create_test_nodes() -> Vec<NetworkNode> {
550        vec![
551            NetworkNode {
552                id: "company".to_string(),
553                node_type: "company".to_string(),
554                volume: 1000000.0,
555            },
556            NetworkNode {
557                id: "vendor1".to_string(),
558                node_type: "vendor".to_string(),
559                volume: 100000.0,
560            },
561            NetworkNode {
562                id: "vendor2".to_string(),
563                node_type: "vendor".to_string(),
564                volume: 80000.0,
565            },
566            NetworkNode {
567                id: "vendor3".to_string(),
568                node_type: "vendor".to_string(),
569                volume: 60000.0,
570            },
571            NetworkNode {
572                id: "customer1".to_string(),
573                node_type: "customer".to_string(),
574                volume: 150000.0,
575            },
576            NetworkNode {
577                id: "customer2".to_string(),
578                node_type: "customer".to_string(),
579                volume: 120000.0,
580            },
581        ]
582    }
583
584    fn create_test_edges() -> Vec<NetworkEdge> {
585        vec![
586            NetworkEdge {
587                from_id: "company".to_string(),
588                to_id: "vendor1".to_string(),
589                strength: 0.8,
590                volume: 100000.0,
591            },
592            NetworkEdge {
593                from_id: "company".to_string(),
594                to_id: "vendor2".to_string(),
595                strength: 0.6,
596                volume: 80000.0,
597            },
598            NetworkEdge {
599                from_id: "company".to_string(),
600                to_id: "vendor3".to_string(),
601                strength: 0.4,
602                volume: 60000.0,
603            },
604            NetworkEdge {
605                from_id: "company".to_string(),
606                to_id: "customer1".to_string(),
607                strength: 0.9,
608                volume: 150000.0,
609            },
610            NetworkEdge {
611                from_id: "company".to_string(),
612                to_id: "customer2".to_string(),
613                strength: 0.7,
614                volume: 120000.0,
615            },
616            // Some vendor-vendor relationships
617            NetworkEdge {
618                from_id: "vendor1".to_string(),
619                to_id: "vendor2".to_string(),
620                strength: 0.3,
621                volume: 20000.0,
622            },
623        ]
624    }
625
626    #[test]
627    fn test_network_evaluation_basic() {
628        let nodes = create_test_nodes();
629        let edges = create_test_edges();
630
631        let evaluator = NetworkEvaluator::new();
632        let eval = evaluator.evaluate(&nodes, &edges, 10, 30);
633
634        assert_eq!(eval.node_count, 6);
635        assert_eq!(eval.edge_count, 6);
636        assert!(eval.connectivity_ratio > 0.0);
637    }
638
639    #[test]
640    fn test_connectivity_calculation() {
641        let nodes = create_test_nodes();
642        let edges = create_test_edges();
643
644        let evaluator = NetworkEvaluator::new();
645        let connectivity = evaluator.calculate_connectivity(&nodes, &edges);
646
647        // All nodes are connected through company
648        assert_eq!(connectivity, 1.0);
649    }
650
651    #[test]
652    fn test_concentration_metrics() {
653        let nodes = create_test_nodes();
654
655        let evaluator = NetworkEvaluator::new();
656        let vendor_conc = evaluator.calculate_concentration(&nodes, "vendor");
657
658        assert_eq!(vendor_conc.total_count, 3);
659        assert!(vendor_conc.top_1_share > 0.0);
660        assert!(vendor_conc.top_5_share > 0.0);
661        assert!(vendor_conc.hhi > 0.0);
662    }
663
664    #[test]
665    fn test_strength_stats() {
666        let edges = create_test_edges();
667
668        let evaluator = NetworkEvaluator::new();
669        let stats = evaluator.calculate_strength_stats(&edges);
670
671        assert!(stats.min > 0.0);
672        assert!(stats.max <= 1.0);
673        assert!(stats.mean > 0.0);
674        assert!(stats.strong_count > 0); // We have some strong relationships
675    }
676
677    #[test]
678    fn test_empty_graph() {
679        let evaluator = NetworkEvaluator::new();
680        let eval = evaluator.evaluate(&[], &[], 0, 0);
681
682        assert!(!eval.passes);
683        assert!(!eval.issues.is_empty());
684    }
685}