1use crate::sampler::SampleResult;
7use scirs2_core::ndarray::Array2;
8use serde::{Deserialize, Serialize};
9use std::collections::HashMap;
10
11#[cfg(feature = "scirs")]
12use crate::scirs_stub::{
13 scirs2_plot::{BoxPlot, Plot2D, Violin},
14 scirs2_statistics::{
15 clustering::{hierarchical_clustering, KMeans, DBSCAN},
16 descriptive::{mean, quantile, std_dev},
17 },
18};
19
20#[derive(Debug, Clone, Serialize, Deserialize)]
22pub struct DistributionConfig {
23 pub clustering_method: ClusteringMethod,
25 pub n_clusters: Option<usize>,
27 pub epsilon: Option<f64>,
29 pub min_samples: Option<usize>,
31 pub compute_distances: bool,
33 pub distance_metric: DistanceMetric,
35}
36
37#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
39pub enum ClusteringMethod {
40 KMeans,
41 DBSCAN,
42 Hierarchical,
43 None,
44}
45
46#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
48pub enum DistanceMetric {
49 Hamming,
50 Euclidean,
51 Manhattan,
52 Jaccard,
53}
54
55impl Default for DistributionConfig {
56 fn default() -> Self {
57 Self {
58 clustering_method: ClusteringMethod::KMeans,
59 n_clusters: Some(5),
60 epsilon: Some(0.5),
61 min_samples: Some(5),
62 compute_distances: true,
63 distance_metric: DistanceMetric::Hamming,
64 }
65 }
66}
67
68pub struct SolutionDistribution {
70 config: DistributionConfig,
71 samples: Vec<SampleResult>,
72 distance_matrix: Option<Array2<f64>>,
73 clusters: Option<Vec<usize>>,
74}
75
76#[derive(Debug, Clone, Serialize, Deserialize)]
78pub struct DistributionAnalysis {
79 pub statistics: SolutionStatistics,
80 pub diversity_metrics: DiversityMetrics,
81 pub cluster_info: Option<ClusterInfo>,
82 pub quality_distribution: QualityDistribution,
83 pub correlation_analysis: CorrelationAnalysis,
84}
85
86#[derive(Debug, Clone, Serialize, Deserialize)]
88pub struct SolutionStatistics {
89 pub n_samples: usize,
90 pub n_unique: usize,
91 pub mean_energy: f64,
92 pub std_energy: f64,
93 pub min_energy: f64,
94 pub max_energy: f64,
95 pub energy_quantiles: HashMap<String, f64>,
96 pub most_frequent_solution: Option<HashMap<String, bool>>,
97 pub frequency_top_solution: usize,
98}
99
100#[derive(Debug, Clone, Serialize, Deserialize)]
102pub struct DiversityMetrics {
103 pub average_distance: f64,
104 pub min_distance: f64,
105 pub max_distance: f64,
106 pub diversity_index: f64,
107 pub entropy: f64,
108 pub effective_sample_size: f64,
109}
110
111#[derive(Debug, Clone, Serialize, Deserialize)]
113pub struct ClusterInfo {
114 pub n_clusters: usize,
115 pub cluster_sizes: Vec<usize>,
116 pub cluster_centers: Vec<HashMap<String, f64>>,
117 pub cluster_energies: Vec<ClusterEnergy>,
118 pub silhouette_score: f64,
119 pub inter_cluster_distances: Array2<f64>,
120}
121
122#[derive(Debug, Clone, Serialize, Deserialize)]
124pub struct ClusterEnergy {
125 pub cluster_id: usize,
126 pub mean_energy: f64,
127 pub std_energy: f64,
128 pub min_energy: f64,
129 pub max_energy: f64,
130}
131
132#[derive(Debug, Clone, Serialize, Deserialize)]
134pub struct QualityDistribution {
135 pub energy_bins: Vec<f64>,
136 pub bin_counts: Vec<usize>,
137 pub cumulative_distribution: Vec<f64>,
138 pub percentile_values: HashMap<usize, f64>,
139}
140
141#[derive(Debug, Clone, Serialize, Deserialize)]
143pub struct CorrelationAnalysis {
144 pub variable_correlations: HashMap<(String, String), f64>,
145 pub energy_correlations: HashMap<String, f64>,
146 pub significant_pairs: Vec<(String, String, f64)>,
147}
148
149impl SolutionDistribution {
150 pub const fn new(config: DistributionConfig) -> Self {
152 Self {
153 config,
154 samples: Vec::new(),
155 distance_matrix: None,
156 clusters: None,
157 }
158 }
159
160 pub fn add_samples(&mut self, samples: Vec<SampleResult>) {
162 self.samples.extend(samples);
163 self.distance_matrix = None;
165 self.clusters = None;
166 }
167
168 pub fn analyze(&mut self) -> Result<DistributionAnalysis, Box<dyn std::error::Error>> {
170 if self.samples.is_empty() {
171 return Err("No samples to analyze".into());
172 }
173
174 let statistics = self.compute_statistics()?;
176
177 let diversity_metrics = self.compute_diversity_metrics()?;
179
180 let cluster_info = if self.config.clustering_method == ClusteringMethod::None {
182 None
183 } else {
184 Some(self.perform_clustering()?)
185 };
186
187 let quality_distribution = self.analyze_quality_distribution()?;
189
190 let correlation_analysis = self.analyze_correlations()?;
192
193 Ok(DistributionAnalysis {
194 statistics,
195 diversity_metrics,
196 cluster_info,
197 quality_distribution,
198 correlation_analysis,
199 })
200 }
201
202 fn compute_statistics(&self) -> Result<SolutionStatistics, Box<dyn std::error::Error>> {
204 let energies: Vec<f64> = self.samples.iter().map(|s| s.energy).collect();
205
206 let mean_energy = energies.iter().sum::<f64>() / energies.len() as f64;
207 let variance = energies
208 .iter()
209 .map(|e| (e - mean_energy).powi(2))
210 .sum::<f64>()
211 / energies.len() as f64;
212 let std_energy = variance.sqrt();
213
214 let min_energy = energies.iter().fold(f64::INFINITY, |a, &b| a.min(b));
215 let max_energy = energies.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));
216
217 let mut sorted_energies = energies;
219 sorted_energies.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
220
221 let mut energy_quantiles = HashMap::new();
222 for &q in &[25, 50, 75] {
223 let idx = (q as f64 / 100.0 * sorted_energies.len() as f64) as usize;
224 let idx = idx.min(sorted_energies.len() - 1);
225 energy_quantiles.insert(format!("q{q}"), sorted_energies[idx]);
226 }
227
228 let mut solution_counts = HashMap::new();
230 for sample in &self.samples {
231 let key = format!("{:?}", sample.assignments);
232 *solution_counts.entry(key).or_insert(0) += 1;
233 }
234
235 let (_most_frequent, frequency) = solution_counts
236 .iter()
237 .max_by_key(|&(_, count)| count)
238 .map_or((String::new(), 0), |(sol, &count)| (sol.clone(), count));
239
240 let n_unique = solution_counts.len();
242
243 Ok(SolutionStatistics {
244 n_samples: self.samples.len(),
245 n_unique,
246 mean_energy,
247 std_energy,
248 min_energy,
249 max_energy,
250 energy_quantiles,
251 most_frequent_solution: None, frequency_top_solution: frequency,
253 })
254 }
255
256 fn compute_diversity_metrics(
258 &mut self,
259 ) -> Result<DiversityMetrics, Box<dyn std::error::Error>> {
260 if self.config.compute_distances && self.distance_matrix.is_none() {
261 self.compute_distance_matrix()?;
262 }
263
264 let dist_matrix = self
265 .distance_matrix
266 .as_ref()
267 .ok_or("Distance matrix not computed")?;
268
269 let n = dist_matrix.nrows();
270 let mut all_distances = Vec::new();
271
272 for i in 0..n {
273 for j in i + 1..n {
274 all_distances.push(dist_matrix[[i, j]]);
275 }
276 }
277
278 if all_distances.is_empty() {
279 return Ok(DiversityMetrics {
280 average_distance: 0.0,
281 min_distance: 0.0,
282 max_distance: 0.0,
283 diversity_index: 0.0,
284 entropy: 0.0,
285 effective_sample_size: 1.0,
286 });
287 }
288
289 let average_distance = all_distances.iter().sum::<f64>() / all_distances.len() as f64;
290 let min_distance = all_distances.iter().fold(f64::INFINITY, |a, &b| a.min(b));
291 let max_distance = all_distances.iter().fold(0.0f64, |a, &b| a.max(b));
292
293 let max_possible_distance = self.get_max_distance();
295 let diversity_index = if max_possible_distance > 0.0 {
296 average_distance / max_possible_distance
297 } else {
298 0.0
299 };
300
301 let entropy = self.calculate_entropy();
303
304 let ess = self.calculate_effective_sample_size();
306
307 Ok(DiversityMetrics {
308 average_distance,
309 min_distance,
310 max_distance,
311 diversity_index,
312 entropy,
313 effective_sample_size: ess,
314 })
315 }
316
317 fn compute_distance_matrix(&mut self) -> Result<(), Box<dyn std::error::Error>> {
319 let n = self.samples.len();
320 let mut matrix = Array2::zeros((n, n));
321
322 for i in 0..n {
323 for j in i + 1..n {
324 let distance = self.calculate_distance(&self.samples[i], &self.samples[j]);
325 matrix[[i, j]] = distance;
326 matrix[[j, i]] = distance;
327 }
328 }
329
330 self.distance_matrix = Some(matrix);
331 Ok(())
332 }
333
334 fn calculate_distance(&self, a: &SampleResult, b: &SampleResult) -> f64 {
336 match self.config.distance_metric {
337 DistanceMetric::Hamming => {
338 let mut distance = 0.0;
339 let all_vars: std::collections::HashSet<_> =
340 a.assignments.keys().chain(b.assignments.keys()).collect();
341
342 for var in all_vars {
343 let val_a = a.assignments.get(var).copied().unwrap_or(false);
344 let val_b = b.assignments.get(var).copied().unwrap_or(false);
345 if val_a != val_b {
346 distance += 1.0;
347 }
348 }
349 distance
350 }
351 DistanceMetric::Jaccard => {
352 let set_a: std::collections::HashSet<_> = a
353 .assignments
354 .iter()
355 .filter(|(_, &v)| v)
356 .map(|(k, _)| k)
357 .collect();
358 let set_b: std::collections::HashSet<_> = b
359 .assignments
360 .iter()
361 .filter(|(_, &v)| v)
362 .map(|(k, _)| k)
363 .collect();
364
365 let intersection = set_a.intersection(&set_b).count();
366 let union = set_a.union(&set_b).count();
367
368 if union > 0 {
369 1.0 - (intersection as f64 / union as f64)
370 } else {
371 0.0
372 }
373 }
374 _ => 0.0, }
376 }
377
378 fn get_max_distance(&self) -> f64 {
380 if self.samples.is_empty() {
381 return 0.0;
382 }
383
384 let n_vars = self.samples[0].assignments.len();
385 match self.config.distance_metric {
386 DistanceMetric::Hamming => n_vars as f64,
387 DistanceMetric::Jaccard => 1.0,
388 _ => 1.0,
389 }
390 }
391
392 fn calculate_entropy(&self) -> f64 {
394 let mut solution_counts = HashMap::new();
395 for sample in &self.samples {
396 let key = format!("{:?}", sample.assignments);
397 *solution_counts.entry(key).or_insert(0) += 1;
398 }
399
400 let total = self.samples.len() as f64;
401 let mut entropy = 0.0;
402
403 for &count in solution_counts.values() {
404 let p = count as f64 / total;
405 if p > 0.0 {
406 entropy -= p * p.ln();
407 }
408 }
409
410 entropy
411 }
412
413 fn calculate_effective_sample_size(&self) -> f64 {
415 let mut solution_counts = HashMap::new();
416 for sample in &self.samples {
417 let key = format!("{:?}", sample.assignments);
418 *solution_counts.entry(key).or_insert(0) += 1;
419 }
420
421 let total = self.samples.len() as f64;
422 let sum_squared: f64 = solution_counts
423 .values()
424 .map(|&c| (c as f64 / total).powi(2))
425 .sum();
426
427 if sum_squared > 0.0 {
428 1.0 / sum_squared
429 } else {
430 0.0
431 }
432 }
433
434 fn perform_clustering(&mut self) -> Result<ClusterInfo, Box<dyn std::error::Error>> {
436 let (feature_matrix, _) = self.samples_to_matrix()?;
438
439 let clusters = match self.config.clustering_method {
440 ClusteringMethod::KMeans => {
441 let k = self.config.n_clusters.unwrap_or(5);
442 self.cluster_kmeans(&feature_matrix, k)?
443 }
444 ClusteringMethod::DBSCAN => {
445 let eps = self.config.epsilon.unwrap_or(0.5);
446 let min_samples = self.config.min_samples.unwrap_or(5);
447 self.cluster_dbscan(&feature_matrix, eps, min_samples)?
448 }
449 ClusteringMethod::Hierarchical => self.cluster_hierarchical(&feature_matrix)?,
450 ClusteringMethod::None => vec![],
451 };
452
453 self.clusters = Some(clusters.clone());
454
455 self.analyze_clusters(&clusters, &feature_matrix)
457 }
458
459 fn cluster_kmeans(
461 &self,
462 data: &Array2<f64>,
463 k: usize,
464 ) -> Result<Vec<usize>, Box<dyn std::error::Error>> {
465 #[cfg(feature = "scirs")]
466 {
467 let kmeans = KMeans::new(k);
468 let mut clusters = kmeans.fit_predict(data)?;
469 Ok(clusters)
470 }
471
472 #[cfg(not(feature = "scirs"))]
473 {
474 use scirs2_core::random::prelude::*;
477 let mut rng = StdRng::seed_from_u64(42);
478
479 Ok((0..data.nrows()).map(|_| rng.gen_range(0..k)).collect())
480 }
481 }
482
483 fn cluster_dbscan(
485 &self,
486 data: &Array2<f64>,
487 eps: f64,
488 min_samples: usize,
489 ) -> Result<Vec<usize>, Box<dyn std::error::Error>> {
490 #[cfg(feature = "scirs")]
491 {
492 let dbscan = DBSCAN::new(eps, min_samples);
493 let mut clusters = dbscan.fit_predict(data)?;
494 Ok(clusters)
495 }
496
497 #[cfg(not(feature = "scirs"))]
498 {
499 Ok(vec![0; data.nrows()])
501 }
502 }
503
504 fn cluster_hierarchical(
506 &self,
507 data: &Array2<f64>,
508 ) -> Result<Vec<usize>, Box<dyn std::error::Error>> {
509 #[cfg(feature = "scirs")]
510 {
511 let n_clusters = self.config.n_clusters.unwrap_or(5);
512 let mut clusters = hierarchical_clustering(data, n_clusters, "average")?;
513 Ok(clusters)
514 }
515
516 #[cfg(not(feature = "scirs"))]
517 {
518 let energies: Vec<f64> = self.samples.iter().map(|s| s.energy).collect();
520 let mut sorted_indices: Vec<usize> = (0..energies.len()).collect();
521 sorted_indices.sort_by(|&a, &b| {
522 energies[a]
523 .partial_cmp(&energies[b])
524 .unwrap_or(std::cmp::Ordering::Equal)
525 });
526
527 let n_clusters = self.config.n_clusters.unwrap_or(5);
528 let cluster_size = energies.len() / n_clusters;
529
530 let mut clusters = vec![0; energies.len()];
531 for (idx, &i) in sorted_indices.iter().enumerate() {
532 clusters[i] = (idx / cluster_size).min(n_clusters - 1);
533 }
534
535 Ok(clusters)
536 }
537 }
538
539 fn samples_to_matrix(&self) -> Result<(Array2<f64>, Vec<String>), Box<dyn std::error::Error>> {
541 if self.samples.is_empty() {
542 return Err("No samples to convert".into());
543 }
544
545 let mut all_vars = std::collections::HashSet::new();
547 for sample in &self.samples {
548 for var in sample.assignments.keys() {
549 all_vars.insert(var.clone());
550 }
551 }
552
553 let var_names: Vec<String> = all_vars.into_iter().collect();
554 let n_vars = var_names.len();
555 let n_samples = self.samples.len();
556
557 let mut matrix = Array2::zeros((n_samples, n_vars));
559
560 for (i, sample) in self.samples.iter().enumerate() {
561 for (j, var_name) in var_names.iter().enumerate() {
562 if let Some(&value) = sample.assignments.get(var_name) {
563 matrix[[i, j]] = if value { 1.0 } else { 0.0 };
564 }
565 }
566 }
567
568 Ok((matrix, var_names))
569 }
570
571 fn analyze_clusters(
573 &self,
574 clusters: &[usize],
575 feature_matrix: &Array2<f64>,
576 ) -> Result<ClusterInfo, Box<dyn std::error::Error>> {
577 let n_clusters = clusters.iter().max().copied().unwrap_or(0) + 1;
578 let mut cluster_sizes = vec![0; n_clusters];
579
580 for &c in clusters {
581 if c < n_clusters {
582 cluster_sizes[c] += 1;
583 }
584 }
585
586 let mut cluster_centers = Vec::new();
588 let (_, var_names) = self.samples_to_matrix()?;
589
590 for cluster_id in 0..n_clusters {
591 let mut center = HashMap::new();
592 let cluster_samples: Vec<usize> = clusters
593 .iter()
594 .enumerate()
595 .filter(|(_, &c)| c == cluster_id)
596 .map(|(i, _)| i)
597 .collect();
598
599 if !cluster_samples.is_empty() {
600 for (j, var_name) in var_names.iter().enumerate() {
601 let mean_value: f64 = cluster_samples
602 .iter()
603 .map(|&i| feature_matrix[[i, j]])
604 .sum::<f64>()
605 / cluster_samples.len() as f64;
606 center.insert(var_name.clone(), mean_value);
607 }
608 }
609
610 cluster_centers.push(center);
611 }
612
613 let mut cluster_energies = Vec::new();
615
616 for cluster_id in 0..n_clusters {
617 let cluster_energy_values: Vec<f64> = clusters
618 .iter()
619 .enumerate()
620 .filter(|(_, &c)| c == cluster_id)
621 .map(|(i, _)| self.samples[i].energy)
622 .collect();
623
624 if !cluster_energy_values.is_empty() {
625 let mean =
626 cluster_energy_values.iter().sum::<f64>() / cluster_energy_values.len() as f64;
627 let variance = cluster_energy_values
628 .iter()
629 .map(|e| (e - mean).powi(2))
630 .sum::<f64>()
631 / cluster_energy_values.len() as f64;
632 let std = variance.sqrt();
633 let min = cluster_energy_values
634 .iter()
635 .fold(f64::INFINITY, |a, &b| a.min(b));
636 let max = cluster_energy_values
637 .iter()
638 .fold(f64::NEG_INFINITY, |a, &b| a.max(b));
639
640 cluster_energies.push(ClusterEnergy {
641 cluster_id,
642 mean_energy: mean,
643 std_energy: std,
644 min_energy: min,
645 max_energy: max,
646 });
647 }
648 }
649
650 let silhouette_score = self.calculate_silhouette_score(clusters)?;
652
653 let inter_cluster_distances =
655 self.calculate_inter_cluster_distances(&cluster_centers, n_clusters)?;
656
657 Ok(ClusterInfo {
658 n_clusters,
659 cluster_sizes,
660 cluster_centers,
661 cluster_energies,
662 silhouette_score,
663 inter_cluster_distances,
664 })
665 }
666
667 fn calculate_silhouette_score(
669 &self,
670 _clusters: &[usize],
671 ) -> Result<f64, Box<dyn std::error::Error>> {
672 Ok(0.5) }
676
677 fn calculate_inter_cluster_distances(
679 &self,
680 centers: &[HashMap<String, f64>],
681 n_clusters: usize,
682 ) -> Result<Array2<f64>, Box<dyn std::error::Error>> {
683 let mut distances = Array2::zeros((n_clusters, n_clusters));
684
685 for i in 0..n_clusters {
686 for j in i + 1..n_clusters {
687 let dist = self.calculate_center_distance(¢ers[i], ¢ers[j]);
688 distances[[i, j]] = dist;
689 distances[[j, i]] = dist;
690 }
691 }
692
693 Ok(distances)
694 }
695
696 fn calculate_center_distance(&self, a: &HashMap<String, f64>, b: &HashMap<String, f64>) -> f64 {
698 let all_vars: std::collections::HashSet<_> = a.keys().chain(b.keys()).collect();
699
700 let mut distance = 0.0;
701 for var in all_vars {
702 let val_a = a.get(var).copied().unwrap_or(0.0);
703 let val_b = b.get(var).copied().unwrap_or(0.0);
704 distance += (val_a - val_b).powi(2);
705 }
706
707 distance.sqrt()
708 }
709
710 fn analyze_quality_distribution(
712 &self,
713 ) -> Result<QualityDistribution, Box<dyn std::error::Error>> {
714 let energies: Vec<f64> = self.samples.iter().map(|s| s.energy).collect();
715
716 let min_energy = energies.iter().fold(f64::INFINITY, |a, &b| a.min(b));
717 let max_energy = energies.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));
718
719 let n_bins = 20;
720 let bin_width = (max_energy - min_energy) / n_bins as f64;
721
722 let mut energy_bins = Vec::new();
723 let mut bin_counts = vec![0; n_bins];
724
725 for i in 0..n_bins {
726 energy_bins.push((i as f64 + 0.5).mul_add(bin_width, min_energy));
727 }
728
729 for &energy in &energies {
730 let bin_idx = ((energy - min_energy) / bin_width).floor() as usize;
731 let bin_idx = bin_idx.min(n_bins - 1);
732 bin_counts[bin_idx] += 1;
733 }
734
735 let total = energies.len() as f64;
737 let mut cumulative_distribution = Vec::new();
738 let mut cumsum = 0;
739
740 for &count in &bin_counts {
741 cumsum += count;
742 cumulative_distribution.push(cumsum as f64 / total);
743 }
744
745 let mut sorted_energies = energies;
747 sorted_energies.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
748
749 let mut percentile_values = HashMap::new();
750 for p in &[1, 5, 10, 25, 50, 75, 90, 95, 99] {
751 let idx = ((*p as f64 / 100.0) * sorted_energies.len() as f64) as usize;
752 let idx = idx.min(sorted_energies.len() - 1);
753 percentile_values.insert(*p, sorted_energies[idx]);
754 }
755
756 Ok(QualityDistribution {
757 energy_bins,
758 bin_counts,
759 cumulative_distribution,
760 percentile_values,
761 })
762 }
763
764 fn analyze_correlations(&self) -> Result<CorrelationAnalysis, Box<dyn std::error::Error>> {
766 let (feature_matrix, var_names) = self.samples_to_matrix()?;
767 let energies: Vec<f64> = self.samples.iter().map(|s| s.energy).collect();
768
769 let mut variable_correlations = HashMap::new();
770 let mut energy_correlations = HashMap::new();
771 let mut significant_pairs = Vec::new();
772
773 for i in 0..var_names.len() {
775 for j in i + 1..var_names.len() {
776 let var_i: Vec<f64> = (0..feature_matrix.nrows())
777 .map(|k| feature_matrix[[k, i]])
778 .collect();
779 let var_j: Vec<f64> = (0..feature_matrix.nrows())
780 .map(|k| feature_matrix[[k, j]])
781 .collect();
782
783 let corr = calculate_correlation(&var_i, &var_j);
784
785 if corr.abs() > 0.3 {
786 significant_pairs.push((var_names[i].clone(), var_names[j].clone(), corr));
788 }
789
790 variable_correlations.insert((var_names[i].clone(), var_names[j].clone()), corr);
791 }
792
793 let var_values: Vec<f64> = (0..feature_matrix.nrows())
795 .map(|k| feature_matrix[[k, i]])
796 .collect();
797
798 let energy_corr = calculate_correlation(&var_values, &energies);
799 energy_correlations.insert(var_names[i].clone(), energy_corr);
800 }
801
802 Ok(CorrelationAnalysis {
803 variable_correlations,
804 energy_correlations,
805 significant_pairs,
806 })
807 }
808}
809
810fn calculate_correlation(x: &[f64], y: &[f64]) -> f64 {
812 if x.len() != y.len() || x.is_empty() {
813 return 0.0;
814 }
815
816 let n = x.len() as f64;
817 let mean_x = x.iter().sum::<f64>() / n;
818 let mean_y = y.iter().sum::<f64>() / n;
819
820 let cov: f64 = x
821 .iter()
822 .zip(y.iter())
823 .map(|(xi, yi)| (xi - mean_x) * (yi - mean_y))
824 .sum::<f64>()
825 / n;
826
827 let std_x = (x.iter().map(|xi| (xi - mean_x).powi(2)).sum::<f64>() / n).sqrt();
828 let std_y = (y.iter().map(|yi| (yi - mean_y).powi(2)).sum::<f64>() / n).sqrt();
829
830 if std_x > 0.0 && std_y > 0.0 {
831 cov / (std_x * std_y)
832 } else {
833 0.0
834 }
835}
836
837pub fn analyze_solution_distribution(
839 samples: Vec<SampleResult>,
840 config: Option<DistributionConfig>,
841) -> Result<DistributionAnalysis, Box<dyn std::error::Error>> {
842 let config = config.unwrap_or_default();
843 let mut analyzer = SolutionDistribution::new(config);
844 analyzer.add_samples(samples);
845 analyzer.analyze()
846}
847
848pub fn plot_distribution_analysis(
850 analysis: &DistributionAnalysis,
851) -> Result<(), Box<dyn std::error::Error>> {
852 #[cfg(feature = "scirs")]
853 {
854 use crate::scirs_stub::scirs2_plot::{Figure, Subplot};
855
856 let mut fig = Figure::new();
857
858 let bin_counts_f64: Vec<f64> = analysis
860 .quality_distribution
861 .bin_counts
862 .iter()
863 .map(|&c| c as f64)
864 .collect();
865 fig.add_subplot(2, 2, 1)?
866 .bar(&analysis.quality_distribution.energy_bins, &bin_counts_f64)
867 .set_xlabel("Energy")
868 .set_ylabel("Count")
869 .set_title("Energy Distribution");
870
871 if let Some(ref cluster_info) = analysis.cluster_info {
873 let cluster_ids: Vec<f64> = (0..cluster_info.n_clusters).map(|i| i as f64).collect();
874 let cluster_sizes: Vec<f64> = cluster_info
875 .cluster_sizes
876 .iter()
877 .map(|&s| s as f64)
878 .collect();
879
880 fig.add_subplot(2, 2, 2)?
881 .bar(&cluster_ids, &cluster_sizes)
882 .set_xlabel("Cluster ID")
883 .set_ylabel("Size")
884 .set_title("Cluster Sizes");
885 }
886
887 fig.add_subplot(2, 2, 3)?
889 .plot(
890 &analysis.quality_distribution.energy_bins,
891 &analysis.quality_distribution.cumulative_distribution,
892 )
893 .set_xlabel("Energy")
894 .set_ylabel("Cumulative Probability")
895 .set_title("Cumulative Distribution");
896
897 let var_names: Vec<String> = analysis
899 .correlation_analysis
900 .energy_correlations
901 .keys()
902 .cloned()
903 .collect();
904 let correlations: Vec<f64> = var_names
905 .iter()
906 .map(|v| analysis.correlation_analysis.energy_correlations[v])
907 .collect();
908
909 if !var_names.is_empty() {
910 fig.add_subplot(2, 2, 4)?
911 .bar_horizontal(&var_names, &correlations)
912 .set_xlabel("Correlation with Energy")
913 .set_ylabel("Variable")
914 .set_title("Variable-Energy Correlations");
915 }
916
917 fig.show()?;
918 }
919
920 #[cfg(not(feature = "scirs"))]
921 {
922 export_distribution_analysis(analysis, "distribution_analysis.json")?;
924 println!("Distribution analysis exported to distribution_analysis.json");
925 }
926
927 Ok(())
928}
929
930fn export_distribution_analysis(
932 analysis: &DistributionAnalysis,
933 path: &str,
934) -> Result<(), Box<dyn std::error::Error>> {
935 let json = serde_json::to_string_pretty(analysis)?;
936 std::fs::write(path, json)?;
937 Ok(())
938}