quantrs2_tytan/visualization/
solution_analysis.rs

1//! Solution distribution analysis for quantum annealing
2//!
3//! This module provides statistical analysis and clustering of solution sets
4//! including diversity metrics, clustering analysis, and quality distribution.
5
6use crate::sampler::SampleResult;
7use scirs2_core::ndarray::Array2;
8use serde::{Deserialize, Serialize};
9use std::collections::HashMap;
10
11#[cfg(feature = "scirs")]
12use crate::scirs_stub::{
13    scirs2_plot::{BoxPlot, Plot2D, Violin},
14    scirs2_statistics::{
15        clustering::{hierarchical_clustering, KMeans, DBSCAN},
16        descriptive::{mean, quantile, std_dev},
17    },
18};
19
20/// Solution distribution configuration
21#[derive(Debug, Clone, Serialize, Deserialize)]
22pub struct DistributionConfig {
23    /// Clustering algorithm to use
24    pub clustering_method: ClusteringMethod,
25    /// Number of clusters (for KMeans)
26    pub n_clusters: Option<usize>,
27    /// DBSCAN epsilon parameter
28    pub epsilon: Option<f64>,
29    /// Minimum samples for DBSCAN
30    pub min_samples: Option<usize>,
31    /// Calculate pairwise distances
32    pub compute_distances: bool,
33    /// Distance metric
34    pub distance_metric: DistanceMetric,
35}
36
37/// Clustering methods
38#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
39pub enum ClusteringMethod {
40    KMeans,
41    DBSCAN,
42    Hierarchical,
43    None,
44}
45
46/// Distance metrics
47#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
48pub enum DistanceMetric {
49    Hamming,
50    Euclidean,
51    Manhattan,
52    Jaccard,
53}
54
55impl Default for DistributionConfig {
56    fn default() -> Self {
57        Self {
58            clustering_method: ClusteringMethod::KMeans,
59            n_clusters: Some(5),
60            epsilon: Some(0.5),
61            min_samples: Some(5),
62            compute_distances: true,
63            distance_metric: DistanceMetric::Hamming,
64        }
65    }
66}
67
68/// Solution distribution analyzer
69pub struct SolutionDistribution {
70    config: DistributionConfig,
71    samples: Vec<SampleResult>,
72    distance_matrix: Option<Array2<f64>>,
73    clusters: Option<Vec<usize>>,
74}
75
76/// Distribution analysis results
77#[derive(Debug, Clone, Serialize, Deserialize)]
78pub struct DistributionAnalysis {
79    pub statistics: SolutionStatistics,
80    pub diversity_metrics: DiversityMetrics,
81    pub cluster_info: Option<ClusterInfo>,
82    pub quality_distribution: QualityDistribution,
83    pub correlation_analysis: CorrelationAnalysis,
84}
85
86/// Basic solution statistics
87#[derive(Debug, Clone, Serialize, Deserialize)]
88pub struct SolutionStatistics {
89    pub n_samples: usize,
90    pub n_unique: usize,
91    pub mean_energy: f64,
92    pub std_energy: f64,
93    pub min_energy: f64,
94    pub max_energy: f64,
95    pub energy_quantiles: HashMap<String, f64>,
96    pub most_frequent_solution: Option<HashMap<String, bool>>,
97    pub frequency_top_solution: usize,
98}
99
100/// Diversity metrics
101#[derive(Debug, Clone, Serialize, Deserialize)]
102pub struct DiversityMetrics {
103    pub average_distance: f64,
104    pub min_distance: f64,
105    pub max_distance: f64,
106    pub diversity_index: f64,
107    pub entropy: f64,
108    pub effective_sample_size: f64,
109}
110
111/// Cluster information
112#[derive(Debug, Clone, Serialize, Deserialize)]
113pub struct ClusterInfo {
114    pub n_clusters: usize,
115    pub cluster_sizes: Vec<usize>,
116    pub cluster_centers: Vec<HashMap<String, f64>>,
117    pub cluster_energies: Vec<ClusterEnergy>,
118    pub silhouette_score: f64,
119    pub inter_cluster_distances: Array2<f64>,
120}
121
122/// Cluster energy statistics
123#[derive(Debug, Clone, Serialize, Deserialize)]
124pub struct ClusterEnergy {
125    pub cluster_id: usize,
126    pub mean_energy: f64,
127    pub std_energy: f64,
128    pub min_energy: f64,
129    pub max_energy: f64,
130}
131
132/// Quality distribution analysis
133#[derive(Debug, Clone, Serialize, Deserialize)]
134pub struct QualityDistribution {
135    pub energy_bins: Vec<f64>,
136    pub bin_counts: Vec<usize>,
137    pub cumulative_distribution: Vec<f64>,
138    pub percentile_values: HashMap<usize, f64>,
139}
140
141/// Correlation analysis between variables
142#[derive(Debug, Clone, Serialize, Deserialize)]
143pub struct CorrelationAnalysis {
144    pub variable_correlations: HashMap<(String, String), f64>,
145    pub energy_correlations: HashMap<String, f64>,
146    pub significant_pairs: Vec<(String, String, f64)>,
147}
148
149impl SolutionDistribution {
150    /// Create new solution distribution analyzer
151    pub const fn new(config: DistributionConfig) -> Self {
152        Self {
153            config,
154            samples: Vec::new(),
155            distance_matrix: None,
156            clusters: None,
157        }
158    }
159
160    /// Add samples for analysis
161    pub fn add_samples(&mut self, samples: Vec<SampleResult>) {
162        self.samples.extend(samples);
163        // Invalidate cached results
164        self.distance_matrix = None;
165        self.clusters = None;
166    }
167
168    /// Perform complete distribution analysis
169    pub fn analyze(&mut self) -> Result<DistributionAnalysis, Box<dyn std::error::Error>> {
170        if self.samples.is_empty() {
171            return Err("No samples to analyze".into());
172        }
173
174        // Compute basic statistics
175        let statistics = self.compute_statistics()?;
176
177        // Compute diversity metrics
178        let diversity_metrics = self.compute_diversity_metrics()?;
179
180        // Perform clustering if requested
181        let cluster_info = if self.config.clustering_method == ClusteringMethod::None {
182            None
183        } else {
184            Some(self.perform_clustering()?)
185        };
186
187        // Analyze quality distribution
188        let quality_distribution = self.analyze_quality_distribution()?;
189
190        // Perform correlation analysis
191        let correlation_analysis = self.analyze_correlations()?;
192
193        Ok(DistributionAnalysis {
194            statistics,
195            diversity_metrics,
196            cluster_info,
197            quality_distribution,
198            correlation_analysis,
199        })
200    }
201
202    /// Compute basic statistics
203    fn compute_statistics(&self) -> Result<SolutionStatistics, Box<dyn std::error::Error>> {
204        let energies: Vec<f64> = self.samples.iter().map(|s| s.energy).collect();
205
206        let mean_energy = energies.iter().sum::<f64>() / energies.len() as f64;
207        let variance = energies
208            .iter()
209            .map(|e| (e - mean_energy).powi(2))
210            .sum::<f64>()
211            / energies.len() as f64;
212        let std_energy = variance.sqrt();
213
214        let min_energy = energies.iter().fold(f64::INFINITY, |a, &b| a.min(b));
215        let max_energy = energies.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));
216
217        // Compute quantiles
218        let mut sorted_energies = energies;
219        sorted_energies.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
220
221        let mut energy_quantiles = HashMap::new();
222        for &q in &[25, 50, 75] {
223            let idx = (q as f64 / 100.0 * sorted_energies.len() as f64) as usize;
224            let idx = idx.min(sorted_energies.len() - 1);
225            energy_quantiles.insert(format!("q{q}"), sorted_energies[idx]);
226        }
227
228        // Find most frequent solution
229        let mut solution_counts = HashMap::new();
230        for sample in &self.samples {
231            let key = format!("{:?}", sample.assignments);
232            *solution_counts.entry(key).or_insert(0) += 1;
233        }
234
235        let (_most_frequent, frequency) = solution_counts
236            .iter()
237            .max_by_key(|&(_, count)| count)
238            .map_or((String::new(), 0), |(sol, &count)| (sol.clone(), count));
239
240        // Count unique solutions
241        let n_unique = solution_counts.len();
242
243        Ok(SolutionStatistics {
244            n_samples: self.samples.len(),
245            n_unique,
246            mean_energy,
247            std_energy,
248            min_energy,
249            max_energy,
250            energy_quantiles,
251            most_frequent_solution: None, // Simplified for now
252            frequency_top_solution: frequency,
253        })
254    }
255
256    /// Compute diversity metrics
257    fn compute_diversity_metrics(
258        &mut self,
259    ) -> Result<DiversityMetrics, Box<dyn std::error::Error>> {
260        if self.config.compute_distances && self.distance_matrix.is_none() {
261            self.compute_distance_matrix()?;
262        }
263
264        let dist_matrix = self
265            .distance_matrix
266            .as_ref()
267            .ok_or("Distance matrix not computed")?;
268
269        let n = dist_matrix.nrows();
270        let mut all_distances = Vec::new();
271
272        for i in 0..n {
273            for j in i + 1..n {
274                all_distances.push(dist_matrix[[i, j]]);
275            }
276        }
277
278        if all_distances.is_empty() {
279            return Ok(DiversityMetrics {
280                average_distance: 0.0,
281                min_distance: 0.0,
282                max_distance: 0.0,
283                diversity_index: 0.0,
284                entropy: 0.0,
285                effective_sample_size: 1.0,
286            });
287        }
288
289        let average_distance = all_distances.iter().sum::<f64>() / all_distances.len() as f64;
290        let min_distance = all_distances.iter().fold(f64::INFINITY, |a, &b| a.min(b));
291        let max_distance = all_distances.iter().fold(0.0f64, |a, &b| a.max(b));
292
293        // Diversity index (normalized average distance)
294        let max_possible_distance = self.get_max_distance();
295        let diversity_index = if max_possible_distance > 0.0 {
296            average_distance / max_possible_distance
297        } else {
298            0.0
299        };
300
301        // Solution entropy
302        let entropy = self.calculate_entropy();
303
304        // Effective sample size (based on solution frequencies)
305        let ess = self.calculate_effective_sample_size();
306
307        Ok(DiversityMetrics {
308            average_distance,
309            min_distance,
310            max_distance,
311            diversity_index,
312            entropy,
313            effective_sample_size: ess,
314        })
315    }
316
317    /// Compute distance matrix
318    fn compute_distance_matrix(&mut self) -> Result<(), Box<dyn std::error::Error>> {
319        let n = self.samples.len();
320        let mut matrix = Array2::zeros((n, n));
321
322        for i in 0..n {
323            for j in i + 1..n {
324                let distance = self.calculate_distance(&self.samples[i], &self.samples[j]);
325                matrix[[i, j]] = distance;
326                matrix[[j, i]] = distance;
327            }
328        }
329
330        self.distance_matrix = Some(matrix);
331        Ok(())
332    }
333
334    /// Calculate distance between two solutions
335    fn calculate_distance(&self, a: &SampleResult, b: &SampleResult) -> f64 {
336        match self.config.distance_metric {
337            DistanceMetric::Hamming => {
338                let mut distance = 0.0;
339                let all_vars: std::collections::HashSet<_> =
340                    a.assignments.keys().chain(b.assignments.keys()).collect();
341
342                for var in all_vars {
343                    let val_a = a.assignments.get(var).copied().unwrap_or(false);
344                    let val_b = b.assignments.get(var).copied().unwrap_or(false);
345                    if val_a != val_b {
346                        distance += 1.0;
347                    }
348                }
349                distance
350            }
351            DistanceMetric::Jaccard => {
352                let set_a: std::collections::HashSet<_> = a
353                    .assignments
354                    .iter()
355                    .filter(|(_, &v)| v)
356                    .map(|(k, _)| k)
357                    .collect();
358                let set_b: std::collections::HashSet<_> = b
359                    .assignments
360                    .iter()
361                    .filter(|(_, &v)| v)
362                    .map(|(k, _)| k)
363                    .collect();
364
365                let intersection = set_a.intersection(&set_b).count();
366                let union = set_a.union(&set_b).count();
367
368                if union > 0 {
369                    1.0 - (intersection as f64 / union as f64)
370                } else {
371                    0.0
372                }
373            }
374            _ => 0.0, // Placeholder for other metrics
375        }
376    }
377
378    /// Get maximum possible distance
379    fn get_max_distance(&self) -> f64 {
380        if self.samples.is_empty() {
381            return 0.0;
382        }
383
384        let n_vars = self.samples[0].assignments.len();
385        match self.config.distance_metric {
386            DistanceMetric::Hamming => n_vars as f64,
387            DistanceMetric::Jaccard => 1.0,
388            _ => 1.0,
389        }
390    }
391
392    /// Calculate solution entropy
393    fn calculate_entropy(&self) -> f64 {
394        let mut solution_counts = HashMap::new();
395        for sample in &self.samples {
396            let key = format!("{:?}", sample.assignments);
397            *solution_counts.entry(key).or_insert(0) += 1;
398        }
399
400        let total = self.samples.len() as f64;
401        let mut entropy = 0.0;
402
403        for &count in solution_counts.values() {
404            let p = count as f64 / total;
405            if p > 0.0 {
406                entropy -= p * p.ln();
407            }
408        }
409
410        entropy
411    }
412
413    /// Calculate effective sample size
414    fn calculate_effective_sample_size(&self) -> f64 {
415        let mut solution_counts = HashMap::new();
416        for sample in &self.samples {
417            let key = format!("{:?}", sample.assignments);
418            *solution_counts.entry(key).or_insert(0) += 1;
419        }
420
421        let total = self.samples.len() as f64;
422        let sum_squared: f64 = solution_counts
423            .values()
424            .map(|&c| (c as f64 / total).powi(2))
425            .sum();
426
427        if sum_squared > 0.0 {
428            1.0 / sum_squared
429        } else {
430            0.0
431        }
432    }
433
434    /// Perform clustering
435    fn perform_clustering(&mut self) -> Result<ClusterInfo, Box<dyn std::error::Error>> {
436        // Convert samples to feature matrix
437        let (feature_matrix, _) = self.samples_to_matrix()?;
438
439        let clusters = match self.config.clustering_method {
440            ClusteringMethod::KMeans => {
441                let k = self.config.n_clusters.unwrap_or(5);
442                self.cluster_kmeans(&feature_matrix, k)?
443            }
444            ClusteringMethod::DBSCAN => {
445                let eps = self.config.epsilon.unwrap_or(0.5);
446                let min_samples = self.config.min_samples.unwrap_or(5);
447                self.cluster_dbscan(&feature_matrix, eps, min_samples)?
448            }
449            ClusteringMethod::Hierarchical => self.cluster_hierarchical(&feature_matrix)?,
450            ClusteringMethod::None => vec![],
451        };
452
453        self.clusters = Some(clusters.clone());
454
455        // Analyze clusters
456        self.analyze_clusters(&clusters, &feature_matrix)
457    }
458
459    /// K-means clustering
460    fn cluster_kmeans(
461        &self,
462        data: &Array2<f64>,
463        k: usize,
464    ) -> Result<Vec<usize>, Box<dyn std::error::Error>> {
465        #[cfg(feature = "scirs")]
466        {
467            let kmeans = KMeans::new(k);
468            let mut clusters = kmeans.fit_predict(data)?;
469            Ok(clusters)
470        }
471
472        #[cfg(not(feature = "scirs"))]
473        {
474            // Simple random assignment as fallback
475
476            use scirs2_core::random::prelude::*;
477            let mut rng = StdRng::seed_from_u64(42);
478
479            Ok((0..data.nrows()).map(|_| rng.gen_range(0..k)).collect())
480        }
481    }
482
483    /// DBSCAN clustering
484    fn cluster_dbscan(
485        &self,
486        data: &Array2<f64>,
487        eps: f64,
488        min_samples: usize,
489    ) -> Result<Vec<usize>, Box<dyn std::error::Error>> {
490        #[cfg(feature = "scirs")]
491        {
492            let dbscan = DBSCAN::new(eps, min_samples);
493            let mut clusters = dbscan.fit_predict(data)?;
494            Ok(clusters)
495        }
496
497        #[cfg(not(feature = "scirs"))]
498        {
499            // All samples in one cluster as fallback
500            Ok(vec![0; data.nrows()])
501        }
502    }
503
504    /// Hierarchical clustering
505    fn cluster_hierarchical(
506        &self,
507        data: &Array2<f64>,
508    ) -> Result<Vec<usize>, Box<dyn std::error::Error>> {
509        #[cfg(feature = "scirs")]
510        {
511            let n_clusters = self.config.n_clusters.unwrap_or(5);
512            let mut clusters = hierarchical_clustering(data, n_clusters, "average")?;
513            Ok(clusters)
514        }
515
516        #[cfg(not(feature = "scirs"))]
517        {
518            // Simple clustering by energy
519            let energies: Vec<f64> = self.samples.iter().map(|s| s.energy).collect();
520            let mut sorted_indices: Vec<usize> = (0..energies.len()).collect();
521            sorted_indices.sort_by(|&a, &b| {
522                energies[a]
523                    .partial_cmp(&energies[b])
524                    .unwrap_or(std::cmp::Ordering::Equal)
525            });
526
527            let n_clusters = self.config.n_clusters.unwrap_or(5);
528            let cluster_size = energies.len() / n_clusters;
529
530            let mut clusters = vec![0; energies.len()];
531            for (idx, &i) in sorted_indices.iter().enumerate() {
532                clusters[i] = (idx / cluster_size).min(n_clusters - 1);
533            }
534
535            Ok(clusters)
536        }
537    }
538
539    /// Convert samples to feature matrix
540    fn samples_to_matrix(&self) -> Result<(Array2<f64>, Vec<String>), Box<dyn std::error::Error>> {
541        if self.samples.is_empty() {
542            return Err("No samples to convert".into());
543        }
544
545        // Get all variable names
546        let mut all_vars = std::collections::HashSet::new();
547        for sample in &self.samples {
548            for var in sample.assignments.keys() {
549                all_vars.insert(var.clone());
550            }
551        }
552
553        let var_names: Vec<String> = all_vars.into_iter().collect();
554        let n_vars = var_names.len();
555        let n_samples = self.samples.len();
556
557        // Create feature matrix
558        let mut matrix = Array2::zeros((n_samples, n_vars));
559
560        for (i, sample) in self.samples.iter().enumerate() {
561            for (j, var_name) in var_names.iter().enumerate() {
562                if let Some(&value) = sample.assignments.get(var_name) {
563                    matrix[[i, j]] = if value { 1.0 } else { 0.0 };
564                }
565            }
566        }
567
568        Ok((matrix, var_names))
569    }
570
571    /// Analyze clusters
572    fn analyze_clusters(
573        &self,
574        clusters: &[usize],
575        feature_matrix: &Array2<f64>,
576    ) -> Result<ClusterInfo, Box<dyn std::error::Error>> {
577        let n_clusters = clusters.iter().max().copied().unwrap_or(0) + 1;
578        let mut cluster_sizes = vec![0; n_clusters];
579
580        for &c in clusters {
581            if c < n_clusters {
582                cluster_sizes[c] += 1;
583            }
584        }
585
586        // Calculate cluster centers
587        let mut cluster_centers = Vec::new();
588        let (_, var_names) = self.samples_to_matrix()?;
589
590        for cluster_id in 0..n_clusters {
591            let mut center = HashMap::new();
592            let cluster_samples: Vec<usize> = clusters
593                .iter()
594                .enumerate()
595                .filter(|(_, &c)| c == cluster_id)
596                .map(|(i, _)| i)
597                .collect();
598
599            if !cluster_samples.is_empty() {
600                for (j, var_name) in var_names.iter().enumerate() {
601                    let mean_value: f64 = cluster_samples
602                        .iter()
603                        .map(|&i| feature_matrix[[i, j]])
604                        .sum::<f64>()
605                        / cluster_samples.len() as f64;
606                    center.insert(var_name.clone(), mean_value);
607                }
608            }
609
610            cluster_centers.push(center);
611        }
612
613        // Calculate cluster energy statistics
614        let mut cluster_energies = Vec::new();
615
616        for cluster_id in 0..n_clusters {
617            let cluster_energy_values: Vec<f64> = clusters
618                .iter()
619                .enumerate()
620                .filter(|(_, &c)| c == cluster_id)
621                .map(|(i, _)| self.samples[i].energy)
622                .collect();
623
624            if !cluster_energy_values.is_empty() {
625                let mean =
626                    cluster_energy_values.iter().sum::<f64>() / cluster_energy_values.len() as f64;
627                let variance = cluster_energy_values
628                    .iter()
629                    .map(|e| (e - mean).powi(2))
630                    .sum::<f64>()
631                    / cluster_energy_values.len() as f64;
632                let std = variance.sqrt();
633                let min = cluster_energy_values
634                    .iter()
635                    .fold(f64::INFINITY, |a, &b| a.min(b));
636                let max = cluster_energy_values
637                    .iter()
638                    .fold(f64::NEG_INFINITY, |a, &b| a.max(b));
639
640                cluster_energies.push(ClusterEnergy {
641                    cluster_id,
642                    mean_energy: mean,
643                    std_energy: std,
644                    min_energy: min,
645                    max_energy: max,
646                });
647            }
648        }
649
650        // Calculate silhouette score
651        let silhouette_score = self.calculate_silhouette_score(clusters)?;
652
653        // Calculate inter-cluster distances
654        let inter_cluster_distances =
655            self.calculate_inter_cluster_distances(&cluster_centers, n_clusters)?;
656
657        Ok(ClusterInfo {
658            n_clusters,
659            cluster_sizes,
660            cluster_centers,
661            cluster_energies,
662            silhouette_score,
663            inter_cluster_distances,
664        })
665    }
666
667    /// Calculate silhouette score
668    fn calculate_silhouette_score(
669        &self,
670        _clusters: &[usize],
671    ) -> Result<f64, Box<dyn std::error::Error>> {
672        // Simplified silhouette calculation
673        // In full implementation would use proper silhouette coefficient
674        Ok(0.5) // Placeholder
675    }
676
677    /// Calculate inter-cluster distances
678    fn calculate_inter_cluster_distances(
679        &self,
680        centers: &[HashMap<String, f64>],
681        n_clusters: usize,
682    ) -> Result<Array2<f64>, Box<dyn std::error::Error>> {
683        let mut distances = Array2::zeros((n_clusters, n_clusters));
684
685        for i in 0..n_clusters {
686            for j in i + 1..n_clusters {
687                let dist = self.calculate_center_distance(&centers[i], &centers[j]);
688                distances[[i, j]] = dist;
689                distances[[j, i]] = dist;
690            }
691        }
692
693        Ok(distances)
694    }
695
696    /// Calculate distance between cluster centers
697    fn calculate_center_distance(&self, a: &HashMap<String, f64>, b: &HashMap<String, f64>) -> f64 {
698        let all_vars: std::collections::HashSet<_> = a.keys().chain(b.keys()).collect();
699
700        let mut distance = 0.0;
701        for var in all_vars {
702            let val_a = a.get(var).copied().unwrap_or(0.0);
703            let val_b = b.get(var).copied().unwrap_or(0.0);
704            distance += (val_a - val_b).powi(2);
705        }
706
707        distance.sqrt()
708    }
709
710    /// Analyze quality distribution
711    fn analyze_quality_distribution(
712        &self,
713    ) -> Result<QualityDistribution, Box<dyn std::error::Error>> {
714        let energies: Vec<f64> = self.samples.iter().map(|s| s.energy).collect();
715
716        let min_energy = energies.iter().fold(f64::INFINITY, |a, &b| a.min(b));
717        let max_energy = energies.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));
718
719        let n_bins = 20;
720        let bin_width = (max_energy - min_energy) / n_bins as f64;
721
722        let mut energy_bins = Vec::new();
723        let mut bin_counts = vec![0; n_bins];
724
725        for i in 0..n_bins {
726            energy_bins.push((i as f64 + 0.5).mul_add(bin_width, min_energy));
727        }
728
729        for &energy in &energies {
730            let bin_idx = ((energy - min_energy) / bin_width).floor() as usize;
731            let bin_idx = bin_idx.min(n_bins - 1);
732            bin_counts[bin_idx] += 1;
733        }
734
735        // Calculate cumulative distribution
736        let total = energies.len() as f64;
737        let mut cumulative_distribution = Vec::new();
738        let mut cumsum = 0;
739
740        for &count in &bin_counts {
741            cumsum += count;
742            cumulative_distribution.push(cumsum as f64 / total);
743        }
744
745        // Calculate percentiles
746        let mut sorted_energies = energies;
747        sorted_energies.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
748
749        let mut percentile_values = HashMap::new();
750        for p in &[1, 5, 10, 25, 50, 75, 90, 95, 99] {
751            let idx = ((*p as f64 / 100.0) * sorted_energies.len() as f64) as usize;
752            let idx = idx.min(sorted_energies.len() - 1);
753            percentile_values.insert(*p, sorted_energies[idx]);
754        }
755
756        Ok(QualityDistribution {
757            energy_bins,
758            bin_counts,
759            cumulative_distribution,
760            percentile_values,
761        })
762    }
763
764    /// Analyze correlations between variables
765    fn analyze_correlations(&self) -> Result<CorrelationAnalysis, Box<dyn std::error::Error>> {
766        let (feature_matrix, var_names) = self.samples_to_matrix()?;
767        let energies: Vec<f64> = self.samples.iter().map(|s| s.energy).collect();
768
769        let mut variable_correlations = HashMap::new();
770        let mut energy_correlations = HashMap::new();
771        let mut significant_pairs = Vec::new();
772
773        // Variable-variable correlations
774        for i in 0..var_names.len() {
775            for j in i + 1..var_names.len() {
776                let var_i: Vec<f64> = (0..feature_matrix.nrows())
777                    .map(|k| feature_matrix[[k, i]])
778                    .collect();
779                let var_j: Vec<f64> = (0..feature_matrix.nrows())
780                    .map(|k| feature_matrix[[k, j]])
781                    .collect();
782
783                let corr = calculate_correlation(&var_i, &var_j);
784
785                if corr.abs() > 0.3 {
786                    // Threshold for significance
787                    significant_pairs.push((var_names[i].clone(), var_names[j].clone(), corr));
788                }
789
790                variable_correlations.insert((var_names[i].clone(), var_names[j].clone()), corr);
791            }
792
793            // Variable-energy correlations
794            let var_values: Vec<f64> = (0..feature_matrix.nrows())
795                .map(|k| feature_matrix[[k, i]])
796                .collect();
797
798            let energy_corr = calculate_correlation(&var_values, &energies);
799            energy_correlations.insert(var_names[i].clone(), energy_corr);
800        }
801
802        Ok(CorrelationAnalysis {
803            variable_correlations,
804            energy_correlations,
805            significant_pairs,
806        })
807    }
808}
809
810/// Calculate correlation coefficient
811fn calculate_correlation(x: &[f64], y: &[f64]) -> f64 {
812    if x.len() != y.len() || x.is_empty() {
813        return 0.0;
814    }
815
816    let n = x.len() as f64;
817    let mean_x = x.iter().sum::<f64>() / n;
818    let mean_y = y.iter().sum::<f64>() / n;
819
820    let cov: f64 = x
821        .iter()
822        .zip(y.iter())
823        .map(|(xi, yi)| (xi - mean_x) * (yi - mean_y))
824        .sum::<f64>()
825        / n;
826
827    let std_x = (x.iter().map(|xi| (xi - mean_x).powi(2)).sum::<f64>() / n).sqrt();
828    let std_y = (y.iter().map(|yi| (yi - mean_y).powi(2)).sum::<f64>() / n).sqrt();
829
830    if std_x > 0.0 && std_y > 0.0 {
831        cov / (std_x * std_y)
832    } else {
833        0.0
834    }
835}
836
837/// Analyze solution distribution
838pub fn analyze_solution_distribution(
839    samples: Vec<SampleResult>,
840    config: Option<DistributionConfig>,
841) -> Result<DistributionAnalysis, Box<dyn std::error::Error>> {
842    let config = config.unwrap_or_default();
843    let mut analyzer = SolutionDistribution::new(config);
844    analyzer.add_samples(samples);
845    analyzer.analyze()
846}
847
848/// Plot solution distribution analysis
849pub fn plot_distribution_analysis(
850    analysis: &DistributionAnalysis,
851) -> Result<(), Box<dyn std::error::Error>> {
852    #[cfg(feature = "scirs")]
853    {
854        use crate::scirs_stub::scirs2_plot::{Figure, Subplot};
855
856        let mut fig = Figure::new();
857
858        // Energy distribution
859        let bin_counts_f64: Vec<f64> = analysis
860            .quality_distribution
861            .bin_counts
862            .iter()
863            .map(|&c| c as f64)
864            .collect();
865        fig.add_subplot(2, 2, 1)?
866            .bar(&analysis.quality_distribution.energy_bins, &bin_counts_f64)
867            .set_xlabel("Energy")
868            .set_ylabel("Count")
869            .set_title("Energy Distribution");
870
871        // Cluster sizes (if available)
872        if let Some(ref cluster_info) = analysis.cluster_info {
873            let cluster_ids: Vec<f64> = (0..cluster_info.n_clusters).map(|i| i as f64).collect();
874            let cluster_sizes: Vec<f64> = cluster_info
875                .cluster_sizes
876                .iter()
877                .map(|&s| s as f64)
878                .collect();
879
880            fig.add_subplot(2, 2, 2)?
881                .bar(&cluster_ids, &cluster_sizes)
882                .set_xlabel("Cluster ID")
883                .set_ylabel("Size")
884                .set_title("Cluster Sizes");
885        }
886
887        // Cumulative distribution
888        fig.add_subplot(2, 2, 3)?
889            .plot(
890                &analysis.quality_distribution.energy_bins,
891                &analysis.quality_distribution.cumulative_distribution,
892            )
893            .set_xlabel("Energy")
894            .set_ylabel("Cumulative Probability")
895            .set_title("Cumulative Distribution");
896
897        // Variable-energy correlations
898        let var_names: Vec<String> = analysis
899            .correlation_analysis
900            .energy_correlations
901            .keys()
902            .cloned()
903            .collect();
904        let correlations: Vec<f64> = var_names
905            .iter()
906            .map(|v| analysis.correlation_analysis.energy_correlations[v])
907            .collect();
908
909        if !var_names.is_empty() {
910            fig.add_subplot(2, 2, 4)?
911                .bar_horizontal(&var_names, &correlations)
912                .set_xlabel("Correlation with Energy")
913                .set_ylabel("Variable")
914                .set_title("Variable-Energy Correlations");
915        }
916
917        fig.show()?;
918    }
919
920    #[cfg(not(feature = "scirs"))]
921    {
922        // Export analysis data
923        export_distribution_analysis(analysis, "distribution_analysis.json")?;
924        println!("Distribution analysis exported to distribution_analysis.json");
925    }
926
927    Ok(())
928}
929
930/// Export distribution analysis
931fn export_distribution_analysis(
932    analysis: &DistributionAnalysis,
933    path: &str,
934) -> Result<(), Box<dyn std::error::Error>> {
935    let json = serde_json::to_string_pretty(analysis)?;
936    std::fs::write(path, json)?;
937    Ok(())
938}