scirs2_optimize/global/
clustering.rs

1//! Clustering algorithms for local minima in multi-start optimization
2//!
3//! This module provides algorithms to identify, cluster, and analyze local minima
4//! found during multi-start optimization strategies. It helps distinguish between
5//! unique local minima and provides insights into the optimization landscape.
6
7use crate::error::OptimizeError;
8use crate::unconstrained::{minimize, Method, Options};
9use ndarray::{Array1, Array2, ArrayView1};
10use std::collections::HashMap;
11
12/// Configuration for clustering local minima
13#[derive(Debug, Clone)]
14pub struct ClusteringOptions {
15    /// Distance threshold for considering minima as belonging to the same cluster
16    pub distance_threshold: f64,
17    /// Relative tolerance for function values when clustering
18    pub function_tolerance: f64,
19    /// Maximum number of clusters to form
20    pub max_clusters: Option<usize>,
21    /// Clustering algorithm to use
22    pub algorithm: ClusteringAlgorithm,
23    /// Whether to normalize coordinates before clustering
24    pub normalize_coordinates: bool,
25    /// Whether to use function values in clustering
26    pub use_function_values: bool,
27    /// Weight for function values vs coordinates in distance calculation
28    pub function_weight: f64,
29}
30
31impl Default for ClusteringOptions {
32    fn default() -> Self {
33        Self {
34            distance_threshold: 1e-3,
35            function_tolerance: 1e-6,
36            max_clusters: None,
37            algorithm: ClusteringAlgorithm::Hierarchical,
38            normalize_coordinates: true,
39            use_function_values: true,
40            function_weight: 0.1,
41        }
42    }
43}
44
45/// Clustering algorithms available
46#[derive(Debug, Clone, Copy)]
47pub enum ClusteringAlgorithm {
48    /// Hierarchical clustering with single linkage
49    Hierarchical,
50    /// K-means clustering
51    KMeans,
52    /// Density-based clustering (DBSCAN-like)
53    Density,
54    /// Custom threshold-based clustering
55    Threshold,
56}
57
58/// Represents a local minimum found during optimization
59#[derive(Debug, Clone)]
60pub struct LocalMinimum<S> {
61    /// Location of the minimum
62    pub x: Array1<f64>,
63    /// Function value at the minimum
64    pub f: f64,
65    /// Original function value (for generic type)
66    pub fun_value: S,
67    /// Number of optimization iterations to reach this minimum
68    pub iterations: usize,
69    /// Number of function evaluations
70    pub func_evals: usize,
71    /// Whether optimization was successful
72    pub success: bool,
73    /// Starting point that led to this minimum
74    pub start_point: Array1<f64>,
75    /// Cluster ID (assigned after clustering)
76    pub cluster_id: Option<usize>,
77    /// Distance to cluster centroid
78    pub cluster_distance: Option<f64>,
79}
80
81/// Result of clustering analysis
82#[derive(Debug, Clone)]
83pub struct ClusteringResult<S> {
84    /// All local minima found
85    pub minima: Vec<LocalMinimum<S>>,
86    /// Cluster centroids
87    pub centroids: Vec<ClusterCentroid>,
88    /// Number of clusters formed
89    pub num_clusters: usize,
90    /// Silhouette score (quality measure)
91    pub silhouette_score: Option<f64>,
92    /// Within-cluster sum of squares
93    pub wcss: f64,
94    /// Best minimum found (global optimum candidate)
95    pub best_minimum: Option<LocalMinimum<S>>,
96}
97
98/// Cluster centroid information
99#[derive(Debug, Clone)]
100pub struct ClusterCentroid {
101    /// Centroid coordinates
102    pub x: Array1<f64>,
103    /// Average function value in cluster
104    pub f_avg: f64,
105    /// Best (minimum) function value in cluster
106    pub f_min: f64,
107    /// Number of minima in this cluster
108    pub size: usize,
109    /// Cluster radius (max distance from centroid)
110    pub radius: f64,
111}
112
113/// Multi-start optimization with clustering
114pub fn multi_start_with_clustering<F, S>(
115    fun: F,
116    start_points: &[Array1<f64>],
117    method: Method,
118    options: Option<Options>,
119    clustering_options: Option<ClusteringOptions>,
120) -> Result<ClusteringResult<S>, OptimizeError>
121where
122    F: FnMut(&ArrayView1<f64>) -> S + Clone,
123    S: Into<f64> + Clone + From<f64>,
124{
125    let clustering_opts = clustering_options.unwrap_or_default();
126    let mut minima = Vec::new();
127
128    // Run optimization from each starting point
129    for start_point in start_points {
130        let fun_clone = fun.clone();
131
132        match minimize(
133            fun_clone,
134            start_point.as_slice().unwrap(),
135            method,
136            options.clone(),
137        ) {
138            Ok(result) => {
139                let minimum = LocalMinimum {
140                    x: result.x.clone(),
141                    f: result.fun.clone().into(),
142                    fun_value: result.fun,
143                    iterations: result.iterations,
144                    func_evals: result.func_evals,
145                    success: result.success,
146                    start_point: start_point.clone(),
147                    cluster_id: None,
148                    cluster_distance: None,
149                };
150                minima.push(minimum);
151            }
152            Err(_) => {
153                // Skip failed optimizations but could log them
154                continue;
155            }
156        }
157    }
158
159    if minima.is_empty() {
160        return Err(OptimizeError::ComputationError(
161            "No successful optimizations found".to_string(),
162        ));
163    }
164
165    // Perform clustering
166    cluster_minima(&mut minima, &clustering_opts)?;
167
168    // Compute cluster centroids and statistics
169    let centroids = compute_cluster_centroids(&minima)?;
170    let num_clusters = centroids.len();
171    let wcss = compute_wcss(&minima, &centroids);
172    let silhouette_score = compute_silhouette_score(&minima);
173
174    // Find best minimum
175    let best_minimum = minima
176        .iter()
177        .filter(|m| m.success)
178        .min_by(|a, b| a.f.partial_cmp(&b.f).unwrap())
179        .cloned();
180
181    Ok(ClusteringResult {
182        minima,
183        centroids,
184        num_clusters,
185        silhouette_score,
186        wcss,
187        best_minimum,
188    })
189}
190
191/// Cluster local minima using the specified algorithm
192fn cluster_minima<S>(
193    minima: &mut [LocalMinimum<S>],
194    options: &ClusteringOptions,
195) -> Result<(), OptimizeError>
196where
197    S: Clone,
198{
199    if minima.is_empty() {
200        return Ok(());
201    }
202
203    match options.algorithm {
204        ClusteringAlgorithm::Hierarchical => hierarchical_clustering(minima, options),
205        ClusteringAlgorithm::KMeans => kmeans_clustering(minima, options),
206        ClusteringAlgorithm::Density => density_clustering(minima, options),
207        ClusteringAlgorithm::Threshold => threshold_clustering(minima, options),
208    }
209}
210
211/// Hierarchical clustering implementation
212fn hierarchical_clustering<S>(
213    minima: &mut [LocalMinimum<S>],
214    options: &ClusteringOptions,
215) -> Result<(), OptimizeError>
216where
217    S: Clone,
218{
219    let n = minima.len();
220    if n <= 1 {
221        if n == 1 {
222            minima[0].cluster_id = Some(0);
223        }
224        return Ok(());
225    }
226
227    // Compute distance matrix
228    let distances = compute_distance_matrix(minima, options);
229
230    // Perform hierarchical clustering using single linkage
231    let mut cluster_assignments = vec![None; n];
232    let mut next_cluster_id = n;
233
234    // Initialize: each point is its own cluster
235    for (i, assignment) in cluster_assignments.iter_mut().enumerate().take(n) {
236        *assignment = Some(i);
237    }
238
239    // Build hierarchy by merging closest clusters
240    let mut active_clusters: Vec<Vec<usize>> = (0..n).map(|i| vec![i]).collect();
241
242    while active_clusters.len() > 1 {
243        let mut min_dist = f64::INFINITY;
244        let mut merge_pair = (0, 1);
245
246        // Find closest pair of clusters
247        for i in 0..active_clusters.len() {
248            for j in (i + 1)..active_clusters.len() {
249                let cluster_dist =
250                    compute_cluster_distance(&active_clusters[i], &active_clusters[j], &distances);
251                if cluster_dist < min_dist {
252                    min_dist = cluster_dist;
253                    merge_pair = (i, j);
254                }
255            }
256        }
257
258        // Stop if distance threshold exceeded
259        if min_dist > options.distance_threshold {
260            break;
261        }
262
263        // Check max clusters constraint
264        if let Some(max_clusters) = options.max_clusters {
265            if active_clusters.len() <= max_clusters {
266                break;
267            }
268        }
269
270        // Merge clusters
271        let (i, j) = merge_pair;
272        let mut merged_cluster = active_clusters[i].clone();
273        merged_cluster.extend(&active_clusters[j]);
274
275        // Update cluster assignments
276        for &point_idx in &merged_cluster {
277            cluster_assignments[point_idx] = Some(next_cluster_id);
278        }
279
280        // Remove old clusters and add merged one
281        let mut new_clusters = Vec::new();
282        for (k, cluster) in active_clusters.iter().enumerate() {
283            if k != i && k != j {
284                new_clusters.push(cluster.clone());
285            }
286        }
287        new_clusters.push(merged_cluster);
288        active_clusters = new_clusters;
289        next_cluster_id += 1;
290    }
291
292    // Assign final cluster IDs (renumber to be sequential)
293    let mut cluster_map = HashMap::new();
294    let mut final_cluster_id = 0;
295
296    for cluster_id in cluster_assignments.iter().flatten() {
297        if !cluster_map.contains_key(cluster_id) {
298            cluster_map.insert(*cluster_id, final_cluster_id);
299            final_cluster_id += 1;
300        }
301    }
302
303    // Update minima with final cluster assignments
304    for (i, minimum) in minima.iter_mut().enumerate() {
305        if let Some(cluster_id) = cluster_assignments[i] {
306            minimum.cluster_id = cluster_map.get(&cluster_id).copied();
307        }
308    }
309
310    Ok(())
311}
312
313/// K-means clustering implementation
314fn kmeans_clustering<S>(
315    minima: &mut [LocalMinimum<S>],
316    options: &ClusteringOptions,
317) -> Result<(), OptimizeError>
318where
319    S: Clone,
320{
321    let n = minima.len();
322    if n <= 1 {
323        if n == 1 {
324            minima[0].cluster_id = Some(0);
325        }
326        return Ok(());
327    }
328
329    // Determine number of clusters
330    let k = if let Some(max_k) = options.max_clusters {
331        std::cmp::min(max_k, n)
332    } else {
333        // Use elbow method or default to sqrt(n)
334        std::cmp::min((n as f64).sqrt() as usize + 1, n)
335    };
336
337    if k >= n {
338        // Each point is its own cluster
339        for (i, minimum) in minima.iter_mut().enumerate() {
340            minimum.cluster_id = Some(i);
341        }
342        return Ok(());
343    }
344
345    // Get feature vectors for clustering
346    let features = extract_features(minima, options);
347    let dim = features.ncols();
348
349    // Initialize centroids randomly (or use k-means++)
350    let mut centroids = initialize_centroids_plus_plus(&features, k);
351    let mut assignments = vec![0; n];
352    let max_iter = 100;
353    let tolerance = 1e-6;
354
355    for _iter in 0..max_iter {
356        let mut changed = false;
357
358        // Assign points to nearest centroids
359        for (i, assignment) in assignments.iter_mut().enumerate().take(n) {
360            let mut min_dist = f64::INFINITY;
361            let mut best_cluster = 0;
362
363            for j in 0..k {
364                let dist = euclidean_distance(&features.row(i), &centroids.row(j));
365                if dist < min_dist {
366                    min_dist = dist;
367                    best_cluster = j;
368                }
369            }
370
371            if *assignment != best_cluster {
372                *assignment = best_cluster;
373                changed = true;
374            }
375        }
376
377        if !changed {
378            break;
379        }
380
381        // Update centroids
382        let mut new_centroids = Array2::zeros((k, dim));
383        let mut cluster_sizes = vec![0; k];
384
385        for i in 0..n {
386            let cluster = assignments[i];
387            cluster_sizes[cluster] += 1;
388            for d in 0..dim {
389                new_centroids[[cluster, d]] += features[[i, d]];
390            }
391        }
392
393        for j in 0..k {
394            if cluster_sizes[j] > 0 {
395                for d in 0..dim {
396                    new_centroids[[j, d]] /= cluster_sizes[j] as f64;
397                }
398            }
399        }
400
401        // Check convergence
402        let centroid_change = (&centroids - &new_centroids).mapv(|x| x.abs()).sum();
403
404        centroids = new_centroids;
405
406        if centroid_change < tolerance {
407            break;
408        }
409    }
410
411    // Assign cluster IDs to minima
412    for (i, minimum) in minima.iter_mut().enumerate() {
413        minimum.cluster_id = Some(assignments[i]);
414    }
415
416    Ok(())
417}
418
419/// Density-based clustering (DBSCAN-like)
420fn density_clustering<S>(
421    minima: &mut [LocalMinimum<S>],
422    options: &ClusteringOptions,
423) -> Result<(), OptimizeError>
424where
425    S: Clone,
426{
427    let n = minima.len();
428    if n <= 1 {
429        if n == 1 {
430            minima[0].cluster_id = Some(0);
431        }
432        return Ok(());
433    }
434
435    let eps = options.distance_threshold;
436    let min_pts = 2; // Minimum points to form a cluster
437
438    let distances = compute_distance_matrix(minima, options);
439    let mut cluster_assignments = vec![None; n];
440    let mut visited = vec![false; n];
441    let mut cluster_id = 0;
442
443    for i in 0..n {
444        if visited[i] {
445            continue;
446        }
447        visited[i] = true;
448
449        // Find neighbors
450        let neighbors: Vec<usize> = (0..n).filter(|&j| distances[[i, j]] <= eps).collect();
451
452        if neighbors.len() < min_pts {
453            // Mark as noise (will remain None)
454            continue;
455        }
456
457        // Start new cluster
458        let mut to_visit = neighbors.clone();
459        cluster_assignments[i] = Some(cluster_id);
460
461        let mut idx = 0;
462        while idx < to_visit.len() {
463            let point = to_visit[idx];
464
465            if !visited[point] {
466                visited[point] = true;
467
468                let point_neighbors: Vec<usize> =
469                    (0..n).filter(|&j| distances[[point, j]] <= eps).collect();
470
471                if point_neighbors.len() >= min_pts {
472                    // Add new neighbors to visit list
473                    for &neighbor in &point_neighbors {
474                        if !to_visit.contains(&neighbor) {
475                            to_visit.push(neighbor);
476                        }
477                    }
478                }
479            }
480
481            if cluster_assignments[point].is_none() {
482                cluster_assignments[point] = Some(cluster_id);
483            }
484
485            idx += 1;
486        }
487
488        cluster_id += 1;
489    }
490
491    // Assign cluster IDs to minima
492    for (i, minimum) in minima.iter_mut().enumerate() {
493        minimum.cluster_id = cluster_assignments[i];
494    }
495
496    Ok(())
497}
498
499/// Simple threshold-based clustering
500fn threshold_clustering<S>(
501    minima: &mut [LocalMinimum<S>],
502    options: &ClusteringOptions,
503) -> Result<(), OptimizeError>
504where
505    S: Clone,
506{
507    let n = minima.len();
508    if n <= 1 {
509        if n == 1 {
510            minima[0].cluster_id = Some(0);
511        }
512        return Ok(());
513    }
514
515    let mut cluster_assignments = vec![None; n];
516    let mut cluster_id = 0;
517
518    for i in 0..n {
519        if cluster_assignments[i].is_some() {
520            continue;
521        }
522
523        // Start new cluster
524        cluster_assignments[i] = Some(cluster_id);
525
526        // Find all points within threshold
527        for j in (i + 1)..n {
528            if cluster_assignments[j].is_some() {
529                continue;
530            }
531
532            let distance = compute_distance(&minima[i], &minima[j], options);
533            if distance <= options.distance_threshold {
534                cluster_assignments[j] = Some(cluster_id);
535            }
536        }
537
538        cluster_id += 1;
539    }
540
541    // Assign cluster IDs to minima
542    for (i, minimum) in minima.iter_mut().enumerate() {
543        minimum.cluster_id = cluster_assignments[i];
544    }
545
546    Ok(())
547}
548
549/// Compute distance matrix between all minima
550fn compute_distance_matrix<S>(
551    minima: &[LocalMinimum<S>],
552    options: &ClusteringOptions,
553) -> Array2<f64>
554where
555    S: Clone,
556{
557    let n = minima.len();
558    let mut distances = Array2::zeros((n, n));
559
560    for i in 0..n {
561        for j in (i + 1)..n {
562            let dist = compute_distance(&minima[i], &minima[j], options);
563            distances[[i, j]] = dist;
564            distances[[j, i]] = dist;
565        }
566    }
567
568    distances
569}
570
571/// Compute distance between two local minima
572fn compute_distance<S>(
573    min1: &LocalMinimum<S>,
574    min2: &LocalMinimum<S>,
575    options: &ClusteringOptions,
576) -> f64
577where
578    S: Clone,
579{
580    // Coordinate distance
581    let coord_dist = (&min1.x - &min2.x).mapv(|x| x.powi(2)).sum().sqrt();
582
583    if !options.use_function_values {
584        return coord_dist;
585    }
586
587    // Function value distance
588    let func_dist = (min1.f - min2.f).abs();
589
590    // Combined distance
591    coord_dist + options.function_weight * func_dist
592}
593
594/// Compute distance between two clusters (single linkage)
595fn compute_cluster_distance(
596    cluster1: &[usize],
597    cluster2: &[usize],
598    distances: &Array2<f64>,
599) -> f64 {
600    let mut min_dist = f64::INFINITY;
601
602    for &i in cluster1 {
603        for &j in cluster2 {
604            let dist = distances[[i, j]];
605            if dist < min_dist {
606                min_dist = dist;
607            }
608        }
609    }
610
611    min_dist
612}
613
614/// Extract feature vectors for clustering
615fn extract_features<S>(minima: &[LocalMinimum<S>], options: &ClusteringOptions) -> Array2<f64>
616where
617    S: Clone,
618{
619    let n = minima.len();
620    if n == 0 {
621        return Array2::zeros((0, 0));
622    }
623
624    let coord_dim = minima[0].x.len();
625    let func_dim = if options.use_function_values { 1 } else { 0 };
626    let total_dim = coord_dim + func_dim;
627
628    let mut features = Array2::zeros((n, total_dim));
629
630    // Extract coordinates
631    for (i, minimum) in minima.iter().enumerate() {
632        for j in 0..coord_dim {
633            features[[i, j]] = minimum.x[j];
634        }
635    }
636
637    // Add function values if requested
638    if options.use_function_values {
639        for (i, minimum) in minima.iter().enumerate() {
640            features[[i, coord_dim]] = minimum.f * options.function_weight;
641        }
642    }
643
644    // Normalize if requested
645    if options.normalize_coordinates {
646        normalize_features(&mut features, coord_dim);
647    }
648
649    features
650}
651
652/// Normalize feature matrix
653fn normalize_features(features: &mut Array2<f64>, coord_dim: usize) {
654    let (n, _) = features.dim();
655    if n == 0 {
656        return;
657    }
658
659    // Normalize coordinates only
660    for j in 0..coord_dim {
661        let col = features.column(j);
662        let min_val = col.iter().fold(f64::INFINITY, |a, &b| f64::min(a, b));
663        let max_val = col.iter().fold(f64::NEG_INFINITY, |a, &b| f64::max(a, b));
664
665        if (max_val - min_val).abs() > 1e-10 {
666            for i in 0..n {
667                features[[i, j]] = (features[[i, j]] - min_val) / (max_val - min_val);
668            }
669        }
670    }
671}
672
673/// Initialize centroids using k-means++ algorithm
674fn initialize_centroids_plus_plus(features: &Array2<f64>, k: usize) -> Array2<f64> {
675    let (n, dim) = features.dim();
676    let mut centroids = Array2::zeros((k, dim));
677
678    if n == 0 || k == 0 {
679        return centroids;
680    }
681
682    // Choose first centroid randomly
683    let first_idx = 0; // In practice, should be random
684    centroids.row_mut(0).assign(&features.row(first_idx));
685
686    // Choose remaining centroids
687    for c in 1..k {
688        let mut distances = vec![f64::INFINITY; n];
689
690        // Compute distance to nearest centroid for each point
691        for (i, distance) in distances.iter_mut().enumerate().take(n) {
692            let point = features.row(i);
693            for j in 0..c {
694                let centroid = centroids.row(j);
695                let dist = euclidean_distance(&point, &centroid);
696                *distance = distance.min(dist);
697            }
698        }
699
700        // Choose next centroid (should be weighted random, using max for simplicity)
701        let next_idx = distances
702            .iter()
703            .enumerate()
704            .max_by(|a, b| a.1.partial_cmp(b.1).unwrap())
705            .map(|(i, _)| i)
706            .unwrap_or(0);
707
708        centroids.row_mut(c).assign(&features.row(next_idx));
709    }
710
711    centroids
712}
713
714/// Compute Euclidean distance between two points
715fn euclidean_distance(a: &ArrayView1<f64>, b: &ArrayView1<f64>) -> f64 {
716    (a - b).mapv(|x| x.powi(2)).sum().sqrt()
717}
718
719/// Compute cluster centroids and statistics
720fn compute_cluster_centroids<S>(
721    minima: &[LocalMinimum<S>],
722) -> Result<Vec<ClusterCentroid>, OptimizeError>
723where
724    S: Clone,
725{
726    if minima.is_empty() {
727        return Ok(Vec::new());
728    }
729
730    // Group minima by cluster
731    let mut clusters: HashMap<usize, Vec<&LocalMinimum<S>>> = HashMap::new();
732
733    for minimum in minima {
734        if let Some(cluster_id) = minimum.cluster_id {
735            clusters.entry(cluster_id).or_default().push(minimum);
736        }
737    }
738
739    let mut centroids = Vec::new();
740
741    for (_, cluster_minima) in clusters {
742        if cluster_minima.is_empty() {
743            continue;
744        }
745
746        let dim = cluster_minima[0].x.len();
747        let mut centroid_x = Array1::zeros(dim);
748        let mut f_sum = 0.0;
749        let mut f_min = f64::INFINITY;
750
751        // Compute centroid coordinates and function statistics
752        for minimum in &cluster_minima {
753            centroid_x = &centroid_x + &minimum.x;
754            f_sum += minimum.f;
755            f_min = f_min.min(minimum.f);
756        }
757
758        let size = cluster_minima.len();
759        centroid_x /= size as f64;
760        let f_avg = f_sum / size as f64;
761
762        // Compute cluster radius
763        let mut max_radius = 0.0;
764        for minimum in &cluster_minima {
765            let dist = (&minimum.x - &centroid_x).mapv(|x| x.powi(2)).sum().sqrt();
766            max_radius = f64::max(max_radius, dist);
767        }
768
769        centroids.push(ClusterCentroid {
770            x: centroid_x,
771            f_avg,
772            f_min,
773            size,
774            radius: max_radius,
775        });
776    }
777
778    Ok(centroids)
779}
780
781/// Compute within-cluster sum of squares
782fn compute_wcss<S>(minima: &[LocalMinimum<S>], centroids: &[ClusterCentroid]) -> f64
783where
784    S: Clone,
785{
786    let mut wcss = 0.0;
787
788    for minimum in minima {
789        if let Some(cluster_id) = minimum.cluster_id {
790            if cluster_id < centroids.len() {
791                let centroid = &centroids[cluster_id];
792                let dist = (&minimum.x - &centroid.x).mapv(|x| x.powi(2)).sum();
793                wcss += dist;
794            }
795        }
796    }
797
798    wcss
799}
800
801/// Compute silhouette score for clustering quality
802fn compute_silhouette_score<S>(minima: &[LocalMinimum<S>]) -> Option<f64>
803where
804    S: Clone,
805{
806    if minima.len() < 2 {
807        return None;
808    }
809
810    let mut silhouette_sum = 0.0;
811    let mut valid_points = 0;
812
813    for (i, minimum) in minima.iter().enumerate() {
814        if let Some(cluster_id) = minimum.cluster_id {
815            // Compute intra-cluster distance
816            let mut intra_sum = 0.0;
817            let mut intra_count = 0;
818
819            // Compute inter-cluster distances
820            let mut min_inter = f64::INFINITY;
821            let mut cluster_inter_sums: HashMap<usize, f64> = HashMap::new();
822            let mut cluster_inter_counts: HashMap<usize, usize> = HashMap::new();
823
824            for (j, other) in minima.iter().enumerate() {
825                if i == j {
826                    continue;
827                }
828
829                if let Some(other_cluster_id) = other.cluster_id {
830                    let dist = (&minimum.x - &other.x).mapv(|x| x.powi(2)).sum().sqrt();
831
832                    if other_cluster_id == cluster_id {
833                        intra_sum += dist;
834                        intra_count += 1;
835                    } else {
836                        *cluster_inter_sums.entry(other_cluster_id).or_insert(0.0) += dist;
837                        *cluster_inter_counts.entry(other_cluster_id).or_insert(0) += 1;
838                    }
839                }
840            }
841
842            // Find minimum inter-cluster distance
843            for (other_cluster_id, sum) in cluster_inter_sums {
844                let count = cluster_inter_counts[&other_cluster_id];
845                if count > 0 {
846                    let avg_inter = sum / count as f64;
847                    min_inter = min_inter.min(avg_inter);
848                }
849            }
850
851            if intra_count > 0 && min_inter < f64::INFINITY {
852                let a = intra_sum / intra_count as f64;
853                let b = min_inter;
854                let silhouette = (b - a) / f64::max(a, b);
855                silhouette_sum += silhouette;
856                valid_points += 1;
857            }
858        }
859    }
860
861    if valid_points > 0 {
862        Some(silhouette_sum / valid_points as f64)
863    } else {
864        None
865    }
866}
867
868/// Generate diverse starting points for multi-start optimization
869pub fn generate_diverse_start_points(
870    bounds: &[(f64, f64)],
871    num_points: usize,
872    strategy: StartPointStrategy,
873) -> Vec<Array1<f64>> {
874    match strategy {
875        StartPointStrategy::Random => generate_random_points(bounds, num_points),
876        StartPointStrategy::LatinHypercube => generate_latin_hypercube_points(bounds, num_points),
877        StartPointStrategy::Grid => generate_grid_points(bounds, num_points),
878        StartPointStrategy::Sobol => generate_sobol_points(bounds, num_points),
879    }
880}
881
882/// Strategy for generating starting points
883#[derive(Debug, Clone, Copy)]
884pub enum StartPointStrategy {
885    Random,
886    LatinHypercube,
887    Grid,
888    Sobol,
889}
890
891/// Generate random starting points
892fn generate_random_points(bounds: &[(f64, f64)], num_points: usize) -> Vec<Array1<f64>> {
893    let dim = bounds.len();
894    let mut points = Vec::new();
895
896    for _ in 0..num_points {
897        let mut point = Array1::zeros(dim);
898        for (i, &(low, high)) in bounds.iter().enumerate() {
899            // Simple pseudo-random (in practice, use proper RNG)
900            let t = (i * num_points + points.len()) as f64 / (num_points * dim) as f64;
901            let random_val = (t * 17.0).fract(); // Simple pseudo-random
902            point[i] = low + random_val * (high - low);
903        }
904        points.push(point);
905    }
906
907    points
908}
909
910/// Generate Latin Hypercube sampling points
911fn generate_latin_hypercube_points(bounds: &[(f64, f64)], num_points: usize) -> Vec<Array1<f64>> {
912    let dim = bounds.len();
913    let mut points = Vec::new();
914
915    // Simple Latin Hypercube implementation
916    for i in 0..num_points {
917        let mut point = Array1::zeros(dim);
918        for (j, &(low, high)) in bounds.iter().enumerate() {
919            let segment = (i as f64 + 0.5) / num_points as f64; // Center of segment
920            point[j] = low + segment * (high - low);
921        }
922        points.push(point);
923    }
924
925    points
926}
927
928/// Generate grid points
929fn generate_grid_points(bounds: &[(f64, f64)], num_points: usize) -> Vec<Array1<f64>> {
930    let dim = bounds.len();
931    if dim == 0 {
932        return Vec::new();
933    }
934
935    let points_per_dim = ((num_points as f64).powf(1.0 / dim as f64)).ceil() as usize;
936    let mut points = Vec::new();
937
938    // Generate grid coordinates recursively
939    fn generate_grid_recursive(
940        bounds: &[(f64, f64)],
941        points_per_dim: usize,
942        current_point: &mut Array1<f64>,
943        dim_idx: usize,
944        points: &mut Vec<Array1<f64>>,
945    ) {
946        if dim_idx >= bounds.len() {
947            points.push(current_point.clone());
948            return;
949        }
950
951        let (low, high) = bounds[dim_idx];
952        for i in 0..points_per_dim {
953            let t = if points_per_dim == 1 {
954                0.5
955            } else {
956                i as f64 / (points_per_dim - 1) as f64
957            };
958            current_point[dim_idx] = low + t * (high - low);
959            generate_grid_recursive(bounds, points_per_dim, current_point, dim_idx + 1, points);
960        }
961    }
962
963    let mut current_point = Array1::zeros(dim);
964    generate_grid_recursive(bounds, points_per_dim, &mut current_point, 0, &mut points);
965
966    // Truncate to requested number of points
967    points.truncate(num_points);
968    points
969}
970
971/// Generate Sobol sequence points (simplified)
972fn generate_sobol_points(bounds: &[(f64, f64)], num_points: usize) -> Vec<Array1<f64>> {
973    // Simplified Sobol sequence (in practice, use proper implementation)
974    let dim = bounds.len();
975    let mut points = Vec::new();
976
977    for i in 0..num_points {
978        let mut point = Array1::zeros(dim);
979        for (j, &(low, high)) in bounds.iter().enumerate() {
980            // Van der Corput sequence for dimension j
981            let mut n = i + 1;
982            let base = 2 + j; // Different base for each dimension
983            let mut result = 0.0;
984            let mut f = 1.0 / base as f64;
985
986            while n > 0 {
987                result += (n % base) as f64 * f;
988                n /= base;
989                f /= base as f64;
990            }
991
992            point[j] = low + result * (high - low);
993        }
994        points.push(point);
995    }
996
997    points
998}
999
1000#[cfg(test)]
1001mod tests {
1002    use super::*;
1003    use approx::assert_abs_diff_eq;
1004
1005    #[test]
1006    fn test_simple_clustering() {
1007        // Create some test minima
1008        let mut minima = vec![
1009            LocalMinimum {
1010                x: Array1::from_vec(vec![0.0, 0.0]),
1011                f: 1.0,
1012                fun_value: 1.0,
1013                iterations: 10,
1014                func_evals: 20,
1015                success: true,
1016                start_point: Array1::from_vec(vec![1.0, 1.0]),
1017                cluster_id: None,
1018                cluster_distance: None,
1019            },
1020            LocalMinimum {
1021                x: Array1::from_vec(vec![0.1, 0.1]),
1022                f: 1.1,
1023                fun_value: 1.1,
1024                iterations: 12,
1025                func_evals: 24,
1026                success: true,
1027                start_point: Array1::from_vec(vec![1.1, 1.1]),
1028                cluster_id: None,
1029                cluster_distance: None,
1030            },
1031            LocalMinimum {
1032                x: Array1::from_vec(vec![5.0, 5.0]),
1033                f: 2.0,
1034                fun_value: 2.0,
1035                iterations: 15,
1036                func_evals: 30,
1037                success: true,
1038                start_point: Array1::from_vec(vec![5.5, 5.5]),
1039                cluster_id: None,
1040                cluster_distance: None,
1041            },
1042        ];
1043
1044        let options = ClusteringOptions {
1045            distance_threshold: 1.0,
1046            algorithm: ClusteringAlgorithm::Threshold,
1047            ..Default::default()
1048        };
1049
1050        threshold_clustering(&mut minima, &options).unwrap();
1051
1052        // First two should be in same cluster, third in different cluster
1053        assert_eq!(minima[0].cluster_id, minima[1].cluster_id);
1054        assert_ne!(minima[0].cluster_id, minima[2].cluster_id);
1055    }
1056
1057    #[test]
1058    fn test_distance_computation() {
1059        let min1 = LocalMinimum {
1060            x: Array1::from_vec(vec![0.0, 0.0]),
1061            f: 1.0,
1062            fun_value: 1.0,
1063            iterations: 10,
1064            func_evals: 20,
1065            success: true,
1066            start_point: Array1::from_vec(vec![1.0, 1.0]),
1067            cluster_id: None,
1068            cluster_distance: None,
1069        };
1070
1071        let min2 = LocalMinimum {
1072            x: Array1::from_vec(vec![3.0, 4.0]),
1073            f: 2.0,
1074            fun_value: 2.0,
1075            iterations: 12,
1076            func_evals: 24,
1077            success: true,
1078            start_point: Array1::from_vec(vec![3.5, 4.5]),
1079            cluster_id: None,
1080            cluster_distance: None,
1081        };
1082
1083        let options = ClusteringOptions::default();
1084        let distance = compute_distance(&min1, &min2, &options);
1085
1086        // Euclidean distance is 5.0, plus function weight * |1.0 - 2.0| = 5.0 + 0.1 * 1.0 = 5.1
1087        assert_abs_diff_eq!(distance, 5.1, epsilon = 1e-10);
1088    }
1089
1090    #[test]
1091    fn test_start_point_generation() {
1092        let bounds = vec![(0.0, 10.0), (-5.0, 5.0)];
1093        let num_points = 5;
1094
1095        let random_points =
1096            generate_diverse_start_points(&bounds, num_points, StartPointStrategy::Random);
1097        assert_eq!(random_points.len(), num_points);
1098
1099        for point in &random_points {
1100            assert!(point[0] >= 0.0 && point[0] <= 10.0);
1101            assert!(point[1] >= -5.0 && point[1] <= 5.0);
1102        }
1103
1104        let grid_points =
1105            generate_diverse_start_points(&bounds, num_points, StartPointStrategy::Grid);
1106        assert_eq!(grid_points.len(), num_points);
1107
1108        for point in &grid_points {
1109            assert!(point[0] >= 0.0 && point[0] <= 10.0);
1110            assert!(point[1] >= -5.0 && point[1] <= 5.0);
1111        }
1112    }
1113}