1use super::{
8 spatial_constraints::SpatialConstraint,
9 spatial_utils::{euclidean_distance, k_nearest_neighbors},
10};
11use scirs2_core::ndarray::{Array1, Array2};
12use sklears_core::error::{Result as SklResult, SklearsError};
13
14#[derive(Debug, Clone)]
16pub struct MoransI {
17 pub statistic: f64,
19 pub expected_value: f64,
21 pub variance: f64,
23 pub z_score: f64,
25 pub p_value: f64,
27}
28
29#[derive(Debug, Clone)]
31pub struct GearysC {
32 pub statistic: f64,
34 pub expected_value: f64,
36 pub variance: f64,
38 pub z_score: f64,
40 pub p_value: f64,
42}
43
44#[derive(Debug, Clone)]
46pub struct LocalIndicators {
47 pub local_morans_i: Array1<f64>,
49 pub local_z_scores: Array1<f64>,
51 pub local_p_values: Array1<f64>,
53 pub clusters: Array1<String>, }
56
57pub struct SpatialAutocorrelationAnalyzer {
59 spatial_weights: Array2<f64>,
60 coords: Array2<f64>,
61}
62
63impl SpatialAutocorrelationAnalyzer {
64 pub fn new(coords: Array2<f64>, constraint: SpatialConstraint) -> SklResult<Self> {
66 let spatial_weights = Self::compute_spatial_weights(&coords, &constraint)?;
67
68 Ok(Self {
69 spatial_weights,
70 coords,
71 })
72 }
73
74 fn compute_spatial_weights(
76 coords: &Array2<f64>,
77 constraint: &SpatialConstraint,
78 ) -> SklResult<Array2<f64>> {
79 let n_samples = coords.nrows();
80 let mut weights = Array2::zeros((n_samples, n_samples));
81
82 match constraint {
83 SpatialConstraint::Distance { radius } => {
84 for i in 0..n_samples {
85 for j in 0..n_samples {
86 if i != j {
87 let dist = euclidean_distance(
88 &coords.row(i).to_owned().into_raw_vec(),
89 &coords.row(j).to_owned().into_raw_vec(),
90 );
91 if dist <= *radius {
92 weights[[i, j]] = 1.0 / dist.max(1e-6); }
94 }
95 }
96 }
97 }
98 SpatialConstraint::Adjacency => {
99 let k = 4; let neighbors = k_nearest_neighbors(coords, k);
101 for (i, neighbor_list) in neighbors.iter().enumerate() {
102 for &j in neighbor_list {
103 weights[[i, j]] = 1.0;
104 weights[[j, i]] = 1.0; }
106 }
107 }
108 SpatialConstraint::Grid { rows, cols } => {
109 if n_samples != rows * cols {
110 return Err(SklearsError::InvalidInput(
111 "Grid dimensions don't match number of samples".to_string(),
112 ));
113 }
114
115 for i in 0..*rows {
116 for j in 0..*cols {
117 let idx = i * cols + j;
118 if i > 0 {
120 weights[[idx, (i - 1) * cols + j]] = 1.0;
121 }
122 if i < rows - 1 {
123 weights[[idx, (i + 1) * cols + j]] = 1.0;
124 }
125 if j > 0 {
126 weights[[idx, i * cols + (j - 1)]] = 1.0;
127 }
128 if j < cols - 1 {
129 weights[[idx, i * cols + (j + 1)]] = 1.0;
130 }
131 }
132 }
133 }
134 SpatialConstraint::Custom => {
135 return Err(SklearsError::InvalidInput(
136 "Custom spatial constraints not supported for autocorrelation analysis"
137 .to_string(),
138 ));
139 }
140 }
141
142 for i in 0..n_samples {
144 let row_sum: f64 = weights.row(i).sum();
145 if row_sum > 1e-8 {
146 for j in 0..n_samples {
147 weights[[i, j]] /= row_sum;
148 }
149 }
150 }
151
152 Ok(weights)
153 }
154
155 pub fn morans_i(&self, values: &Array1<f64>) -> SklResult<MoransI> {
157 let n = values.len() as f64;
158 let mean_val = values.mean().unwrap_or(0.0);
159
160 let deviations: Array1<f64> = values.mapv(|v| v - mean_val);
162
163 let mut numerator = 0.0;
165 let mut w_sum = 0.0;
166
167 for i in 0..values.len() {
168 for j in 0..values.len() {
169 let w_ij = self.spatial_weights[[i, j]];
170 numerator += w_ij * deviations[i] * deviations[j];
171 w_sum += w_ij;
172 }
173 }
174
175 let denominator: f64 = deviations.mapv(|d| d * d).sum();
177
178 let morans_i = if denominator > 1e-8 && w_sum > 1e-8 {
180 (n / w_sum) * (numerator / denominator)
181 } else {
182 0.0
183 };
184
185 let expected = -1.0 / (n - 1.0);
187
188 let variance = self.compute_morans_i_variance(n, w_sum)?;
190
191 let z_score = if variance > 1e-8 {
193 (morans_i - expected) / variance.sqrt()
194 } else {
195 0.0
196 };
197
198 let p_value = 2.0 * (1.0 - self.standard_normal_cdf(z_score.abs()));
199
200 Ok(MoransI {
201 statistic: morans_i,
202 expected_value: expected,
203 variance,
204 z_score,
205 p_value,
206 })
207 }
208
209 pub fn gearys_c(&self, values: &Array1<f64>) -> SklResult<GearysC> {
211 let n = values.len() as f64;
212
213 let mut numerator = 0.0;
215 let mut w_sum = 0.0;
216
217 for i in 0..values.len() {
218 for j in 0..values.len() {
219 let w_ij = self.spatial_weights[[i, j]];
220 numerator += w_ij * (values[i] - values[j]).powi(2);
221 w_sum += w_ij;
222 }
223 }
224
225 let mean_val = values.mean().unwrap_or(0.0);
227 let denominator: f64 = values.mapv(|v| (v - mean_val).powi(2)).sum();
228
229 let gearys_c = if denominator > 1e-8 && w_sum > 1e-8 {
231 ((n - 1.0) / (2.0 * w_sum)) * (numerator / denominator)
232 } else {
233 1.0
234 };
235
236 let expected = 1.0;
238
239 let variance = self.compute_gearys_c_variance(n, w_sum)?;
241
242 let z_score = if variance > 1e-8 {
244 (gearys_c - expected) / variance.sqrt()
245 } else {
246 0.0
247 };
248
249 let p_value = 2.0 * (1.0 - self.standard_normal_cdf(z_score.abs()));
250
251 Ok(GearysC {
252 statistic: gearys_c,
253 expected_value: expected,
254 variance,
255 z_score,
256 p_value,
257 })
258 }
259
260 pub fn local_indicators(&self, values: &Array1<f64>) -> SklResult<LocalIndicators> {
262 let n = values.len();
263 let mean_val = values.mean().unwrap_or(0.0);
264
265 let mut local_morans_i = Array1::zeros(n);
266 let mut local_z_scores = Array1::zeros(n);
267 let mut local_p_values = Array1::zeros(n);
268 let mut clusters = Vec::with_capacity(n);
269
270 let variance: f64 = values.mapv(|v| (v - mean_val).powi(2)).sum() / n as f64;
272
273 for i in 0..n {
274 let z_i = (values[i] - mean_val) / variance.sqrt();
275
276 let mut weighted_z_sum = 0.0;
278 for j in 0..n {
279 if i != j {
280 let w_ij = self.spatial_weights[[i, j]];
281 let z_j = (values[j] - mean_val) / variance.sqrt();
282 weighted_z_sum += w_ij * z_j;
283 }
284 }
285
286 local_morans_i[i] = z_i * weighted_z_sum;
287
288 local_z_scores[i] = local_morans_i[i] / (1.0f64 + 1e-6).sqrt();
290 local_p_values[i] = 2.0 * (1.0 - self.standard_normal_cdf(local_z_scores[i].abs()));
291
292 let cluster_type = if local_p_values[i] < 0.05 {
294 if z_i > 0.0 && weighted_z_sum > 0.0 {
295 "High-High"
296 } else if z_i < 0.0 && weighted_z_sum < 0.0 {
297 "Low-Low"
298 } else if z_i > 0.0 && weighted_z_sum < 0.0 {
299 "High-Low"
300 } else {
301 "Low-High"
302 }
303 } else {
304 "Not significant"
305 };
306
307 clusters.push(cluster_type.to_string());
308 }
309
310 Ok(LocalIndicators {
311 local_morans_i,
312 local_z_scores,
313 local_p_values,
314 clusters: Array1::from_vec(clusters),
315 })
316 }
317
318 pub fn analyze_mixture_assignments(
320 &self,
321 assignments: &Array1<usize>,
322 ) -> SklResult<(MoransI, GearysC)> {
323 let values = assignments.mapv(|a| a as f64);
325
326 let morans_i = self.morans_i(&values)?;
327 let gearys_c = self.gearys_c(&values)?;
328
329 Ok((morans_i, gearys_c))
330 }
331
332 fn compute_morans_i_variance(&self, n: f64, _w_sum: f64) -> SklResult<f64> {
334 let variance = 1.0 / (n - 1.0) * (1.0 - 1.0 / n);
337 Ok(variance.max(1e-8))
338 }
339
340 fn compute_gearys_c_variance(&self, n: f64, _w_sum: f64) -> SklResult<f64> {
341 let variance = 1.0 / (2.0 * (n - 1.0)) * (1.0 - 1.0 / n);
343 Ok(variance.max(1e-8))
344 }
345
346 fn standard_normal_cdf(&self, x: f64) -> f64 {
348 let t = 1.0 / (1.0 + 0.2316419 * x.abs());
350 let d = 0.3989423 * (-x * x / 2.0).exp();
351 let prob = d
352 * t
353 * (0.3193815 + t * (-0.3565638 + t * (1.781478 + t * (-1.821256 + t * 1.330274))));
354
355 if x >= 0.0 {
356 1.0 - prob
357 } else {
358 prob
359 }
360 }
361}
362
363#[derive(Debug, Clone)]
365pub struct SpatialClusteringQuality {
366 pub silhouette_score: f64,
368 pub spatial_separation: f64,
370 pub spatial_compactness: f64,
372 pub boundary_coherence: f64,
374}
375
376impl SpatialClusteringQuality {
377 pub fn assess(
379 coords: &Array2<f64>,
380 assignments: &Array1<usize>,
381 n_components: usize,
382 ) -> SklResult<Self> {
383 let silhouette_score = Self::compute_silhouette_score(coords, assignments)?;
384 let spatial_separation =
385 Self::compute_spatial_separation(coords, assignments, n_components)?;
386 let spatial_compactness =
387 Self::compute_spatial_compactness(coords, assignments, n_components)?;
388 let boundary_coherence = Self::compute_boundary_coherence(coords, assignments)?;
389
390 Ok(Self {
391 silhouette_score,
392 spatial_separation,
393 spatial_compactness,
394 boundary_coherence,
395 })
396 }
397
398 fn compute_silhouette_score(
399 coords: &Array2<f64>,
400 assignments: &Array1<usize>,
401 ) -> SklResult<f64> {
402 let n_samples = coords.nrows();
403 let mut total_silhouette = 0.0;
404
405 for i in 0..n_samples {
406 let cluster_i = assignments[i];
407
408 let mut intra_cluster_dist = 0.0;
410 let mut same_cluster_count = 0;
411
412 for j in 0..n_samples {
413 if i != j && assignments[j] == cluster_i {
414 intra_cluster_dist += euclidean_distance(
415 &coords.row(i).to_owned().into_raw_vec(),
416 &coords.row(j).to_owned().into_raw_vec(),
417 );
418 same_cluster_count += 1;
419 }
420 }
421
422 let a_i = if same_cluster_count > 0 {
423 intra_cluster_dist / same_cluster_count as f64
424 } else {
425 0.0
426 };
427
428 let mut min_inter_cluster_dist = f64::INFINITY;
430
431 for other_cluster in 0..10 {
432 if other_cluster != cluster_i {
434 let mut inter_cluster_dist = 0.0;
435 let mut other_cluster_count = 0;
436
437 for j in 0..n_samples {
438 if assignments[j] == other_cluster {
439 inter_cluster_dist += euclidean_distance(
440 &coords.row(i).to_owned().into_raw_vec(),
441 &coords.row(j).to_owned().into_raw_vec(),
442 );
443 other_cluster_count += 1;
444 }
445 }
446
447 if other_cluster_count > 0 {
448 let avg_dist = inter_cluster_dist / other_cluster_count as f64;
449 min_inter_cluster_dist = min_inter_cluster_dist.min(avg_dist);
450 }
451 }
452 }
453
454 let b_i = min_inter_cluster_dist;
455
456 let s_i = if a_i.max(b_i) > 1e-8 {
458 (b_i - a_i) / a_i.max(b_i)
459 } else {
460 0.0
461 };
462
463 total_silhouette += s_i;
464 }
465
466 Ok(total_silhouette / n_samples as f64)
467 }
468
469 fn compute_spatial_separation(
470 coords: &Array2<f64>,
471 assignments: &Array1<usize>,
472 n_components: usize,
473 ) -> SklResult<f64> {
474 let mut centroids = Array2::zeros((n_components, coords.ncols()));
476 let mut cluster_counts = vec![0; n_components];
477
478 for i in 0..coords.nrows() {
479 let cluster = assignments[i];
480 if cluster < n_components {
481 for j in 0..coords.ncols() {
482 centroids[[cluster, j]] += coords[[i, j]];
483 }
484 cluster_counts[cluster] += 1;
485 }
486 }
487
488 for i in 0..n_components {
490 if cluster_counts[i] > 0 {
491 for j in 0..coords.ncols() {
492 centroids[[i, j]] /= cluster_counts[i] as f64;
493 }
494 }
495 }
496
497 let mut min_separation = f64::INFINITY;
499 for i in 0..n_components {
500 for j in (i + 1)..n_components {
501 if cluster_counts[i] > 0 && cluster_counts[j] > 0 {
502 let dist = euclidean_distance(
503 ¢roids.row(i).to_owned().into_raw_vec(),
504 ¢roids.row(j).to_owned().into_raw_vec(),
505 );
506 min_separation = min_separation.min(dist);
507 }
508 }
509 }
510
511 Ok(min_separation)
512 }
513
514 fn compute_spatial_compactness(
515 coords: &Array2<f64>,
516 assignments: &Array1<usize>,
517 n_components: usize,
518 ) -> SklResult<f64> {
519 let mut total_compactness = 0.0;
520 let mut valid_clusters = 0;
521
522 for cluster in 0..n_components {
523 let cluster_points: Vec<usize> = assignments
524 .iter()
525 .enumerate()
526 .filter(|(_, &a)| a == cluster)
527 .map(|(i, _)| i)
528 .collect();
529
530 if cluster_points.len() > 1 {
531 let mut centroid = vec![0.0; coords.ncols()];
533 for &point_idx in &cluster_points {
534 for j in 0..coords.ncols() {
535 centroid[j] += coords[[point_idx, j]];
536 }
537 }
538 for j in 0..coords.ncols() {
539 centroid[j] /= cluster_points.len() as f64;
540 }
541
542 let mut total_dist = 0.0;
544 for &point_idx in &cluster_points {
545 let dist = euclidean_distance(
546 &coords.row(point_idx).to_owned().into_raw_vec(),
547 ¢roid,
548 );
549 total_dist += dist;
550 }
551
552 total_compactness += total_dist / cluster_points.len() as f64;
553 valid_clusters += 1;
554 }
555 }
556
557 Ok(if valid_clusters > 0 {
558 1.0 / (1.0 + total_compactness / valid_clusters as f64) } else {
560 0.0
561 })
562 }
563
564 fn compute_boundary_coherence(
565 coords: &Array2<f64>,
566 assignments: &Array1<usize>,
567 ) -> SklResult<f64> {
568 let n_samples = coords.nrows();
569 let mut coherent_boundaries = 0;
570 let mut total_boundaries = 0;
571
572 let k = 5.min(n_samples - 1);
574 let neighbors = k_nearest_neighbors(coords, k);
575
576 for (i, neighbor_list) in neighbors.iter().enumerate() {
577 let cluster_i = assignments[i];
578
579 for &neighbor_idx in neighbor_list {
580 if assignments[neighbor_idx] == cluster_i {
581 coherent_boundaries += 1;
582 }
583 total_boundaries += 1;
584 }
585 }
586
587 Ok(if total_boundaries > 0 {
588 coherent_boundaries as f64 / total_boundaries as f64
589 } else {
590 1.0
591 })
592 }
593
594 pub fn overall_score(&self) -> f64 {
596 let weights = [0.3, 0.25, 0.25, 0.2]; let scores = [
599 self.silhouette_score.max(0.0), self.spatial_separation / (1.0 + self.spatial_separation), self.spatial_compactness,
602 self.boundary_coherence,
603 ];
604
605 weights.iter().zip(scores.iter()).map(|(w, s)| w * s).sum()
606 }
607}
608
609#[allow(non_snake_case)]
610#[cfg(test)]
611mod tests {
612 use super::*;
613 use scirs2_core::ndarray::array;
614
615 #[test]
616 fn test_spatial_autocorrelation_analyzer_creation() {
617 let coords = array![[0.0, 0.0], [1.0, 0.0], [2.0, 0.0], [1.0, 1.0]];
618 let constraint = SpatialConstraint::Distance { radius: 1.5 };
619
620 let analyzer = SpatialAutocorrelationAnalyzer::new(coords, constraint).unwrap();
621 assert_eq!(analyzer.coords.nrows(), 4);
622 assert_eq!(analyzer.spatial_weights.dim(), (4, 4));
623 }
624
625 #[test]
626 fn test_morans_i_calculation() {
627 let coords = array![[0.0, 0.0], [1.0, 0.0], [2.0, 0.0]];
628 let constraint = SpatialConstraint::Distance { radius: 1.5 };
629 let analyzer = SpatialAutocorrelationAnalyzer::new(coords, constraint).unwrap();
630
631 let values = array![1.0, 1.0, 0.0]; let morans_i = analyzer.morans_i(&values).unwrap();
633
634 assert!(morans_i.statistic.is_finite());
635 assert!(morans_i.z_score.is_finite());
636 assert!(morans_i.p_value >= 0.0 && morans_i.p_value <= 1.0);
637 }
638
639 #[test]
640 fn test_gearys_c_calculation() {
641 let coords = array![[0.0, 0.0], [1.0, 0.0], [2.0, 0.0]];
642 let constraint = SpatialConstraint::Distance { radius: 1.5 };
643 let analyzer = SpatialAutocorrelationAnalyzer::new(coords, constraint).unwrap();
644
645 let values = array![1.0, 1.0, 0.0];
646 let gearys_c = analyzer.gearys_c(&values).unwrap();
647
648 assert!(gearys_c.statistic.is_finite());
649 assert!(gearys_c.z_score.is_finite());
650 assert!(gearys_c.p_value >= 0.0 && gearys_c.p_value <= 1.0);
651 }
652
653 #[test]
654 fn test_local_indicators() {
655 let coords = array![[0.0, 0.0], [1.0, 0.0], [2.0, 0.0], [0.0, 1.0]];
656 let constraint = SpatialConstraint::Distance { radius: 1.5 };
657 let analyzer = SpatialAutocorrelationAnalyzer::new(coords, constraint).unwrap();
658
659 let values = array![1.0, 1.0, 0.0, 1.0];
660 let lisa = analyzer.local_indicators(&values).unwrap();
661
662 assert_eq!(lisa.local_morans_i.len(), 4);
663 assert_eq!(lisa.local_z_scores.len(), 4);
664 assert_eq!(lisa.local_p_values.len(), 4);
665 assert_eq!(lisa.clusters.len(), 4);
666
667 for cluster_type in lisa.clusters.iter() {
669 assert!([
670 "High-High",
671 "Low-Low",
672 "High-Low",
673 "Low-High",
674 "Not significant"
675 ]
676 .contains(&cluster_type.as_str()));
677 }
678 }
679
680 #[test]
681 fn test_spatial_clustering_quality() {
682 let coords = array![[0.0, 0.0], [0.1, 0.1], [5.0, 5.0], [5.1, 5.1]];
683 let assignments = array![0, 0, 1, 1]; let quality = SpatialClusteringQuality::assess(&coords, &assignments, 2).unwrap();
686
687 assert!(quality.silhouette_score.is_finite());
688 assert!(quality.spatial_separation > 0.0);
689 assert!(quality.spatial_compactness > 0.0);
690 assert!(quality.boundary_coherence >= 0.0 && quality.boundary_coherence <= 1.0);
691
692 let overall = quality.overall_score();
693 assert!(overall >= 0.0 && overall <= 1.0);
694 }
695
696 #[test]
697 fn test_mixture_assignments_analysis() {
698 let coords = array![[0.0, 0.0], [1.0, 0.0], [2.0, 0.0], [0.0, 1.0]];
699 let constraint = SpatialConstraint::Adjacency;
700 let analyzer = SpatialAutocorrelationAnalyzer::new(coords, constraint).unwrap();
701
702 let assignments = array![0, 0, 1, 1];
703 let (morans_i, gearys_c) = analyzer.analyze_mixture_assignments(&assignments).unwrap();
704
705 assert!(morans_i.statistic.is_finite());
706 assert!(gearys_c.statistic.is_finite());
707 }
708
709 #[test]
710 fn test_standard_normal_cdf() {
711 let coords = array![[0.0, 0.0]];
712 let constraint = SpatialConstraint::Distance { radius: 1.0 };
713 let analyzer = SpatialAutocorrelationAnalyzer::new(coords, constraint).unwrap();
714
715 let cdf_0 = analyzer.standard_normal_cdf(0.0);
717 assert!((cdf_0 - 0.5).abs() < 0.01);
718
719 let cdf_neg = analyzer.standard_normal_cdf(-1.96);
720 assert!(cdf_neg < 0.05);
721
722 let cdf_pos = analyzer.standard_normal_cdf(1.96);
723 assert!(cdf_pos > 0.95);
724 }
725}