Skip to main content

scirs2_spatial/spatial_stats/
lisa.rs

1//! Local Indicators of Spatial Association (LISA)
2//!
3//! - Local Moran's I with permutation-based pseudo-significance
4//! - Getis-Ord Gi* statistic with z-scores and p-values
5//! - LISA cluster map classification (HH, LL, HL, LH)
6
7use scirs2_core::ndarray::{Array1, Array2, ArrayView1, ArrayView2};
8use scirs2_core::random::{seeded_rng, Rng, RngExt, SeedableRng};
9
10use crate::error::{SpatialError, SpatialResult};
11
12/// Classification label for a LISA cluster map cell.
13#[derive(Debug, Clone, Copy, PartialEq, Eq)]
14pub enum LisaCluster {
15    /// High value surrounded by high values (hot spot).
16    HighHigh,
17    /// Low value surrounded by low values (cold spot).
18    LowLow,
19    /// High value surrounded by low values (spatial outlier).
20    HighLow,
21    /// Low value surrounded by high values (spatial outlier).
22    LowHigh,
23    /// Not significant at the chosen alpha level.
24    NotSignificant,
25}
26
27/// Result of a Local Moran's I analysis for a single observation.
28#[derive(Debug, Clone)]
29pub struct LisaResult {
30    /// Local Moran's I_i values for each observation.
31    pub local_i: Array1<f64>,
32    /// Pseudo p-values from permutation test (one per observation).
33    pub p_values: Array1<f64>,
34    /// Cluster classification for each observation.
35    pub clusters: Vec<LisaCluster>,
36}
37
38/// A complete LISA cluster map.
39#[derive(Debug, Clone)]
40pub struct LisaClusterMap {
41    /// Local Moran's I values.
42    pub local_i: Array1<f64>,
43    /// Pseudo p-values.
44    pub p_values: Array1<f64>,
45    /// Cluster labels.
46    pub clusters: Vec<LisaCluster>,
47    /// Mean of the values.
48    pub mean: f64,
49    /// Number of permutations used.
50    pub n_permutations: usize,
51    /// Significance level used for classification.
52    pub alpha: f64,
53}
54
55/// Result of the Getis-Ord Gi* statistic for one location.
56#[derive(Debug, Clone)]
57pub struct GetisOrdResult {
58    /// Gi* z-scores for each location.
59    pub z_scores: Array1<f64>,
60    /// Two-sided p-values.
61    pub p_values: Array1<f64>,
62}
63
64// ---------------------------------------------------------------------------
65// Local Moran's I with permutation test
66// ---------------------------------------------------------------------------
67
68/// Compute Local Moran's I with a conditional permutation test.
69///
70/// For each observation i, the value at i is held fixed while remaining values
71/// are randomly permuted `n_permutations` times. The pseudo p-value is
72/// `(count of |I_perm| >= |I_obs| + 1) / (n_permutations + 1)`.
73///
74/// # Arguments
75///
76/// * `values` - Observed values (n,)
77/// * `weights` - Spatial weights matrix (n x n)
78/// * `n_permutations` - Number of random permutations (e.g. 999)
79/// * `seed` - RNG seed for reproducibility
80///
81/// # Returns
82///
83/// A `LisaResult` with local I values, p-values, and cluster classifications
84/// (using alpha = 0.05).
85pub fn local_moran_permutation_test(
86    values: &ArrayView1<f64>,
87    weights: &ArrayView2<f64>,
88    n_permutations: usize,
89    seed: u64,
90) -> SpatialResult<LisaResult> {
91    let n = values.len();
92    if weights.nrows() != n || weights.ncols() != n {
93        return Err(SpatialError::DimensionError(
94            "Weights matrix dimensions must match number of values".to_string(),
95        ));
96    }
97    if n < 3 {
98        return Err(SpatialError::ValueError(
99            "Need at least 3 observations".to_string(),
100        ));
101    }
102
103    let nf = n as f64;
104    let mean: f64 = values.sum() / nf;
105    let variance: f64 = values.iter().map(|&x| (x - mean) * (x - mean)).sum::<f64>() / nf;
106    if variance == 0.0 {
107        return Err(SpatialError::ValueError("Variance is zero".to_string()));
108    }
109    let std_dev = variance.sqrt();
110
111    // Compute standardized deviations
112    let z: Vec<f64> = values.iter().map(|&x| (x - mean) / std_dev).collect();
113
114    // Observed Local Moran's I
115    let mut local_i = Array1::zeros(n);
116    for i in 0..n {
117        let mut wz_sum = 0.0;
118        for j in 0..n {
119            if i != j {
120                wz_sum += weights[[i, j]] * z[j];
121            }
122        }
123        local_i[i] = z[i] * wz_sum;
124    }
125
126    // Permutation test
127    let mut p_values = Array1::zeros(n);
128    let mut rng = seeded_rng(seed);
129
130    // Build index list for permuting
131    let mut indices: Vec<usize> = (0..n).collect();
132
133    for i in 0..n {
134        let obs_i = local_i[i].abs();
135        let mut count_extreme = 0usize;
136
137        for _perm in 0..n_permutations {
138            // Fisher-Yates shuffle of indices, skipping position i
139            // We shuffle all except i, then compute local I at i using permuted values
140            // Simple approach: shuffle the entire z vector copy, but keep z[i] fixed
141            fisher_yates_shuffle_except(&mut indices, i, &mut rng);
142
143            let mut wz_perm = 0.0;
144            for j in 0..n {
145                if j != i {
146                    // Use z[indices[j]] as the permuted value at location j
147                    wz_perm += weights[[i, j]] * z[indices[j]];
148                }
149            }
150            let i_perm = z[i] * wz_perm;
151
152            if i_perm.abs() >= obs_i {
153                count_extreme += 1;
154            }
155        }
156
157        p_values[i] = (count_extreme as f64 + 1.0) / (n_permutations as f64 + 1.0);
158    }
159
160    // Classify clusters (alpha = 0.05)
161    let alpha = 0.05;
162    let clusters = classify_clusters(&z, &local_i, &p_values, weights, alpha);
163
164    Ok(LisaResult {
165        local_i,
166        p_values,
167        clusters,
168    })
169}
170
171/// Build a complete LISA cluster map with configurable alpha.
172///
173/// This is a convenience wrapper around `local_moran_permutation_test`
174/// that also stores metadata.
175pub fn lisa_cluster_map(
176    values: &ArrayView1<f64>,
177    weights: &ArrayView2<f64>,
178    n_permutations: usize,
179    alpha: f64,
180    seed: u64,
181) -> SpatialResult<LisaClusterMap> {
182    let n = values.len();
183    if weights.nrows() != n || weights.ncols() != n {
184        return Err(SpatialError::DimensionError(
185            "Weights matrix dimensions must match number of values".to_string(),
186        ));
187    }
188    if n < 3 {
189        return Err(SpatialError::ValueError(
190            "Need at least 3 observations".to_string(),
191        ));
192    }
193
194    let nf = n as f64;
195    let mean: f64 = values.sum() / nf;
196    let variance: f64 = values.iter().map(|&x| (x - mean) * (x - mean)).sum::<f64>() / nf;
197    if variance == 0.0 {
198        return Err(SpatialError::ValueError("Variance is zero".to_string()));
199    }
200    let std_dev = variance.sqrt();
201    let z: Vec<f64> = values.iter().map(|&x| (x - mean) / std_dev).collect();
202
203    let mut local_i = Array1::zeros(n);
204    for i in 0..n {
205        let mut wz_sum = 0.0;
206        for j in 0..n {
207            if i != j {
208                wz_sum += weights[[i, j]] * z[j];
209            }
210        }
211        local_i[i] = z[i] * wz_sum;
212    }
213
214    // Permutation test
215    let mut p_values = Array1::zeros(n);
216    let mut rng = seeded_rng(seed);
217    let mut indices: Vec<usize> = (0..n).collect();
218
219    for i in 0..n {
220        let obs_i = local_i[i].abs();
221        let mut count_extreme = 0usize;
222
223        for _perm in 0..n_permutations {
224            fisher_yates_shuffle_except(&mut indices, i, &mut rng);
225
226            let mut wz_perm = 0.0;
227            for j in 0..n {
228                if j != i {
229                    wz_perm += weights[[i, j]] * z[indices[j]];
230                }
231            }
232            let i_perm = z[i] * wz_perm;
233
234            if i_perm.abs() >= obs_i {
235                count_extreme += 1;
236            }
237        }
238
239        p_values[i] = (count_extreme as f64 + 1.0) / (n_permutations as f64 + 1.0);
240    }
241
242    let clusters = classify_clusters(&z, &local_i, &p_values, weights, alpha);
243
244    Ok(LisaClusterMap {
245        local_i,
246        p_values,
247        clusters,
248        mean,
249        n_permutations,
250        alpha,
251    })
252}
253
254// ---------------------------------------------------------------------------
255// Getis-Ord Gi*
256// ---------------------------------------------------------------------------
257
258/// Compute the Getis-Ord Gi* statistic for each location.
259///
260/// Gi* includes the focal location in its own computation (self-weight = 1).
261///
262/// Gi* = (sum_j w_ij x_j - xbar sum_j w_ij) / (s * sqrt((n sum_j w_ij^2 - (sum_j w_ij)^2) / (n-1)))
263///
264/// where the sums include j = i.
265pub fn getis_ord_gi_star(
266    values: &ArrayView1<f64>,
267    weights: &ArrayView2<f64>,
268) -> SpatialResult<GetisOrdResult> {
269    let n = values.len();
270    if weights.nrows() != n || weights.ncols() != n {
271        return Err(SpatialError::DimensionError(
272            "Weights matrix dimensions must match number of values".to_string(),
273        ));
274    }
275    if n < 2 {
276        return Err(SpatialError::ValueError(
277            "Need at least 2 observations".to_string(),
278        ));
279    }
280
281    let nf = n as f64;
282    let xbar: f64 = values.sum() / nf;
283    let s2: f64 = values.iter().map(|&x| (x - xbar) * (x - xbar)).sum::<f64>() / nf;
284    if s2 == 0.0 {
285        return Err(SpatialError::ValueError(
286            "Variance of values is zero".to_string(),
287        ));
288    }
289    let s = s2.sqrt();
290
291    let mut z_scores = Array1::zeros(n);
292    let mut p_values = Array1::zeros(n);
293
294    for i in 0..n {
295        // Include self-weight (set diagonal to 1 conceptually for Gi*)
296        let mut wx_sum = 0.0;
297        let mut w_sum = 0.0;
298        let mut w_sq_sum = 0.0;
299
300        for j in 0..n {
301            let wij = if i == j {
302                // Gi* uses self-weight = 1 if diagonal is 0
303                if weights[[i, j]] == 0.0 {
304                    1.0
305                } else {
306                    weights[[i, j]]
307                }
308            } else {
309                weights[[i, j]]
310            };
311            wx_sum += wij * values[j];
312            w_sum += wij;
313            w_sq_sum += wij * wij;
314        }
315
316        let numerator = wx_sum - xbar * w_sum;
317        let denom_inner = (nf * w_sq_sum - w_sum * w_sum) / (nf - 1.0);
318
319        if denom_inner > 0.0 {
320            let denominator = s * denom_inner.sqrt();
321            z_scores[i] = numerator / denominator;
322        }
323
324        p_values[i] = two_sided_p(z_scores[i]);
325    }
326
327    Ok(GetisOrdResult { z_scores, p_values })
328}
329
330// ---------------------------------------------------------------------------
331// Internal helpers
332// ---------------------------------------------------------------------------
333
334/// Two-sided p-value via normal CDF approximation.
335fn two_sided_p(z: f64) -> f64 {
336    2.0 * (1.0 - normal_cdf(z.abs()))
337}
338
339/// Standard normal CDF (Abramowitz & Stegun).
340fn normal_cdf(x: f64) -> f64 {
341    let a1 = 0.254829592;
342    let a2 = -0.284496736;
343    let a3 = 1.421413741;
344    let a4 = -1.453152027;
345    let a5 = 1.061405429;
346    let p = 0.3275911;
347
348    let sign = if x < 0.0 { -1.0 } else { 1.0 };
349    let x_abs = x.abs() / std::f64::consts::SQRT_2;
350    let t = 1.0 / (1.0 + p * x_abs);
351    let y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * (-x_abs * x_abs).exp();
352    0.5 * (1.0 + sign * y)
353}
354
355/// Fisher-Yates shuffle of `indices`, keeping position `skip` in place.
356fn fisher_yates_shuffle_except<R: Rng + ?Sized>(indices: &mut [usize], skip: usize, rng: &mut R) {
357    let n = indices.len();
358    // Reset indices
359    for (idx, val) in indices.iter_mut().enumerate() {
360        *val = idx;
361    }
362
363    // Shuffle all positions except skip
364    // Collect swappable positions
365    let positions: Vec<usize> = (0..n).filter(|&p| p != skip).collect();
366    let m = positions.len();
367    for k in (1..m).rev() {
368        let j = rng.random_range(0..=k);
369        indices.swap(positions[k], positions[j]);
370    }
371}
372
373/// Classify each observation into a LISA cluster type.
374fn classify_clusters(
375    z: &[f64],
376    local_i: &Array1<f64>,
377    p_values: &Array1<f64>,
378    weights: &ArrayView2<f64>,
379    alpha: f64,
380) -> Vec<LisaCluster> {
381    let n = z.len();
382    let mut clusters = vec![LisaCluster::NotSignificant; n];
383
384    for i in 0..n {
385        if p_values[i] > alpha {
386            continue;
387        }
388
389        // Compute spatial lag (weighted average of neighbours' z)
390        let mut w_sum = 0.0;
391        let mut wz_sum = 0.0;
392        for j in 0..n {
393            if j != i && weights[[i, j]] > 0.0 {
394                w_sum += weights[[i, j]];
395                wz_sum += weights[[i, j]] * z[j];
396            }
397        }
398
399        let lag = if w_sum > 0.0 { wz_sum / w_sum } else { 0.0 };
400
401        clusters[i] = if z[i] > 0.0 && lag > 0.0 {
402            LisaCluster::HighHigh
403        } else if z[i] < 0.0 && lag < 0.0 {
404            LisaCluster::LowLow
405        } else if z[i] > 0.0 && lag < 0.0 {
406            LisaCluster::HighLow
407        } else if z[i] < 0.0 && lag > 0.0 {
408            LisaCluster::LowHigh
409        } else {
410            LisaCluster::NotSignificant
411        };
412    }
413
414    clusters
415}
416
417// ---------------------------------------------------------------------------
418// Tests
419// ---------------------------------------------------------------------------
420
421#[cfg(test)]
422mod tests {
423    use super::*;
424    use scirs2_core::ndarray::array;
425
426    fn build_chain_weights(n: usize) -> Array2<f64> {
427        let mut w = Array2::zeros((n, n));
428        for i in 0..(n - 1) {
429            w[[i, i + 1]] = 1.0;
430            w[[i + 1, i]] = 1.0;
431        }
432        w
433    }
434
435    #[test]
436    fn test_local_moran_permutation_hot_spot() {
437        // Three high values clustered at one end
438        let values = array![10.0, 10.0, 10.0, 1.0, 1.0, 1.0];
439        let w = build_chain_weights(6);
440
441        let result =
442            local_moran_permutation_test(&values.view(), &w.view(), 199, 42).expect("lisa failed");
443
444        assert_eq!(result.local_i.len(), 6);
445        assert_eq!(result.p_values.len(), 6);
446        assert_eq!(result.clusters.len(), 6);
447
448        // First observation should be part of a high-high cluster (or at least
449        // have positive local I)
450        assert!(
451            result.local_i[0] > 0.0,
452            "Local I at position 0 should be positive for clustered high values"
453        );
454    }
455
456    #[test]
457    fn test_local_moran_permutation_spatial_outlier() {
458        // One high outlier surrounded by lows
459        let values = array![1.0, 1.0, 10.0, 1.0, 1.0];
460        let w = build_chain_weights(5);
461
462        let result =
463            local_moran_permutation_test(&values.view(), &w.view(), 199, 123).expect("lisa");
464
465        // The outlier at index 2 should have negative local I
466        assert!(
467            result.local_i[2] < 0.0,
468            "Local I at outlier position should be negative, got {}",
469            result.local_i[2]
470        );
471    }
472
473    #[test]
474    fn test_lisa_cluster_map_classifications() {
475        let values = array![10.0, 10.0, 10.0, 1.0, 1.0, 1.0, 1.0, 1.0];
476        let w = build_chain_weights(8);
477
478        let map = lisa_cluster_map(&values.view(), &w.view(), 499, 0.10, 42).expect("cluster_map");
479
480        assert_eq!(map.clusters.len(), 8);
481        assert_eq!(map.n_permutations, 499);
482
483        // Check that HH, LL, or not-significant labels are present
484        let has_hh = map.clusters.contains(&LisaCluster::HighHigh);
485        let has_ll = map.clusters.contains(&LisaCluster::LowLow);
486        let has_ns = map.clusters.contains(&LisaCluster::NotSignificant);
487
488        // With strong clustering and moderate alpha, we expect at least some
489        // significant clusters
490        assert!(
491            has_hh || has_ll || has_ns,
492            "Should produce at least one classification"
493        );
494    }
495
496    #[test]
497    fn test_getis_ord_gi_star_hotspot() {
498        // Cluster of high values at one end
499        let values = array![10.0, 10.0, 10.0, 1.0, 1.0, 1.0];
500        let w = build_chain_weights(6);
501
502        let result = getis_ord_gi_star(&values.view(), &w.view()).expect("gi_star");
503
504        assert_eq!(result.z_scores.len(), 6);
505        assert_eq!(result.p_values.len(), 6);
506
507        // First few locations should have positive z-scores (hotspot)
508        assert!(
509            result.z_scores[0] > 0.0,
510            "z-score at position 0 should be positive for high cluster, got {}",
511            result.z_scores[0]
512        );
513
514        // Last few should have negative z-scores (cold spot)
515        assert!(
516            result.z_scores[5] < 0.0,
517            "z-score at position 5 should be negative for low cluster, got {}",
518            result.z_scores[5]
519        );
520    }
521
522    #[test]
523    fn test_getis_ord_gi_star_uniform() {
524        // Uniform values => z-scores should be near zero
525        let values = array![5.0, 5.0, 5.0, 5.0, 5.0];
526        let w = build_chain_weights(5);
527
528        // Uniform values => variance = 0 => should get error
529        let result = getis_ord_gi_star(&values.view(), &w.view());
530        assert!(result.is_err());
531    }
532
533    #[test]
534    fn test_getis_ord_gi_star_p_values() {
535        let values = array![100.0, 100.0, 100.0, 1.0, 1.0, 1.0, 1.0, 1.0];
536        let w = build_chain_weights(8);
537
538        let result = getis_ord_gi_star(&values.view(), &w.view()).expect("gi_star");
539
540        // P-values should be between 0 and 1
541        for &p in result.p_values.iter() {
542            assert!((0.0..=1.0).contains(&p), "p-value {} out of range", p);
543        }
544
545        // The hotspot location should have a small-ish p-value
546        assert!(
547            result.p_values[0] < 0.5,
548            "p-value at hotspot should be < 0.5"
549        );
550    }
551}