use crate::error::{AnalyticsError, Result};
use crate::hotspot::SpatialWeights;
use scirs2_core::ndarray::{Array1, ArrayView1, ArrayView2};
#[derive(Debug, Clone)]
pub struct GetisOrdResult {
pub z_scores: Array1<f64>,
pub p_values: Array1<f64>,
pub classifications: Array1<HotspotClass>,
pub confidence: f64,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum HotspotClass {
HotSpot,
ColdSpot,
NotSignificant,
}
pub struct GetisOrdGiStar {
confidence: f64,
}
impl GetisOrdGiStar {
pub fn new(confidence: f64) -> Self {
Self { confidence }
}
pub fn calculate(
&self,
values: &ArrayView1<f64>,
weights: &SpatialWeights,
) -> Result<GetisOrdResult> {
let n = values.len();
if n != weights.weights.nrows() {
return Err(AnalyticsError::dimension_mismatch(
format!("{}", n),
format!("{}", weights.weights.nrows()),
));
}
if n < 3 {
return Err(AnalyticsError::insufficient_data(
"Need at least 3 observations for Gi* statistic",
));
}
let mean = values.sum() / (n as f64);
let variance = values.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / (n as f64);
let std_dev = variance.sqrt();
if std_dev < f64::EPSILON {
return Err(AnalyticsError::numerical_instability(
"Standard deviation is too small",
));
}
let mut z_scores = Array1::zeros(n);
let mut p_values = Array1::zeros(n);
let mut classifications = Array1::from_elem(n, HotspotClass::NotSignificant);
for i in 0..n {
let mut weighted_sum = 0.0;
let mut weight_sum = 0.0;
let mut weight_sq_sum = 0.0;
for j in 0..n {
let w_ij = weights.weights[[i, j]];
weighted_sum += w_ij * values[j];
weight_sum += w_ij;
weight_sq_sum += w_ij * w_ij;
}
let numerator = weighted_sum - mean * weight_sum;
let denominator = std_dev
* ((n as f64 * weight_sq_sum - weight_sum * weight_sum) / ((n - 1) as f64)).sqrt();
if denominator.abs() < f64::EPSILON {
z_scores[i] = 0.0;
} else {
z_scores[i] = numerator / denominator;
}
p_values[i] = 2.0 * (1.0 - standard_normal_cdf(z_scores[i].abs()));
if p_values[i] < self.confidence {
classifications[i] = if z_scores[i] > 0.0 {
HotspotClass::HotSpot
} else {
HotspotClass::ColdSpot
};
}
}
Ok(GetisOrdResult {
z_scores,
p_values,
classifications,
confidence: self.confidence,
})
}
pub fn calculate_grid(
&self,
values: &ArrayView2<f64>,
neighborhood_size: usize,
) -> Result<GetisOrdResult> {
let (nrows, ncols) = values.dim();
let _n = nrows * ncols;
let flat_values = values.iter().copied().collect::<Vec<f64>>();
let flat_array = Array1::from_vec(flat_values);
let weights = self.build_grid_weights(nrows, ncols, neighborhood_size)?;
self.calculate(&flat_array.view(), &weights)
}
fn build_grid_weights(
&self,
nrows: usize,
ncols: usize,
neighborhood_size: usize,
) -> Result<SpatialWeights> {
let n = nrows * ncols;
let mut weights = scirs2_core::ndarray::Array2::zeros((n, n));
for row in 0..nrows {
for col in 0..ncols {
let i = row * ncols + col;
weights[[i, i]] = 1.0;
for dr in 0..=(2 * neighborhood_size) {
for dc in 0..=(2 * neighborhood_size) {
if dr == neighborhood_size && dc == neighborhood_size {
continue; }
let nr = row as isize + dr as isize - neighborhood_size as isize;
let nc = col as isize + dc as isize - neighborhood_size as isize;
if nr >= 0 && nr < nrows as isize && nc >= 0 && nc < ncols as isize {
let j = (nr as usize) * ncols + (nc as usize);
weights[[i, j]] = 1.0;
}
}
}
}
}
SpatialWeights::from_adjacency(weights)
}
}
fn standard_normal_cdf(x: f64) -> f64 {
0.5 * (1.0 + erf(x / 2_f64.sqrt()))
}
fn erf(x: f64) -> f64 {
let sign = x.signum();
let x = x.abs();
let a1 = 0.254_829_592;
let a2 = -0.284_496_736;
let a3 = 1.421_413_741;
let a4 = -1.453_152_027;
let a5 = 1.061_405_429;
let p = 0.327_591_100;
let t = 1.0 / (1.0 + p * x);
let t2 = t * t;
let t3 = t2 * t;
let t4 = t3 * t;
let t5 = t4 * t;
let result = 1.0 - (a1 * t + a2 * t2 + a3 * t3 + a4 * t4 + a5 * t5) * (-x * x).exp();
sign * result
}
#[cfg(test)]
mod tests {
use super::*;
use approx::assert_abs_diff_eq;
use scirs2_core::ndarray::array;
#[test]
fn test_getis_ord_hotspot() {
let values = array![1.0, 1.0, 1.0, 10.0, 10.0, 10.0, 1.0, 1.0, 1.0];
let mut weights_matrix = scirs2_core::ndarray::Array2::zeros((9, 9));
for i in 0..9 {
weights_matrix[[i, i]] = 1.0;
if i > 0 {
weights_matrix[[i, i - 1]] = 1.0;
}
if i < 8 {
weights_matrix[[i, i + 1]] = 1.0;
}
}
let weights = SpatialWeights::from_adjacency(weights_matrix)
.expect("Failed to create spatial weights for hotspot test");
let gi_star = GetisOrdGiStar::new(0.05);
let result = gi_star
.calculate(&values.view(), &weights)
.expect("Failed to calculate Gi* statistic for hotspot test");
assert_eq!(result.z_scores.len(), 9);
assert!(result.z_scores[3] > 0.0);
assert!(result.z_scores[4] > 0.0);
assert!(result.z_scores[5] > 0.0);
}
#[test]
fn test_getis_ord_coldspot() {
let values = array![10.0, 10.0, 1.0, 1.0, 10.0, 10.0];
let mut weights_matrix = scirs2_core::ndarray::Array2::zeros((6, 6));
for i in 0..6 {
weights_matrix[[i, i]] = 1.0;
if i > 0 {
weights_matrix[[i, i - 1]] = 1.0;
}
if i < 5 {
weights_matrix[[i, i + 1]] = 1.0;
}
}
let weights = SpatialWeights::from_adjacency(weights_matrix)
.expect("Failed to create spatial weights for coldspot test");
let gi_star = GetisOrdGiStar::new(0.05);
let result = gi_star
.calculate(&values.view(), &weights)
.expect("Failed to calculate Gi* statistic for coldspot test");
assert!(result.z_scores[2] < 0.0);
assert!(result.z_scores[3] < 0.0);
}
#[test]
fn test_getis_ord_grid() {
let values = array![
[1.0, 1.0, 1.0, 1.0, 1.0],
[1.0, 1.0, 1.0, 1.0, 1.0],
[1.0, 1.0, 10.0, 1.0, 1.0],
[1.0, 1.0, 1.0, 1.0, 1.0],
[1.0, 1.0, 1.0, 1.0, 1.0]
];
let gi_star = GetisOrdGiStar::new(0.05);
let result = gi_star
.calculate_grid(&values.view(), 1)
.expect("Failed to calculate Gi* statistic for grid test");
assert_eq!(result.z_scores.len(), 25);
assert!(
result.z_scores[12] > 0.0,
"Center hotspot should have positive z-score, got: {}",
result.z_scores[12]
);
}
}