use std::num::NonZeroUsize;
use crate::Real;
use crate::error::KrigingError;
use crate::spacetime::coord::temporal_distance;
use crate::spacetime::dataset::SpaceTimeDataset;
use crate::spacetime::metric::SpatialMetric;
use crate::variogram::empirical::{EmpiricalEstimator, PositiveReal};
#[derive(Debug, Clone)]
pub struct EmpiricalSpaceTimeVariogram {
pub n_spatial_bins: usize,
pub n_temporal_bins: usize,
pub spatial_lags: Vec<Real>,
pub temporal_lags: Vec<Real>,
pub semivariances: Vec<Real>,
pub n_pairs: Vec<usize>,
}
impl EmpiricalSpaceTimeVariogram {
#[inline]
pub fn index(&self, i: usize, j: usize) -> usize {
i * self.n_temporal_bins + j
}
pub fn total_pairs(&self) -> usize {
self.n_pairs.iter().sum()
}
}
#[derive(Debug, Clone)]
pub struct SpaceTimeVariogramConfig {
pub max_spatial_distance: Option<PositiveReal>,
pub max_temporal_lag: Option<PositiveReal>,
pub n_spatial_bins: NonZeroUsize,
pub n_temporal_bins: NonZeroUsize,
pub estimator: EmpiricalEstimator,
}
impl Default for SpaceTimeVariogramConfig {
fn default() -> Self {
Self {
max_spatial_distance: None,
max_temporal_lag: None,
n_spatial_bins: NonZeroUsize::new(10).expect("10 != 0"),
n_temporal_bins: NonZeroUsize::new(8).expect("8 != 0"),
estimator: EmpiricalEstimator::Classical,
}
}
}
pub fn compute_empirical_spacetime_variogram<M: SpatialMetric>(
metric: &M,
dataset: &SpaceTimeDataset<M::Coord>,
config: &SpaceTimeVariogramConfig,
) -> Result<EmpiricalSpaceTimeVariogram, KrigingError> {
let coords = dataset.coords();
let values = dataset.values();
let n = coords.len();
if n < 2 {
return Err(KrigingError::InsufficientData(2));
}
let prepared: Vec<M::Prepared> = coords.iter().map(|c| metric.prepare(c.spatial)).collect();
let (max_spatial, max_temporal) = resolve_max_lags(
&prepared,
coords,
metric,
config.max_spatial_distance,
config.max_temporal_lag,
)?;
let n_s = config.n_spatial_bins.get();
let n_t = config.n_temporal_bins.get();
let bin_s = max_spatial / n_s as Real;
let bin_t = max_temporal / n_t as Real;
let len = n_s * n_t;
let mut dist_s_sum = vec![0.0 as Real; len];
let mut dist_t_sum = vec![0.0 as Real; len];
let mut value_sums = vec![0.0 as Real; len];
let mut counts = vec![0usize; len];
let robust = matches!(config.estimator, EmpiricalEstimator::CressieHawkins);
for i in 0..n {
for j in (i + 1)..n {
let hs = metric.distance(prepared[i], prepared[j]);
if hs > max_spatial {
continue;
}
let ht = temporal_distance(coords[i].time, coords[j].time);
if ht > max_temporal {
continue;
}
let bi = clamp_bin(hs, bin_s, n_s);
let bj = clamp_bin(ht, bin_t, n_t);
let idx = bi * n_t + bj;
let dz = (values[i] - values[j]).abs();
let g = if robust { dz.sqrt() } else { 0.5 * dz * dz };
dist_s_sum[idx] += hs;
dist_t_sum[idx] += ht;
value_sums[idx] += g;
counts[idx] += 1;
}
}
let mut spatial_lags = vec![0.0 as Real; len];
let mut temporal_lags = vec![0.0 as Real; len];
let mut semivariances = vec![0.0 as Real; len];
for idx in 0..len {
if counts[idx] == 0 {
continue;
}
let n_i = counts[idx] as Real;
let g = if robust {
let mean_sqrt = value_sums[idx] / n_i;
let numer = mean_sqrt.powi(4);
let denom = 0.457 + 0.494 / n_i + 0.045 / (n_i * n_i);
0.5 * numer / denom
} else {
value_sums[idx] / n_i
};
spatial_lags[idx] = dist_s_sum[idx] / n_i;
temporal_lags[idx] = dist_t_sum[idx] / n_i;
semivariances[idx] = g;
}
if counts.iter().all(|c| *c == 0) {
return Err(KrigingError::FittingError(
"no pairs in selected space-time lag range".to_string(),
));
}
Ok(EmpiricalSpaceTimeVariogram {
n_spatial_bins: n_s,
n_temporal_bins: n_t,
spatial_lags,
temporal_lags,
semivariances,
n_pairs: counts,
})
}
fn clamp_bin(value: Real, bin_width: Real, n_bins: usize) -> usize {
let mut bin = (value / bin_width.max(Real::MIN_POSITIVE)).floor() as usize;
if bin >= n_bins {
bin = n_bins - 1;
}
bin
}
fn resolve_max_lags<M: SpatialMetric>(
prepared: &[M::Prepared],
coords: &[crate::spacetime::SpaceTimeCoord<M::Coord>],
metric: &M,
supplied_spatial: Option<PositiveReal>,
supplied_temporal: Option<PositiveReal>,
) -> Result<(Real, Real), KrigingError> {
let mut max_spatial = supplied_spatial.map(|v| v.get()).unwrap_or(0.0);
let mut max_temporal = supplied_temporal.map(|v| v.get()).unwrap_or(0.0);
if supplied_spatial.is_none() || supplied_temporal.is_none() {
for i in 0..prepared.len() {
for j in (i + 1)..prepared.len() {
if supplied_spatial.is_none() {
let hs = metric.distance(prepared[i], prepared[j]);
if hs > max_spatial {
max_spatial = hs;
}
}
if supplied_temporal.is_none() {
let ht = temporal_distance(coords[i].time, coords[j].time);
if ht > max_temporal {
max_temporal = ht;
}
}
}
}
}
if max_spatial <= 0.0 {
return Err(KrigingError::FittingError(
"max spatial distance must be positive".to_string(),
));
}
if max_temporal <= 0.0 {
return Err(KrigingError::FittingError(
"max temporal lag must be positive".to_string(),
));
}
Ok((max_spatial, max_temporal))
}
pub fn spatial_marginal(
empirical: &EmpiricalSpaceTimeVariogram,
) -> crate::variogram::empirical::EmpiricalVariogram {
let mut distances = Vec::new();
let mut semivariances = Vec::new();
let mut n_pairs = Vec::new();
for i in 0..empirical.n_spatial_bins {
let idx = empirical.index(i, 0);
if empirical.n_pairs[idx] == 0 {
continue;
}
distances.push(empirical.spatial_lags[idx]);
semivariances.push(empirical.semivariances[idx]);
n_pairs.push(empirical.n_pairs[idx]);
}
crate::variogram::empirical::EmpiricalVariogram {
distances,
semivariances,
n_pairs,
}
}
pub fn temporal_marginal(
empirical: &EmpiricalSpaceTimeVariogram,
) -> crate::variogram::empirical::EmpiricalVariogram {
let mut distances = Vec::new();
let mut semivariances = Vec::new();
let mut n_pairs = Vec::new();
for j in 0..empirical.n_temporal_bins {
let idx = empirical.index(0, j);
if empirical.n_pairs[idx] == 0 {
continue;
}
distances.push(empirical.temporal_lags[idx]);
semivariances.push(empirical.semivariances[idx]);
n_pairs.push(empirical.n_pairs[idx]);
}
crate::variogram::empirical::EmpiricalVariogram {
distances,
semivariances,
n_pairs,
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::distance::GeoCoord;
use crate::spacetime::SpaceTimeCoord;
use crate::spacetime::metric::GeoMetric;
fn make_dataset() -> SpaceTimeDataset<GeoCoord> {
let mut coords = Vec::new();
let mut values = Vec::new();
for i in 0..5 {
for t in 0..4 {
coords.push(SpaceTimeCoord::new(
GeoCoord::try_new(i as Real * 0.1, 0.0).unwrap(),
t as Real,
));
values.push((i as Real) + 0.5 * (t as Real));
}
}
SpaceTimeDataset::new(coords, values).unwrap()
}
#[test]
fn default_config_produces_non_empty_variogram() {
let ds = make_dataset();
let config = SpaceTimeVariogramConfig::default();
let emp = compute_empirical_spacetime_variogram(&GeoMetric, &ds, &config).unwrap();
assert_eq!(emp.n_spatial_bins, 10);
assert_eq!(emp.n_temporal_bins, 8);
assert!(emp.total_pairs() > 0);
}
#[test]
fn pair_accounting_is_consistent_with_explicit_loop() {
let ds = make_dataset();
let max_s = PositiveReal::try_new(100.0).unwrap();
let max_t = PositiveReal::try_new(10.0).unwrap();
let config = SpaceTimeVariogramConfig {
max_spatial_distance: Some(max_s),
max_temporal_lag: Some(max_t),
n_spatial_bins: NonZeroUsize::new(4).unwrap(),
n_temporal_bins: NonZeroUsize::new(4).unwrap(),
estimator: EmpiricalEstimator::Classical,
};
let emp = compute_empirical_spacetime_variogram(&GeoMetric, &ds, &config).unwrap();
let n = ds.len();
let mut expected = 0usize;
for i in 0..n {
for j in (i + 1)..n {
let hs = crate::distance::haversine_distance(
ds.coords()[i].spatial,
ds.coords()[j].spatial,
);
let ht = (ds.coords()[i].time - ds.coords()[j].time).abs();
if hs <= max_s.get() && ht <= max_t.get() {
expected += 1;
}
}
}
assert_eq!(emp.total_pairs(), expected);
}
#[test]
fn cressie_hawkins_is_robust_to_outlier() {
let mut coords = Vec::new();
let mut values = Vec::new();
for i in 0..6 {
for t in 0..4 {
coords.push(SpaceTimeCoord::new(
GeoCoord::try_new(i as Real * 0.1, 0.0).unwrap(),
t as Real,
));
values.push((i + t) as Real);
}
}
*values.last_mut().unwrap() = 1000.0;
let ds = SpaceTimeDataset::new(coords, values).unwrap();
let config_classical = SpaceTimeVariogramConfig {
estimator: EmpiricalEstimator::Classical,
n_spatial_bins: NonZeroUsize::new(5).unwrap(),
n_temporal_bins: NonZeroUsize::new(4).unwrap(),
..Default::default()
};
let config_robust = SpaceTimeVariogramConfig {
estimator: EmpiricalEstimator::CressieHawkins,
..config_classical.clone()
};
let classical =
compute_empirical_spacetime_variogram(&GeoMetric, &ds, &config_classical).unwrap();
let robust =
compute_empirical_spacetime_variogram(&GeoMetric, &ds, &config_robust).unwrap();
let max_classical = classical
.semivariances
.iter()
.copied()
.fold(0.0 as Real, Real::max);
let max_robust = robust
.semivariances
.iter()
.copied()
.fold(0.0 as Real, Real::max);
assert!(
max_robust < max_classical * 0.8,
"robust max {} should be notably smaller than classical max {}",
max_robust,
max_classical
);
}
#[test]
fn spatial_marginal_monotonic_for_linear_in_space_field() {
let ds = make_dataset();
let config = SpaceTimeVariogramConfig {
n_spatial_bins: NonZeroUsize::new(4).unwrap(),
n_temporal_bins: NonZeroUsize::new(4).unwrap(),
..Default::default()
};
let emp = compute_empirical_spacetime_variogram(&GeoMetric, &ds, &config).unwrap();
let marginal = spatial_marginal(&emp);
assert!(!marginal.distances.is_empty());
let g: Vec<_> = marginal.semivariances.to_vec();
assert!(
g.windows(2).all(|w| w[1] >= w[0] - 1e-6),
"spatial marginal should be non-decreasing, got {:?}",
g
);
}
}