Skip to main content

oxigdal_temporal/gap_filling/
mod.rs

1//! Gap Filling Module
2//!
3//! Implements gap filling methods for temporal data including interpolation,
4//! harmonic regression, and other techniques to fill missing values in time series.
5
6use crate::error::{Result, TemporalError};
7use crate::timeseries::TimeSeriesRaster;
8use scirs2_core::ndarray::Array3;
9use serde::{Deserialize, Serialize};
10use std::f64::consts::PI;
11use tracing::info;
12
13pub mod harmonic;
14pub mod interpolation;
15
16/// Gap filling method
17#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
18pub enum GapFillMethod {
19    /// Linear interpolation
20    LinearInterpolation,
21    /// Spline interpolation
22    SplineInterpolation,
23    /// Nearest neighbor
24    NearestNeighbor,
25    /// Harmonic regression
26    HarmonicRegression,
27    /// Moving average
28    MovingAverage,
29    /// Forward fill (propagate last valid value)
30    ForwardFill,
31    /// Backward fill (propagate next valid value)
32    BackwardFill,
33}
34
35/// Gap filling result
36#[derive(Debug, Clone)]
37pub struct GapFillResult {
38    /// Filled data
39    pub data: Array3<f64>,
40    /// Filled count per pixel
41    pub filled_count: Array3<usize>,
42    /// Quality/confidence of fill
43    pub quality: Option<Array3<f64>>,
44}
45
46impl GapFillResult {
47    /// Create new gap fill result
48    #[must_use]
49    pub fn new(data: Array3<f64>, filled_count: Array3<usize>) -> Self {
50        Self {
51            data,
52            filled_count,
53            quality: None,
54        }
55    }
56
57    /// Add quality scores
58    #[must_use]
59    pub fn with_quality(mut self, quality: Array3<f64>) -> Self {
60        self.quality = Some(quality);
61        self
62    }
63}
64
65/// Gap filler
66pub struct GapFiller;
67
68impl GapFiller {
69    /// Fill gaps in time series
70    ///
71    /// # Errors
72    /// Returns error if gap filling fails
73    pub fn fill_gaps(
74        ts: &TimeSeriesRaster,
75        method: GapFillMethod,
76        params: Option<GapFillParams>,
77    ) -> Result<TimeSeriesRaster> {
78        match method {
79            GapFillMethod::LinearInterpolation => Self::linear_interpolation(ts),
80            GapFillMethod::SplineInterpolation => Self::spline_interpolation(ts),
81            GapFillMethod::NearestNeighbor => Self::nearest_neighbor(ts),
82            GapFillMethod::HarmonicRegression => {
83                let period = params.map_or(12, |p| p.harmonic_period);
84                Self::harmonic_regression(ts, period)
85            }
86            GapFillMethod::MovingAverage => {
87                let window = params.map_or(3, |p| p.window_size);
88                Self::moving_average(ts, window)
89            }
90            GapFillMethod::ForwardFill => Self::forward_fill(ts),
91            GapFillMethod::BackwardFill => Self::backward_fill(ts),
92        }
93    }
94
95    /// Linear interpolation gap filling
96    fn linear_interpolation(ts: &TimeSeriesRaster) -> Result<TimeSeriesRaster> {
97        if ts.len() < 2 {
98            return Err(TemporalError::insufficient_data(
99                "Need at least 2 observations",
100            ));
101        }
102
103        let (height, width, n_bands) = ts
104            .expected_shape()
105            .ok_or_else(|| TemporalError::insufficient_data("No shape information"))?;
106
107        let mut filled_ts = ts.clone();
108
109        for i in 0..height {
110            for j in 0..width {
111                for k in 0..n_bands {
112                    let values = ts.extract_pixel_timeseries(i, j, k)?;
113                    let filled = Self::interpolate_linear(&values);
114
115                    // Update time series with filled values
116                    for (t, entry) in filled_ts.entries_mut().values_mut().enumerate() {
117                        if let Some(data) = &mut entry.data {
118                            data[[i, j, k]] = filled[t];
119                        }
120                    }
121                }
122            }
123        }
124
125        info!("Completed linear interpolation gap filling");
126        Ok(filled_ts)
127    }
128
129    /// Interpolate gaps linearly
130    fn interpolate_linear(values: &[f64]) -> Vec<f64> {
131        let mut result = values.to_vec();
132
133        for i in 0..result.len() {
134            if result[i].is_nan() {
135                // Find previous valid value
136                let mut prev_idx = None;
137                for j in (0..i).rev() {
138                    if !result[j].is_nan() {
139                        prev_idx = Some(j);
140                        break;
141                    }
142                }
143
144                // Find next valid value
145                let next_idx = result[(i + 1)..]
146                    .iter()
147                    .position(|&v| !v.is_nan())
148                    .map(|idx| idx + i + 1);
149
150                // Interpolate
151                if let (Some(prev), Some(next)) = (prev_idx, next_idx) {
152                    let prev_val = result[prev];
153                    let next_val = result[next];
154                    let weight = (i - prev) as f64 / (next - prev) as f64;
155                    result[i] = prev_val + weight * (next_val - prev_val);
156                }
157            }
158        }
159
160        result
161    }
162
163    /// Spline interpolation gap filling
164    fn spline_interpolation(ts: &TimeSeriesRaster) -> Result<TimeSeriesRaster> {
165        // Use linear interpolation as approximation
166        info!("Using linear interpolation approximation for spline");
167        Self::linear_interpolation(ts)
168    }
169
170    /// Nearest neighbor gap filling
171    fn nearest_neighbor(ts: &TimeSeriesRaster) -> Result<TimeSeriesRaster> {
172        if ts.is_empty() {
173            return Err(TemporalError::insufficient_data("Empty time series"));
174        }
175
176        let (height, width, n_bands) = ts
177            .expected_shape()
178            .ok_or_else(|| TemporalError::insufficient_data("No shape information"))?;
179
180        let mut filled_ts = ts.clone();
181
182        for i in 0..height {
183            for j in 0..width {
184                for k in 0..n_bands {
185                    let values = ts.extract_pixel_timeseries(i, j, k)?;
186                    let filled = Self::fill_nearest(&values);
187
188                    for (t, entry) in filled_ts.entries_mut().values_mut().enumerate() {
189                        if let Some(data) = &mut entry.data {
190                            data[[i, j, k]] = filled[t];
191                        }
192                    }
193                }
194            }
195        }
196
197        info!("Completed nearest neighbor gap filling");
198        Ok(filled_ts)
199    }
200
201    /// Fill with nearest valid value
202    fn fill_nearest(values: &[f64]) -> Vec<f64> {
203        let mut result = values.to_vec();
204
205        for i in 0..result.len() {
206            if result[i].is_nan() {
207                let nearest_val = result
208                    .iter()
209                    .enumerate()
210                    .filter(|(_, v)| !v.is_nan())
211                    .min_by_key(|(j, _)| i.abs_diff(*j))
212                    .map(|(_, v)| *v)
213                    .unwrap_or(f64::NAN);
214
215                result[i] = nearest_val;
216            }
217        }
218
219        result
220    }
221
222    /// Harmonic regression gap filling
223    fn harmonic_regression(ts: &TimeSeriesRaster, period: usize) -> Result<TimeSeriesRaster> {
224        if ts.len() < period {
225            return Err(TemporalError::insufficient_data(format!(
226                "Need at least {} observations for period {}",
227                period, period
228            )));
229        }
230
231        let (height, width, n_bands) = ts
232            .expected_shape()
233            .ok_or_else(|| TemporalError::insufficient_data("No shape information"))?;
234
235        let mut filled_ts = ts.clone();
236
237        for i in 0..height {
238            for j in 0..width {
239                for k in 0..n_bands {
240                    let values = ts.extract_pixel_timeseries(i, j, k)?;
241                    let filled = Self::fit_harmonic(&values, period);
242
243                    for (t, entry) in filled_ts.entries_mut().values_mut().enumerate() {
244                        if let Some(data) = &mut entry.data {
245                            data[[i, j, k]] = filled[t];
246                        }
247                    }
248                }
249            }
250        }
251
252        info!("Completed harmonic regression gap filling");
253        Ok(filled_ts)
254    }
255
256    /// Fit harmonic function to data
257    fn fit_harmonic(values: &[f64], period: usize) -> Vec<f64> {
258        let n = values.len();
259
260        // Simple harmonic model: y = a + b*sin(2*pi*t/P) + c*cos(2*pi*t/P)
261        let valid_data: Vec<(usize, f64)> = values
262            .iter()
263            .enumerate()
264            .filter(|(_, v)| !v.is_nan())
265            .map(|(i, &v)| (i, v))
266            .collect();
267
268        if valid_data.is_empty() {
269            return values.to_vec();
270        }
271
272        // Least squares fit
273        let mut sum_y = 0.0;
274        let mut sum_sin = 0.0;
275        let mut sum_cos = 0.0;
276        let mut sum_sin2 = 0.0;
277        let mut sum_cos2 = 0.0;
278        let mut sum_y_sin = 0.0;
279        let mut sum_y_cos = 0.0;
280
281        for &(t, y) in &valid_data {
282            let phase = 2.0 * PI * (t as f64) / (period as f64);
283            let sin_val = phase.sin();
284            let cos_val = phase.cos();
285
286            sum_y += y;
287            sum_sin += sin_val;
288            sum_cos += cos_val;
289            sum_sin2 += sin_val * sin_val;
290            sum_cos2 += cos_val * cos_val;
291            sum_y_sin += y * sin_val;
292            sum_y_cos += y * cos_val;
293        }
294
295        let n_valid = valid_data.len() as f64;
296        let a = sum_y / n_valid;
297        let b = (sum_y_sin - sum_sin * sum_y / n_valid) / (sum_sin2 - sum_sin * sum_sin / n_valid);
298        let c = (sum_y_cos - sum_cos * sum_y / n_valid) / (sum_cos2 - sum_cos * sum_cos / n_valid);
299
300        // Generate fitted values
301        values
302            .iter()
303            .enumerate()
304            .take(n)
305            .map(|(t, val)| {
306                let phase = 2.0 * PI * (t as f64) / (period as f64);
307                let fitted = a + b * phase.sin() + c * phase.cos();
308                if val.is_nan() { fitted } else { *val }
309            })
310            .collect()
311    }
312
313    /// Moving average gap filling
314    fn moving_average(ts: &TimeSeriesRaster, window: usize) -> Result<TimeSeriesRaster> {
315        if ts.len() < window {
316            return Err(TemporalError::insufficient_data(format!(
317                "Need at least {} observations",
318                window
319            )));
320        }
321
322        let (height, width, n_bands) = ts
323            .expected_shape()
324            .ok_or_else(|| TemporalError::insufficient_data("No shape information"))?;
325
326        let mut filled_ts = ts.clone();
327
328        for i in 0..height {
329            for j in 0..width {
330                for k in 0..n_bands {
331                    let values = ts.extract_pixel_timeseries(i, j, k)?;
332                    let filled = Self::fill_moving_average(&values, window);
333
334                    for (t, entry) in filled_ts.entries_mut().values_mut().enumerate() {
335                        if let Some(data) = &mut entry.data {
336                            data[[i, j, k]] = filled[t];
337                        }
338                    }
339                }
340            }
341        }
342
343        info!("Completed moving average gap filling");
344        Ok(filled_ts)
345    }
346
347    /// Fill with moving average
348    fn fill_moving_average(values: &[f64], window: usize) -> Vec<f64> {
349        let mut result = values.to_vec();
350        let half_window = window / 2;
351
352        for i in 0..result.len() {
353            if result[i].is_nan() {
354                let start = i.saturating_sub(half_window);
355                let end = (i + half_window + 1).min(result.len());
356
357                let valid_values: Vec<f64> = result[start..end]
358                    .iter()
359                    .filter(|v| !v.is_nan())
360                    .copied()
361                    .collect();
362
363                if !valid_values.is_empty() {
364                    result[i] = valid_values.iter().sum::<f64>() / valid_values.len() as f64;
365                }
366            }
367        }
368
369        result
370    }
371
372    /// Forward fill
373    fn forward_fill(ts: &TimeSeriesRaster) -> Result<TimeSeriesRaster> {
374        if ts.is_empty() {
375            return Err(TemporalError::insufficient_data("Empty time series"));
376        }
377
378        let (height, width, n_bands) = ts
379            .expected_shape()
380            .ok_or_else(|| TemporalError::insufficient_data("No shape information"))?;
381
382        let mut filled_ts = ts.clone();
383
384        for i in 0..height {
385            for j in 0..width {
386                for k in 0..n_bands {
387                    let values = ts.extract_pixel_timeseries(i, j, k)?;
388                    let mut last_valid = f64::NAN;
389                    let mut filled = Vec::with_capacity(values.len());
390
391                    for &value in &values {
392                        let v: f64 = value;
393                        if !v.is_nan() && v.is_finite() {
394                            last_valid = v;
395                            filled.push(v);
396                        } else {
397                            filled.push(last_valid);
398                        }
399                    }
400
401                    for (t, entry) in filled_ts.entries_mut().values_mut().enumerate() {
402                        if let Some(data) = &mut entry.data {
403                            data[[i, j, k]] = filled[t];
404                        }
405                    }
406                }
407            }
408        }
409
410        info!("Completed forward fill");
411        Ok(filled_ts)
412    }
413
414    /// Backward fill
415    fn backward_fill(ts: &TimeSeriesRaster) -> Result<TimeSeriesRaster> {
416        if ts.is_empty() {
417            return Err(TemporalError::insufficient_data("Empty time series"));
418        }
419
420        let (height, width, n_bands) = ts
421            .expected_shape()
422            .ok_or_else(|| TemporalError::insufficient_data("No shape information"))?;
423
424        let mut filled_ts = ts.clone();
425
426        for i in 0..height {
427            for j in 0..width {
428                for k in 0..n_bands {
429                    let values = ts.extract_pixel_timeseries(i, j, k)?;
430                    let mut filled = values.clone();
431                    let mut next_valid = f64::NAN;
432
433                    for t in (0..values.len()).rev() {
434                        if !values[t].is_nan() {
435                            next_valid = values[t];
436                        } else {
437                            filled[t] = next_valid;
438                        }
439                    }
440
441                    for (t, entry) in filled_ts.entries_mut().values_mut().enumerate() {
442                        if let Some(data) = &mut entry.data {
443                            data[[i, j, k]] = filled[t];
444                        }
445                    }
446                }
447            }
448        }
449
450        info!("Completed backward fill");
451        Ok(filled_ts)
452    }
453}
454
455/// Gap filling parameters
456#[derive(Debug, Clone, Copy)]
457pub struct GapFillParams {
458    /// Window size for moving average
459    pub window_size: usize,
460    /// Period for harmonic regression
461    pub harmonic_period: usize,
462    /// Maximum gap size to fill
463    pub max_gap_size: Option<usize>,
464}
465
466impl Default for GapFillParams {
467    fn default() -> Self {
468        Self {
469            window_size: 3,
470            harmonic_period: 12,
471            max_gap_size: None,
472        }
473    }
474}