Skip to main content

oxigdal_temporal/timeseries/
mod.rs

1//! Time Series Raster Module
2//!
3//! This module provides time-indexed raster collections with temporal metadata,
4//! efficient storage with lazy loading, temporal querying, and gap detection.
5
6use crate::error::{Result, TemporalError};
7use chrono::{DateTime, NaiveDate, NaiveDateTime, Utc};
8use scirs2_core::ndarray::{Array2, Array3, s};
9use serde::{Deserialize, Serialize};
10use std::collections::BTreeMap;
11use tracing::{debug, info, warn};
12
13pub mod collection;
14pub mod datacube;
15
16pub use collection::*;
17pub use datacube::*;
18
19/// Temporal resolution for time series
20#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
21pub enum TemporalResolution {
22    /// Daily resolution
23    Daily,
24    /// Weekly resolution
25    Weekly,
26    /// Monthly resolution
27    Monthly,
28    /// Yearly resolution
29    Yearly,
30    /// Custom interval in seconds
31    Custom(i64),
32}
33
34impl TemporalResolution {
35    /// Get the duration in seconds
36    #[must_use]
37    pub fn as_seconds(&self) -> i64 {
38        match self {
39            Self::Daily => 86400,
40            Self::Weekly => 604800,
41            Self::Monthly => 2592000, // Approximate 30 days
42            Self::Yearly => 31536000, // 365 days
43            Self::Custom(secs) => *secs,
44        }
45    }
46
47    /// Get the duration in days
48    #[must_use]
49    pub fn as_days(&self) -> f64 {
50        self.as_seconds() as f64 / 86400.0
51    }
52}
53
54/// Temporal metadata for a single raster
55#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct TemporalMetadata {
57    /// Timestamp (UTC)
58    pub timestamp: DateTime<Utc>,
59    /// Acquisition date
60    pub acquisition_date: NaiveDate,
61    /// Sensor/satellite identifier
62    pub sensor: Option<String>,
63    /// Scene identifier
64    pub scene_id: Option<String>,
65    /// Cloud cover percentage (0-100)
66    pub cloud_cover: Option<f32>,
67    /// Quality score (0-1)
68    pub quality_score: Option<f32>,
69    /// Processing level
70    pub processing_level: Option<String>,
71    /// Custom metadata
72    pub custom: BTreeMap<String, String>,
73}
74
75impl TemporalMetadata {
76    /// Create new temporal metadata
77    #[must_use]
78    pub fn new(timestamp: DateTime<Utc>, acquisition_date: NaiveDate) -> Self {
79        Self {
80            timestamp,
81            acquisition_date,
82            sensor: None,
83            scene_id: None,
84            cloud_cover: None,
85            quality_score: None,
86            processing_level: None,
87            custom: BTreeMap::new(),
88        }
89    }
90
91    /// Create from naive datetime
92    pub fn from_naive_datetime(dt: NaiveDateTime) -> Result<Self> {
93        let timestamp = DateTime::from_naive_utc_and_offset(dt, Utc);
94        let acquisition_date = dt.date();
95        Ok(Self::new(timestamp, acquisition_date))
96    }
97
98    /// Set sensor information
99    #[must_use]
100    pub fn with_sensor(mut self, sensor: impl Into<String>) -> Self {
101        self.sensor = Some(sensor.into());
102        self
103    }
104
105    /// Set scene ID
106    #[must_use]
107    pub fn with_scene_id(mut self, scene_id: impl Into<String>) -> Self {
108        self.scene_id = Some(scene_id.into());
109        self
110    }
111
112    /// Set cloud cover percentage
113    #[must_use]
114    pub fn with_cloud_cover(mut self, cloud_cover: f32) -> Self {
115        self.cloud_cover = Some(cloud_cover.clamp(0.0, 100.0));
116        self
117    }
118
119    /// Set quality score
120    #[must_use]
121    pub fn with_quality_score(mut self, quality_score: f32) -> Self {
122        self.quality_score = Some(quality_score.clamp(0.0, 1.0));
123        self
124    }
125
126    /// Set processing level
127    #[must_use]
128    pub fn with_processing_level(mut self, level: impl Into<String>) -> Self {
129        self.processing_level = Some(level.into());
130        self
131    }
132
133    /// Add custom metadata field
134    #[must_use]
135    pub fn with_custom(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
136        self.custom.insert(key.into(), value.into());
137        self
138    }
139}
140
141/// Time-indexed raster entry
142#[derive(Debug, Clone)]
143pub struct TemporalRasterEntry {
144    /// Temporal metadata
145    pub metadata: TemporalMetadata,
146    /// Raster data (lazy-loaded)
147    pub data: Option<Array3<f64>>,
148    /// Data source path (for lazy loading)
149    pub source_path: Option<String>,
150}
151
152impl TemporalRasterEntry {
153    /// Create new entry with in-memory data
154    #[must_use]
155    pub fn new_loaded(metadata: TemporalMetadata, data: Array3<f64>) -> Self {
156        Self {
157            metadata,
158            data: Some(data),
159            source_path: None,
160        }
161    }
162
163    /// Create new entry with lazy loading
164    #[must_use]
165    pub fn new_lazy(metadata: TemporalMetadata, source_path: String) -> Self {
166        Self {
167            metadata,
168            data: None,
169            source_path: Some(source_path),
170        }
171    }
172
173    /// Check if data is loaded
174    #[must_use]
175    pub fn is_loaded(&self) -> bool {
176        self.data.is_some()
177    }
178
179    /// Get data dimensions (if loaded)
180    #[must_use]
181    pub fn shape(&self) -> Option<(usize, usize, usize)> {
182        self.data
183            .as_ref()
184            .map(|d| (d.shape()[0], d.shape()[1], d.shape()[2]))
185    }
186}
187
188/// Time series raster collection
189///
190/// This struct represents a collection of rasters indexed by time,
191/// with support for temporal queries, gap detection, and lazy loading.
192#[derive(Debug, Clone)]
193pub struct TimeSeriesRaster {
194    /// Time-indexed rasters (sorted by timestamp)
195    entries: BTreeMap<i64, TemporalRasterEntry>,
196    /// Temporal resolution
197    resolution: Option<TemporalResolution>,
198    /// Expected spatial dimensions (height, width, bands)
199    expected_shape: Option<(usize, usize, usize)>,
200}
201
202impl TimeSeriesRaster {
203    /// Create a new empty time series raster collection
204    #[must_use]
205    pub fn new() -> Self {
206        Self {
207            entries: BTreeMap::new(),
208            resolution: None,
209            expected_shape: None,
210        }
211    }
212
213    /// Create with expected resolution
214    #[must_use]
215    pub fn with_resolution(resolution: TemporalResolution) -> Self {
216        Self {
217            entries: BTreeMap::new(),
218            resolution: Some(resolution),
219            expected_shape: None,
220        }
221    }
222
223    /// Create with expected shape
224    #[must_use]
225    pub fn with_shape(height: usize, width: usize, bands: usize) -> Self {
226        Self {
227            entries: BTreeMap::new(),
228            resolution: None,
229            expected_shape: Some((height, width, bands)),
230        }
231    }
232
233    /// Add a raster with timestamp
234    ///
235    /// # Errors
236    /// Returns error if shape doesn't match expected dimensions
237    pub fn add_raster(&mut self, metadata: TemporalMetadata, data: Array3<f64>) -> Result<()> {
238        // Validate shape if expected shape is set
239        if let Some((exp_h, exp_w, exp_b)) = self.expected_shape {
240            let shape = data.shape();
241            if shape[0] != exp_h || shape[1] != exp_w || shape[2] != exp_b {
242                return Err(TemporalError::dimension_mismatch(
243                    format!("{}x{}x{}", exp_h, exp_w, exp_b),
244                    format!("{}x{}x{}", shape[0], shape[1], shape[2]),
245                ));
246            }
247        } else {
248            // Set expected shape from first raster
249            let shape = data.shape();
250            self.expected_shape = Some((shape[0], shape[1], shape[2]));
251        }
252
253        let timestamp_key = metadata.timestamp.timestamp();
254        let entry = TemporalRasterEntry::new_loaded(metadata, data);
255        self.entries.insert(timestamp_key, entry);
256
257        debug!("Added raster at timestamp {}", timestamp_key);
258        Ok(())
259    }
260
261    /// Add a raster with lazy loading
262    ///
263    /// # Errors
264    /// Returns error if metadata is invalid
265    pub fn add_raster_lazy(
266        &mut self,
267        metadata: TemporalMetadata,
268        source_path: String,
269    ) -> Result<()> {
270        let timestamp_key = metadata.timestamp.timestamp();
271        let entry = TemporalRasterEntry::new_lazy(metadata, source_path);
272        self.entries.insert(timestamp_key, entry);
273
274        debug!("Added lazy raster at timestamp {}", timestamp_key);
275        Ok(())
276    }
277
278    /// Get the number of rasters in the time series
279    #[must_use]
280    pub fn len(&self) -> usize {
281        self.entries.len()
282    }
283
284    /// Check if time series is empty
285    #[must_use]
286    pub fn is_empty(&self) -> bool {
287        self.entries.is_empty()
288    }
289
290    /// Get temporal resolution
291    #[must_use]
292    pub fn resolution(&self) -> Option<TemporalResolution> {
293        self.resolution
294    }
295
296    /// Set temporal resolution
297    pub fn set_resolution(&mut self, resolution: TemporalResolution) {
298        self.resolution = Some(resolution);
299    }
300
301    /// Get expected shape
302    #[must_use]
303    pub fn expected_shape(&self) -> Option<(usize, usize, usize)> {
304        self.expected_shape
305    }
306
307    /// Get time range (start, end)
308    #[must_use]
309    pub fn time_range(&self) -> Option<(DateTime<Utc>, DateTime<Utc>)> {
310        if self.is_empty() {
311            return None;
312        }
313
314        let start_ts = self.entries.keys().next()?;
315        let end_ts = self.entries.keys().next_back()?;
316
317        let start = DateTime::from_timestamp(*start_ts, 0)?;
318        let end = DateTime::from_timestamp(*end_ts, 0)?;
319
320        Some((start, end))
321    }
322
323    /// Get all timestamps
324    #[must_use]
325    pub fn timestamps(&self) -> Vec<DateTime<Utc>> {
326        self.entries
327            .keys()
328            .filter_map(|&ts| DateTime::from_timestamp(ts, 0))
329            .collect()
330    }
331
332    /// Get raster at specific timestamp
333    #[must_use]
334    pub fn get_at_timestamp(&self, timestamp: &DateTime<Utc>) -> Option<&TemporalRasterEntry> {
335        self.entries.get(&timestamp.timestamp())
336    }
337
338    /// Get mutable raster at specific timestamp
339    pub fn get_at_timestamp_mut(
340        &mut self,
341        timestamp: &DateTime<Utc>,
342    ) -> Option<&mut TemporalRasterEntry> {
343        self.entries.get_mut(&timestamp.timestamp())
344    }
345
346    /// Get raster by index
347    ///
348    /// # Errors
349    /// Returns error if index is out of bounds
350    pub fn get_by_index(&self, index: usize) -> Result<&TemporalRasterEntry> {
351        self.entries
352            .values()
353            .nth(index)
354            .ok_or_else(|| TemporalError::time_index_out_of_bounds(index, 0, self.len()))
355    }
356
357    /// Query rasters within a time range
358    #[must_use]
359    pub fn query_range(
360        &self,
361        start: &DateTime<Utc>,
362        end: &DateTime<Utc>,
363    ) -> Vec<&TemporalRasterEntry> {
364        let start_ts = start.timestamp();
365        let end_ts = end.timestamp();
366
367        self.entries
368            .range(start_ts..=end_ts)
369            .map(|(_, entry)| entry)
370            .collect()
371    }
372
373    /// Detect gaps in the time series based on expected resolution
374    ///
375    /// # Errors
376    /// Returns error if resolution is not set
377    pub fn detect_gaps(&self) -> Result<Vec<(DateTime<Utc>, DateTime<Utc>)>> {
378        let resolution = self
379            .resolution
380            .ok_or_else(|| TemporalError::invalid_input("Temporal resolution not set"))?;
381
382        let mut gaps = Vec::new();
383        let timestamps: Vec<i64> = self.entries.keys().copied().collect();
384
385        if timestamps.len() < 2 {
386            return Ok(gaps);
387        }
388
389        let expected_interval = resolution.as_seconds();
390
391        for i in 0..timestamps.len() - 1 {
392            let current = timestamps[i];
393            let next = timestamps[i + 1];
394            let actual_interval = next - current;
395
396            // Allow 10% tolerance
397            let tolerance = (expected_interval as f64 * 0.1) as i64;
398            if actual_interval > expected_interval + tolerance {
399                if let (Some(gap_start), Some(gap_end)) = (
400                    DateTime::from_timestamp(current, 0),
401                    DateTime::from_timestamp(next, 0),
402                ) {
403                    gaps.push((gap_start, gap_end));
404                    warn!(
405                        "Gap detected between {} and {} ({} seconds)",
406                        gap_start, gap_end, actual_interval
407                    );
408                }
409            }
410        }
411
412        info!("Detected {} gaps in time series", gaps.len());
413        Ok(gaps)
414    }
415
416    /// Get statistics about the time series
417    #[must_use]
418    pub fn stats(&self) -> TimeSeriesStats {
419        let count = self.len();
420        let loaded_count = self.entries.values().filter(|e| e.is_loaded()).count();
421        let lazy_count = count - loaded_count;
422
423        let (time_range_start, time_range_end) = self
424            .time_range()
425            .map(|(s, e)| (Some(s), Some(e)))
426            .unwrap_or((None, None));
427
428        let avg_cloud_cover = if count > 0 {
429            let sum: f32 = self
430                .entries
431                .values()
432                .filter_map(|e| e.metadata.cloud_cover)
433                .sum();
434            let cloud_count = self
435                .entries
436                .values()
437                .filter(|e| e.metadata.cloud_cover.is_some())
438                .count();
439            if cloud_count > 0 {
440                Some(sum / cloud_count as f32)
441            } else {
442                None
443            }
444        } else {
445            None
446        };
447
448        TimeSeriesStats {
449            count,
450            loaded_count,
451            lazy_count,
452            time_range_start,
453            time_range_end,
454            resolution: self.resolution,
455            avg_cloud_cover,
456        }
457    }
458
459    /// Filter rasters by cloud cover threshold
460    ///
461    /// # Errors
462    /// Returns error if cloud cover threshold is invalid
463    pub fn filter_by_cloud_cover(&mut self, max_cloud_cover: f32) -> Result<usize> {
464        if !(0.0..=100.0).contains(&max_cloud_cover) {
465            return Err(TemporalError::invalid_parameter(
466                "max_cloud_cover",
467                "must be between 0 and 100",
468            ));
469        }
470
471        let original_count = self.len();
472        self.entries.retain(|_, entry| {
473            entry
474                .metadata
475                .cloud_cover
476                .is_none_or(|cc| cc <= max_cloud_cover)
477        });
478
479        let removed = original_count - self.len();
480        info!(
481            "Filtered {} rasters with cloud cover > {}%",
482            removed, max_cloud_cover
483        );
484        Ok(removed)
485    }
486
487    /// Filter rasters by quality score threshold
488    ///
489    /// # Errors
490    /// Returns error if quality threshold is invalid
491    pub fn filter_by_quality(&mut self, min_quality: f32) -> Result<usize> {
492        if !(0.0..=1.0).contains(&min_quality) {
493            return Err(TemporalError::invalid_parameter(
494                "min_quality",
495                "must be between 0 and 1",
496            ));
497        }
498
499        let original_count = self.len();
500        self.entries.retain(|_, entry| {
501            entry
502                .metadata
503                .quality_score
504                .is_none_or(|qs| qs >= min_quality)
505        });
506
507        let removed = original_count - self.len();
508        info!(
509            "Filtered {} rasters with quality < {}",
510            removed, min_quality
511        );
512        Ok(removed)
513    }
514
515    /// Extract pixel time series at specific location
516    ///
517    /// # Errors
518    /// Returns error if coordinates are out of bounds or data not loaded
519    pub fn extract_pixel_timeseries(
520        &self,
521        row: usize,
522        col: usize,
523        band: usize,
524    ) -> Result<Vec<f64>> {
525        let mut values = Vec::with_capacity(self.len());
526
527        for entry in self.entries.values() {
528            let data = entry.data.as_ref().ok_or_else(|| {
529                TemporalError::invalid_input("Data not loaded. Call load_data() first")
530            })?;
531
532            // Validate bounds
533            let shape = data.shape();
534            if row >= shape[0] || col >= shape[1] || band >= shape[2] {
535                return Err(TemporalError::invalid_parameter(
536                    "coordinates",
537                    format!(
538                        "({},{},{}) out of bounds for shape ({},{},{})",
539                        row, col, band, shape[0], shape[1], shape[2]
540                    ),
541                ));
542            }
543
544            values.push(data[[row, col, band]]);
545        }
546
547        Ok(values)
548    }
549
550    /// Extract spatial slice at specific time
551    ///
552    /// # Errors
553    /// Returns error if timestamp not found or data not loaded
554    pub fn extract_spatial_slice(&self, timestamp: &DateTime<Utc>) -> Result<Array3<f64>> {
555        let entry = self.get_at_timestamp(timestamp).ok_or_else(|| {
556            TemporalError::invalid_input(format!("Timestamp {} not found", timestamp))
557        })?;
558
559        entry
560            .data
561            .as_ref()
562            .cloned()
563            .ok_or_else(|| TemporalError::invalid_input("Data not loaded"))
564    }
565
566    /// Extract temporal slice for specific band
567    ///
568    /// # Errors
569    /// Returns error if band is out of bounds or data not loaded
570    pub fn extract_temporal_slice(&self, band: usize) -> Result<Vec<Array2<f64>>> {
571        let mut slices = Vec::with_capacity(self.len());
572
573        for entry in self.entries.values() {
574            let data = entry
575                .data
576                .as_ref()
577                .ok_or_else(|| TemporalError::invalid_input("Data not loaded"))?;
578
579            if band >= data.shape()[2] {
580                return Err(TemporalError::invalid_parameter(
581                    "band",
582                    format!("band {} out of bounds (max: {})", band, data.shape()[2] - 1),
583                ));
584            }
585
586            // Extract 2D slice for this band
587            let slice = data.slice(s![.., .., band]).to_owned();
588            slices.push(slice);
589        }
590
591        Ok(slices)
592    }
593
594    /// Calculate temporal statistics at each pixel
595    ///
596    /// # Errors
597    /// Returns error if data not loaded or insufficient data
598    pub fn pixel_statistics(&self) -> Result<PixelStatistics> {
599        if self.is_empty() {
600            return Err(TemporalError::insufficient_data(
601                "No rasters in time series",
602            ));
603        }
604
605        // Get shape from first entry
606        let first_entry = self
607            .entries
608            .values()
609            .next()
610            .ok_or_else(|| TemporalError::insufficient_data("No entries"))?;
611        let data = first_entry
612            .data
613            .as_ref()
614            .ok_or_else(|| TemporalError::invalid_input("Data not loaded"))?;
615        let (height, width, bands) = (data.shape()[0], data.shape()[1], data.shape()[2]);
616
617        let mut mean = Array3::zeros((height, width, bands));
618        let mut min = Array3::from_elem((height, width, bands), f64::INFINITY);
619        let mut max = Array3::from_elem((height, width, bands), f64::NEG_INFINITY);
620        let mut std_dev = Array3::zeros((height, width, bands));
621
622        let count = self.len() as f64;
623
624        // First pass: calculate mean, min, max
625        for entry in self.entries.values() {
626            let data = entry
627                .data
628                .as_ref()
629                .ok_or_else(|| TemporalError::invalid_input("Data not loaded"))?;
630
631            for i in 0..height {
632                for j in 0..width {
633                    for k in 0..bands {
634                        let val = data[[i, j, k]];
635                        mean[[i, j, k]] += val / count;
636                        if val < min[[i, j, k]] {
637                            min[[i, j, k]] = val;
638                        }
639                        if val > max[[i, j, k]] {
640                            max[[i, j, k]] = val;
641                        }
642                    }
643                }
644            }
645        }
646
647        // Second pass: calculate standard deviation
648        for entry in self.entries.values() {
649            let data = entry
650                .data
651                .as_ref()
652                .ok_or_else(|| TemporalError::invalid_input("Data not loaded"))?;
653
654            for i in 0..height {
655                for j in 0..width {
656                    for k in 0..bands {
657                        let diff = data[[i, j, k]] - mean[[i, j, k]];
658                        std_dev[[i, j, k]] += diff * diff / count;
659                    }
660                }
661            }
662        }
663
664        // Take square root for std_dev
665        std_dev.mapv_inplace(f64::sqrt);
666
667        Ok(PixelStatistics {
668            mean,
669            min,
670            max,
671            std_dev,
672        })
673    }
674
675    /// Get all entries
676    #[must_use]
677    pub fn entries(&self) -> &BTreeMap<i64, TemporalRasterEntry> {
678        &self.entries
679    }
680
681    /// Get mutable entries
682    pub fn entries_mut(&mut self) -> &mut BTreeMap<i64, TemporalRasterEntry> {
683        &mut self.entries
684    }
685
686    /// Iterate over entries in chronological order
687    pub fn iter(&self) -> impl Iterator<Item = (&i64, &TemporalRasterEntry)> {
688        self.entries.iter()
689    }
690}
691
692impl Default for TimeSeriesRaster {
693    fn default() -> Self {
694        Self::new()
695    }
696}
697
698/// Statistics computed across temporal dimension for each pixel
699#[derive(Debug, Clone)]
700pub struct PixelStatistics {
701    /// Mean value over time for each pixel
702    pub mean: Array3<f64>,
703    /// Minimum value over time for each pixel
704    pub min: Array3<f64>,
705    /// Maximum value over time for each pixel
706    pub max: Array3<f64>,
707    /// Standard deviation over time for each pixel
708    pub std_dev: Array3<f64>,
709}
710
711/// Time series statistics
712#[derive(Debug, Clone, Serialize, Deserialize)]
713pub struct TimeSeriesStats {
714    /// Total number of rasters
715    pub count: usize,
716    /// Number of loaded rasters
717    pub loaded_count: usize,
718    /// Number of lazy-loaded rasters
719    pub lazy_count: usize,
720    /// Start of time range
721    pub time_range_start: Option<DateTime<Utc>>,
722    /// End of time range
723    pub time_range_end: Option<DateTime<Utc>>,
724    /// Temporal resolution
725    pub resolution: Option<TemporalResolution>,
726    /// Average cloud cover
727    pub avg_cloud_cover: Option<f32>,
728}