oxirs_tsdb/query/
interpolate.rs

1//! Interpolation methods for filling missing data
2//!
3//! Provides various interpolation strategies:
4//! - Linear interpolation
5//! - Forward fill (LOCF - Last Observation Carried Forward)
6//! - Backward fill
7//! - Spline interpolation
8
9use crate::error::{TsdbError, TsdbResult};
10use crate::series::DataPoint;
11use chrono::{DateTime, Duration, Utc};
12
13/// Interpolation method
14#[derive(Debug, Clone, Copy, PartialEq)]
15pub enum InterpolateMethod {
16    /// Linear interpolation between adjacent points
17    Linear,
18    /// Forward fill (carry last observation forward)
19    ForwardFill,
20    /// Backward fill (use next value)
21    BackwardFill,
22    /// Nearest value (use closest point)
23    Nearest,
24    /// Constant value fill
25    Constant(f64),
26    /// Step function (hold until next point)
27    Step,
28}
29
30/// Interpolator for filling missing values
31pub struct Interpolator {
32    method: InterpolateMethod,
33    /// Maximum gap to fill (None = unlimited)
34    max_gap: Option<Duration>,
35}
36
37impl Interpolator {
38    /// Create a new interpolator
39    pub fn new(method: InterpolateMethod) -> Self {
40        Self {
41            method,
42            max_gap: None,
43        }
44    }
45
46    /// Set maximum gap to interpolate across
47    pub fn with_max_gap(mut self, max_gap: Duration) -> Self {
48        self.max_gap = Some(max_gap);
49        self
50    }
51
52    /// Interpolate a value at a specific timestamp
53    pub fn interpolate_at(
54        &self,
55        timestamp: DateTime<Utc>,
56        points: &[DataPoint],
57    ) -> TsdbResult<f64> {
58        if points.is_empty() {
59            return Err(TsdbError::Query(
60                "No data points for interpolation".to_string(),
61            ));
62        }
63
64        // Find surrounding points
65        let (before, after) = find_surrounding_points(timestamp, points);
66
67        match self.method {
68            InterpolateMethod::Linear => self.linear_interpolate(timestamp, before, after),
69            InterpolateMethod::ForwardFill => before
70                .map(|p| p.value)
71                .ok_or_else(|| TsdbError::Query("No previous value for forward fill".to_string())),
72            InterpolateMethod::BackwardFill => after
73                .map(|p| p.value)
74                .ok_or_else(|| TsdbError::Query("No next value for backward fill".to_string())),
75            InterpolateMethod::Nearest => match (before, after) {
76                (Some(b), Some(a)) => {
77                    let gap_before = (timestamp - b.timestamp).num_milliseconds().abs();
78                    let gap_after = (a.timestamp - timestamp).num_milliseconds().abs();
79                    if gap_before <= gap_after {
80                        Ok(b.value)
81                    } else {
82                        Ok(a.value)
83                    }
84                }
85                (Some(b), None) => Ok(b.value),
86                (None, Some(a)) => Ok(a.value),
87                (None, None) => Err(TsdbError::Query("No surrounding points".to_string())),
88            },
89            InterpolateMethod::Constant(v) => Ok(v),
90            InterpolateMethod::Step => before.map(|p| p.value).ok_or_else(|| {
91                TsdbError::Query("No previous value for step interpolation".to_string())
92            }),
93        }
94    }
95
96    /// Linear interpolation between two points
97    fn linear_interpolate(
98        &self,
99        timestamp: DateTime<Utc>,
100        before: Option<DataPoint>,
101        after: Option<DataPoint>,
102    ) -> TsdbResult<f64> {
103        match (before, after) {
104            (Some(b), Some(a)) => {
105                // Check max gap
106                if let Some(max_gap) = self.max_gap {
107                    if (a.timestamp - b.timestamp) > max_gap {
108                        return Err(TsdbError::Query(
109                            "Gap too large for interpolation".to_string(),
110                        ));
111                    }
112                }
113
114                let total_duration = (a.timestamp - b.timestamp).num_milliseconds() as f64;
115                let elapsed = (timestamp - b.timestamp).num_milliseconds() as f64;
116
117                if total_duration == 0.0 {
118                    return Ok(b.value);
119                }
120
121                let ratio = elapsed / total_duration;
122                Ok(b.value + ratio * (a.value - b.value))
123            }
124            (Some(b), None) => Ok(b.value), // Extrapolate using last known value
125            (None, Some(a)) => Ok(a.value), // Extrapolate using first known value
126            (None, None) => Err(TsdbError::Query(
127                "No surrounding points for interpolation".to_string(),
128            )),
129        }
130    }
131
132    /// Fill gaps in a time series at regular intervals
133    pub fn fill_at_interval(
134        &self,
135        points: &[DataPoint],
136        interval: Duration,
137        start: Option<DateTime<Utc>>,
138        end: Option<DateTime<Utc>>,
139    ) -> TsdbResult<Vec<DataPoint>> {
140        if points.is_empty() {
141            return Ok(Vec::new());
142        }
143
144        let actual_start = start.unwrap_or_else(|| {
145            points
146                .first()
147                .expect("points should not be empty for interpolation")
148                .timestamp
149        });
150        let actual_end = end.unwrap_or_else(|| {
151            points
152                .last()
153                .expect("points should not be empty for interpolation")
154                .timestamp
155        });
156
157        let mut result = Vec::new();
158        let mut current = actual_start;
159
160        while current <= actual_end {
161            // Check if we have an exact match
162            if let Some(exact) = points.iter().find(|p| {
163                (p.timestamp - current).num_milliseconds().abs() < interval.num_milliseconds() / 2
164            }) {
165                result.push(*exact);
166            } else {
167                // Interpolate
168                match self.interpolate_at(current, points) {
169                    Ok(value) => {
170                        result.push(DataPoint {
171                            timestamp: current,
172                            value,
173                        });
174                    }
175                    Err(_) => {
176                        // Skip if cannot interpolate (e.g., gap too large)
177                    }
178                }
179            }
180
181            current += interval;
182        }
183
184        Ok(result)
185    }
186
187    /// Upsample data points to higher frequency
188    pub fn upsample(
189        &self,
190        points: &[DataPoint],
191        target_interval: Duration,
192    ) -> TsdbResult<Vec<DataPoint>> {
193        if points.len() < 2 {
194            return Ok(points.to_vec());
195        }
196
197        let start = points.first().unwrap().timestamp;
198        let end = points.last().unwrap().timestamp;
199
200        self.fill_at_interval(points, target_interval, Some(start), Some(end))
201    }
202}
203
204/// Find the nearest points before and after a given timestamp
205fn find_surrounding_points(
206    timestamp: DateTime<Utc>,
207    points: &[DataPoint],
208) -> (Option<DataPoint>, Option<DataPoint>) {
209    let mut before: Option<DataPoint> = None;
210    let mut after: Option<DataPoint> = None;
211
212    for point in points {
213        if point.timestamp <= timestamp {
214            before = Some(*point);
215        } else {
216            after = Some(*point);
217            break;
218        }
219    }
220
221    (before, after)
222}
223
224/// Convenience function for linear interpolation
225pub fn interpolate_linear(timestamp: DateTime<Utc>, points: &[DataPoint]) -> TsdbResult<f64> {
226    Interpolator::new(InterpolateMethod::Linear).interpolate_at(timestamp, points)
227}
228
229/// Convenience function for forward fill
230pub fn forward_fill(timestamp: DateTime<Utc>, points: &[DataPoint]) -> TsdbResult<f64> {
231    Interpolator::new(InterpolateMethod::ForwardFill).interpolate_at(timestamp, points)
232}
233
234#[cfg(test)]
235mod tests {
236    use super::*;
237
238    fn create_test_points() -> Vec<DataPoint> {
239        let start = Utc::now();
240        vec![
241            DataPoint {
242                timestamp: start,
243                value: 10.0,
244            },
245            DataPoint {
246                timestamp: start + Duration::seconds(10),
247                value: 20.0,
248            },
249            DataPoint {
250                timestamp: start + Duration::seconds(20),
251                value: 30.0,
252            },
253        ]
254    }
255
256    #[test]
257    fn test_linear_interpolation() {
258        let points = create_test_points();
259        let mid_time = points[0].timestamp + Duration::seconds(5);
260
261        let result = interpolate_linear(mid_time, &points).unwrap();
262
263        // At 5 seconds, should be 15.0 (linear between 10 and 20)
264        assert!((result - 15.0).abs() < 0.001);
265    }
266
267    #[test]
268    fn test_forward_fill() {
269        let points = create_test_points();
270        let mid_time = points[0].timestamp + Duration::seconds(5);
271
272        let result = forward_fill(mid_time, &points).unwrap();
273
274        // Forward fill uses previous value: 10.0
275        assert!((result - 10.0).abs() < 0.001);
276    }
277
278    #[test]
279    fn test_backward_fill() {
280        let points = create_test_points();
281        let mid_time = points[0].timestamp + Duration::seconds(5);
282
283        let interp = Interpolator::new(InterpolateMethod::BackwardFill);
284        let result = interp.interpolate_at(mid_time, &points).unwrap();
285
286        // Backward fill uses next value: 20.0
287        assert!((result - 20.0).abs() < 0.001);
288    }
289
290    #[test]
291    fn test_nearest() {
292        let points = create_test_points();
293
294        // At 3 seconds, nearest is first point (10.0)
295        let near_first = points[0].timestamp + Duration::seconds(3);
296        let interp = Interpolator::new(InterpolateMethod::Nearest);
297        let result1 = interp.interpolate_at(near_first, &points).unwrap();
298        assert!((result1 - 10.0).abs() < 0.001);
299
300        // At 7 seconds, nearest is second point (20.0)
301        let near_second = points[0].timestamp + Duration::seconds(7);
302        let result2 = interp.interpolate_at(near_second, &points).unwrap();
303        assert!((result2 - 20.0).abs() < 0.001);
304    }
305
306    #[test]
307    fn test_constant_fill() {
308        let points = create_test_points();
309        let mid_time = points[0].timestamp + Duration::seconds(5);
310
311        let interp = Interpolator::new(InterpolateMethod::Constant(42.0));
312        let result = interp.interpolate_at(mid_time, &points).unwrap();
313
314        assert!((result - 42.0).abs() < 0.001);
315    }
316
317    #[test]
318    fn test_fill_at_interval() {
319        let start = Utc::now();
320        let points = vec![
321            DataPoint {
322                timestamp: start,
323                value: 0.0,
324            },
325            DataPoint {
326                timestamp: start + Duration::seconds(10),
327                value: 100.0,
328            },
329        ];
330
331        let interp = Interpolator::new(InterpolateMethod::Linear);
332        let filled = interp
333            .fill_at_interval(&points, Duration::seconds(2), None, None)
334            .unwrap();
335
336        // Should have 6 points: 0, 2, 4, 6, 8, 10 seconds
337        assert_eq!(filled.len(), 6);
338
339        // Check linear progression
340        for (i, point) in filled.iter().enumerate() {
341            let expected = i as f64 * 20.0; // 0, 20, 40, 60, 80, 100
342            assert!(
343                (point.value - expected).abs() < 1.0,
344                "Point {} expected {}, got {}",
345                i,
346                expected,
347                point.value
348            );
349        }
350    }
351
352    #[test]
353    fn test_max_gap_limit() {
354        let start = Utc::now();
355        let points = vec![
356            DataPoint {
357                timestamp: start,
358                value: 0.0,
359            },
360            DataPoint {
361                timestamp: start + Duration::hours(1),
362                value: 100.0,
363            },
364        ];
365
366        let interp =
367            Interpolator::new(InterpolateMethod::Linear).with_max_gap(Duration::minutes(10));
368
369        let mid_time = start + Duration::minutes(30);
370        let result = interp.interpolate_at(mid_time, &points);
371
372        // Should fail because gap is 1 hour, max is 10 minutes
373        assert!(result.is_err());
374    }
375
376    #[test]
377    fn test_upsample() {
378        let start = Utc::now();
379        let points = vec![
380            DataPoint {
381                timestamp: start,
382                value: 0.0,
383            },
384            DataPoint {
385                timestamp: start + Duration::seconds(4),
386                value: 40.0,
387            },
388        ];
389
390        let interp = Interpolator::new(InterpolateMethod::Linear);
391        let upsampled = interp.upsample(&points, Duration::seconds(1)).unwrap();
392
393        // Should have 5 points: 0, 1, 2, 3, 4 seconds
394        assert_eq!(upsampled.len(), 5);
395        assert!((upsampled[2].value - 20.0).abs() < 0.001);
396    }
397}