downsample_oxide/
lib.rs

1// MIT License
2
3// Copyright (c) 2017 Jerome Froelich
4
5// Permission is hereby granted, free of charge, to any person obtaining a copy
6// of this software and associated documentation files (the "Software"), to deal
7// in the Software without restriction, including without limitation the rights
8// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9// copies of the Software, and to permit persons to whom the Software is
10// furnished to do so, subject to the following conditions:
11
12// The above copyright notice and this permission notice shall be included in all
13// copies or substantial portions of the Software.
14
15// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21// SOFTWARE.
22
23// ------------------------------------------------------ \\
24// ADAPTED FROM https://github.com/jeromefroe/lttb-rs
25// ------------------------------------------------------ //
26
27use rust_decimal::Decimal;
28//
29#[cfg(all(not(feature = "time"), feature = "chrono"))]
30use chrono_crate as chrono;
31#[cfg(all(feature = "time", not(feature = "chrono")))]
32use time_crate as time;
33
34/// DataPoint
35///
36/// Struct used to represent a single datapoint in a time series.
37#[derive(Debug, PartialEq, Clone, Copy)]
38pub struct DataPoint {
39    x: Decimal,
40    y: Decimal,
41}
42
43impl DataPoint {
44    pub fn new(x: impl Into<std::time::SystemTime>, y: Decimal) -> Self {
45        DataPoint {
46            // convert from anything that impls SystemTime to UNIX epoch as seconds, then into Decimal for arithmetic reasons
47            x: x.into()
48                .duration_since(std::time::SystemTime::UNIX_EPOCH)
49                .unwrap()
50                .as_secs()
51                .into(),
52            y,
53        }
54    }
55}
56
57#[derive(Debug, PartialEq, Clone, Copy)]
58pub struct DataOutput {
59    #[cfg(all(not(feature = "time"), feature = "chrono"))]
60    pub x: chrono::DateTime<chrono::Utc>,
61    #[cfg(all(feature = "time", not(feature = "chrono")))]
62    pub x: time::OffsetDateTime,
63    #[cfg(all(not(feature = "time"), not(feature = "chrono")))]
64    pub x: std::time::SystemTime,
65    pub y: Decimal,
66}
67impl From<DataPoint> for DataOutput {
68    fn from(value: DataPoint) -> Self {
69        #[allow(unused_variables)]
70        let systemtime = std::time::UNIX_EPOCH + std::time::Duration::from_secs(value.x.try_into().unwrap());
71        Self {
72            #[cfg(all(not(feature = "time"), feature = "chrono"))]
73            x: systemtime.try_into().unwrap(),
74            #[cfg(all(feature = "time", not(feature = "chrono")))]
75            x: systemtime.try_into().unwrap(),
76            #[cfg(all(not(feature = "time"), not(feature = "chrono")))]
77            x: systemtime,
78            y: value.y,
79        }
80    }
81}
82
83pub trait Lttb {
84    fn downsample(self, threshold: usize) -> Vec<DataOutput>;
85}
86impl Lttb for Vec<DataPoint> {
87    fn downsample(self, threshold: usize) -> Vec<DataOutput> {
88        if threshold >= self.len() || threshold == 0 {
89            // Nothing to do.
90            return self.into_iter().map(Into::into).collect();
91        }
92
93        let mut sampled = Vec::with_capacity(threshold);
94
95        // Bucket size. Leave room for start and end data points.
96        let every = Decimal::from(self.len() - 2) / (Decimal::from(threshold - 2));
97
98        // Initially a is the first point in the triangle.
99        let mut a = 0;
100
101        // Always add the first point.
102        sampled.push(self[a]);
103
104        for i in 0..threshold - 2 {
105            // Calculate point average for next bucket (containing c).
106            let mut avg_x = Decimal::from(0);
107            let mut avg_y = Decimal::from(0);
108
109            let avg_range_start = (i + 1) * (usize::try_from(every).unwrap()) + 1;
110
111            let mut end = ((i + 2) * usize::try_from(every).unwrap()) + 1;
112            if end >= self.len() {
113                end = self.len();
114            }
115            let avg_range_end = end;
116
117            let avg_range_length = avg_range_end - avg_range_start;
118
119            for i in 0..(avg_range_end - avg_range_start) {
120                let idx = avg_range_start + i;
121                avg_x += self[idx].x;
122                avg_y += self[idx].y;
123            }
124            avg_x /= Decimal::from(avg_range_length);
125            avg_y /= Decimal::from(avg_range_length);
126
127            // Get the range for this bucket.
128            let range_offs: usize = ((i) * usize::try_from(every).unwrap()) + 1;
129            let range_to: usize = ((i + 1) * usize::try_from(every).unwrap()) + 1;
130
131            // Point a.
132            let point_a_x = self[a].x;
133            let point_a_y = self[a].y;
134
135            let mut max_area = Decimal::from(-1);
136            let mut next_a = range_offs;
137            for i in 0..(range_to - range_offs) {
138                let idx = range_offs + i;
139
140                // Calculate triangle area over three buckets.
141                let area = ((point_a_x - avg_x) * (self[idx].y - point_a_y)
142                    - (point_a_x - self[idx].x) * (avg_y - point_a_y))
143                    .abs()
144                    * Decimal::try_from(0.5).unwrap();
145                if area > max_area {
146                    max_area = area;
147                    next_a = idx; // Next a is this b.
148                }
149            }
150
151            sampled.push(self[next_a]); // Pick this point from the bucket.
152            a = next_a; // This a is the next a (chosen b).
153        }
154
155        // Always add the last point.
156        sampled.push(self[self.len() - 1]);
157
158        sampled.into_iter().map(Into::into).collect()
159    }
160}
161
162#[cfg(test)]
163mod tests {
164    use chrono_crate::*;
165
166    use super::*;
167
168    fn first_day_of_month(month_num: u32) -> DateTime<Utc> {
169        NaiveDate::from_ymd_opt(2022, month_num, 1)
170            .unwrap()
171            .and_time(NaiveTime::default())
172            .and_local_timezone(Utc)
173            .unwrap()
174    }
175
176    #[test]
177    fn lttb_test_5() {
178        let dps = Vec::from([
179            DataPoint::new(first_day_of_month(1), Decimal::from(10)),
180            DataPoint::new(first_day_of_month(2), Decimal::from(12)),
181            DataPoint::new(first_day_of_month(3), Decimal::from(8)),
182            DataPoint::new(first_day_of_month(4), Decimal::from(10)),
183            DataPoint::new(first_day_of_month(5), Decimal::from(12)),
184        ]);
185
186        let expected: Vec<DataOutput> = Vec::from([
187            DataPoint::new(first_day_of_month(1), Decimal::from(10)).into(),
188            DataPoint::new(first_day_of_month(3), Decimal::from(8)).into(),
189            DataPoint::new(first_day_of_month(5), Decimal::from(12)).into(),
190        ]);
191
192        assert_eq!(expected, dps.downsample(3));
193    }
194
195    #[test]
196    fn lttb_test_12() {
197        let dps = Vec::from([
198            DataPoint::new(first_day_of_month(1), Decimal::from(10)),
199            DataPoint::new(first_day_of_month(2), Decimal::from(12)),
200            DataPoint::new(first_day_of_month(3), Decimal::from(8)),
201            DataPoint::new(first_day_of_month(4), Decimal::from(10)),
202            DataPoint::new(first_day_of_month(5), Decimal::from(12)),
203            DataPoint::new(first_day_of_month(6), Decimal::from(10)),
204            DataPoint::new(first_day_of_month(7), Decimal::from(12)),
205            DataPoint::new(first_day_of_month(8), Decimal::from(8)),
206            DataPoint::new(first_day_of_month(9), Decimal::from(10)),
207            DataPoint::new(first_day_of_month(10), Decimal::from(12)),
208            DataPoint::new(first_day_of_month(11), Decimal::from(12)),
209            DataPoint::new(first_day_of_month(12), Decimal::from(12)),
210        ]);
211
212        let expected: Vec<DataOutput> = Vec::from([
213            DataPoint::new(first_day_of_month(1), Decimal::from(10)).into(),
214            DataPoint::new(first_day_of_month(3), Decimal::from(8)).into(),
215            DataPoint::new(first_day_of_month(7), Decimal::from(12)).into(),
216            DataPoint::new(first_day_of_month(12), Decimal::from(12)).into(),
217        ]);
218
219        assert_eq!(expected, dps.downsample(4));
220    }
221}