light_curve_feature/features/
bins.rs

1use crate::evaluator::*;
2use crate::extractor::FeatureExtractor;
3
4use itertools::Itertools;
5use unzip3::Unzip3;
6
7macro_const! {
8    const DOC: &str = r"
9Sampled time series meta-feature
10
11Binning time series to bins with width $\mathrm{window}$ with respect to some $\mathrm{offset}$.
12$j-th$ bin interval is
13$[j \cdot \mathrm{window} + \mathrm{offset}; (j + 1) \cdot \mathrm{window} + \mathrm{offset})$.
14Binned time series is defined by
15$$
16t_j^* = (j + \frac12) \cdot \mathrm{window} + \mathrm{offset},
17$$
18$$
19m_j^* = \frac{\sum{m_i / \delta_i^2}}{\sum{\delta_i^{-2}}},
20$$
21$$
22\delta_j^* = \frac{N_j}{\sum{\delta_i^{-2}}},
23$$
24where $N_j$ is a number of sampling observations and all sums are over observations inside
25considering bin. Bins takes any other feature evaluators to extract features from sample time series
26
27- Depends on: **time**, **magnitude**, **magnitude error**
28- Minimum number of observations: as required by sub-features, but at least **1**
29- Number of features: as provided by sub-features
30";
31}
32
33#[doc = DOC!()]
34#[derive(Clone, Debug, Serialize, Deserialize)]
35#[serde(
36    into = "BinsParameters<T, F>",
37    from = "BinsParameters<T, F>",
38    bound = "T: Float, F: FeatureEvaluator<T>"
39)]
40pub struct Bins<T, F>
41where
42    T: Float,
43    F: FeatureEvaluator<T>,
44{
45    window: T,
46    offset: T,
47    feature_extractor: FeatureExtractor<T, F>,
48    properties: Box<EvaluatorProperties>,
49}
50
51impl<T, F> Bins<T, F>
52where
53    T: Float,
54    F: FeatureEvaluator<T>,
55{
56    pub fn new(window: T, offset: T) -> Self {
57        assert!(window.is_sign_positive(), "window must be positive");
58        let info = EvaluatorInfo {
59            size: 0,
60            min_ts_length: 1,
61            t_required: true,
62            m_required: true,
63            w_required: true,
64            sorting_required: true,
65        };
66        Self {
67            properties: EvaluatorProperties {
68                info,
69                names: vec![],
70                descriptions: vec![],
71            }
72            .into(),
73            window,
74            offset,
75            feature_extractor: FeatureExtractor::new(vec![]),
76        }
77    }
78
79    pub fn set_window(&mut self, window: T) -> &mut Self {
80        assert!(window.is_sign_positive(), "window must be positive");
81        self.window = window;
82        self
83    }
84
85    pub fn set_offset(&mut self, offset: T) -> &mut Self {
86        self.offset = offset;
87        self
88    }
89
90    /// Extend a feature to extract from binned time series
91    pub fn add_feature(&mut self, feature: F) -> &mut Self {
92        let window = self.window;
93        let offset = self.offset;
94        self.properties.info.size += feature.size_hint();
95        self.properties.info.min_ts_length =
96            usize::max(self.properties.info.min_ts_length, feature.min_ts_length());
97        self.properties.names.extend(
98            feature
99                .get_names()
100                .iter()
101                .map(|name| format!("bins_window{:.1}_offset{:.1}_{}", window, offset, name)),
102        );
103        self.properties
104            .descriptions
105            .extend(feature.get_descriptions().iter().map(|desc| {
106                format!(
107                    "{desc} for binned time-series with window {window} and offset {offset}",
108                    desc = desc,
109                    window = window,
110                    offset = offset,
111                )
112            }));
113        self.feature_extractor.add_feature(feature);
114        self
115    }
116
117    #[inline]
118    pub fn default_window() -> T {
119        T::one()
120    }
121
122    #[inline]
123    pub fn default_offset() -> T {
124        T::zero()
125    }
126}
127
128impl<T, F> Bins<T, F>
129where
130    T: Float,
131    F: FeatureEvaluator<T>,
132{
133    pub const fn doc() -> &'static str {
134        DOC
135    }
136
137    fn transform_ts(&self, ts: &mut TimeSeries<T>) -> Result<TmwArrays<T>, EvaluatorError> {
138        self.check_ts_length(ts)?;
139        let (t, m, w): (Vec<_>, Vec<_>, Vec<_>) =
140            ts.t.as_slice()
141                .iter()
142                .copied()
143                .zip(ts.m.as_slice().iter().copied())
144                .zip(ts.w.as_slice().iter().copied())
145                .map(|((t, m), w)| (t, m, w))
146                .chunk_by(|(t, _, _)| ((*t - self.offset) / self.window).floor())
147                .into_iter()
148                .map(|(x, chunk)| {
149                    let bin_t = (x + T::half()) * self.window;
150                    let (n, bin_m, norm) = chunk
151                        .fold((T::zero(), T::zero(), T::zero()), |acc, (_, m, w)| {
152                            (acc.0 + T::one(), acc.1 + m * w, acc.2 + w)
153                        });
154                    let bin_m = bin_m / norm;
155                    let bin_w = norm / n;
156                    (bin_t, bin_m, bin_w)
157                })
158                .unzip3();
159        Ok(TmwArrays {
160            t: t.into(),
161            m: m.into(),
162            w: w.into(),
163        })
164    }
165}
166
167impl<T, F> Default for Bins<T, F>
168where
169    T: Float,
170    F: FeatureEvaluator<T>,
171{
172    fn default() -> Self {
173        Self::new(Self::default_window(), Self::default_offset())
174    }
175}
176
177impl<T, F> EvaluatorInfoTrait for Bins<T, F>
178where
179    T: Float,
180    F: FeatureEvaluator<T>,
181{
182    fn get_info(&self) -> &EvaluatorInfo {
183        &self.properties.info
184    }
185}
186impl<T, F> FeatureNamesDescriptionsTrait for Bins<T, F>
187where
188    T: Float,
189    F: FeatureEvaluator<T>,
190{
191    fn get_names(&self) -> Vec<&str> {
192        self.properties.names.iter().map(String::as_str).collect()
193    }
194
195    fn get_descriptions(&self) -> Vec<&str> {
196        self.properties
197            .descriptions
198            .iter()
199            .map(String::as_str)
200            .collect()
201    }
202}
203
204impl<T, F> FeatureEvaluator<T> for Bins<T, F>
205where
206    T: Float,
207    F: FeatureEvaluator<T>,
208{
209    transformer_eval!();
210}
211
212#[derive(Serialize, Deserialize, JsonSchema)]
213#[serde(rename = "Bins", bound = "T: Float, F: FeatureEvaluator<T>")]
214struct BinsParameters<T, F>
215where
216    T: Float,
217    F: FeatureEvaluator<T>,
218{
219    window: T,
220    offset: T,
221    feature_extractor: FeatureExtractor<T, F>,
222}
223
224impl<T, F> From<Bins<T, F>> for BinsParameters<T, F>
225where
226    T: Float,
227    F: FeatureEvaluator<T>,
228{
229    fn from(f: Bins<T, F>) -> Self {
230        Self {
231            window: f.window,
232            offset: f.offset,
233            feature_extractor: f.feature_extractor,
234        }
235    }
236}
237
238impl<T, F> From<BinsParameters<T, F>> for Bins<T, F>
239where
240    T: Float,
241    F: FeatureEvaluator<T>,
242{
243    fn from(p: BinsParameters<T, F>) -> Self {
244        let mut bins = Self::new(p.window, p.offset);
245        p.feature_extractor
246            .get_features()
247            .iter()
248            .cloned()
249            .for_each(|feature| {
250                bins.add_feature(feature);
251            });
252        bins
253    }
254}
255
256impl<T, F> JsonSchema for Bins<T, F>
257where
258    T: Float,
259    F: FeatureEvaluator<T>,
260{
261    json_schema!(BinsParameters<T, F>, false);
262}
263
264#[cfg(test)]
265#[allow(clippy::unreadable_literal)]
266#[allow(clippy::excessive_precision)]
267mod tests {
268    use super::*;
269    use crate::features::{Amplitude, EtaE, LinearFit};
270    use crate::tests::*;
271
272    serialization_name_test!(Bins<f64, Feature<f64>>);
273
274    serde_json_test!(
275        bins_ser_json_de,
276        Bins<f64, Feature<f64>>,
277        {
278            let mut bins = Bins::default();
279            bins.add_feature(Amplitude::default().into());
280            bins
281        },
282    );
283
284    eval_info_test!(bins_with_amplitude_info, {
285        let mut bins = Bins::default();
286        bins.add_feature(Amplitude::default().into());
287        bins
288    });
289
290    #[test]
291    fn bins_with_eta_e_info() {
292        let eval = {
293            let mut bins = Bins::new(1e-100, 0.0);
294            bins.add_feature(EtaE::default().into());
295            bins.into()
296        };
297        // Bins are tiny, so no actual binning happens and min_ts_length must be right
298        // Wrong times must give wrong answer, so t_required must be checked
299        eval_info_tests(
300            eval,  // feature
301            true,  // test_min_ts_length. Bins are tiny, so no actual binning happens
302            true, // test_t_required. Times are essential for EtaE, wrong times must give wrong answers
303            true, // m_required. EtaE needs magnitudes
304            false, // test_w_required. EtaE doesn't need weights and no binning happens, so they are not used and test would fail
305            true,  // test_sorting_required. Sorting is essential for binning
306        );
307    }
308
309    #[test]
310    fn bins_with_linear_fit_info() {
311        let eval = {
312            let mut bins = Bins::new(1.0, 0.0);
313            bins.add_feature(LinearFit::default().into());
314            bins.into()
315        };
316        // Bins are tiny, so no actual binning happens and min_ts_length must be right
317        // Wrong times must give wrong answer, so t_required must be checked
318        eval_info_tests(
319            eval,  // feature
320            false, // test_min_ts_length. Bins has significant size, so actual binning happens and min_ts_length must be lower limit
321            true, // test_t_required. Times are essential for EtaE, wrong times must give wrong answers
322            true, // m_required. EtaE needs magnitudes
323            true, // test_w_required. EtaE doesn't need weights and no binning happens, so they are not used and test would fail
324            true, // test_sorting_required. Sorting is essential for binning
325        );
326    }
327
328    check_doc_static_method!(bins_doc_static_method, Bins<f64, Feature<f64>>);
329
330    check_finite!(check_values_finite, {
331        let mut bins: Bins<_, Feature<_>> = Bins::default();
332        bins.add_feature(Amplitude::default().into());
333        bins.add_feature(EtaE::default().into());
334        bins
335    });
336
337    #[test]
338    fn bins() {
339        let t = [0.0_f32, 1.0, 1.1, 1.2, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 5.0];
340        let m = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
341        let w = [10.0, 5.0, 10.0, 5.0, 10.0, 5.0, 10.0, 5.0, 10.0, 5.0, 10.0];
342        let mut ts = TimeSeries::new(&t, &m, &w);
343
344        let desired_t = [0.5, 1.5, 2.5, 5.5];
345        let desired_m = [0.0, 2.0, 6.333333333333333, 10.0];
346        let desired_w = [10.0, 6.666666666666667, 7.5, 10.0];
347
348        let bins: Bins<_, Feature<_>> = Bins::new(1.0, 0.0);
349        let actual_tmw = bins.transform_ts(&mut ts).unwrap();
350
351        assert_eq!(actual_tmw.t.len(), actual_tmw.m.len());
352        assert_eq!(actual_tmw.t.len(), actual_tmw.w.len());
353        all_close(actual_tmw.t.as_slice().unwrap(), &desired_t, 1e-6);
354        all_close(actual_tmw.m.as_slice().unwrap(), &desired_m, 1e-6);
355        all_close(actual_tmw.w.as_slice().unwrap(), &desired_w, 1e-6);
356    }
357
358    #[test]
359    fn bins_windows_and_offsets() {
360        let t = [0.0_f32, 1.0, 1.1, 1.2, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 5.0];
361        let m = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
362        let mut ts = TimeSeries::new_without_weight(&t, &m);
363
364        let mut len = |window, offset| {
365            let tmw = Bins::<_, Feature<_>>::new(window, offset)
366                .transform_ts(&mut ts)
367                .unwrap();
368            assert_eq!(tmw.t.len(), tmw.m.len());
369            assert_eq!(tmw.m.len(), tmw.w.len());
370            tmw.t.len()
371        };
372
373        assert_eq!(len(2.0, 0.0), 3);
374        assert_eq!(len(3.0, 0.0), 2);
375        assert_eq!(len(10.0, 0.0), 1);
376        assert_eq!(len(1.0, 0.1), 5);
377        assert_eq!(len(1.0, 0.5), 5);
378        assert_eq!(len(2.0, 1.0), 3);
379    }
380
381    // Add more Bins::get_info() tests for non-trivial cases
382}