Skip to main content

augurs_forecaster/
transforms.rs

1/*!
2Data transformations.
3*/
4
5// Note: implementations of the various transforms are in the
6// various submodules of this module (e.g. `power` and `scale`).
7
8mod error;
9mod exp;
10pub mod interpolate;
11mod power;
12mod scale;
13
14use std::fmt;
15
16use augurs_core::Forecast;
17
18pub use error::Error;
19pub use exp::{Log, Logit};
20pub use interpolate::{InterpolateExt, LinearInterpolator};
21pub use power::{BoxCox, YeoJohnson};
22pub use scale::{MinMaxScaler, StandardScaleParams, StandardScaler};
23
24/// A transformation pipeline.
25///
26/// A `Pipeline` is a collection of heterogeneous [`Transformer`] instances
27/// that can be applied to a time series. Calling [`Pipeline::fit`] or [`Pipeline::fit_transform`]
28/// will fit each transformation to the output of the previous one in turn
29/// starting by passing the input to the first transformation. The
30/// [`Pipeline::inverse_transform`] can then be used to back-transform data
31/// to the original scale.
32///
33/// The default `Pipeline` contains no transformers and the fit/transform methods
34/// are noops.
35#[derive(Debug, Default)]
36pub struct Pipeline {
37    transformers: Vec<Box<dyn Transformer>>,
38    is_fitted: bool,
39}
40
41impl Pipeline {
42    /// Create a new `Pipeline` with the given transformers.
43    pub fn new(transformers: Vec<Box<dyn Transformer>>) -> Self {
44        Self {
45            transformers,
46            is_fitted: false,
47        }
48    }
49
50    /// Return `true` if the pipeline has been fitted.
51    pub fn is_fitted(&self) -> bool {
52        self.is_fitted
53    }
54
55    // Helper function for actually doing the fit then transform steps.
56    fn fit_transform_inner(&mut self, input: &mut [f64]) -> Result<(), Error> {
57        for t in self.transformers.iter_mut() {
58            t.fit_transform(input)?;
59        }
60        self.is_fitted = true;
61        Ok(())
62    }
63
64    /// Apply the inverse transformations to the given forecast.
65    ///
66    /// # Errors
67    ///
68    /// This function will return an error if the pipeline has not been fitted.
69    pub(crate) fn inverse_transform_forecast(&self, forecast: &mut Forecast) -> Result<(), Error> {
70        for t in self.transformers.iter().rev() {
71            t.inverse_transform(&mut forecast.point)?;
72            if let Some(intervals) = forecast.intervals.as_mut() {
73                t.inverse_transform(&mut intervals.lower)?;
74                t.inverse_transform(&mut intervals.upper)?;
75            }
76        }
77        Ok(())
78    }
79}
80
81impl Transformer for Pipeline {
82    /// Fit the transformations to the given time series.
83    ///
84    /// Prefer `fit_transform` if possible, as it avoids copying the input.
85    fn fit(&mut self, input: &[f64]) -> Result<(), Error> {
86        if self.transformers.is_empty() {
87            return Ok(());
88        }
89        // Copy the input to avoid mutating the original.
90        // We need to do this so we can call `fit_transform` on each
91        // transformation in the pipeline without mutating the input.
92        // This is required because each transformation needs to be
93        // fit after previous transformations have been applied.
94        let mut input = input.to_vec();
95        // Reuse `fit_transform_inner`, and just discard the result.
96        self.fit_transform_inner(&mut input)?;
97        Ok(())
98    }
99
100    /// Fit and transform the given time series.
101    ///
102    /// This is equivalent to calling `fit` and then `transform` on the pipeline,
103    /// but is more efficient because it avoids copying the input.
104    fn fit_transform(&mut self, input: &mut [f64]) -> Result<(), Error> {
105        self.fit_transform_inner(input)?;
106        Ok(())
107    }
108
109    /// Apply the fitted transformations to the given time series.
110    ///
111    /// # Errors
112    ///
113    /// This function will return an error if the pipeline has not been fitted.
114    fn transform(&self, input: &mut [f64]) -> Result<(), Error> {
115        for t in self.transformers.iter() {
116            t.transform(input)?;
117        }
118        Ok(())
119    }
120
121    /// Apply the inverse transformations to the given time series.
122    ///
123    /// # Errors
124    ///
125    /// This function will return an error if the pipeline has not been fitted.
126    fn inverse_transform(&self, input: &mut [f64]) -> Result<(), Error> {
127        for t in self.transformers.iter().rev() {
128            t.inverse_transform(input)?;
129        }
130        Ok(())
131    }
132}
133
134/// A transformation that can be applied to a time series.
135pub trait Transformer: fmt::Debug + Sync + Send {
136    /// Fit the transformation to the given time series.
137    ///
138    /// For example, for a min-max scaler, this would find
139    /// the min and max of the provided data and store it on the
140    /// scaler ready for use in transforming and back-transforming.
141    fn fit(&mut self, data: &[f64]) -> Result<(), Error>;
142
143    /// Apply the transformation to the given time series.
144    ///
145    /// # Errors
146    ///
147    /// This function should return an error if the transform has not been fitted,
148    /// and may return other errors specific to the implementation.
149    fn transform(&self, data: &mut [f64]) -> Result<(), Error>;
150
151    /// Apply the inverse transformation to the given time series.
152    ///
153    /// # Errors
154    ///
155    /// This function should return an error if the transform has not been fitted,
156    /// and may return other errors specific to the implementation.
157    fn inverse_transform(&self, data: &mut [f64]) -> Result<(), Error>;
158
159    /// Fit the transformation to the given time series and then apply it.
160    ///
161    /// The default implementation just calls [`Self::fit`] then [`Self::transform`]
162    /// but it can be overridden to be more efficient if desired.
163    fn fit_transform(&mut self, data: &mut [f64]) -> Result<(), Error> {
164        self.fit(data)?;
165        self.transform(data)?;
166        Ok(())
167    }
168
169    /// Create a boxed version of the transformation.
170    ///
171    /// This is useful for creating a `Transform` instance that can be used as
172    /// part of a [`Pipeline`].
173    fn boxed(self) -> Box<dyn Transformer>
174    where
175        Self: Sized + 'static,
176    {
177        Box::new(self)
178    }
179}