Skip to main content

scirs2/
series.rs

1//! Python bindings for scirs2-series using PyO3
2//!
3//! This module provides Python bindings for seamless integration with pandas,
4//! statsmodels, and other Python time series analysis libraries.
5
6use pyo3::prelude::*;
7use pyo3::types::{PyAny, PyDict, PyType};
8
9// NumPy types for Python array interface (scirs2-numpy with native ndarray 0.17)
10use scirs2_numpy::{IntoPyArray, PyArray1, PyReadonlyArray1, ToPyArray};
11
12// ndarray types from scirs2-core
13use scirs2_core::Array1;
14
15// Direct imports from scirs2-series (native ndarray 0.17 support)
16use scirs2_series::arima_models::ArimaModel;
17use scirs2_series::decomposition::stl::stl_decomposition;
18use scirs2_series::transformations::{adf_test, box_cox_transform, inverse_box_cox_transform};
19use scirs2_series::utils::{difference_series, seasonal_difference_series};
20
21use std::collections::HashMap;
22
23/// Python wrapper for time series data
24#[pyclass(from_py_object)]
25#[derive(Clone, Debug)]
26pub struct PyTimeSeries {
27    pub(crate) values: Array1<f64>,
28    pub(crate) timestamps: Option<Array1<f64>>,
29    pub(crate) frequency: Option<f64>,
30}
31
32impl PyTimeSeries {
33    /// Create a new time series from Rust-owned arrays (crate-internal)
34    pub(crate) fn from_arrays(values: Array1<f64>, timestamps: Option<Array1<f64>>) -> Self {
35        PyTimeSeries {
36            values,
37            timestamps,
38            frequency: None,
39        }
40    }
41
42    /// Get values as an owned array (crate-internal)
43    pub(crate) fn values_owned(&self) -> Array1<f64> {
44        self.values.clone()
45    }
46
47    /// Get timestamps as an owned array (crate-internal)
48    pub(crate) fn timestamps_owned(&self) -> Option<Array1<f64>> {
49        self.timestamps.clone()
50    }
51}
52
53#[pymethods]
54impl PyTimeSeries {
55    /// Create a new time series from Python list or numpy array
56    #[new]
57    fn new(
58        values: PyReadonlyArray1<f64>,
59        timestamps: Option<PyReadonlyArray1<f64>>,
60    ) -> PyResult<Self> {
61        let values_array = values.as_array().to_owned();
62        let timestamps_array = timestamps.map(|ts| ts.as_array().to_owned());
63
64        Ok(PyTimeSeries {
65            values: values_array,
66            timestamps: timestamps_array,
67            frequency: None,
68        })
69    }
70
71    /// Set the frequency of the time series
72    fn set_frequency(&mut self, frequency: f64) {
73        self.frequency = Some(frequency);
74    }
75
76    /// Get the length of the time series
77    fn __len__(&self) -> usize {
78        self.values.len()
79    }
80
81    /// Get values as numpy array
82    fn get_values<'py>(&self, py: Python<'py>) -> PyResult<Py<PyArray1<f64>>> {
83        Ok(self.values.clone().into_pyarray(py).unbind())
84    }
85
86    /// Get timestamps as numpy array (if available)
87    fn get_timestamps<'py>(&self, py: Python<'py>) -> PyResult<Option<Py<PyArray1<f64>>>> {
88        Ok(self
89            .timestamps
90            .as_ref()
91            .map(|ts| ts.clone().into_pyarray(py).unbind()))
92    }
93
94    /// Convert to pandas-compatible dictionary
95    fn to_dict(&self, py: Python) -> PyResult<Py<PyAny>> {
96        let dict = PyDict::new(py);
97        dict.set_item("values", self.values.clone().into_pyarray(py).unbind())?;
98
99        if let Some(ref timestamps) = self.timestamps {
100            dict.set_item("timestamps", timestamps.clone().into_pyarray(py).unbind())?;
101        }
102
103        if let Some(freq) = self.frequency {
104            dict.set_item("frequency", freq)?;
105        }
106
107        Ok(dict.into())
108    }
109
110    /// Create from pandas Series
111    #[classmethod]
112    fn from_pandas(_cls: &Bound<'_, PyType>, series: &Bound<'_, PyAny>) -> PyResult<Self> {
113        // Extract values from pandas Series
114        let values = series.getattr("values")?;
115        let values_array: PyReadonlyArray1<f64> = values.extract()?;
116
117        // Try to extract index (timestamps) if available
118        let index = series.getattr("index")?;
119        let timestamps = if index.hasattr("values")? {
120            index
121                .getattr("values")?
122                .extract::<PyReadonlyArray1<f64>>()
123                .ok()
124        } else {
125            None
126        };
127
128        Self::new(values_array, timestamps)
129    }
130
131    /// Statistical summary
132    fn describe(&self) -> PyResult<HashMap<String, f64>> {
133        let mut stats = HashMap::new();
134        let values = &self.values;
135
136        let n = values.len() as f64;
137        let mean = values.iter().sum::<f64>() / n;
138        let variance = values.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / n;
139        let std = variance.sqrt();
140        let min = values.iter().fold(f64::INFINITY, |a, &b| a.min(b));
141        let max = values.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));
142
143        stats.insert("count".to_string(), n);
144        stats.insert("mean".to_string(), mean);
145        stats.insert("std".to_string(), std);
146        stats.insert("min".to_string(), min);
147        stats.insert("max".to_string(), max);
148
149        // Calculate quantiles
150        let mut sorted_values = values.to_vec();
151        sorted_values.sort_by(|a, b| a.partial_cmp(b).expect("Operation failed"));
152        let len = sorted_values.len();
153
154        stats.insert("25%".to_string(), sorted_values[len / 4]);
155        stats.insert("50%".to_string(), sorted_values[len / 2]);
156        stats.insert("75%".to_string(), sorted_values[3 * len / 4]);
157
158        Ok(stats)
159    }
160}
161
162/// Python wrapper for ARIMA models
163#[pyclass]
164pub struct PyARIMA {
165    p: usize,
166    d: usize,
167    q: usize,
168    model: Option<ArimaModel<f64>>,
169    data: Option<Array1<f64>>,
170}
171
172#[pymethods]
173impl PyARIMA {
174    /// Create a new ARIMA model
175    #[new]
176    fn new(p: usize, d: usize, q: usize) -> Self {
177        PyARIMA {
178            p,
179            d,
180            q,
181            model: None,
182            data: None,
183        }
184    }
185
186    /// Fit the ARIMA model
187    fn fit(&mut self, data: &PyTimeSeries) -> PyResult<()> {
188        let mut model = ArimaModel::new(self.p, self.d, self.q)
189            .map_err(|e| pyo3::exceptions::PyRuntimeError::new_err(format!("{e}")))?;
190        model
191            .fit(&data.values)
192            .map_err(|e| pyo3::exceptions::PyRuntimeError::new_err(format!("{e}")))?;
193
194        self.model = Some(model);
195        self.data = Some(data.values.clone());
196        Ok(())
197    }
198
199    /// Generate forecasts
200    fn forecast(&self, py: Python, steps: usize) -> PyResult<Py<PyArray1<f64>>> {
201        match (&self.model, &self.data) {
202            (Some(model), Some(data)) => {
203                let forecasts = model
204                    .forecast(steps, data)
205                    .map_err(|e| pyo3::exceptions::PyRuntimeError::new_err(format!("{e}")))?;
206                Ok(forecasts.into_pyarray(py).unbind())
207            }
208            _ => Err(pyo3::exceptions::PyRuntimeError::new_err(
209                "Model not fitted. Call fit() first.",
210            )),
211        }
212    }
213
214    /// Get model parameters
215    fn get_params(&self) -> PyResult<HashMap<String, f64>> {
216        let mut params = HashMap::new();
217        params.insert("p".to_string(), self.p as f64);
218        params.insert("d".to_string(), self.d as f64);
219        params.insert("q".to_string(), self.q as f64);
220
221        if let Some(ref model) = self.model {
222            params.insert("aic".to_string(), model.aic());
223            params.insert("bic".to_string(), model.bic());
224        }
225
226        Ok(params)
227    }
228
229    /// Get AR coefficients
230    fn get_ar_coefficients(&self, py: Python) -> PyResult<Py<PyArray1<f64>>> {
231        match &self.model {
232            Some(model) => Ok(model.ar_coeffs.to_pyarray(py).unbind()),
233            None => Err(pyo3::exceptions::PyRuntimeError::new_err(
234                "Model not fitted. Call fit() first.",
235            )),
236        }
237    }
238
239    /// Get MA coefficients
240    fn get_ma_coefficients(&self, py: Python) -> PyResult<Py<PyArray1<f64>>> {
241        match &self.model {
242            Some(model) => Ok(model.ma_coeffs.to_pyarray(py).unbind()),
243            None => Err(pyo3::exceptions::PyRuntimeError::new_err(
244                "Model not fitted. Call fit() first.",
245            )),
246        }
247    }
248
249    /// Get model summary (similar to statsmodels)
250    fn summary(&self) -> PyResult<String> {
251        match &self.model {
252            Some(model) => {
253                let mut summary =
254                    format!("ARIMA({},{},{}) Model Results\n", self.p, self.d, self.q);
255                summary.push_str("=====================================\n");
256                summary.push_str(&format!("AIC:                  {:10.4}\n", model.aic()));
257                summary.push_str(&format!("BIC:                  {:10.4}\n", model.bic()));
258
259                let ar_coeffs = &model.ar_coeffs;
260                if !ar_coeffs.is_empty() {
261                    summary.push_str("\nAR Coefficients:\n");
262                    for (i, coef) in ar_coeffs.iter().enumerate() {
263                        summary.push_str(&format!("  ar.L{}: {:10.4}\n", i + 1, coef));
264                    }
265                }
266
267                let ma_coeffs = &model.ma_coeffs;
268                if !ma_coeffs.is_empty() {
269                    summary.push_str("\nMA Coefficients:\n");
270                    for (i, coef) in ma_coeffs.iter().enumerate() {
271                        summary.push_str(&format!("  ma.L{}: {:10.4}\n", i + 1, coef));
272                    }
273                }
274
275                Ok(summary)
276            }
277            None => Err(pyo3::exceptions::PyRuntimeError::new_err(
278                "Model not fitted. Call fit() first.",
279            )),
280        }
281    }
282}
283
284/// Apply differencing to a time series
285#[pyfunction]
286fn apply_differencing(
287    py: Python,
288    data: &PyTimeSeries,
289    periods: usize,
290) -> PyResult<Py<PyArray1<f64>>> {
291    let result = difference_series(&data.values, periods)
292        .map_err(|e| pyo3::exceptions::PyRuntimeError::new_err(format!("{e}")))?;
293    Ok(result.into_pyarray(py).unbind())
294}
295
296/// Apply seasonal differencing to a time series
297#[pyfunction]
298fn apply_seasonal_differencing(
299    py: Python,
300    data: &PyTimeSeries,
301    periods: usize,
302) -> PyResult<Py<PyArray1<f64>>> {
303    let result = seasonal_difference_series(&data.values, periods)
304        .map_err(|e| pyo3::exceptions::PyRuntimeError::new_err(format!("{e}")))?;
305    Ok(result.into_pyarray(py).unbind())
306}
307
308/// Perform STL decomposition
309#[pyfunction]
310fn stl_decomposition_py(py: Python, data: &PyTimeSeries, period: usize) -> PyResult<Py<PyAny>> {
311    use scirs2_series::decomposition::stl::STLOptions;
312
313    let options = STLOptions::default();
314    let result = stl_decomposition(&data.values, period, &options)
315        .map_err(|e| pyo3::exceptions::PyRuntimeError::new_err(format!("{e}")))?;
316
317    let dict = PyDict::new(py);
318    dict.set_item("trend", result.trend.into_pyarray(py).unbind())?;
319    dict.set_item("seasonal", result.seasonal.into_pyarray(py).unbind())?;
320    dict.set_item("residual", result.residual.into_pyarray(py).unbind())?;
321
322    Ok(dict.into())
323}
324
325/// Perform Augmented Dickey-Fuller test for stationarity
326#[pyfunction]
327#[pyo3(signature = (data, max_lags=None, regression="c"))]
328fn adf_test_py(
329    data: &PyTimeSeries,
330    max_lags: Option<usize>,
331    regression: &str,
332) -> PyResult<HashMap<String, f64>> {
333    let result = adf_test(&data.values, max_lags, regression)
334        .map_err(|e| pyo3::exceptions::PyRuntimeError::new_err(format!("{e}")))?;
335
336    let mut output = HashMap::new();
337    output.insert("statistic".to_string(), result.statistic);
338    output.insert("p_value".to_string(), result.p_value);
339    output.insert(
340        "is_stationary".to_string(),
341        if result.is_stationary { 1.0 } else { 0.0 },
342    );
343
344    Ok(output)
345}
346
347/// Apply Box-Cox transformation
348#[pyfunction]
349fn boxcox_transform_py(
350    py: Python,
351    data: &PyTimeSeries,
352    lambda: Option<f64>,
353) -> PyResult<Py<PyAny>> {
354    let (transformed, transform_info) = box_cox_transform(&data.values, lambda)
355        .map_err(|e| pyo3::exceptions::PyRuntimeError::new_err(format!("{e}")))?;
356
357    let dict = PyDict::new(py);
358    dict.set_item("transformed", transformed.into_pyarray(py).unbind())?;
359    dict.set_item("lambda", transform_info.lambda)?;
360
361    Ok(dict.into())
362}
363
364/// Apply inverse Box-Cox transformation
365#[pyfunction]
366fn boxcox_inverse_py(
367    py: Python,
368    data: PyReadonlyArray1<f64>,
369    lambda: f64,
370) -> PyResult<Py<PyArray1<f64>>> {
371    let data_array = data.as_array();
372    // Create BoxCoxTransform struct with the lambda parameter
373    use scirs2_series::transformations::BoxCoxTransform;
374    let transform = BoxCoxTransform {
375        lambda,
376        lambda_estimated: false,
377        min_adjustment: 0.0,
378    };
379    let result = inverse_box_cox_transform(&data_array, &transform)
380        .map_err(|e| pyo3::exceptions::PyRuntimeError::new_err(format!("{e}")))?;
381    Ok(result.into_pyarray(py).unbind())
382}
383
384/// Python module registration
385pub fn register_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
386    m.add_class::<PyTimeSeries>()?;
387    m.add_class::<PyARIMA>()?;
388
389    m.add_function(wrap_pyfunction!(apply_differencing, m)?)?;
390    m.add_function(wrap_pyfunction!(apply_seasonal_differencing, m)?)?;
391    m.add_function(wrap_pyfunction!(stl_decomposition_py, m)?)?;
392    m.add_function(wrap_pyfunction!(adf_test_py, m)?)?;
393    m.add_function(wrap_pyfunction!(boxcox_transform_py, m)?)?;
394    m.add_function(wrap_pyfunction!(boxcox_inverse_py, m)?)?;
395
396    Ok(())
397}
398
399// Helper functions for pandas integration
400
401/// Creates a pandas DataFrame from a HashMap of Array1<f64> data
402#[allow(dead_code)]
403pub fn create_pandas_dataframe(
404    py: Python,
405    data: HashMap<String, Array1<f64>>,
406) -> PyResult<Py<PyAny>> {
407    let pandas = py.import("pandas")?;
408    let dict = PyDict::new(py);
409
410    for (key, values) in data {
411        dict.set_item(key, values.into_pyarray(py).unbind())?;
412    }
413
414    let df = pandas.call_method1("DataFrame", (dict,))?;
415    Ok(df.into())
416}
417
418/// Creates a pandas Series from a Rust Array1<f64>
419#[allow(dead_code)]
420pub fn create_pandas_series(
421    py: Python,
422    data: Array1<f64>,
423    name: Option<&str>,
424) -> PyResult<Py<PyAny>> {
425    let pandas = py.import("pandas")?;
426    let args = (data.into_pyarray(py).unbind(),);
427    let kwargs = PyDict::new(py);
428
429    if let Some(name) = name {
430        kwargs.set_item("name", name)?;
431    }
432
433    let series = pandas.call_method("Series", args, Some(&kwargs))?;
434    Ok(series.into())
435}