Skip to main content

scirs2/
pandas_compat.rs

1//! Pandas DataFrame and Series integration
2//!
3//! This module provides utilities for converting between pandas DataFrames/Series
4//! and scirs2 data structures with zero-copy where possible.
5//!
6//! # Example (Python)
7//! ```python
8//! import pandas as pd
9//! import scirs2
10//! import numpy as np
11//!
12//! # Create pandas Series
13//! s = pd.Series([1.0, 2.0, 3.0, 4.0, 5.0])
14//!
15//! # Convert to TimeSeries
16//! ts = scirs2.pandas_to_timeseries(s)
17//!
18//! # Perform operations
19//! arima = scirs2.PyARIMA(1, 1, 0)
20//! arima.fit(ts)
21//! forecast = arima.forecast(5)
22//!
23//! # Convert back to pandas
24//! forecast_series = pd.Series(forecast)
25//! ```
26
27use crate::error::SciRS2Error;
28#[cfg(feature = "series")]
29use crate::series::PyTimeSeries;
30use pyo3::prelude::*;
31use pyo3::types::PyDict;
32#[cfg(feature = "series")]
33use pyo3::types::PyList;
34use scirs2_numpy::{IntoPyArray, PyArray1, PyArray2, PyArrayMethods};
35
36/// Convert pandas Series to PyTimeSeries
37///
38/// This function extracts the values and index from a pandas Series
39/// and creates a PyTimeSeries object.
40///
41/// # Arguments
42/// * `series` - A pandas Series object
43///
44/// # Returns
45/// A PyTimeSeries object with values and timestamps
46#[cfg(feature = "series")]
47#[pyfunction]
48pub fn pandas_to_timeseries(py: Python, series: Py<PyAny>) -> PyResult<PyTimeSeries> {
49    // Get values as numpy array
50    let values_obj = series.getattr(py, "values")?;
51    let values_array = values_obj.cast_bound::<PyArray1<f64>>(py)?;
52
53    // Get index as numpy array (if datetime-like, convert to float timestamps)
54    let index = series.getattr(py, "index")?;
55
56    // Try to convert index to numpy array
57    let timestamps = if let Ok(index_values) = index.getattr(py, "values") {
58        // Check if it's a DatetimeIndex by trying to_numpy method
59        if index.getattr(py, "to_numpy").is_ok() {
60            // Convert datetime to timestamp (seconds since epoch)
61            let timestamp_method = index.getattr(py, "astype")?;
62            let timestamps_ns = timestamp_method.call1(py, ("int64",))?;
63            let ts_values = timestamps_ns.getattr(py, "values")?;
64            let timestamps_array = ts_values.cast_bound::<PyArray1<i64>>(py)?;
65
66            // Convert from nanoseconds to seconds
67            let binding = timestamps_array.readonly();
68            let ts_arr = binding.as_array();
69            let ts_vec: Vec<f64> = ts_arr.iter().map(|&ns| ns as f64 / 1e9).collect();
70
71            Some(scirs2_core::Array1::from_vec(ts_vec))
72        } else if let Ok(index_array) = index_values.cast_bound::<PyArray1<f64>>(py) {
73            // Already numeric
74            let binding = index_array.readonly();
75            let idx_arr = binding.as_array();
76            Some(idx_arr.to_owned())
77        } else {
78            None
79        }
80    } else {
81        None
82    };
83
84    // Create PyTimeSeries using crate-internal constructor
85    let binding = values_array.readonly();
86    let values_arr = binding.as_array();
87    let values_owned = values_arr.to_owned();
88
89    Ok(PyTimeSeries::from_arrays(values_owned, timestamps))
90}
91
92/// Convert PyTimeSeries to pandas Series
93///
94/// # Arguments
95/// * `ts` - A PyTimeSeries object
96///
97/// # Returns
98/// A pandas Series object
99#[cfg(feature = "series")]
100#[pyfunction]
101pub fn timeseries_to_pandas(py: Python, ts: &PyTimeSeries) -> PyResult<Py<PyAny>> {
102    // Import pandas
103    let pandas = py.import("pandas")?;
104
105    // Get values using crate-internal accessor
106    let values = ts.values_owned().into_pyarray(py).unbind();
107
108    // Create Series
109    let series = if let Some(timestamps) = ts.timestamps_owned() {
110        // Create DatetimeIndex from timestamps
111        let timestamps_ns: Vec<i64> = timestamps.iter().map(|&s| (s * 1e9) as i64).collect();
112
113        let datetime_index = pandas
114            .getattr("DatetimeIndex")?
115            .call1((PyList::new(py, &timestamps_ns)?,))?;
116
117        // Create Series with datetime index
118        let kwargs = PyDict::new(py);
119        kwargs.set_item("index", datetime_index)?;
120        pandas.getattr("Series")?.call((values,), Some(&kwargs))
121    } else {
122        // Create Series without index
123        pandas.getattr("Series")?.call1((values,))
124    }?;
125
126    Ok(series.into())
127}
128
129/// Convert pandas DataFrame to numpy array (2D)
130///
131/// This is a convenience function for extracting numeric data from DataFrames
132/// for use with scirs2 functions.
133///
134/// # Arguments
135/// * `df` - A pandas DataFrame
136///
137/// # Returns
138/// A 2D numpy array
139#[pyfunction]
140pub fn dataframe_to_array(py: Python, df: Py<PyAny>) -> PyResult<Py<PyArray2<f64>>> {
141    // Get values as numpy array
142    let values = df.getattr(py, "values")?;
143    let array = values.cast_bound::<PyArray2<f64>>(py)?;
144
145    Ok(array.to_owned().unbind())
146}
147
148/// Convert numpy array to pandas DataFrame
149///
150/// # Arguments
151/// * `array` - A 2D numpy array
152/// * `columns` - Optional column names (list of strings)
153/// * `index` - Optional index values
154///
155/// # Returns
156/// A pandas DataFrame
157#[pyfunction]
158#[pyo3(signature = (array, columns=None, index=None))]
159pub fn array_to_dataframe(
160    py: Python,
161    array: &Bound<'_, PyArray2<f64>>,
162    columns: Option<Vec<String>>,
163    index: Option<Py<PyAny>>,
164) -> PyResult<Py<PyAny>> {
165    // Import pandas
166    let pandas = py.import("pandas")?;
167
168    // Create DataFrame
169    let kwargs = PyDict::new(py);
170    if let Some(cols) = columns {
171        kwargs.set_item("columns", cols)?;
172    }
173    if let Some(idx) = index {
174        kwargs.set_item("index", idx)?;
175    }
176
177    let df = pandas.getattr("DataFrame")?.call((array,), Some(&kwargs))?;
178
179    Ok(df.into())
180}
181
182/// Apply a scirs2 function to each column of a DataFrame
183///
184/// # Example (Python)
185/// ```python
186/// import pandas as pd
187/// import scirs2
188///
189/// df = pd.DataFrame({
190///     'A': [1, 2, 3, 4, 5],
191///     'B': [2, 4, 6, 8, 10],
192///     'C': [1, 3, 5, 7, 9]
193/// })
194///
195/// # Calculate mean of each column
196/// means = scirs2.apply_to_dataframe(df, scirs2.mean_py)
197/// ```
198#[pyfunction]
199pub fn apply_to_dataframe(py: Python, df: Py<PyAny>, func: Py<PyAny>) -> PyResult<Py<PyAny>> {
200    // Import pandas
201    let pandas = py.import("pandas")?;
202
203    // Get column names
204    let columns = df.getattr(py, "columns")?;
205    let col_list: Vec<String> = columns.extract(py)?;
206
207    // Apply function to each column
208    let results = PyDict::new(py);
209    for col_name in col_list {
210        let column = df.call_method1(py, "__getitem__", (&col_name,))?;
211        let values = column.getattr(py, "values")?;
212        let result = func.call1(py, (values,))?;
213        results.set_item(&col_name, result)?;
214    }
215
216    // Convert results dict to pandas Series
217    let series = pandas.getattr("Series")?.call1((results,))?;
218    Ok(series.into())
219}
220
221/// Apply a scirs2 function row-wise or column-wise to a DataFrame
222///
223/// # Arguments
224/// * `df` - A pandas DataFrame
225/// * `func` - A scirs2 function that takes a 1D array
226/// * `axis` - 0 for column-wise (default), 1 for row-wise
227///
228/// # Returns
229/// A pandas Series with results
230#[pyfunction]
231#[pyo3(signature = (df, func, axis=0))]
232pub fn apply_along_axis(
233    py: Python,
234    df: Py<PyAny>,
235    func: Py<PyAny>,
236    axis: usize,
237) -> PyResult<Py<PyAny>> {
238    let pandas = py.import("pandas")?;
239
240    if axis == 0 {
241        // Column-wise (same as apply_to_dataframe)
242        apply_to_dataframe(py, df, func)
243    } else {
244        // Row-wise
245        let values = df.getattr(py, "values")?;
246        let array = values.cast_bound::<PyArray2<f64>>(py)?;
247        let binding = array.readonly();
248        let arr = binding.as_array();
249
250        let results: Vec<f64> = arr
251            .rows()
252            .into_iter()
253            .map(|row| {
254                let row_array = row.to_owned().into_pyarray(py);
255                func.call1(py, (row_array,))
256                    .and_then(|r| r.extract::<f64>(py))
257            })
258            .collect::<Result<Vec<_>, _>>()?;
259
260        let series = pandas.getattr("Series")?.call1((results,))?;
261        Ok(series.into())
262    }
263}
264
265/// Rolling window operations on pandas Series with scirs2 functions
266///
267/// # Example (Python)
268/// ```python
269/// import pandas as pd
270/// import scirs2
271///
272/// s = pd.Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
273///
274/// # Calculate rolling mean with window size 3
275/// rolling_mean = scirs2.rolling_apply(s, 3, scirs2.mean_py)
276/// ```
277#[pyfunction]
278pub fn rolling_apply(
279    py: Python,
280    series: Py<PyAny>,
281    window: usize,
282    func: Py<PyAny>,
283) -> PyResult<Py<PyAny>> {
284    let pandas = py.import("pandas")?;
285
286    // Get values
287    let values = series.getattr(py, "values")?;
288    let array = values.cast_bound::<PyArray1<f64>>(py)?;
289    let binding = array.readonly();
290    let arr = binding.as_array();
291
292    if arr.len() < window {
293        return Err(SciRS2Error::ValueError(format!(
294            "Window size {} is larger than array length {}",
295            window,
296            arr.len()
297        ))
298        .into());
299    }
300
301    // Calculate rolling statistics
302    let mut results = Vec::with_capacity(arr.len() - window + 1);
303    for i in 0..=(arr.len() - window) {
304        let window_slice = arr.slice(ndarray::s![i..i + window]);
305        let window_array = window_slice.to_owned().into_pyarray(py);
306        let result: f64 = func.call1(py, (window_array,))?.extract(py)?;
307        results.push(result);
308    }
309
310    // Pad with NaN at the beginning
311    let mut padded = vec![f64::NAN; window - 1];
312    padded.extend(results);
313
314    // Create pandas Series
315    let series_result = pandas.getattr("Series")?.call1((padded,))?;
316    Ok(series_result.into())
317}
318
319/// Register pandas compatibility functions with Python module
320pub fn register_pandas_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
321    #[cfg(feature = "series")]
322    m.add_function(wrap_pyfunction!(pandas_to_timeseries, m)?)?;
323    #[cfg(feature = "series")]
324    m.add_function(wrap_pyfunction!(timeseries_to_pandas, m)?)?;
325    m.add_function(wrap_pyfunction!(dataframe_to_array, m)?)?;
326    m.add_function(wrap_pyfunction!(array_to_dataframe, m)?)?;
327    m.add_function(wrap_pyfunction!(apply_to_dataframe, m)?)?;
328    m.add_function(wrap_pyfunction!(apply_along_axis, m)?)?;
329    m.add_function(wrap_pyfunction!(rolling_apply, m)?)?;
330    Ok(())
331}