Skip to main content

scirs2/
pandas_compat.rs

1//! Pandas DataFrame and Series integration
2//!
3//! This module provides utilities for converting between pandas DataFrames/Series
4//! and scirs2 data structures with zero-copy where possible.
5//!
6//! # Example (Python)
7//! ```python
8//! import pandas as pd
9//! import scirs2
10//! import numpy as np
11//!
12//! # Create pandas Series
13//! s = pd.Series([1.0, 2.0, 3.0, 4.0, 5.0])
14//!
15//! # Convert to TimeSeries
16//! ts = scirs2.pandas_to_timeseries(s)
17//!
18//! # Perform operations
19//! arima = scirs2.PyARIMA(1, 1, 0)
20//! arima.fit(ts)
21//! forecast = arima.forecast(5)
22//!
23//! # Convert back to pandas
24//! forecast_series = pd.Series(forecast)
25//! ```
26
27use crate::error::SciRS2Error;
28use crate::series::PyTimeSeries;
29use pyo3::prelude::*;
30use pyo3::types::{PyDict, PyList};
31use scirs2_numpy::{IntoPyArray, PyArray1, PyArray2, PyArrayMethods};
32
33/// Convert pandas Series to PyTimeSeries
34///
35/// This function extracts the values and index from a pandas Series
36/// and creates a PyTimeSeries object.
37///
38/// # Arguments
39/// * `series` - A pandas Series object
40///
41/// # Returns
42/// A PyTimeSeries object with values and timestamps
43#[pyfunction]
44pub fn pandas_to_timeseries(py: Python, series: Py<PyAny>) -> PyResult<PyTimeSeries> {
45    // Get values as numpy array
46    let values_obj = series.getattr(py, "values")?;
47    let values_array = values_obj.cast_bound::<PyArray1<f64>>(py)?;
48
49    // Get index as numpy array (if datetime-like, convert to float timestamps)
50    let index = series.getattr(py, "index")?;
51
52    // Try to convert index to numpy array
53    let timestamps = if let Ok(index_values) = index.getattr(py, "values") {
54        // Check if it's a DatetimeIndex by trying to_numpy method
55        if index.getattr(py, "to_numpy").is_ok() {
56            // Convert datetime to timestamp (seconds since epoch)
57            let timestamp_method = index.getattr(py, "astype")?;
58            let timestamps_ns = timestamp_method.call1(py, ("int64",))?;
59            let ts_values = timestamps_ns.getattr(py, "values")?;
60            let timestamps_array = ts_values.cast_bound::<PyArray1<i64>>(py)?;
61
62            // Convert from nanoseconds to seconds
63            let binding = timestamps_array.readonly();
64            let ts_arr = binding.as_array();
65            let ts_vec: Vec<f64> = ts_arr.iter().map(|&ns| ns as f64 / 1e9).collect();
66
67            Some(scirs2_core::Array1::from_vec(ts_vec))
68        } else if let Ok(index_array) = index_values.cast_bound::<PyArray1<f64>>(py) {
69            // Already numeric
70            let binding = index_array.readonly();
71            let idx_arr = binding.as_array();
72            Some(idx_arr.to_owned())
73        } else {
74            None
75        }
76    } else {
77        None
78    };
79
80    // Create PyTimeSeries using crate-internal constructor
81    let binding = values_array.readonly();
82    let values_arr = binding.as_array();
83    let values_owned = values_arr.to_owned();
84
85    Ok(PyTimeSeries::from_arrays(values_owned, timestamps))
86}
87
88/// Convert PyTimeSeries to pandas Series
89///
90/// # Arguments
91/// * `ts` - A PyTimeSeries object
92///
93/// # Returns
94/// A pandas Series object
95#[pyfunction]
96pub fn timeseries_to_pandas(py: Python, ts: &PyTimeSeries) -> PyResult<Py<PyAny>> {
97    // Import pandas
98    let pandas = py.import("pandas")?;
99
100    // Get values using crate-internal accessor
101    let values = ts.values_owned().into_pyarray(py).unbind();
102
103    // Create Series
104    let series = if let Some(timestamps) = ts.timestamps_owned() {
105        // Create DatetimeIndex from timestamps
106        let timestamps_ns: Vec<i64> = timestamps.iter().map(|&s| (s * 1e9) as i64).collect();
107
108        let datetime_index = pandas
109            .getattr("DatetimeIndex")?
110            .call1((PyList::new(py, &timestamps_ns)?,))?;
111
112        // Create Series with datetime index
113        let kwargs = PyDict::new(py);
114        kwargs.set_item("index", datetime_index)?;
115        pandas.getattr("Series")?.call((values,), Some(&kwargs))
116    } else {
117        // Create Series without index
118        pandas.getattr("Series")?.call1((values,))
119    }?;
120
121    Ok(series.into())
122}
123
124/// Convert pandas DataFrame to numpy array (2D)
125///
126/// This is a convenience function for extracting numeric data from DataFrames
127/// for use with scirs2 functions.
128///
129/// # Arguments
130/// * `df` - A pandas DataFrame
131///
132/// # Returns
133/// A 2D numpy array
134#[pyfunction]
135pub fn dataframe_to_array(py: Python, df: Py<PyAny>) -> PyResult<Py<PyArray2<f64>>> {
136    // Get values as numpy array
137    let values = df.getattr(py, "values")?;
138    let array = values.cast_bound::<PyArray2<f64>>(py)?;
139
140    Ok(array.to_owned().unbind())
141}
142
143/// Convert numpy array to pandas DataFrame
144///
145/// # Arguments
146/// * `array` - A 2D numpy array
147/// * `columns` - Optional column names (list of strings)
148/// * `index` - Optional index values
149///
150/// # Returns
151/// A pandas DataFrame
152#[pyfunction]
153#[pyo3(signature = (array, columns=None, index=None))]
154pub fn array_to_dataframe(
155    py: Python,
156    array: &Bound<'_, PyArray2<f64>>,
157    columns: Option<Vec<String>>,
158    index: Option<Py<PyAny>>,
159) -> PyResult<Py<PyAny>> {
160    // Import pandas
161    let pandas = py.import("pandas")?;
162
163    // Create DataFrame
164    let kwargs = PyDict::new(py);
165    if let Some(cols) = columns {
166        kwargs.set_item("columns", cols)?;
167    }
168    if let Some(idx) = index {
169        kwargs.set_item("index", idx)?;
170    }
171
172    let df = pandas.getattr("DataFrame")?.call((array,), Some(&kwargs))?;
173
174    Ok(df.into())
175}
176
177/// Apply a scirs2 function to each column of a DataFrame
178///
179/// # Example (Python)
180/// ```python
181/// import pandas as pd
182/// import scirs2
183///
184/// df = pd.DataFrame({
185///     'A': [1, 2, 3, 4, 5],
186///     'B': [2, 4, 6, 8, 10],
187///     'C': [1, 3, 5, 7, 9]
188/// })
189///
190/// # Calculate mean of each column
191/// means = scirs2.apply_to_dataframe(df, scirs2.mean_py)
192/// ```
193#[pyfunction]
194pub fn apply_to_dataframe(py: Python, df: Py<PyAny>, func: Py<PyAny>) -> PyResult<Py<PyAny>> {
195    // Import pandas
196    let pandas = py.import("pandas")?;
197
198    // Get column names
199    let columns = df.getattr(py, "columns")?;
200    let col_list: Vec<String> = columns.extract(py)?;
201
202    // Apply function to each column
203    let results = PyDict::new(py);
204    for col_name in col_list {
205        let column = df.call_method1(py, "__getitem__", (&col_name,))?;
206        let values = column.getattr(py, "values")?;
207        let result = func.call1(py, (values,))?;
208        results.set_item(&col_name, result)?;
209    }
210
211    // Convert results dict to pandas Series
212    let series = pandas.getattr("Series")?.call1((results,))?;
213    Ok(series.into())
214}
215
216/// Apply a scirs2 function row-wise or column-wise to a DataFrame
217///
218/// # Arguments
219/// * `df` - A pandas DataFrame
220/// * `func` - A scirs2 function that takes a 1D array
221/// * `axis` - 0 for column-wise (default), 1 for row-wise
222///
223/// # Returns
224/// A pandas Series with results
225#[pyfunction]
226#[pyo3(signature = (df, func, axis=0))]
227pub fn apply_along_axis(
228    py: Python,
229    df: Py<PyAny>,
230    func: Py<PyAny>,
231    axis: usize,
232) -> PyResult<Py<PyAny>> {
233    let pandas = py.import("pandas")?;
234
235    if axis == 0 {
236        // Column-wise (same as apply_to_dataframe)
237        apply_to_dataframe(py, df, func)
238    } else {
239        // Row-wise
240        let values = df.getattr(py, "values")?;
241        let array = values.cast_bound::<PyArray2<f64>>(py)?;
242        let binding = array.readonly();
243        let arr = binding.as_array();
244
245        let results: Vec<f64> = arr
246            .rows()
247            .into_iter()
248            .map(|row| {
249                let row_array = row.to_owned().into_pyarray(py);
250                func.call1(py, (row_array,))
251                    .and_then(|r| r.extract::<f64>(py))
252            })
253            .collect::<Result<Vec<_>, _>>()?;
254
255        let series = pandas.getattr("Series")?.call1((results,))?;
256        Ok(series.into())
257    }
258}
259
260/// Rolling window operations on pandas Series with scirs2 functions
261///
262/// # Example (Python)
263/// ```python
264/// import pandas as pd
265/// import scirs2
266///
267/// s = pd.Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
268///
269/// # Calculate rolling mean with window size 3
270/// rolling_mean = scirs2.rolling_apply(s, 3, scirs2.mean_py)
271/// ```
272#[pyfunction]
273pub fn rolling_apply(
274    py: Python,
275    series: Py<PyAny>,
276    window: usize,
277    func: Py<PyAny>,
278) -> PyResult<Py<PyAny>> {
279    let pandas = py.import("pandas")?;
280
281    // Get values
282    let values = series.getattr(py, "values")?;
283    let array = values.cast_bound::<PyArray1<f64>>(py)?;
284    let binding = array.readonly();
285    let arr = binding.as_array();
286
287    if arr.len() < window {
288        return Err(SciRS2Error::ValueError(format!(
289            "Window size {} is larger than array length {}",
290            window,
291            arr.len()
292        ))
293        .into());
294    }
295
296    // Calculate rolling statistics
297    let mut results = Vec::with_capacity(arr.len() - window + 1);
298    for i in 0..=(arr.len() - window) {
299        let window_slice = arr.slice(ndarray::s![i..i + window]);
300        let window_array = window_slice.to_owned().into_pyarray(py);
301        let result: f64 = func.call1(py, (window_array,))?.extract(py)?;
302        results.push(result);
303    }
304
305    // Pad with NaN at the beginning
306    let mut padded = vec![f64::NAN; window - 1];
307    padded.extend(results);
308
309    // Create pandas Series
310    let series_result = pandas.getattr("Series")?.call1((padded,))?;
311    Ok(series_result.into())
312}
313
314/// Register pandas compatibility functions with Python module
315pub fn register_pandas_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
316    m.add_function(wrap_pyfunction!(pandas_to_timeseries, m)?)?;
317    m.add_function(wrap_pyfunction!(timeseries_to_pandas, m)?)?;
318    m.add_function(wrap_pyfunction!(dataframe_to_array, m)?)?;
319    m.add_function(wrap_pyfunction!(array_to_dataframe, m)?)?;
320    m.add_function(wrap_pyfunction!(apply_to_dataframe, m)?)?;
321    m.add_function(wrap_pyfunction!(apply_along_axis, m)?)?;
322    m.add_function(wrap_pyfunction!(rolling_apply, m)?)?;
323    Ok(())
324}