sklears_python/linear/
common.rs

1//! Common functionality for linear model Python bindings
2//!
3//! This module contains shared imports, types, and utilities used
4//! across all linear model implementations.
5
6// Re-export commonly used types and traits - Using SciRS2-Core for improved performance
7use numpy::Element;
8pub use numpy::{PyArray1, PyArray2, PyReadonlyArray1, PyReadonlyArray2};
9pub use pyo3::exceptions::PyValueError;
10pub use pyo3::prelude::*;
11pub use scirs2_core::ndarray::{Array1, Array2};
12
13// Performance optimization imports
14#[cfg(feature = "parallel")]
15pub use rayon::prelude::*;
16
17/// Common error type for linear model operations
18pub type LinearModelResult<T> = Result<T, PyValueError>;
19
20/// Enhanced error handling for sklears-python
21#[derive(Debug)]
22pub enum SklearsPythonError {
23    /// Input validation errors
24    ValidationError(String),
25    /// Model fitting errors
26    FittingError(String),
27    /// Prediction errors
28    PredictionError(String),
29    /// Memory allocation errors
30    MemoryError(String),
31    /// Numerical computation errors
32    NumericalError(String),
33    /// Configuration errors
34    ConfigurationError(String),
35}
36
37impl std::fmt::Display for SklearsPythonError {
38    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
39        match self {
40            SklearsPythonError::ValidationError(msg) => write!(f, "Validation Error: {}", msg),
41            SklearsPythonError::FittingError(msg) => write!(f, "Model Fitting Error: {}", msg),
42            SklearsPythonError::PredictionError(msg) => write!(f, "Prediction Error: {}", msg),
43            SklearsPythonError::MemoryError(msg) => write!(f, "Memory Error: {}", msg),
44            SklearsPythonError::NumericalError(msg) => write!(f, "Numerical Error: {}", msg),
45            SklearsPythonError::ConfigurationError(msg) => {
46                write!(f, "Configuration Error: {}", msg)
47            }
48        }
49    }
50}
51
52impl std::error::Error for SklearsPythonError {}
53
54impl From<SklearsPythonError> for PyErr {
55    fn from(err: SklearsPythonError) -> Self {
56        match err {
57            SklearsPythonError::ValidationError(msg) => PyValueError::new_err(msg),
58            SklearsPythonError::FittingError(msg) => PyRuntimeError::new_err(msg),
59            SklearsPythonError::PredictionError(msg) => PyRuntimeError::new_err(msg),
60            SklearsPythonError::MemoryError(msg) => {
61                use pyo3::exceptions::PyMemoryError;
62                PyMemoryError::new_err(msg)
63            }
64            SklearsPythonError::NumericalError(msg) => PyArithmeticError::new_err(msg),
65            SklearsPythonError::ConfigurationError(msg) => PyValueError::new_err(msg),
66        }
67    }
68}
69
70// Import additional exception types
71use pyo3::exceptions::{PyArithmeticError, PyRuntimeError};
72
73/// Calculate R² score with optimized array operations
74pub fn calculate_r2_score(y_true: &Array1<f64>, y_pred: &Array1<f64>) -> f64 {
75    let y_mean = y_true.mean().unwrap_or(0.0);
76
77    // Use optimized array operations
78    let y_centered: Array1<f64> = y_true.mapv(|y| y - y_mean);
79    let residuals: Array1<f64> = y_true - y_pred;
80    let ss_tot = y_centered.dot(&y_centered);
81    let ss_res = residuals.dot(&residuals);
82
83    1.0 - (ss_res / ss_tot)
84}
85
86/// Validate input arrays for model fitting
87pub fn validate_fit_arrays(x: &Array2<f64>, y: &Array1<f64>) -> PyResult<()> {
88    if x.nrows() != y.len() {
89        return Err(PyValueError::new_err(format!(
90            "X and y have incompatible shapes: X has {} samples, y has {} samples",
91            x.nrows(),
92            y.len()
93        )));
94    }
95
96    if x.nrows() == 0 {
97        return Err(PyValueError::new_err("X and y must not be empty"));
98    }
99
100    if x.ncols() == 0 {
101        return Err(PyValueError::new_err("X must have at least one feature"));
102    }
103
104    Ok(())
105}
106
107/// Validate input arrays for prediction
108pub fn validate_predict_array(x: &Array2<f64>) -> PyResult<()> {
109    if x.nrows() == 0 {
110        return Err(SklearsPythonError::ValidationError("X must not be empty".to_string()).into());
111    }
112
113    if x.ncols() == 0 {
114        return Err(SklearsPythonError::ValidationError(
115            "X must have at least one feature".to_string(),
116        )
117        .into());
118    }
119
120    // Check for invalid values
121    validate_finite_values(x)?;
122
123    Ok(())
124}
125
126/// Enhanced validation functions with better error handling
127pub fn validate_fit_arrays_enhanced(
128    x: &Array2<f64>,
129    y: &Array1<f64>,
130) -> Result<(), SklearsPythonError> {
131    if x.nrows() != y.len() {
132        return Err(SklearsPythonError::ValidationError(format!(
133            "X and y have incompatible shapes: X has {} samples, y has {} samples",
134            x.nrows(),
135            y.len()
136        )));
137    }
138
139    if x.nrows() == 0 {
140        return Err(SklearsPythonError::ValidationError(
141            "X and y must not be empty".to_string(),
142        ));
143    }
144
145    if x.ncols() == 0 {
146        return Err(SklearsPythonError::ValidationError(
147            "X must have at least one feature".to_string(),
148        ));
149    }
150
151    // Check for infinite or NaN values
152    validate_finite_values(x)?;
153    validate_finite_values_1d(y)?;
154
155    // Memory usage validation (warn if arrays are very large)
156    check_memory_usage(x, y)?;
157
158    Ok(())
159}
160
161/// Validate that array contains only finite values
162pub fn validate_finite_values(arr: &Array2<f64>) -> Result<(), SklearsPythonError> {
163    for value in arr.iter() {
164        if !value.is_finite() {
165            return Err(SklearsPythonError::NumericalError(
166                "Input array contains non-finite values (NaN or infinite)".to_string(),
167            ));
168        }
169    }
170    Ok(())
171}
172
173/// Validate that 1D array contains only finite values
174pub fn validate_finite_values_1d(arr: &Array1<f64>) -> Result<(), SklearsPythonError> {
175    for value in arr.iter() {
176        if !value.is_finite() {
177            return Err(SklearsPythonError::NumericalError(
178                "Target array contains non-finite values (NaN or infinite)".to_string(),
179            ));
180        }
181    }
182    Ok(())
183}
184
185/// Check memory usage and warn if arrays are very large
186pub fn check_memory_usage(x: &Array2<f64>, y: &Array1<f64>) -> Result<(), SklearsPythonError> {
187    let x_memory_mb = (x.len() * std::mem::size_of::<f64>()) as f64 / (1024.0 * 1024.0);
188    let y_memory_mb = (y.len() * std::mem::size_of::<f64>()) as f64 / (1024.0 * 1024.0);
189    let total_memory_mb = x_memory_mb + y_memory_mb;
190
191    // Warn if using more than 1GB of memory
192    if total_memory_mb > 1024.0 {
193        eprintln!("Warning: Large dataset detected ({:.2} MB). Consider using batch processing or data preprocessing to reduce memory usage.", total_memory_mb);
194    }
195
196    // Error if using more than 4GB (likely will cause issues)
197    if total_memory_mb > 4096.0 {
198        return Err(SklearsPythonError::MemoryError(format!(
199            "Dataset is too large ({:.2} MB). Consider using data preprocessing to reduce memory usage.",
200            total_memory_mb
201        )));
202    }
203
204    Ok(())
205}
206
207/// Get system memory information for better memory management
208pub fn get_available_memory_mb() -> f64 {
209    // This is a simplified implementation
210    // In a real implementation, you'd use system APIs to get actual available memory
211    // For now, we assume 8GB as a reasonable default
212    8192.0
213}
214
215/// Convert a read-only NumPy array view into an owned SciRS2 ndarray Array1
216pub fn pyarray_to_core_array1<T>(py_array: PyReadonlyArray1<T>) -> PyResult<Array1<T>>
217where
218    T: Clone + Element,
219{
220    let array_view = py_array.as_array();
221    Ok(Array1::from_vec(array_view.iter().cloned().collect()))
222}
223
224/// Convert a read-only NumPy array view into an owned SciRS2 ndarray Array2
225pub fn pyarray_to_core_array2<T>(py_array: PyReadonlyArray2<T>) -> PyResult<Array2<T>>
226where
227    T: Clone + Element,
228{
229    let array_view = py_array.as_array();
230    let shape = array_view.shape();
231    if shape.len() != 2 {
232        return Err(PyValueError::new_err("Expected a 2D array"));
233    }
234    let rows = shape[0];
235    let cols = shape[1];
236    Array2::from_shape_vec((rows, cols), array_view.iter().cloned().collect())
237        .map_err(|_| PyValueError::new_err("Failed to convert NumPy array to ndarray"))
238}
239
240/// Convert an ndarray Array1 into a Python-owned NumPy array object
241pub fn core_array1_to_py<'py, T>(py: Python<'py>, array: &Array1<T>) -> Py<PyArray1<T>>
242where
243    T: Clone + Element,
244{
245    let numpy_array = numpy::ndarray::Array1::from_vec(array.to_vec());
246    PyArray1::from_owned_array(py, numpy_array).into()
247}
248
249/// Convert an ndarray Array2 into a Python-owned NumPy array object
250pub fn core_array2_to_py<'py, T>(py: Python<'py>, array: &Array2<T>) -> PyResult<Py<PyArray2<T>>>
251where
252    T: Clone + Element,
253{
254    let (rows, cols) = array.dim();
255    let data: Vec<T> = array.iter().cloned().collect();
256    let numpy_array = numpy::ndarray::Array2::from_shape_vec((rows, cols), data)
257        .map_err(|_| PyValueError::new_err("Failed to convert ndarray to NumPy array"))?;
258    Ok(PyArray2::from_owned_array(py, numpy_array).into())
259}
260
261/// Performance monitoring structure
262#[derive(Debug, Clone, Default)]
263pub struct PerformanceStats {
264    pub training_time_ms: Option<f64>,
265    pub prediction_time_ms: Option<f64>,
266    pub memory_usage_mb: Option<f64>,
267    pub cache_hits: usize,
268    pub cache_misses: usize,
269}