sklears_python/linear/
common.rs1use numpy::Element;
8pub use numpy::{PyArray1, PyArray2, PyReadonlyArray1, PyReadonlyArray2};
9pub use pyo3::exceptions::PyValueError;
10pub use pyo3::prelude::*;
11pub use scirs2_core::ndarray::{Array1, Array2};
12
13#[cfg(feature = "parallel")]
15pub use rayon::prelude::*;
16
17pub type LinearModelResult<T> = Result<T, PyValueError>;
19
20#[derive(Debug)]
22pub enum SklearsPythonError {
23 ValidationError(String),
25 FittingError(String),
27 PredictionError(String),
29 MemoryError(String),
31 NumericalError(String),
33 ConfigurationError(String),
35}
36
37impl std::fmt::Display for SklearsPythonError {
38 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
39 match self {
40 SklearsPythonError::ValidationError(msg) => write!(f, "Validation Error: {}", msg),
41 SklearsPythonError::FittingError(msg) => write!(f, "Model Fitting Error: {}", msg),
42 SklearsPythonError::PredictionError(msg) => write!(f, "Prediction Error: {}", msg),
43 SklearsPythonError::MemoryError(msg) => write!(f, "Memory Error: {}", msg),
44 SklearsPythonError::NumericalError(msg) => write!(f, "Numerical Error: {}", msg),
45 SklearsPythonError::ConfigurationError(msg) => {
46 write!(f, "Configuration Error: {}", msg)
47 }
48 }
49 }
50}
51
52impl std::error::Error for SklearsPythonError {}
53
54impl From<SklearsPythonError> for PyErr {
55 fn from(err: SklearsPythonError) -> Self {
56 match err {
57 SklearsPythonError::ValidationError(msg) => PyValueError::new_err(msg),
58 SklearsPythonError::FittingError(msg) => PyRuntimeError::new_err(msg),
59 SklearsPythonError::PredictionError(msg) => PyRuntimeError::new_err(msg),
60 SklearsPythonError::MemoryError(msg) => {
61 use pyo3::exceptions::PyMemoryError;
62 PyMemoryError::new_err(msg)
63 }
64 SklearsPythonError::NumericalError(msg) => PyArithmeticError::new_err(msg),
65 SklearsPythonError::ConfigurationError(msg) => PyValueError::new_err(msg),
66 }
67 }
68}
69
70use pyo3::exceptions::{PyArithmeticError, PyRuntimeError};
72
73pub fn calculate_r2_score(y_true: &Array1<f64>, y_pred: &Array1<f64>) -> f64 {
75 let y_mean = y_true.mean().unwrap_or(0.0);
76
77 let y_centered: Array1<f64> = y_true.mapv(|y| y - y_mean);
79 let residuals: Array1<f64> = y_true - y_pred;
80 let ss_tot = y_centered.dot(&y_centered);
81 let ss_res = residuals.dot(&residuals);
82
83 1.0 - (ss_res / ss_tot)
84}
85
86pub fn validate_fit_arrays(x: &Array2<f64>, y: &Array1<f64>) -> PyResult<()> {
88 if x.nrows() != y.len() {
89 return Err(PyValueError::new_err(format!(
90 "X and y have incompatible shapes: X has {} samples, y has {} samples",
91 x.nrows(),
92 y.len()
93 )));
94 }
95
96 if x.nrows() == 0 {
97 return Err(PyValueError::new_err("X and y must not be empty"));
98 }
99
100 if x.ncols() == 0 {
101 return Err(PyValueError::new_err("X must have at least one feature"));
102 }
103
104 Ok(())
105}
106
107pub fn validate_predict_array(x: &Array2<f64>) -> PyResult<()> {
109 if x.nrows() == 0 {
110 return Err(SklearsPythonError::ValidationError("X must not be empty".to_string()).into());
111 }
112
113 if x.ncols() == 0 {
114 return Err(SklearsPythonError::ValidationError(
115 "X must have at least one feature".to_string(),
116 )
117 .into());
118 }
119
120 validate_finite_values(x)?;
122
123 Ok(())
124}
125
126pub fn validate_fit_arrays_enhanced(
128 x: &Array2<f64>,
129 y: &Array1<f64>,
130) -> Result<(), SklearsPythonError> {
131 if x.nrows() != y.len() {
132 return Err(SklearsPythonError::ValidationError(format!(
133 "X and y have incompatible shapes: X has {} samples, y has {} samples",
134 x.nrows(),
135 y.len()
136 )));
137 }
138
139 if x.nrows() == 0 {
140 return Err(SklearsPythonError::ValidationError(
141 "X and y must not be empty".to_string(),
142 ));
143 }
144
145 if x.ncols() == 0 {
146 return Err(SklearsPythonError::ValidationError(
147 "X must have at least one feature".to_string(),
148 ));
149 }
150
151 validate_finite_values(x)?;
153 validate_finite_values_1d(y)?;
154
155 check_memory_usage(x, y)?;
157
158 Ok(())
159}
160
161pub fn validate_finite_values(arr: &Array2<f64>) -> Result<(), SklearsPythonError> {
163 for value in arr.iter() {
164 if !value.is_finite() {
165 return Err(SklearsPythonError::NumericalError(
166 "Input array contains non-finite values (NaN or infinite)".to_string(),
167 ));
168 }
169 }
170 Ok(())
171}
172
173pub fn validate_finite_values_1d(arr: &Array1<f64>) -> Result<(), SklearsPythonError> {
175 for value in arr.iter() {
176 if !value.is_finite() {
177 return Err(SklearsPythonError::NumericalError(
178 "Target array contains non-finite values (NaN or infinite)".to_string(),
179 ));
180 }
181 }
182 Ok(())
183}
184
185pub fn check_memory_usage(x: &Array2<f64>, y: &Array1<f64>) -> Result<(), SklearsPythonError> {
187 let x_memory_mb = (x.len() * std::mem::size_of::<f64>()) as f64 / (1024.0 * 1024.0);
188 let y_memory_mb = (y.len() * std::mem::size_of::<f64>()) as f64 / (1024.0 * 1024.0);
189 let total_memory_mb = x_memory_mb + y_memory_mb;
190
191 if total_memory_mb > 1024.0 {
193 eprintln!("Warning: Large dataset detected ({:.2} MB). Consider using batch processing or data preprocessing to reduce memory usage.", total_memory_mb);
194 }
195
196 if total_memory_mb > 4096.0 {
198 return Err(SklearsPythonError::MemoryError(format!(
199 "Dataset is too large ({:.2} MB). Consider using data preprocessing to reduce memory usage.",
200 total_memory_mb
201 )));
202 }
203
204 Ok(())
205}
206
207pub fn get_available_memory_mb() -> f64 {
209 8192.0
213}
214
215pub fn pyarray_to_core_array1<T>(py_array: PyReadonlyArray1<T>) -> PyResult<Array1<T>>
217where
218 T: Clone + Element,
219{
220 let array_view = py_array.as_array();
221 Ok(Array1::from_vec(array_view.iter().cloned().collect()))
222}
223
224pub fn pyarray_to_core_array2<T>(py_array: PyReadonlyArray2<T>) -> PyResult<Array2<T>>
226where
227 T: Clone + Element,
228{
229 let array_view = py_array.as_array();
230 let shape = array_view.shape();
231 if shape.len() != 2 {
232 return Err(PyValueError::new_err("Expected a 2D array"));
233 }
234 let rows = shape[0];
235 let cols = shape[1];
236 Array2::from_shape_vec((rows, cols), array_view.iter().cloned().collect())
237 .map_err(|_| PyValueError::new_err("Failed to convert NumPy array to ndarray"))
238}
239
240pub fn core_array1_to_py<'py, T>(py: Python<'py>, array: &Array1<T>) -> Py<PyArray1<T>>
242where
243 T: Clone + Element,
244{
245 let numpy_array = numpy::ndarray::Array1::from_vec(array.to_vec());
246 PyArray1::from_owned_array(py, numpy_array).into()
247}
248
249pub fn core_array2_to_py<'py, T>(py: Python<'py>, array: &Array2<T>) -> PyResult<Py<PyArray2<T>>>
251where
252 T: Clone + Element,
253{
254 let (rows, cols) = array.dim();
255 let data: Vec<T> = array.iter().cloned().collect();
256 let numpy_array = numpy::ndarray::Array2::from_shape_vec((rows, cols), data)
257 .map_err(|_| PyValueError::new_err("Failed to convert ndarray to NumPy array"))?;
258 Ok(PyArray2::from_owned_array(py, numpy_array).into())
259}
260
261#[derive(Debug, Clone, Default)]
263pub struct PerformanceStats {
264 pub training_time_ms: Option<f64>,
265 pub prediction_time_ms: Option<f64>,
266 pub memory_usage_mb: Option<f64>,
267 pub cache_hits: usize,
268 pub cache_misses: usize,
269}