use numpy::Element;
pub use numpy::{PyArray1, PyArray2, PyReadonlyArray1, PyReadonlyArray2};
pub use pyo3::exceptions::PyValueError;
pub use pyo3::prelude::*;
pub use scirs2_core::ndarray::{Array1, Array2};
#[cfg(feature = "parallel")]
pub use rayon::prelude::*;
pub type LinearModelResult<T> = Result<T, PyValueError>;
#[derive(Debug)]
pub enum SklearsPythonError {
ValidationError(String),
FittingError(String),
PredictionError(String),
MemoryError(String),
NumericalError(String),
ConfigurationError(String),
}
impl std::fmt::Display for SklearsPythonError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
SklearsPythonError::ValidationError(msg) => write!(f, "Validation Error: {}", msg),
SklearsPythonError::FittingError(msg) => write!(f, "Model Fitting Error: {}", msg),
SklearsPythonError::PredictionError(msg) => write!(f, "Prediction Error: {}", msg),
SklearsPythonError::MemoryError(msg) => write!(f, "Memory Error: {}", msg),
SklearsPythonError::NumericalError(msg) => write!(f, "Numerical Error: {}", msg),
SklearsPythonError::ConfigurationError(msg) => {
write!(f, "Configuration Error: {}", msg)
}
}
}
}
impl std::error::Error for SklearsPythonError {}
impl From<SklearsPythonError> for PyErr {
fn from(err: SklearsPythonError) -> Self {
match err {
SklearsPythonError::ValidationError(msg) => PyValueError::new_err(msg),
SklearsPythonError::FittingError(msg) => PyRuntimeError::new_err(msg),
SklearsPythonError::PredictionError(msg) => PyRuntimeError::new_err(msg),
SklearsPythonError::MemoryError(msg) => {
use pyo3::exceptions::PyMemoryError;
PyMemoryError::new_err(msg)
}
SklearsPythonError::NumericalError(msg) => PyArithmeticError::new_err(msg),
SklearsPythonError::ConfigurationError(msg) => PyValueError::new_err(msg),
}
}
}
use pyo3::exceptions::{PyArithmeticError, PyRuntimeError};
pub fn calculate_r2_score(y_true: &Array1<f64>, y_pred: &Array1<f64>) -> f64 {
let y_mean = y_true.mean().unwrap_or(0.0);
let y_centered: Array1<f64> = y_true.mapv(|y| y - y_mean);
let residuals: Array1<f64> = y_true - y_pred;
let ss_tot = y_centered.dot(&y_centered);
let ss_res = residuals.dot(&residuals);
1.0 - (ss_res / ss_tot)
}
pub fn validate_fit_arrays(x: &Array2<f64>, y: &Array1<f64>) -> PyResult<()> {
if x.nrows() != y.len() {
return Err(PyValueError::new_err(format!(
"X and y have incompatible shapes: X has {} samples, y has {} samples",
x.nrows(),
y.len()
)));
}
if x.nrows() == 0 {
return Err(PyValueError::new_err("X and y must not be empty"));
}
if x.ncols() == 0 {
return Err(PyValueError::new_err("X must have at least one feature"));
}
Ok(())
}
pub fn validate_predict_array(x: &Array2<f64>) -> PyResult<()> {
if x.nrows() == 0 {
return Err(SklearsPythonError::ValidationError("X must not be empty".to_string()).into());
}
if x.ncols() == 0 {
return Err(SklearsPythonError::ValidationError(
"X must have at least one feature".to_string(),
)
.into());
}
validate_finite_values(x)?;
Ok(())
}
pub fn validate_fit_arrays_enhanced(
x: &Array2<f64>,
y: &Array1<f64>,
) -> Result<(), SklearsPythonError> {
if x.nrows() != y.len() {
return Err(SklearsPythonError::ValidationError(format!(
"X and y have incompatible shapes: X has {} samples, y has {} samples",
x.nrows(),
y.len()
)));
}
if x.nrows() == 0 {
return Err(SklearsPythonError::ValidationError(
"X and y must not be empty".to_string(),
));
}
if x.ncols() == 0 {
return Err(SklearsPythonError::ValidationError(
"X must have at least one feature".to_string(),
));
}
validate_finite_values(x)?;
validate_finite_values_1d(y)?;
check_memory_usage(x, y)?;
Ok(())
}
pub fn validate_finite_values(arr: &Array2<f64>) -> Result<(), SklearsPythonError> {
for value in arr.iter() {
if !value.is_finite() {
return Err(SklearsPythonError::NumericalError(
"Input array contains non-finite values (NaN or infinite)".to_string(),
));
}
}
Ok(())
}
pub fn validate_finite_values_1d(arr: &Array1<f64>) -> Result<(), SklearsPythonError> {
for value in arr.iter() {
if !value.is_finite() {
return Err(SklearsPythonError::NumericalError(
"Target array contains non-finite values (NaN or infinite)".to_string(),
));
}
}
Ok(())
}
pub fn check_memory_usage(x: &Array2<f64>, y: &Array1<f64>) -> Result<(), SklearsPythonError> {
let x_memory_mb = (x.len() * std::mem::size_of::<f64>()) as f64 / (1024.0 * 1024.0);
let y_memory_mb = (y.len() * std::mem::size_of::<f64>()) as f64 / (1024.0 * 1024.0);
let total_memory_mb = x_memory_mb + y_memory_mb;
if total_memory_mb > 1024.0 {
eprintln!("Warning: Large dataset detected ({:.2} MB). Consider using batch processing or data preprocessing to reduce memory usage.", total_memory_mb);
}
if total_memory_mb > 4096.0 {
return Err(SklearsPythonError::MemoryError(format!(
"Dataset is too large ({:.2} MB). Consider using data preprocessing to reduce memory usage.",
total_memory_mb
)));
}
Ok(())
}
pub fn get_available_memory_mb() -> f64 {
8192.0
}
pub fn pyarray_to_core_array1<T>(py_array: PyReadonlyArray1<T>) -> PyResult<Array1<T>>
where
T: Clone + Element,
{
let array_view = py_array.as_array();
Ok(Array1::from_vec(array_view.iter().cloned().collect()))
}
pub fn pyarray_to_core_array2<T>(py_array: PyReadonlyArray2<T>) -> PyResult<Array2<T>>
where
T: Clone + Element,
{
let array_view = py_array.as_array();
let shape = array_view.shape();
if shape.len() != 2 {
return Err(PyValueError::new_err("Expected a 2D array"));
}
let rows = shape[0];
let cols = shape[1];
Array2::from_shape_vec((rows, cols), array_view.iter().cloned().collect())
.map_err(|_| PyValueError::new_err("Failed to convert NumPy array to ndarray"))
}
pub fn core_array1_to_py<'py, T>(py: Python<'py>, array: &Array1<T>) -> Py<PyArray1<T>>
where
T: Clone + Element,
{
let numpy_array = numpy::ndarray::Array1::from_vec(array.to_vec());
PyArray1::from_owned_array(py, numpy_array).into()
}
pub fn core_array2_to_py<'py, T>(py: Python<'py>, array: &Array2<T>) -> PyResult<Py<PyArray2<T>>>
where
T: Clone + Element,
{
let (rows, cols) = array.dim();
let data: Vec<T> = array.iter().cloned().collect();
let numpy_array = numpy::ndarray::Array2::from_shape_vec((rows, cols), data)
.map_err(|_| PyValueError::new_err("Failed to convert ndarray to NumPy array"))?;
Ok(PyArray2::from_owned_array(py, numpy_array).into())
}
#[derive(Debug, Clone, Default)]
pub struct PerformanceStats {
pub training_time_ms: Option<f64>,
pub prediction_time_ms: Option<f64>,
pub memory_usage_mb: Option<f64>,
pub cache_hits: usize,
pub cache_misses: usize,
}