use std::fmt::{Display, Formatter};
use std::marker::PhantomData;
use std::mem;
use std::time::Instant;
use super::supervised_train::SupervisedTrain;
use crate::model::{ComparisonEntry, supervised::Algorithm};
use crate::settings::{RegressionSettings, SVRParameters, SettingsError, XGRegressorParameters};
use crate::utils::distance::{Distance, KNNRegressorDistance};
use crate::utils::kernels::SmartcoreKernel;
use smartcore::api::SupervisedEstimator;
use smartcore::error::{Failed, FailedError};
use smartcore::linalg::basic::arrays::{Array1, Array2, MutArrayView1, MutArrayView2};
use smartcore::linalg::traits::cholesky::CholeskyDecomposable;
use smartcore::linalg::traits::evd::EVDDecomposable;
use smartcore::linalg::traits::qr::QRDecomposable;
use smartcore::linalg::traits::svd::SVDDecomposable;
use smartcore::model_selection::{BaseKFold, CrossValidationResult};
use smartcore::numbers::floatnum::FloatNumber;
use smartcore::numbers::realnum::RealNumber;
use smartcore::svm::svr::{SVR as SmartcoreSVR, SVRParameters as SmartcoreSVRParameters};
use smartcore::xgboost::xgb_regressor::{
XGRegressor as SmartcoreXGRegressor, XGRegressorParameters as SmartcoreXGRegressorParameters,
};
#[derive(Clone)]
struct PreparedSVRParameters<INPUT>
where
INPUT: RealNumber + FloatNumber,
{
eps: INPUT,
c: INPUT,
tol: INPUT,
kernel_template: smartcore::svm::Kernels,
}
impl<INPUT> PreparedSVRParameters<INPUT>
where
INPUT: RealNumber + FloatNumber,
{
fn new(settings: &SVRParameters) -> Result<Self, Failed> {
let SmartcoreKernel { kernel, .. } = settings.kernel.to_smartcore()?;
let eps =
convert_nonnegative_scalar::<INPUT>(settings.eps, "support vector regressor epsilon")?;
let c = convert_positive_scalar::<INPUT>(settings.c, "support vector regressor C")?;
let tol =
convert_positive_scalar::<INPUT>(settings.tol, "support vector regressor tolerance")?;
Ok(Self {
eps,
c,
tol,
kernel_template: kernel,
})
}
fn to_parameters(&self) -> SmartcoreSVRParameters<INPUT> {
SmartcoreSVRParameters {
eps: self.eps,
c: self.c,
tol: self.tol,
kernel: Some(self.kernel_template.clone()),
}
}
}
pub struct OwnedSupportVectorRegressor<INPUT, OUTPUT, InputArray, OutputArray>
where
INPUT: RealNumber + FloatNumber + 'static,
OUTPUT: FloatNumber + 'static,
InputArray: MutArrayView2<INPUT>
+ Sized
+ Clone
+ Array2<INPUT>
+ QRDecomposable<INPUT>
+ SVDDecomposable<INPUT>
+ EVDDecomposable<INPUT>
+ CholeskyDecomposable<INPUT>
+ 'static,
OutputArray: MutArrayView1<OUTPUT> + Sized + Clone + Array1<OUTPUT> + 'static,
{
_parameters: Box<SmartcoreSVRParameters<INPUT>>,
model: SmartcoreSVR<'static, INPUT, InputArray, Vec<INPUT>>,
_marker: PhantomData<(OUTPUT, OutputArray)>,
}
impl<INPUT, OUTPUT, InputArray, OutputArray>
OwnedSupportVectorRegressor<INPUT, OUTPUT, InputArray, OutputArray>
where
INPUT: RealNumber + FloatNumber + 'static,
OUTPUT: FloatNumber + 'static,
InputArray: MutArrayView2<INPUT>
+ Sized
+ Clone
+ Array2<INPUT>
+ QRDecomposable<INPUT>
+ SVDDecomposable<INPUT>
+ EVDDecomposable<INPUT>
+ CholeskyDecomposable<INPUT>
+ 'static,
OutputArray: MutArrayView1<OUTPUT> + Sized + Clone + Array1<OUTPUT> + 'static,
{
fn fit_with_parameters(
x: &InputArray,
targets: &Vec<INPUT>,
params: SmartcoreSVRParameters<INPUT>,
) -> Result<Self, Failed> {
let boxed_params = Box::new(params);
let params_ref: &SmartcoreSVRParameters<INPUT> = boxed_params.as_ref();
let model = SmartcoreSVR::fit(x, targets, params_ref)?;
let model = unsafe {
mem::transmute::<
SmartcoreSVR<'_, INPUT, InputArray, Vec<INPUT>>,
SmartcoreSVR<'static, INPUT, InputArray, Vec<INPUT>>,
>(model)
};
Ok(Self {
_parameters: boxed_params,
model,
_marker: PhantomData,
})
}
fn predict_array(&self, x: &InputArray) -> Result<OutputArray, Failed> {
let predictions = self.model.predict(x)?;
convert_input_predictions_to_output_array::<INPUT, OUTPUT, OutputArray>(predictions)
}
}
fn convert_nonnegative_scalar<INPUT>(value: f32, name: &str) -> Result<INPUT, Failed>
where
INPUT: RealNumber + FloatNumber,
{
let as_f64 = f64::from(value);
if !as_f64.is_finite() {
return Err(Failed::because(
FailedError::ParametersError,
&format!("{name} must be finite"),
));
}
if as_f64 < 0.0 {
return Err(Failed::because(
FailedError::ParametersError,
&format!("{name} must be non-negative"),
));
}
INPUT::from_f64(as_f64).ok_or_else(|| {
Failed::because(
FailedError::ParametersError,
&format!("{name} value {as_f64} cannot be represented by the input type"),
)
})
}
fn convert_positive_scalar<INPUT>(value: f32, name: &str) -> Result<INPUT, Failed>
where
INPUT: RealNumber + FloatNumber,
{
let as_f64 = f64::from(value);
if !as_f64.is_finite() {
return Err(Failed::because(
FailedError::ParametersError,
&format!("{name} must be finite"),
));
}
if as_f64 <= 0.0 {
return Err(Failed::because(
FailedError::ParametersError,
&format!("{name} must be positive"),
));
}
INPUT::from_f64(as_f64).ok_or_else(|| {
Failed::because(
FailedError::ParametersError,
&format!("{name} value {as_f64} cannot be represented by the input type"),
)
})
}
fn convert_targets_to_input<INPUT, OUTPUT, OutputArray>(
targets: &OutputArray,
) -> Result<Vec<INPUT>, Failed>
where
INPUT: RealNumber + FloatNumber,
OUTPUT: FloatNumber,
OutputArray: Array1<OUTPUT>,
{
let mut converted = Vec::with_capacity(targets.shape());
for value in targets.iterator(0) {
converted.push(convert_output_value_to_input::<INPUT, OUTPUT>(*value)?);
}
Ok(converted)
}
fn convert_output_value_to_input<INPUT, OUTPUT>(value: OUTPUT) -> Result<INPUT, Failed>
where
INPUT: RealNumber + FloatNumber,
OUTPUT: FloatNumber,
{
let as_f64 = value.to_f64().ok_or_else(|| {
Failed::because(
FailedError::ParametersError,
"target value not representable as f64",
)
})?;
if !as_f64.is_finite() {
return Err(Failed::because(
FailedError::ParametersError,
"target value must be finite",
));
}
INPUT::from_f64(as_f64).ok_or_else(|| {
Failed::because(
FailedError::ParametersError,
&format!(
"support vector regressor target {as_f64} cannot be represented by the input type"
),
)
})
}
fn convert_input_predictions_to_output_array<INPUT, OUTPUT, OutputArray>(
predictions: Vec<INPUT>,
) -> Result<OutputArray, Failed>
where
INPUT: RealNumber + FloatNumber,
OUTPUT: FloatNumber,
OutputArray: Array1<OUTPUT>,
{
let converted = convert_input_predictions_to_output_vec::<INPUT, OUTPUT>(predictions)?;
Ok(<OutputArray as Array1<OUTPUT>>::from_vec_slice(&converted))
}
fn convert_input_predictions_to_output_vec<INPUT, OUTPUT>(
predictions: Vec<INPUT>,
) -> Result<Vec<OUTPUT>, Failed>
where
INPUT: RealNumber + FloatNumber,
OUTPUT: FloatNumber,
{
let mut converted = Vec::with_capacity(predictions.len());
for value in predictions {
converted.push(convert_input_value_to_output::<INPUT, OUTPUT>(value)?);
}
Ok(converted)
}
fn convert_input_value_to_output<INPUT, OUTPUT>(value: INPUT) -> Result<OUTPUT, Failed>
where
INPUT: RealNumber + FloatNumber,
OUTPUT: FloatNumber,
{
let as_f64 = value
.to_f64()
.ok_or_else(|| Failed::predict("prediction value not representable as f64"))?;
if !as_f64.is_finite() {
return Err(Failed::predict(
"support vector regressor produced a non-finite prediction",
));
}
OUTPUT::from_f64(as_f64).ok_or_else(|| {
Failed::predict(&format!(
"support vector regressor prediction {as_f64} cannot be represented in the output type"
))
})
}
fn sanitize_xgboost_parameters(
params: &XGRegressorParameters,
) -> Result<SmartcoreXGRegressorParameters, Failed> {
let sanitized: SmartcoreXGRegressorParameters = params.clone();
if sanitized.n_estimators == 0 {
return Err(Failed::because(
FailedError::ParametersError,
"xgboost number of estimators must be positive",
));
}
if sanitized.max_depth == 0 {
return Err(Failed::because(
FailedError::ParametersError,
"xgboost maximum depth must be positive",
));
}
if !sanitized.learning_rate.is_finite() {
return Err(Failed::because(
FailedError::ParametersError,
"xgboost learning rate must be finite",
));
}
if sanitized.learning_rate <= 0.0 {
return Err(Failed::because(
FailedError::ParametersError,
"xgboost learning rate must be greater than zero",
));
}
if sanitized.min_child_weight == 0 {
return Err(Failed::because(
FailedError::ParametersError,
"xgboost minimum child weight must be positive",
));
}
if !sanitized.lambda.is_finite() {
return Err(Failed::because(
FailedError::ParametersError,
"xgboost lambda must be finite",
));
}
if sanitized.lambda < 0.0 {
return Err(Failed::because(
FailedError::ParametersError,
"xgboost lambda must be non-negative",
));
}
if !sanitized.gamma.is_finite() {
return Err(Failed::because(
FailedError::ParametersError,
"xgboost gamma must be finite",
));
}
if sanitized.gamma < 0.0 {
return Err(Failed::because(
FailedError::ParametersError,
"xgboost gamma must be non-negative",
));
}
if !sanitized.base_score.is_finite() {
return Err(Failed::because(
FailedError::ParametersError,
"xgboost base score must be finite",
));
}
if !sanitized.subsample.is_finite() {
return Err(Failed::because(
FailedError::ParametersError,
"xgboost subsample ratio must be finite",
));
}
if !(0.0 < sanitized.subsample && sanitized.subsample <= 1.0) {
return Err(Failed::because(
FailedError::ParametersError,
"xgboost subsample ratio must be in (0, 1]",
));
}
Ok(sanitized)
}
pub enum RegressionAlgorithm<INPUT, OUTPUT, InputArray, OutputArray>
where
INPUT: RealNumber + FloatNumber + 'static,
OUTPUT: FloatNumber + 'static,
InputArray: MutArrayView2<INPUT>
+ Sized
+ Clone
+ Array2<INPUT>
+ QRDecomposable<INPUT>
+ SVDDecomposable<INPUT>
+ EVDDecomposable<INPUT>
+ CholeskyDecomposable<INPUT>
+ 'static,
OutputArray: MutArrayView1<OUTPUT> + Sized + Clone + Array1<OUTPUT> + 'static,
{
DecisionTreeRegressor(
smartcore::tree::decision_tree_regressor::DecisionTreeRegressor<
INPUT,
OUTPUT,
InputArray,
OutputArray,
>,
),
RandomForestRegressor(
smartcore::ensemble::random_forest_regressor::RandomForestRegressor<
INPUT,
OUTPUT,
InputArray,
OutputArray,
>,
),
ExtraTreesRegressor(
smartcore::ensemble::extra_trees_regressor::ExtraTreesRegressor<
INPUT,
OUTPUT,
InputArray,
OutputArray,
>,
),
Linear(
smartcore::linear::linear_regression::LinearRegression<
INPUT,
OUTPUT,
InputArray,
OutputArray,
>,
),
Ridge(
smartcore::linear::ridge_regression::RidgeRegression<
INPUT,
OUTPUT,
InputArray,
OutputArray,
>,
),
Lasso(smartcore::linear::lasso::Lasso<INPUT, OUTPUT, InputArray, OutputArray>),
ElasticNet(smartcore::linear::elastic_net::ElasticNet<INPUT, OUTPUT, InputArray, OutputArray>),
KNNRegressor(
smartcore::neighbors::knn_regressor::KNNRegressor<
INPUT,
OUTPUT,
InputArray,
OutputArray,
KNNRegressorDistance<INPUT>,
>,
),
SupportVectorRegressor(
Option<OwnedSupportVectorRegressor<INPUT, OUTPUT, InputArray, OutputArray>>,
),
XGBoostRegressor(Option<SmartcoreXGRegressor<INPUT, OUTPUT, InputArray, OutputArray>>),
}
impl<INPUT, OUTPUT, InputArray, OutputArray>
SupervisedTrain<
INPUT,
OUTPUT,
InputArray,
OutputArray,
RegressionSettings<INPUT, OUTPUT, InputArray, OutputArray>,
> for RegressionAlgorithm<INPUT, OUTPUT, InputArray, OutputArray>
where
INPUT: RealNumber + FloatNumber + 'static,
OUTPUT: FloatNumber + 'static,
InputArray: MutArrayView2<INPUT>
+ Sized
+ Clone
+ Array2<INPUT>
+ QRDecomposable<INPUT>
+ SVDDecomposable<INPUT>
+ EVDDecomposable<INPUT>
+ CholeskyDecomposable<INPUT>
+ 'static,
OutputArray: MutArrayView1<OUTPUT> + Sized + Clone + Array1<OUTPUT> + 'static,
{
#[allow(clippy::too_many_lines)]
fn fit_inner(
self,
x: &InputArray,
y: &OutputArray,
settings: &RegressionSettings<INPUT, OUTPUT, InputArray, OutputArray>,
) -> Result<Self, Failed> {
Ok(match self {
Self::Linear(_) => {
Self::Linear(smartcore::linear::linear_regression::LinearRegression::fit(
x,
y,
settings.linear_settings.clone().ok_or_else(|| {
Failed::because(
FailedError::ParametersError,
"linear regression settings not provided",
)
})?,
)?)
}
Self::Lasso(_) => Self::Lasso(smartcore::linear::lasso::Lasso::fit(
x,
y,
settings.lasso_settings.clone().ok_or_else(|| {
Failed::because(
FailedError::ParametersError,
"lasso regression settings not provided",
)
})?,
)?),
Self::Ridge(_) => {
Self::Ridge(smartcore::linear::ridge_regression::RidgeRegression::fit(
x,
y,
settings.ridge_settings.clone().ok_or_else(|| {
Failed::because(
FailedError::ParametersError,
"ridge regression settings not provided",
)
})?,
)?)
}
Self::ElasticNet(_) => {
Self::ElasticNet(smartcore::linear::elastic_net::ElasticNet::fit(
x,
y,
settings.elastic_net_settings.clone().ok_or_else(|| {
Failed::because(
FailedError::ParametersError,
"elastic net regression settings not provided",
)
})?,
)?)
}
Self::RandomForestRegressor(_) => Self::RandomForestRegressor(
smartcore::ensemble::random_forest_regressor::RandomForestRegressor::fit(
x,
y,
settings
.random_forest_regressor_settings
.clone()
.ok_or_else(|| {
Failed::because(
FailedError::ParametersError,
"random forest regressor settings not provided",
)
})?,
)?,
),
Self::ExtraTreesRegressor(_) => Self::ExtraTreesRegressor(
smartcore::ensemble::extra_trees_regressor::ExtraTreesRegressor::fit(
x,
y,
settings.extra_trees_settings.clone().ok_or_else(|| {
Failed::because(
FailedError::ParametersError,
"extra trees regressor settings not provided",
)
})?,
)?,
),
Self::DecisionTreeRegressor(_) => Self::DecisionTreeRegressor(
smartcore::tree::decision_tree_regressor::DecisionTreeRegressor::fit(
x,
y,
settings
.decision_tree_regressor_settings
.clone()
.ok_or_else(|| {
Failed::because(
FailedError::ParametersError,
"decision tree regressor settings not provided",
)
})?,
)?,
),
Self::KNNRegressor(_) => {
let knn_settings = settings.knn_regressor_settings.as_ref().ok_or_else(|| {
Failed::because(
FailedError::ParametersError,
"KNN regressor settings not provided",
)
})?;
let params = knn_settings
.to_regressor_params::<INPUT>()
.map_err(|e| Failed::because(FailedError::ParametersError, &e.to_string()))?;
Self::KNNRegressor(smartcore::neighbors::knn_regressor::KNNRegressor::fit(
x, y, params,
)?)
}
Self::SupportVectorRegressor(_) => {
let svr_settings = settings.svr_settings.as_ref().ok_or_else(|| {
Failed::because(
FailedError::ParametersError,
"support vector regressor settings not provided",
)
})?;
let prepared = PreparedSVRParameters::<INPUT>::new(svr_settings)?;
let params = prepared.to_parameters();
let targets = convert_targets_to_input::<INPUT, OUTPUT, OutputArray>(y)?;
let model = OwnedSupportVectorRegressor::fit_with_parameters(x, &targets, params)?;
Self::SupportVectorRegressor(Some(model))
}
Self::XGBoostRegressor(_) => {
let params = settings.xgboost_settings.as_ref().ok_or_else(|| {
Failed::because(
FailedError::ParametersError,
"xgboost regressor settings not provided",
)
})?;
let sanitized = sanitize_xgboost_parameters(params)?;
let model = SmartcoreXGRegressor::fit(x, y, sanitized)?;
Self::XGBoostRegressor(Some(model))
}
})
}
#[allow(clippy::too_many_lines)]
#[allow(clippy::type_complexity)]
fn cv(
self,
x: &InputArray,
y: &OutputArray,
settings: &RegressionSettings<INPUT, OUTPUT, InputArray, OutputArray>,
) -> Result<(CrossValidationResult, Self), Failed> {
let metric = Self::metric(settings)
.map_err(|e| Failed::because(FailedError::ParametersError, &e.to_string()))?;
match self {
RegressionAlgorithm::Linear(_) => Self::cross_validate_with(
self,
smartcore::linear::linear_regression::LinearRegression::new(),
settings.linear_settings.clone().ok_or_else(|| {
Failed::because(
FailedError::ParametersError,
"linear regression settings not provided",
)
})?,
x,
y,
settings,
&settings.get_kfolds(),
metric,
),
RegressionAlgorithm::Ridge(_) => Self::cross_validate_with(
self,
smartcore::linear::ridge_regression::RidgeRegression::new(),
settings.ridge_settings.clone().ok_or_else(|| {
Failed::because(
FailedError::ParametersError,
"ridge regression settings not provided",
)
})?,
x,
y,
settings,
&settings.get_kfolds(),
metric,
),
RegressionAlgorithm::Lasso(_) => Self::cross_validate_with(
self,
smartcore::linear::lasso::Lasso::new(),
settings.lasso_settings.clone().ok_or_else(|| {
Failed::because(
FailedError::ParametersError,
"lasso regression settings not provided",
)
})?,
x,
y,
settings,
&settings.get_kfolds(),
metric,
),
RegressionAlgorithm::ElasticNet(_) => Self::cross_validate_with(
self,
smartcore::linear::elastic_net::ElasticNet::new(),
settings.elastic_net_settings.clone().ok_or_else(|| {
Failed::because(
FailedError::ParametersError,
"elastic net regression settings not provided",
)
})?,
x,
y,
settings,
&settings.get_kfolds(),
metric,
),
RegressionAlgorithm::RandomForestRegressor(_) => Self::cross_validate_with(
self,
smartcore::ensemble::random_forest_regressor::RandomForestRegressor::new(),
settings
.random_forest_regressor_settings
.clone()
.ok_or_else(|| {
Failed::because(
FailedError::ParametersError,
"random forest regressor settings not provided",
)
})?,
x,
y,
settings,
&settings.get_kfolds(),
metric,
),
RegressionAlgorithm::ExtraTreesRegressor(_) => Self::cross_validate_with(
self,
smartcore::ensemble::extra_trees_regressor::ExtraTreesRegressor::new(),
settings.extra_trees_settings.clone().ok_or_else(|| {
Failed::because(
FailedError::ParametersError,
"extra trees regressor settings not provided",
)
})?,
x,
y,
settings,
&settings.get_kfolds(),
metric,
),
RegressionAlgorithm::DecisionTreeRegressor(_) => Self::cross_validate_with(
self,
smartcore::tree::decision_tree_regressor::DecisionTreeRegressor::new(),
settings
.decision_tree_regressor_settings
.clone()
.ok_or_else(|| {
Failed::because(
FailedError::ParametersError,
"decision tree regressor settings not provided",
)
})?,
x,
y,
settings,
&settings.get_kfolds(),
metric,
),
RegressionAlgorithm::KNNRegressor(_) => {
let knn_settings = settings.knn_regressor_settings.as_ref().ok_or_else(|| {
Failed::because(
FailedError::ParametersError,
"KNN regressor settings not provided",
)
})?;
let params = knn_settings
.to_regressor_params::<INPUT>()
.map_err(|e| Failed::because(FailedError::ParametersError, &e.to_string()))?;
Self::cross_validate_with(
self,
smartcore::neighbors::knn_regressor::KNNRegressor::new(),
params,
x,
y,
settings,
&settings.get_kfolds(),
metric,
)
}
RegressionAlgorithm::SupportVectorRegressor(_) => {
let svr_settings = settings.svr_settings.as_ref().ok_or_else(|| {
Failed::because(
FailedError::ParametersError,
"support vector regressor settings not provided",
)
})?;
let prepared = PreparedSVRParameters::<INPUT>::new(svr_settings)?;
let kfold = settings.get_kfolds();
let mut test_scores: Vec<f64> = Vec::with_capacity(kfold.n_splits);
let mut train_scores: Vec<f64> = Vec::with_capacity(kfold.n_splits);
for (train_idx, test_idx) in kfold.split(x) {
let train_x = x.take(&train_idx, 0);
let train_y = y.take(&train_idx);
let test_x = x.take(&test_idx, 0);
let test_y = y.take(&test_idx);
let train_targets =
convert_targets_to_input::<INPUT, OUTPUT, OutputArray>(&train_y)?;
let params = prepared.to_parameters();
let fold_model = OwnedSupportVectorRegressor::fit_with_parameters(
&train_x,
&train_targets,
params,
)?;
let train_pred = fold_model.predict_array(&train_x)?;
let test_pred = fold_model.predict_array(&test_x)?;
train_scores.push(metric(&train_y, &train_pred));
test_scores.push(metric(&test_y, &test_pred));
}
let result = CrossValidationResult {
test_score: test_scores,
train_score: train_scores,
};
let final_params = prepared.to_parameters();
let final_targets = convert_targets_to_input::<INPUT, OUTPUT, OutputArray>(y)?;
let final_model = OwnedSupportVectorRegressor::fit_with_parameters(
x,
&final_targets,
final_params,
)?;
Ok((result, Self::SupportVectorRegressor(Some(final_model))))
}
RegressionAlgorithm::XGBoostRegressor(_) => {
let params = settings.xgboost_settings.as_ref().ok_or_else(|| {
Failed::because(
FailedError::ParametersError,
"xgboost regressor settings not provided",
)
})?;
let sanitized = sanitize_xgboost_parameters(params)?;
let kfold = settings.get_kfolds();
let mut test_scores: Vec<f64> = Vec::with_capacity(kfold.n_splits);
let mut train_scores: Vec<f64> = Vec::with_capacity(kfold.n_splits);
for (train_idx, test_idx) in kfold.split(x) {
let train_x = x.take(&train_idx, 0);
let train_y = y.take(&train_idx);
let test_x = x.take(&test_idx, 0);
let test_y = y.take(&test_idx);
let fold_model =
SmartcoreXGRegressor::fit(&train_x, &train_y, sanitized.clone())?;
let train_pred =
convert_input_predictions_to_output_array::<INPUT, OUTPUT, OutputArray>(
fold_model.predict(&train_x)?,
)?;
let test_pred =
convert_input_predictions_to_output_array::<INPUT, OUTPUT, OutputArray>(
fold_model.predict(&test_x)?,
)?;
train_scores.push(metric(&train_y, &train_pred));
test_scores.push(metric(&test_y, &test_pred));
}
let result = CrossValidationResult {
test_score: test_scores,
train_score: train_scores,
};
let final_model = SmartcoreXGRegressor::fit(x, y, sanitized)?;
Ok((result, Self::XGBoostRegressor(Some(final_model))))
}
}
}
fn metric(
settings: &RegressionSettings<INPUT, OUTPUT, InputArray, OutputArray>,
) -> Result<fn(&OutputArray, &OutputArray) -> f64, SettingsError> {
settings.get_metric()
}
}
impl<INPUT, OUTPUT, InputArray, OutputArray>
RegressionAlgorithm<INPUT, OUTPUT, InputArray, OutputArray>
where
INPUT: RealNumber + FloatNumber + 'static,
OUTPUT: FloatNumber + 'static,
InputArray: MutArrayView2<INPUT>
+ Sized
+ Clone
+ Array2<INPUT>
+ QRDecomposable<INPUT>
+ SVDDecomposable<INPUT>
+ EVDDecomposable<INPUT>
+ CholeskyDecomposable<INPUT>
+ 'static,
OutputArray: MutArrayView1<OUTPUT> + Sized + Clone + Array1<OUTPUT> + 'static,
{
#[must_use]
pub fn default_linear() -> Self {
Self::Linear(smartcore::linear::linear_regression::LinearRegression::new())
}
#[must_use]
pub fn default_ridge() -> Self {
Self::Ridge(smartcore::linear::ridge_regression::RidgeRegression::new())
}
#[must_use]
pub fn default_lasso() -> Self {
Self::Lasso(smartcore::linear::lasso::Lasso::new())
}
#[must_use]
pub fn default_elastic_net() -> Self {
Self::ElasticNet(smartcore::linear::elastic_net::ElasticNet::new())
}
#[must_use]
pub fn default_random_forest() -> Self {
Self::RandomForestRegressor(
smartcore::ensemble::random_forest_regressor::RandomForestRegressor::new(),
)
}
#[must_use]
pub fn default_extra_trees_regressor() -> Self {
Self::ExtraTreesRegressor(
smartcore::ensemble::extra_trees_regressor::ExtraTreesRegressor::new(),
)
}
#[must_use]
pub fn default_decision_tree() -> Self {
Self::DecisionTreeRegressor(
smartcore::tree::decision_tree_regressor::DecisionTreeRegressor::new(),
)
}
#[must_use]
pub fn default_knn_regressor() -> Self {
Self::KNNRegressor(smartcore::neighbors::knn_regressor::KNNRegressor::new())
}
#[must_use]
pub fn default_support_vector_regressor() -> Self {
Self::SupportVectorRegressor(None)
}
#[must_use]
pub fn default_xgboost_regressor() -> Self {
Self::XGBoostRegressor(None)
}
pub fn all_algorithms(
settings: &RegressionSettings<INPUT, OUTPUT, InputArray, OutputArray>,
) -> Vec<Self> {
<Self as Algorithm<RegressionSettings<INPUT, OUTPUT, InputArray, OutputArray>>>::all_algorithms(
settings,
)
}
#[allow(clippy::missing_errors_doc)]
pub fn fit(
self,
x: &InputArray,
y: &OutputArray,
settings: &RegressionSettings<INPUT, OUTPUT, InputArray, OutputArray>,
) -> Result<Self, Failed> {
<Self as SupervisedTrain<
INPUT,
OUTPUT,
InputArray,
OutputArray,
RegressionSettings<INPUT, OUTPUT, InputArray, OutputArray>,
>>::fit(self, x, y, settings)
}
#[allow(clippy::missing_errors_doc)]
pub fn cv(
self,
x: &InputArray,
y: &OutputArray,
settings: &RegressionSettings<INPUT, OUTPUT, InputArray, OutputArray>,
) -> Result<(CrossValidationResult, Self), Failed> {
<Self as SupervisedTrain<
INPUT,
OUTPUT,
InputArray,
OutputArray,
RegressionSettings<INPUT, OUTPUT, InputArray, OutputArray>,
>>::cv(self, x, y, settings)
}
}
impl<INPUT, OUTPUT, InputArray, OutputArray>
Algorithm<RegressionSettings<INPUT, OUTPUT, InputArray, OutputArray>>
for RegressionAlgorithm<INPUT, OUTPUT, InputArray, OutputArray>
where
INPUT: RealNumber + FloatNumber + 'static,
OUTPUT: FloatNumber + 'static,
InputArray: MutArrayView2<INPUT>
+ Sized
+ Clone
+ Array2<INPUT>
+ QRDecomposable<INPUT>
+ SVDDecomposable<INPUT>
+ EVDDecomposable<INPUT>
+ CholeskyDecomposable<INPUT>
+ 'static,
OutputArray: MutArrayView1<OUTPUT> + Sized + Clone + Array1<OUTPUT> + 'static,
{
type Input = INPUT;
type Output = OUTPUT;
type InputArray = InputArray;
type OutputArray = OutputArray;
fn predict(&self, x: &Self::InputArray) -> Result<Self::OutputArray, Failed> {
match self {
Self::DecisionTreeRegressor(model) => model.predict(x),
Self::RandomForestRegressor(model) => model.predict(x),
Self::ExtraTreesRegressor(model) => model.predict(x),
Self::Linear(model) => model.predict(x),
Self::Ridge(model) => model.predict(x),
Self::Lasso(model) => model.predict(x),
Self::ElasticNet(model) => model.predict(x),
Self::KNNRegressor(model) => model.predict(x),
Self::SupportVectorRegressor(model) => {
let model = model
.as_ref()
.ok_or_else(|| Failed::predict("support vector regressor is not trained"))?;
model.predict_array(x)
}
Self::XGBoostRegressor(model) => {
let model = model
.as_ref()
.ok_or_else(|| Failed::predict("xgboost regressor is not trained"))?;
convert_input_predictions_to_output_array::<INPUT, OUTPUT, OutputArray>(
model.predict(x)?,
)
}
}
}
fn cross_validate_model(
self,
x: &Self::InputArray,
y: &Self::OutputArray,
settings: &RegressionSettings<INPUT, OUTPUT, InputArray, OutputArray>,
) -> Result<ComparisonEntry<Self>, Failed> {
let start = Instant::now();
let results = self.cv(x, y, settings)?;
let end = Instant::now();
Ok(ComparisonEntry {
result: results.0,
algorithm: results.1,
duration: end.duration_since(start),
})
}
fn all_algorithms(
settings: &RegressionSettings<INPUT, OUTPUT, InputArray, OutputArray>,
) -> Vec<Self> {
let mut algorithms = vec![
Self::default_linear(),
Self::default_ridge(),
Self::default_lasso(),
Self::default_elastic_net(),
Self::default_random_forest(),
Self::default_decision_tree(),
];
if settings.extra_trees_settings.is_some() {
algorithms.push(Self::default_extra_trees_regressor());
}
if let Some(knn) = &settings.knn_regressor_settings
&& !matches!(knn.distance, Distance::Mahalanobis)
{
algorithms.push(Self::default_knn_regressor());
}
if settings.svr_settings.is_some() {
algorithms.push(Self::default_support_vector_regressor());
}
if settings.xgboost_settings.is_some() {
algorithms.push(Self::default_xgboost_regressor());
}
algorithms
.retain(|algorithm| !settings.skiplist.iter().any(|skipped| skipped == algorithm));
algorithms
}
}
impl<INPUT, OUTPUT, InputArray, OutputArray> PartialEq
for RegressionAlgorithm<INPUT, OUTPUT, InputArray, OutputArray>
where
INPUT: RealNumber + FloatNumber + 'static,
OUTPUT: FloatNumber + 'static,
InputArray: MutArrayView2<INPUT>
+ Sized
+ Clone
+ Array2<INPUT>
+ QRDecomposable<INPUT>
+ SVDDecomposable<INPUT>
+ EVDDecomposable<INPUT>
+ CholeskyDecomposable<INPUT>
+ 'static,
OutputArray: MutArrayView1<OUTPUT> + Sized + Clone + Array1<OUTPUT> + 'static,
{
fn eq(&self, other: &Self) -> bool {
matches!(
(self, other),
(
Self::DecisionTreeRegressor(_),
Self::DecisionTreeRegressor(_)
) | (
Self::RandomForestRegressor(_),
Self::RandomForestRegressor(_)
) | (Self::Linear(_), Self::Linear(_))
| (Self::Ridge(_), Self::Ridge(_))
| (Self::Lasso(_), Self::Lasso(_))
| (Self::ElasticNet(_), Self::ElasticNet(_))
| (Self::ExtraTreesRegressor(_), Self::ExtraTreesRegressor(_))
| (Self::KNNRegressor(_), Self::KNNRegressor(_))
| (
Self::SupportVectorRegressor(_),
Self::SupportVectorRegressor(_)
)
| (Self::XGBoostRegressor(_), Self::XGBoostRegressor(_))
)
}
}
impl<INPUT, OUTPUT, InputArray, OutputArray> Default
for RegressionAlgorithm<INPUT, OUTPUT, InputArray, OutputArray>
where
INPUT: RealNumber + FloatNumber + 'static,
OUTPUT: FloatNumber + 'static,
InputArray: MutArrayView2<INPUT>
+ Sized
+ Clone
+ Array2<INPUT>
+ QRDecomposable<INPUT>
+ SVDDecomposable<INPUT>
+ EVDDecomposable<INPUT>
+ CholeskyDecomposable<INPUT>
+ 'static,
OutputArray: MutArrayView1<OUTPUT> + Sized + Clone + Array1<OUTPUT> + 'static,
{
fn default() -> Self {
RegressionAlgorithm::Linear(smartcore::linear::linear_regression::LinearRegression::new())
}
}
impl<INPUT, OUTPUT, InputArray, OutputArray> Display
for RegressionAlgorithm<INPUT, OUTPUT, InputArray, OutputArray>
where
INPUT: RealNumber + FloatNumber,
OUTPUT: FloatNumber,
InputArray: MutArrayView2<INPUT>
+ Sized
+ Clone
+ Array2<INPUT>
+ QRDecomposable<INPUT>
+ SVDDecomposable<INPUT>
+ EVDDecomposable<INPUT>
+ CholeskyDecomposable<INPUT>,
OutputArray: MutArrayView1<OUTPUT> + Sized + Clone + Array1<OUTPUT>,
{
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Self::DecisionTreeRegressor(_) => write!(f, "Decision Tree Regressor"),
Self::RandomForestRegressor(_) => write!(f, "Random Forest Regressor"),
Self::ExtraTreesRegressor(_) => write!(f, "Extra Trees Regressor"),
Self::Linear(_) => write!(f, "Linear Regressor"),
Self::Ridge(_) => write!(f, "Ridge Regressor"),
Self::Lasso(_) => write!(f, "LASSO Regressor"),
Self::ElasticNet(_) => write!(f, "Elastic Net Regressor"),
Self::KNNRegressor(_) => write!(f, "KNN Regressor"),
Self::SupportVectorRegressor(_) => write!(f, "Support Vector Regressor"),
Self::XGBoostRegressor(_) => write!(f, "XGBoost Regressor"),
}
}
}
#[cfg(test)]
mod tests {
use super::{RegressionAlgorithm, RegressionSettings};
use crate::DenseMatrix;
use smartcore::error::FailedError;
#[test]
#[allow(clippy::field_reassign_with_default)]
fn knn_regressor_requires_settings() {
let x: DenseMatrix<f64> = DenseMatrix::from_2d_array(&[&[0.0_f64], &[1.0_f64]]).unwrap();
let y: Vec<f64> = vec![0.0, 1.0];
let mut settings: RegressionSettings<f64, f64, DenseMatrix<f64>, Vec<f64>> =
RegressionSettings::default();
settings.knn_regressor_settings = None;
let algo: RegressionAlgorithm<f64, f64, DenseMatrix<f64>, Vec<f64>> =
RegressionAlgorithm::default_knn_regressor();
let err = algo
.fit(&x, &y, &settings)
.err()
.expect("expected training to fail");
assert_eq!(err.error(), FailedError::ParametersError);
}
}