import numpy as np
import pytest
from sklearn.datasets import make_classification, make_regression
from sklearn.model_selection import cross_val_score
from ferrolearn import (
DecisionTreeClassifier,
ElasticNet,
GaussianNB,
KMeans,
KNeighborsClassifier,
Lasso,
LinearRegression,
LogisticRegression,
PCA,
RandomForestClassifier,
Ridge,
StandardScaler,
)
@pytest.fixture
def regression_data():
X, y = make_regression(n_samples=100, n_features=5, noise=0.1, random_state=42)
return X, y
@pytest.fixture
def classification_data():
X, y = make_classification(
n_samples=100, n_features=5, n_informative=3, random_state=42
)
return X, y
@pytest.mark.parametrize(
"estimator",
[
LinearRegression(),
Ridge(),
Lasso(),
ElasticNet(),
],
ids=["LinearRegression", "Ridge", "Lasso", "ElasticNet"],
)
def test_regressor_cross_val_score(estimator, regression_data):
X, y = regression_data
scores = cross_val_score(estimator, X, y, cv=3, scoring="r2")
assert len(scores) == 3
assert all(isinstance(s, float) for s in scores)
assert np.mean(scores) > 0.0
@pytest.mark.parametrize(
"estimator",
[
LogisticRegression(),
DecisionTreeClassifier(),
RandomForestClassifier(n_estimators=5, random_state=42),
KNeighborsClassifier(n_neighbors=3),
GaussianNB(),
],
ids=[
"LogisticRegression",
"DecisionTreeClassifier",
"RandomForestClassifier",
"KNeighborsClassifier",
"GaussianNB",
],
)
def test_classifier_cross_val_score(estimator, classification_data):
X, y = classification_data
scores = cross_val_score(estimator, X, y, cv=3, scoring="accuracy")
assert len(scores) == 3
assert all(isinstance(s, float) for s in scores)
assert np.mean(scores) > 0.5