import numpy as np
import pytest
import scirs2
class TestNormalization:
def test_normalize_array_zscore(self):
data = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
normalized = scirs2.normalize_array_py(data, method="zscore", axis=0)
col_means = normalized.mean(axis=0)
col_stds = normalized.std(axis=0, ddof=1)
assert np.allclose(col_means, [0, 0], atol=1e-10)
assert np.allclose(col_stds, [1, 1], atol=0.1)
def test_normalize_array_minmax(self):
data = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
normalized = scirs2.normalize_array_py(data, method="minmax", axis=0)
assert np.all((normalized >= 0) & (normalized <= 1))
assert np.allclose(normalized.min(axis=0), [0, 0])
assert np.allclose(normalized.max(axis=0), [1, 1])
def test_normalize_array_l2(self):
data = np.array([[3.0, 4.0], [6.0, 8.0]])
normalized = scirs2.normalize_array_py(data, method="l2", axis=1)
row_norms = np.linalg.norm(normalized, axis=1)
assert np.allclose(row_norms, [1, 1], atol=1e-10)
def test_normalize_vector_zscore(self):
data = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
normalized = scirs2.normalize_vector_py(data, method="zscore")
assert np.allclose(normalized.mean(), 0, atol=1e-10)
assert np.allclose(normalized.std(ddof=1), 1, atol=0.1)
def test_normalizer_fit_transform(self):
normalizer = scirs2.Normalizer(method="zscore", axis=0)
train_data = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
test_data = np.array([[2.0, 3.0], [4.0, 5.0]])
normalizer.fit(train_data)
normalized_train = normalizer.transform(train_data)
normalized_test = normalizer.transform(test_data)
assert np.allclose(normalized_train.mean(axis=0), [0, 0], atol=1e-10)
assert normalized_test.shape == (2, 2)
class TestPCA:
def test_pca_basic(self):
np.random.seed(42)
data = np.random.randn(100, 5)
pca = scirs2.PCA(n_components=2)
pca.fit(data)
transformed = pca.transform(data)
assert transformed.shape == (100, 2)
def test_pca_variance_explained(self):
np.random.seed(42)
data = np.random.randn(50, 10)
pca = scirs2.PCA(n_components=5)
pca.fit(data)
explained_variance = pca.explained_variance_ratio()
assert 0 < sum(explained_variance) <= 1.0
assert explained_variance[0] >= explained_variance[1]
def test_pca_inverse_transform(self):
np.random.seed(42)
data = np.random.randn(20, 4)
pca = scirs2.PCA(n_components=2)
pca.fit(data)
transformed = pca.transform(data)
reconstructed = pca.inverse_transform(transformed)
assert reconstructed.shape == data.shape
def test_pca_full_components(self):
data = np.random.randn(30, 5)
pca = scirs2.PCA(n_components=5)
pca.fit(data)
transformed = pca.transform(data)
assert transformed.shape == (30, 5)
class TestTSNE:
def test_tsne_basic(self):
np.random.seed(42)
data = np.vstack([
np.random.randn(20, 10) + 0,
np.random.randn(20, 10) + 5
])
tsne = scirs2.TSNE(n_components=2, perplexity=5.0, max_iter=100)
transformed = tsne.fit_transform(data)
assert transformed.shape == (40, 2)
def test_tsne_3d(self):
np.random.seed(42)
data = np.random.randn(30, 8)
tsne = scirs2.TSNE(n_components=3, max_iter=50)
transformed = tsne.fit_transform(data)
assert transformed.shape == (30, 3)
def test_tsne_with_parameters(self):
np.random.seed(42)
data = np.random.randn(25, 5)
tsne = scirs2.TSNE(
n_components=2,
perplexity=3.0,
learning_rate=100.0,
max_iter=200
)
transformed = tsne.fit_transform(data)
assert transformed.shape == (25, 2)
class TestUMAP:
def test_umap_basic(self):
np.random.seed(42)
data = np.random.randn(50, 10)
umap = scirs2.UMAP(n_components=2, n_neighbors=5)
transformed = umap.fit_transform(data)
assert transformed.shape == (50, 2)
def test_umap_3d(self):
np.random.seed(42)
data = np.random.randn(40, 8)
umap = scirs2.UMAP(n_components=3, n_neighbors=5)
transformed = umap.fit_transform(data)
assert transformed.shape == (40, 3)
def test_umap_with_parameters(self):
np.random.seed(42)
data = np.random.randn(30, 6)
umap = scirs2.UMAP(
n_components=2,
n_neighbors=10,
min_dist=0.1,
metric="euclidean"
)
transformed = umap.fit_transform(data)
assert transformed.shape == (30, 2)
class TestFeatureEngineering:
def test_polynomial_features(self):
data = np.array([[1.0, 2.0], [3.0, 4.0]])
poly = scirs2.PolynomialFeatures(degree=2)
poly_features = poly.transform(data)
assert poly_features.shape[1] >= data.shape[1]
def test_polynomial_features_degree_3(self):
data = np.array([[1.0, 2.0]])
poly = scirs2.PolynomialFeatures(degree=3)
poly_features = poly.transform(data)
assert poly_features.shape[1] > 2
def test_binarize(self):
data = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
binarized = scirs2.binarize_py(data, threshold=3.5)
assert binarized[0, 0] == 0 assert binarized[0, 1] == 0 assert binarized[1, 0] == 1 assert binarized[1, 2] == 1
def test_log_transform(self):
data = np.array([[1.0, 10.0, 100.0]])
log_data = scirs2.log_transform_py(data)
assert np.allclose(log_data[0, 0], 0, atol=1e-10)
assert log_data[0, 1] > 2.0
assert log_data[0, 2] > 4.0
def test_power_transform(self):
data = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
power_transformer = scirs2.PowerTransformer()
power_transformer.fit(data)
transformed = power_transformer.transform(data)
assert transformed.shape == data.shape
def test_discretize_equal_width(self):
data = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0])
discretized = scirs2.discretize_equal_width_py(data, n_bins=5)
assert len(np.unique(discretized)) <= 5
def test_discretize_equal_frequency(self):
data = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0])
discretized = scirs2.discretize_equal_frequency_py(data, n_bins=4)
assert len(np.unique(discretized)) <= 4
class TestEncoding:
def test_one_hot_encoder(self):
encoder = scirs2.OneHotEncoder()
data = np.array([[0], [1], [2], [1], [0]])
encoder.fit(data)
encoded = encoder.transform(data)
assert encoded.shape == (5, 3)
assert np.allclose(encoded.sum(axis=1), [1, 1, 1, 1, 1])
def test_ordinal_encoder(self):
encoder = scirs2.OrdinalEncoder()
data = [["red"], ["blue"], ["green"], ["red"], ["blue"]]
encoder.fit(data)
encoded = encoder.transform(data)
assert encoded.shape == (5, 1)
assert encoded.dtype in [np.int32, np.int64, np.float64]
def test_one_hot_encoder_multiple_categories(self):
encoder = scirs2.OneHotEncoder()
data = np.array([[0], [1], [2], [3], [4]])
encoder.fit(data)
encoded = encoder.transform(data)
assert encoded.shape == (5, 5)
class TestImputation:
def test_simple_imputer_mean(self):
imputer = scirs2.SimpleImputer(strategy="mean")
data = np.array([[1.0, 2.0], [np.nan, 3.0], [7.0, 6.0]])
imputer.fit(data)
imputed = imputer.transform(data)
assert not np.isnan(imputed).any()
assert np.allclose(imputed[1, 0], 4.0)
def test_simple_imputer_median(self):
imputer = scirs2.SimpleImputer(strategy="median")
data = np.array([[1.0], [np.nan], [3.0], [5.0]])
imputer.fit(data)
imputed = imputer.transform(data)
assert not np.isnan(imputed).any()
def test_simple_imputer_constant(self):
imputer = scirs2.SimpleImputer(strategy="constant", fill_value=0.0)
data = np.array([[1.0, np.nan], [np.nan, 3.0]])
imputer.fit(data)
imputed = imputer.transform(data)
assert not np.isnan(imputed).any()
assert imputed[0, 1] == 0.0
assert imputed[1, 0] == 0.0
def test_knn_imputer(self):
imputer = scirs2.KNNImputer(n_neighbors=2)
data = np.array([
[1.0, 2.0, 3.0],
[4.0, np.nan, 6.0],
[7.0, 8.0, 9.0]
])
imputer.fit(data)
imputed = imputer.transform(data)
assert not np.isnan(imputed).any()
assert 2.0 <= imputed[1, 1] <= 8.0
class TestScaling:
def test_maxabs_scaler(self):
scaler = scirs2.MaxAbsScaler()
data = np.array([[-2.0, 4.0], [-1.0, 2.0], [0.0, 0.0], [1.0, -2.0]])
scaler.fit(data)
scaled = scaler.transform(data)
assert np.all((scaled >= -1) & (scaled <= 1))
assert np.allclose(np.abs(scaled).max(axis=0), [1, 1])
def test_quantile_transformer(self):
transformer = scirs2.QuantileTransformer(n_quantiles=10)
np.random.seed(42)
data = np.random.randn(50, 2)
transformer.fit(data)
transformed = transformer.transform(data)
assert transformed.shape == data.shape
class TestEdgeCases:
def test_normalize_single_value(self):
data = np.array([[5.0]])
normalized = scirs2.normalize_array_py(data, method="minmax", axis=0)
assert normalized.shape == (1, 1)
def test_pca_more_components_than_features(self):
data = np.random.randn(50, 5)
pca = scirs2.PCA(n_components=10)
try:
pca.fit(data)
transformed = pca.transform(data)
assert transformed.shape[1] <= 5
except Exception:
pass
def test_imputation_all_missing(self):
imputer = scirs2.SimpleImputer(strategy="mean")
data = np.array([[np.nan], [np.nan], [np.nan]])
try:
imputer.fit(data)
imputed = imputer.transform(data)
assert imputed.shape == data.shape
except Exception:
pass
def test_empty_data(self):
data = np.array([]).reshape(0, 2)
try:
normalized = scirs2.normalize_array_py(data, method="zscore", axis=0)
assert normalized.shape == (0, 2)
except Exception:
pass
if __name__ == "__main__":
pytest.main([__file__, "-v"])