import numpy as np
import pytest
import scirs2
class TestKMeans:
def test_kmeans_basic(self):
data = np.array([
[1.0, 1.0], [1.2, 0.8], [0.8, 1.2],
[5.0, 5.0], [5.2, 4.8], [4.8, 5.2]
])
kmeans = scirs2.KMeans(n_clusters=2)
kmeans.fit(data)
labels = kmeans.labels
assert len(labels) == 6
assert len(set(labels)) == 2
assert labels[0] == labels[1] == labels[2]
assert labels[3] == labels[4] == labels[5]
assert labels[0] != labels[3]
def test_kmeans_inertia(self):
data = np.array([
[1.0, 1.0], [1.2, 0.8], [0.8, 1.2],
[5.0, 5.0], [5.2, 4.8], [4.8, 5.2]
])
kmeans = scirs2.KMeans(n_clusters=2)
kmeans.fit(data)
inertia = kmeans.inertia_
assert inertia > 0
assert inertia < 1.0
def test_kmeans_single_cluster(self):
data = np.array([
[1.0, 1.0], [1.1, 0.9], [0.9, 1.1],
[1.2, 0.8], [0.8, 1.2]
])
kmeans = scirs2.KMeans(n_clusters=1)
kmeans.fit(data)
labels = kmeans.labels
assert all(l == labels[0] for l in labels)
class TestClusterMetrics:
def test_silhouette_score(self):
data = np.array([
[1.0, 1.0], [1.2, 0.8], [0.8, 1.2],
[5.0, 5.0], [5.2, 4.8], [4.8, 5.2]
])
labels = np.array([0, 0, 0, 1, 1, 1], dtype=np.int32)
score = scirs2.silhouette_score_py(data, labels)
assert -1.0 <= score <= 1.0
assert score > 0.8
def test_davies_bouldin_score(self):
data = np.array([
[1.0, 1.0], [1.2, 0.8], [0.8, 1.2],
[5.0, 5.0], [5.2, 4.8], [4.8, 5.2]
])
labels = np.array([0, 0, 0, 1, 1, 1], dtype=np.int32)
score = scirs2.davies_bouldin_score_py(data, labels)
assert score >= 0
assert score < 1.0
def test_calinski_harabasz_score(self):
data = np.array([
[1.0, 1.0], [1.2, 0.8], [0.8, 1.2],
[5.0, 5.0], [5.2, 4.8], [4.8, 5.2]
])
labels = np.array([0, 0, 0, 1, 1, 1], dtype=np.int32)
score = scirs2.calinski_harabasz_score_py(data, labels)
assert score > 0
class TestPreprocessing:
def test_standardize(self):
data = np.array([
[1.0, 100.0],
[2.0, 200.0],
[3.0, 300.0]
])
standardized = scirs2.standardize_py(data, True)
col_means = standardized.mean(axis=0)
assert np.allclose(col_means, [0, 0], atol=1e-10)
assert np.abs(standardized).max() < 2.0
def test_normalize_l2(self):
data = np.array([
[3.0, 4.0],
[6.0, 8.0]
])
normalized = scirs2.normalize_py(data, "l2")
row_norms = np.linalg.norm(normalized, axis=1)
assert np.allclose(row_norms, [1, 1], atol=1e-10)
class TestEdgeCases:
def test_kmeans_many_clusters(self):
data = np.random.randn(100, 2)
kmeans = scirs2.KMeans(n_clusters=10)
kmeans.fit(data)
labels = kmeans.labels
assert len(set(labels)) <= 10
def test_high_dimensional_data(self):
data = np.random.randn(50, 10)
kmeans = scirs2.KMeans(n_clusters=3)
kmeans.fit(data)
labels = kmeans.labels
assert len(labels) == 50
if __name__ == "__main__":
pytest.main([__file__, "-v"])