scirs2-python 0.4.3

Python bindings for SciRS2 - A comprehensive scientific computing library in Rust (SciPy alternative)
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
"""Tests for scirs2 transformation module."""

import numpy as np
import pytest
import scirs2


class TestNormalization:
    """Test normalization functions."""

    def test_normalize_array_zscore(self):
        """Test z-score normalization of array."""
        data = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])

        normalized = scirs2.normalize_array_py(data, method="zscore", axis=0)

        # Each column should have mean ~0 and std ~1
        col_means = normalized.mean(axis=0)
        col_stds = normalized.std(axis=0, ddof=1)

        assert np.allclose(col_means, [0, 0], atol=1e-10)
        assert np.allclose(col_stds, [1, 1], atol=0.1)

    def test_normalize_array_minmax(self):
        """Test min-max normalization."""
        data = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])

        normalized = scirs2.normalize_array_py(data, method="minmax", axis=0)

        # Each column should be in [0, 1]
        assert np.all((normalized >= 0) & (normalized <= 1))
        # Min should be 0, max should be 1
        assert np.allclose(normalized.min(axis=0), [0, 0])
        assert np.allclose(normalized.max(axis=0), [1, 1])

    def test_normalize_array_l2(self):
        """Test L2 normalization."""
        data = np.array([[3.0, 4.0], [6.0, 8.0]])

        normalized = scirs2.normalize_array_py(data, method="l2", axis=1)

        # Each row should have L2 norm = 1
        row_norms = np.linalg.norm(normalized, axis=1)
        assert np.allclose(row_norms, [1, 1], atol=1e-10)

    def test_normalize_vector_zscore(self):
        """Test vector normalization."""
        data = np.array([1.0, 2.0, 3.0, 4.0, 5.0])

        normalized = scirs2.normalize_vector_py(data, method="zscore")

        # Mean should be 0, std should be 1
        assert np.allclose(normalized.mean(), 0, atol=1e-10)
        assert np.allclose(normalized.std(ddof=1), 1, atol=0.1)

    def test_normalizer_fit_transform(self):
        """Test Normalizer class with fit/transform pattern."""
        normalizer = scirs2.Normalizer(method="zscore", axis=0)

        train_data = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
        test_data = np.array([[2.0, 3.0], [4.0, 5.0]])

        normalizer.fit(train_data)
        normalized_train = normalizer.transform(train_data)
        normalized_test = normalizer.transform(test_data)

        # Train data should have mean 0
        assert np.allclose(normalized_train.mean(axis=0), [0, 0], atol=1e-10)

        # Test data should be transformed using train statistics
        assert normalized_test.shape == (2, 2)


class TestPCA:
    """Test Principal Component Analysis."""

    def test_pca_basic(self):
        """Test basic PCA dimensionality reduction."""
        # Create correlated data
        np.random.seed(42)
        data = np.random.randn(100, 5)

        pca = scirs2.PCA(n_components=2)
        pca.fit(data)
        transformed = pca.transform(data)

        assert transformed.shape == (100, 2)

    def test_pca_variance_explained(self):
        """Test PCA explained variance."""
        np.random.seed(42)
        data = np.random.randn(50, 10)

        pca = scirs2.PCA(n_components=5)
        pca.fit(data)

        explained_variance = pca.explained_variance_ratio()

        # Should sum to less than or equal to 1
        assert 0 < sum(explained_variance) <= 1.0
        # First component should explain most variance
        assert explained_variance[0] >= explained_variance[1]

    def test_pca_inverse_transform(self):
        """Test PCA inverse transformation."""
        np.random.seed(42)
        data = np.random.randn(20, 4)

        pca = scirs2.PCA(n_components=2)
        pca.fit(data)
        transformed = pca.transform(data)
        reconstructed = pca.inverse_transform(transformed)

        # Reconstruction should approximate original
        assert reconstructed.shape == data.shape

    def test_pca_full_components(self):
        """Test PCA with all components."""
        data = np.random.randn(30, 5)

        pca = scirs2.PCA(n_components=5)
        pca.fit(data)
        transformed = pca.transform(data)

        assert transformed.shape == (30, 5)


class TestTSNE:
    """Test t-SNE dimensionality reduction."""

    def test_tsne_basic(self):
        """Test basic t-SNE transformation."""
        np.random.seed(42)
        # Create clustered data
        data = np.vstack([
            np.random.randn(20, 10) + 0,
            np.random.randn(20, 10) + 5
        ])

        tsne = scirs2.TSNE(n_components=2, perplexity=5.0, max_iter=100)
        transformed = tsne.fit_transform(data)

        assert transformed.shape == (40, 2)

    def test_tsne_3d(self):
        """Test t-SNE to 3D."""
        np.random.seed(42)
        data = np.random.randn(30, 8)

        tsne = scirs2.TSNE(n_components=3, max_iter=50)
        transformed = tsne.fit_transform(data)

        assert transformed.shape == (30, 3)

    def test_tsne_with_parameters(self):
        """Test t-SNE with custom parameters."""
        np.random.seed(42)
        data = np.random.randn(25, 5)

        tsne = scirs2.TSNE(
            n_components=2,
            perplexity=3.0,
            learning_rate=100.0,
            max_iter=200
        )
        transformed = tsne.fit_transform(data)

        assert transformed.shape == (25, 2)


class TestUMAP:
    """Test UMAP dimensionality reduction."""

    def test_umap_basic(self):
        """Test basic UMAP transformation."""
        np.random.seed(42)
        data = np.random.randn(50, 10)

        umap = scirs2.UMAP(n_components=2, n_neighbors=5)
        transformed = umap.fit_transform(data)

        assert transformed.shape == (50, 2)

    def test_umap_3d(self):
        """Test UMAP to 3D."""
        np.random.seed(42)
        data = np.random.randn(40, 8)

        umap = scirs2.UMAP(n_components=3, n_neighbors=5)
        transformed = umap.fit_transform(data)

        assert transformed.shape == (40, 3)

    def test_umap_with_parameters(self):
        """Test UMAP with custom parameters."""
        np.random.seed(42)
        data = np.random.randn(30, 6)

        umap = scirs2.UMAP(
            n_components=2,
            n_neighbors=10,
            min_dist=0.1,
            metric="euclidean"
        )
        transformed = umap.fit_transform(data)

        assert transformed.shape == (30, 2)


class TestFeatureEngineering:
    """Test feature engineering functions."""

    def test_polynomial_features(self):
        """Test polynomial feature generation."""
        data = np.array([[1.0, 2.0], [3.0, 4.0]])

        poly = scirs2.PolynomialFeatures(degree=2)
        poly_features = poly.transform(data)

        # [1, a, b, a^2, ab, b^2] = 6 features for degree 2
        assert poly_features.shape[1] >= data.shape[1]

    def test_polynomial_features_degree_3(self):
        """Test polynomial features with degree 3."""
        data = np.array([[1.0, 2.0]])

        poly = scirs2.PolynomialFeatures(degree=3)
        poly_features = poly.transform(data)

        # Should have more features
        assert poly_features.shape[1] > 2

    def test_binarize(self):
        """Test binarization."""
        data = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])

        binarized = scirs2.binarize_py(data, threshold=3.5)

        # Values <= 3.5 should be 0, > 3.5 should be 1
        assert binarized[0, 0] == 0  # 1.0 <= 3.5
        assert binarized[0, 1] == 0  # 2.0 <= 3.5
        assert binarized[1, 0] == 1  # 4.0 > 3.5
        assert binarized[1, 2] == 1  # 6.0 > 3.5

    def test_log_transform(self):
        """Test logarithmic transformation."""
        data = np.array([[1.0, 10.0, 100.0]])

        log_data = scirs2.log_transform_py(data)

        # log(1) = 0, log(10) ≈ 2.3, log(100) ≈ 4.6
        assert np.allclose(log_data[0, 0], 0, atol=1e-10)
        assert log_data[0, 1] > 2.0
        assert log_data[0, 2] > 4.0

    def test_power_transform(self):
        """Test Box-Cox power transformation."""
        data = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])

        power_transformer = scirs2.PowerTransformer()
        power_transformer.fit(data)
        transformed = power_transformer.transform(data)

        assert transformed.shape == data.shape

    def test_discretize_equal_width(self):
        """Test equal-width discretization."""
        data = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0])

        discretized = scirs2.discretize_equal_width_py(data, n_bins=5)

        # Should have 5 unique values (bins)
        assert len(np.unique(discretized)) <= 5

    def test_discretize_equal_frequency(self):
        """Test equal-frequency discretization."""
        data = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0])

        discretized = scirs2.discretize_equal_frequency_py(data, n_bins=4)

        # Each bin should have approximately equal counts
        assert len(np.unique(discretized)) <= 4


class TestEncoding:
    """Test categorical encoding."""

    def test_one_hot_encoder(self):
        """Test one-hot encoding."""
        encoder = scirs2.OneHotEncoder()

        # Categorical data
        data = np.array([[0], [1], [2], [1], [0]])

        encoder.fit(data)
        encoded = encoder.transform(data)

        # Should have 3 columns (0, 1, 2)
        assert encoded.shape == (5, 3)
        # Each row should sum to 1
        assert np.allclose(encoded.sum(axis=1), [1, 1, 1, 1, 1])

    def test_ordinal_encoder(self):
        """Test ordinal encoding."""
        encoder = scirs2.OrdinalEncoder()

        # String categorical data
        data = [["red"], ["blue"], ["green"], ["red"], ["blue"]]

        encoder.fit(data)
        encoded = encoder.transform(data)

        # Should map to integers
        assert encoded.shape == (5, 1)
        assert encoded.dtype in [np.int32, np.int64, np.float64]

    def test_one_hot_encoder_multiple_categories(self):
        """Test one-hot encoding with multiple categories."""
        encoder = scirs2.OneHotEncoder()

        data = np.array([[0], [1], [2], [3], [4]])

        encoder.fit(data)
        encoded = encoder.transform(data)

        assert encoded.shape == (5, 5)


class TestImputation:
    """Test missing value imputation."""

    def test_simple_imputer_mean(self):
        """Test mean imputation."""
        imputer = scirs2.SimpleImputer(strategy="mean")

        data = np.array([[1.0, 2.0], [np.nan, 3.0], [7.0, 6.0]])

        imputer.fit(data)
        imputed = imputer.transform(data)

        # NaN should be replaced with mean
        assert not np.isnan(imputed).any()
        # Mean of [1, 7] = 4
        assert np.allclose(imputed[1, 0], 4.0)

    def test_simple_imputer_median(self):
        """Test median imputation."""
        imputer = scirs2.SimpleImputer(strategy="median")

        data = np.array([[1.0], [np.nan], [3.0], [5.0]])

        imputer.fit(data)
        imputed = imputer.transform(data)

        assert not np.isnan(imputed).any()

    def test_simple_imputer_constant(self):
        """Test constant value imputation."""
        imputer = scirs2.SimpleImputer(strategy="constant", fill_value=0.0)

        data = np.array([[1.0, np.nan], [np.nan, 3.0]])

        imputer.fit(data)
        imputed = imputer.transform(data)

        # NaN should be replaced with 0
        assert not np.isnan(imputed).any()
        assert imputed[0, 1] == 0.0
        assert imputed[1, 0] == 0.0

    def test_knn_imputer(self):
        """Test KNN imputation."""
        imputer = scirs2.KNNImputer(n_neighbors=2)

        data = np.array([
            [1.0, 2.0, 3.0],
            [4.0, np.nan, 6.0],
            [7.0, 8.0, 9.0]
        ])

        imputer.fit(data)
        imputed = imputer.transform(data)

        # NaN should be replaced
        assert not np.isnan(imputed).any()
        # Imputed value should be reasonable
        assert 2.0 <= imputed[1, 1] <= 8.0


class TestScaling:
    """Test scaling transformations."""

    def test_maxabs_scaler(self):
        """Test MaxAbs scaling."""
        scaler = scirs2.MaxAbsScaler()

        data = np.array([[-2.0, 4.0], [-1.0, 2.0], [0.0, 0.0], [1.0, -2.0]])

        scaler.fit(data)
        scaled = scaler.transform(data)

        # All values should be in [-1, 1]
        assert np.all((scaled >= -1) & (scaled <= 1))
        # Max absolute value should be 1
        assert np.allclose(np.abs(scaled).max(axis=0), [1, 1])

    def test_quantile_transformer(self):
        """Test quantile transformation."""
        transformer = scirs2.QuantileTransformer(n_quantiles=10)

        np.random.seed(42)
        data = np.random.randn(50, 2)

        transformer.fit(data)
        transformed = transformer.transform(data)

        # Transformed data should be more uniformly distributed
        assert transformed.shape == data.shape


class TestEdgeCases:
    """Test edge cases and error handling."""

    def test_normalize_single_value(self):
        """Test normalization with single value."""
        data = np.array([[5.0]])

        normalized = scirs2.normalize_array_py(data, method="minmax", axis=0)

        # Single value can't be normalized meaningfully
        assert normalized.shape == (1, 1)

    def test_pca_more_components_than_features(self):
        """Test PCA with n_components > n_features."""
        data = np.random.randn(50, 5)

        pca = scirs2.PCA(n_components=10)

        # Should handle gracefully (use min of n_samples, n_features)
        try:
            pca.fit(data)
            transformed = pca.transform(data)
            assert transformed.shape[1] <= 5
        except Exception:
            # Expected to fail or adjust automatically
            pass

    def test_imputation_all_missing(self):
        """Test imputation when all values are missing."""
        imputer = scirs2.SimpleImputer(strategy="mean")

        data = np.array([[np.nan], [np.nan], [np.nan]])

        try:
            imputer.fit(data)
            imputed = imputer.transform(data)
            # Should use fallback strategy
            assert imputed.shape == data.shape
        except Exception:
            # Expected to fail
            pass

    def test_empty_data(self):
        """Test transformations on empty data."""
        data = np.array([]).reshape(0, 2)

        try:
            normalized = scirs2.normalize_array_py(data, method="zscore", axis=0)
            assert normalized.shape == (0, 2)
        except Exception:
            # Expected to fail gracefully
            pass


if __name__ == "__main__":
    pytest.main([__file__, "-v"])