import numpy as np
import pytest
import scirs2
class TestBoxplotStats:
def test_boxplot_stats_basic(self):
data = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0])
result = scirs2.boxplot_stats_py(data)
assert "q1" in result
assert "median" in result
assert "q3" in result
assert "whislo" in result
assert "whishi" in result
assert "outliers" in result
assert result["q1"] < result["median"] < result["q3"]
assert result["whislo"] <= result["q1"]
assert result["q3"] <= result["whishi"]
def test_boxplot_stats_with_outliers(self):
data = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 100.0])
result = scirs2.boxplot_stats_py(data)
assert len(result["outliers"]) > 0
assert 100.0 in result["outliers"]
def test_boxplot_stats_no_outliers(self):
data = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
result = scirs2.boxplot_stats_py(data)
assert len(result["outliers"]) == 0
def test_boxplot_stats_custom_whis(self):
data = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 15.0])
result_large = scirs2.boxplot_stats_py(data, whis=3.0)
result_small = scirs2.boxplot_stats_py(data, whis=1.0)
assert len(result_large["outliers"]) <= len(result_small["outliers"])
def test_boxplot_stats_identical_values(self):
data = np.array([5.0, 5.0, 5.0, 5.0, 5.0])
result = scirs2.boxplot_stats_py(data)
assert abs(result["q1"] - 5.0) < 0.01
assert abs(result["median"] - 5.0) < 0.01
assert abs(result["q3"] - 5.0) < 0.01
assert len(result["outliers"]) == 0
def test_boxplot_stats_negative_values(self):
data = np.array([-10.0, -5.0, 0.0, 5.0, 10.0])
result = scirs2.boxplot_stats_py(data)
assert result["median"] == 0.0
assert result["q1"] < 0.0
assert result["q3"] > 0.0
class TestQuartiles:
def test_quartiles_basic(self):
data = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0])
result = scirs2.quartiles_py(data)
assert len(result) == 3
assert result[0] < result[1] < result[2]
def test_quartiles_median(self):
data = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
result = scirs2.quartiles_py(data)
assert abs(result[1] - 3.0) < 0.01
def test_quartiles_consistency_with_boxplot(self):
data = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0])
quartiles = scirs2.quartiles_py(data)
boxplot = scirs2.boxplot_stats_py(data)
assert abs(quartiles[0] - boxplot["q1"]) < 0.01
assert abs(quartiles[1] - boxplot["median"]) < 0.01
assert abs(quartiles[2] - boxplot["q3"]) < 0.01
def test_quartiles_odd_length(self):
data = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0])
result = scirs2.quartiles_py(data)
assert len(result) == 3
assert result[0] < result[1] < result[2]
def test_quartiles_even_length(self):
data = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0])
result = scirs2.quartiles_py(data)
assert len(result) == 3
assert result[0] < result[1] < result[2]
class TestWinsorizedMean:
def test_winsorized_mean_basic(self):
data = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
result = scirs2.winsorized_mean_py(data, limits=0.1)
assert 2.5 < result < 3.5
def test_winsorized_mean_with_outlier(self):
data = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 100.0])
regular_mean = np.mean(data)
winsorized_mean = scirs2.winsorized_mean_py(data, limits=0.2)
median = np.median(data)
assert abs(winsorized_mean - median) < abs(regular_mean - median)
def test_winsorized_mean_zero_limits(self):
data = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
winsorized = scirs2.winsorized_mean_py(data, limits=0.0)
regular = np.mean(data)
assert abs(winsorized - regular) < 0.01
def test_winsorized_mean_various_limits(self):
data = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 100.0])
result_10 = scirs2.winsorized_mean_py(data, limits=0.1)
result_20 = scirs2.winsorized_mean_py(data, limits=0.2)
regular = np.mean(data)
assert result_10 < regular
assert result_20 < regular
def test_winsorized_mean_symmetric_data(self):
data = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 4.0, 3.0, 2.0, 1.0])
result = scirs2.winsorized_mean_py(data, limits=0.1)
median = np.median(data)
assert abs(result - median) < 0.5
class TestWinsorizedVariance:
def test_winsorized_variance_basic(self):
data = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
result = scirs2.winsorized_variance_py(data, limits=0.1, ddof=1)
assert result > 0.0
def test_winsorized_variance_with_outlier(self):
data = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 100.0])
regular_var = np.var(data, ddof=1)
winsorized_var = scirs2.winsorized_variance_py(data, limits=0.2, ddof=1)
assert winsorized_var < regular_var / 2
def test_winsorized_variance_zero_limits(self):
data = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
winsorized = scirs2.winsorized_variance_py(data, limits=0.0, ddof=1)
regular = np.var(data, ddof=1)
assert abs(winsorized - regular) < 2.0
def test_winsorized_variance_ddof_effect(self):
data = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0])
var_ddof0 = scirs2.winsorized_variance_py(data, limits=0.1, ddof=0)
var_ddof1 = scirs2.winsorized_variance_py(data, limits=0.1, ddof=1)
assert var_ddof1 > var_ddof0
def test_winsorized_variance_identical_values(self):
data = np.array([5.0, 5.0, 5.0, 5.0, 5.0])
result = scirs2.winsorized_variance_py(data, limits=0.1, ddof=1)
assert abs(result) < 0.01
class TestRobustStatisticsComparison:
def test_robust_vs_regular_with_outliers(self):
clean_data = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0])
outlier_data = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 100.0])
regular_mean_clean = np.mean(clean_data)
regular_mean_outlier = np.mean(outlier_data)
mean_diff = abs(regular_mean_outlier - regular_mean_clean)
winsorized_mean_clean = scirs2.winsorized_mean_py(clean_data, limits=0.1)
winsorized_mean_outlier = scirs2.winsorized_mean_py(outlier_data, limits=0.1)
winsorized_diff = abs(winsorized_mean_outlier - winsorized_mean_clean)
assert winsorized_diff < mean_diff / 2
def test_boxplot_outlier_detection(self):
data = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 100.0])
result = scirs2.boxplot_stats_py(data, whis=1.5)
assert len(result["outliers"]) > 0
assert 100.0 in result["outliers"]
class TestQuantileEdgeCases:
def test_small_dataset(self):
data = np.array([1.0, 2.0, 3.0])
quartiles = scirs2.quartiles_py(data)
assert len(quartiles) == 3
boxplot = scirs2.boxplot_stats_py(data)
assert "median" in boxplot
def test_large_dataset(self):
np.random.seed(42)
data = np.random.randn(1000) * 10 + 50
quartiles = scirs2.quartiles_py(data)
assert len(quartiles) == 3
winsorized_mean = scirs2.winsorized_mean_py(data, limits=0.05)
assert winsorized_mean > 0.0
def test_negative_and_positive_values(self):
data = np.array([-10.0, -5.0, -1.0, 0.0, 1.0, 5.0, 10.0])
quartiles = scirs2.quartiles_py(data)
assert quartiles[0] < 0.0
assert quartiles[2] > 0.0
winsorized_mean = scirs2.winsorized_mean_py(data, limits=0.1)
assert abs(winsorized_mean) < 2.0
class TestRealWorldScenarios:
def test_salary_data_robustness(self):
salaries = np.array([30.0, 35.0, 40.0, 42.0, 45.0, 48.0, 50.0, 52.0, 55.0, 60.0, 500.0])
regular_mean = np.mean(salaries)
winsorized_mean = scirs2.winsorized_mean_py(salaries, limits=0.1)
assert winsorized_mean < regular_mean
def test_sensor_data_outlier_detection(self):
readings = np.array([20.1, 20.2, 20.0, 19.9, 20.3, 50.0, 20.1, 20.2])
boxplot = scirs2.boxplot_stats_py(readings)
assert 50.0 in boxplot["outliers"]
def test_grading_with_outliers(self):
scores = np.array([85.0, 88.0, 90.0, 92.0, 91.0, 87.0, 95.0, 20.0, 93.0, 89.0])
regular_mean = np.mean(scores)
winsorized_mean = scirs2.winsorized_mean_py(scores, limits=0.1)
assert winsorized_mean > regular_mean
def test_financial_returns_robustness(self):
returns = np.array([0.5, 0.3, -0.2, 0.4, 0.1, -0.3, 0.6, -15.0, 0.2, 0.4])
regular_var = np.var(returns, ddof=1)
winsorized_var = scirs2.winsorized_variance_py(returns, limits=0.1, ddof=1)
assert winsorized_var < regular_var / 2
def test_quartile_ranges_for_data_quality(self):
measurements = np.array([99.8, 99.9, 100.0, 100.1, 100.2, 99.7, 100.3, 100.0, 99.9])
quartiles = scirs2.quartiles_py(measurements)
iqr = quartiles[2] - quartiles[0]
assert iqr < 1.0
if __name__ == "__main__":
pytest.main([__file__, "-v"])