import numpy as np
import scipy.stats as stats
from scipy import special
import json
import os
SEED = 12345
REFERENCE_DIR = os.path.dirname(os.path.abspath(__file__))
REFERENCE_FILE = os.path.join(REFERENCE_DIR, "distribution_reference_data.json")
SAMPLE_SIZE = 10000
np.random.seed(SEED)
reference_data = {}
def generate_basic_stats(samples):
return {
"mean": float(np.mean(samples)),
"variance": float(np.var(samples)),
"min": float(np.min(samples)),
"max": float(np.max(samples)),
"median": float(np.median(samples)),
"first_10": samples[:10].tolist() }
print("Generating reference data for standard continuous distributions...")
samples = np.random.normal(0, 1, SAMPLE_SIZE)
reference_data["normal"] = generate_basic_stats(samples)
samples = np.random.beta(2, 5, SAMPLE_SIZE)
reference_data["beta"] = generate_basic_stats(samples)
samples = stats.cauchy.rvs(loc=0, scale=1, size=SAMPLE_SIZE)
reference_data["cauchy"] = generate_basic_stats(samples)
samples = np.random.chisquare(2, SAMPLE_SIZE)
reference_data["chisquare"] = generate_basic_stats(samples)
samples = np.random.exponential(1, SAMPLE_SIZE)
reference_data["exponential"] = generate_basic_stats(samples)
samples = np.random.gamma(2, 2, SAMPLE_SIZE)
reference_data["gamma"] = generate_basic_stats(samples)
samples = np.random.gumbel(0, 1, SAMPLE_SIZE)
reference_data["gumbel"] = generate_basic_stats(samples)
samples = np.random.laplace(0, 1, SAMPLE_SIZE)
reference_data["laplace"] = generate_basic_stats(samples)
samples = stats.logistic.rvs(loc=0, scale=1, size=SAMPLE_SIZE)
reference_data["logistic"] = generate_basic_stats(samples)
samples = np.random.lognormal(0, 1, SAMPLE_SIZE)
reference_data["lognormal"] = generate_basic_stats(samples)
samples = np.random.pareto(2, SAMPLE_SIZE) + 1 reference_data["pareto"] = generate_basic_stats(samples)
samples = np.random.standard_t(5, SAMPLE_SIZE)
reference_data["student_t"] = generate_basic_stats(samples)
samples = np.random.uniform(0, 1, SAMPLE_SIZE)
reference_data["uniform"] = generate_basic_stats(samples)
samples = np.random.weibull(1, SAMPLE_SIZE)
reference_data["weibull"] = generate_basic_stats(samples)
print("Generating reference data for discrete distributions...")
samples = np.random.binomial(10, 0.5, SAMPLE_SIZE)
reference_data["binomial"] = generate_basic_stats(samples)
samples = np.random.poisson(5, SAMPLE_SIZE)
reference_data["poisson"] = generate_basic_stats(samples)
samples = np.random.binomial(1, 0.5, SAMPLE_SIZE)
reference_data["bernoulli"] = generate_basic_stats(samples)
samples = np.random.geometric(0.5, SAMPLE_SIZE)
reference_data["geometric"] = generate_basic_stats(samples)
print("Generating reference data for advanced SciPy distributions...")
samples = stats.ncx2.rvs(df=2, nc=1, size=SAMPLE_SIZE)
reference_data["noncentral_chisquare"] = generate_basic_stats(samples)
samples = stats.ncf.rvs(dfn=2, dfd=5, nc=1, size=SAMPLE_SIZE)
reference_data["noncentral_f"] = generate_basic_stats(samples)
samples = stats.vonmises.rvs(kappa=1, loc=0, size=SAMPLE_SIZE)
reference_data["vonmises"] = generate_basic_stats(samples)
samples = stats.maxwell.rvs(scale=1, size=SAMPLE_SIZE)
reference_data["maxwell"] = generate_basic_stats(samples)
a, b = -2, 2 samples = stats.truncnorm.rvs(a, b, loc=0, scale=1, size=SAMPLE_SIZE)
reference_data["truncated_normal"] = generate_basic_stats(samples)
mean = [0, 0]
cov = [[1, 0.5], [0.5, 1]]
samples = np.random.multivariate_normal(mean, cov, SAMPLE_SIZE)
mvn_stats = {
"mean": np.mean(samples, axis=0).tolist(),
"cov": np.cov(samples, rowvar=False).tolist(),
"first_10": samples[:10].tolist()
}
reference_data["multivariate_normal"] = mvn_stats
with open(REFERENCE_FILE, 'w') as f:
json.dump(reference_data, f, indent=2)
print(f"Reference data saved to {REFERENCE_FILE}")
print("This data can be used by Rust tests to validate distribution implementations.")