import numpy as np
import json
import os
import time
import sys
from datetime import datetime
SMALL_SIZE = 1_000
MEDIUM_SIZE = 10_000
LARGE_SIZE = 100_000
MATRIX_SMALL = (100, 100)
MATRIX_MEDIUM = (1000, 1000)
MATRIX_LARGE = (5000, 5000)
ITERATIONS = 10
BENCH_DIR = os.path.dirname(os.path.abspath(__file__))
RESULTS_FILE = os.path.join(BENCH_DIR, "numpy_benchmark_results.json")
def get_system_info():
import platform
import psutil
try:
cpu_info = {
"processor": platform.processor(),
"python_version": platform.python_version(),
"numpy_version": np.__version__,
"platform": platform.platform(),
"cpu_count": os.cpu_count(),
"physical_memory_gb": round(psutil.virtual_memory().total / (1024**3), 2)
}
return cpu_info
except ImportError:
return {
"processor": platform.processor(),
"python_version": platform.python_version(),
"numpy_version": np.__version__,
"platform": platform.platform(),
"cpu_count": os.cpu_count()
}
def time_function(func, *args, **kwargs):
times = []
func(*args, **kwargs)
for _ in range(ITERATIONS):
start = time.perf_counter()
func(*args, **kwargs)
end = time.perf_counter()
times.append((end - start) * 1000)
return {
"mean": sum(times) / len(times),
"min": min(times),
"max": max(times),
"median": sorted(times)[len(times) // 2],
"iterations": ITERATIONS
}
def benchmark_array_creation():
results = {}
results["zeros_small"] = time_function(np.zeros, SMALL_SIZE)
results["zeros_medium"] = time_function(np.zeros, MEDIUM_SIZE)
results["zeros_large"] = time_function(np.zeros, LARGE_SIZE)
results["ones_small"] = time_function(np.ones, SMALL_SIZE)
results["ones_medium"] = time_function(np.ones, MEDIUM_SIZE)
results["ones_large"] = time_function(np.ones, LARGE_SIZE)
results["random_small"] = time_function(np.random.random, (SMALL_SIZE,))
results["random_medium"] = time_function(np.random.random, (MEDIUM_SIZE,))
results["random_large"] = time_function(np.random.random, (LARGE_SIZE,))
results["zeros_matrix_small"] = time_function(np.zeros, MATRIX_SMALL)
results["zeros_matrix_medium"] = time_function(np.zeros, MATRIX_MEDIUM)
results["ones_matrix_small"] = time_function(np.ones, MATRIX_SMALL)
results["ones_matrix_medium"] = time_function(np.ones, MATRIX_MEDIUM)
results["random_matrix_small"] = time_function(np.random.random, MATRIX_SMALL)
results["random_matrix_medium"] = time_function(np.random.random, MATRIX_MEDIUM)
results["identity_small"] = time_function(np.identity, 100)
results["identity_medium"] = time_function(np.identity, 1000)
results["arange_small"] = time_function(np.arange, SMALL_SIZE)
results["arange_medium"] = time_function(np.arange, MEDIUM_SIZE)
results["arange_large"] = time_function(np.arange, LARGE_SIZE)
return results
def benchmark_array_operations():
results = {}
a_small = np.random.random(SMALL_SIZE)
a_medium = np.random.random(MEDIUM_SIZE)
a_large = np.random.random(LARGE_SIZE)
b_small = np.random.random(SMALL_SIZE)
b_medium = np.random.random(MEDIUM_SIZE)
b_large = np.random.random(LARGE_SIZE)
results["add_small"] = time_function(np.add, a_small, b_small)
results["add_medium"] = time_function(np.add, a_medium, b_medium)
results["add_large"] = time_function(np.add, a_large, b_large)
results["multiply_small"] = time_function(np.multiply, a_small, b_small)
results["multiply_medium"] = time_function(np.multiply, a_medium, b_medium)
results["multiply_large"] = time_function(np.multiply, a_large, b_large)
results["sqrt_small"] = time_function(np.sqrt, a_small)
results["sqrt_medium"] = time_function(np.sqrt, a_medium)
results["sqrt_large"] = time_function(np.sqrt, a_large)
results["exp_small"] = time_function(np.exp, a_small)
results["exp_medium"] = time_function(np.exp, a_medium)
results["exp_large"] = time_function(np.exp, a_large)
results["log_small"] = time_function(np.log, a_small)
results["log_medium"] = time_function(np.log, a_medium)
results["log_large"] = time_function(np.log, a_large)
results["sum_small"] = time_function(np.sum, a_small)
results["sum_medium"] = time_function(np.sum, a_medium)
results["sum_large"] = time_function(np.sum, a_large)
results["mean_small"] = time_function(np.mean, a_small)
results["mean_medium"] = time_function(np.mean, a_medium)
results["mean_large"] = time_function(np.mean, a_large)
results["std_small"] = time_function(np.std, a_small)
results["std_medium"] = time_function(np.std, a_medium)
results["std_large"] = time_function(np.std, a_large)
results["reshape_small"] = time_function(np.reshape, a_small, (SMALL_SIZE // 10, 10))
results["reshape_medium"] = time_function(np.reshape, a_medium, (MEDIUM_SIZE // 100, 100))
results["reshape_large"] = time_function(np.reshape, a_large, (LARGE_SIZE // 1000, 1000))
results["transpose_small"] = time_function(lambda x: x.T, np.reshape(a_small, (SMALL_SIZE // 10, 10)))
results["transpose_medium"] = time_function(lambda x: x.T, np.reshape(a_medium, (MEDIUM_SIZE // 100, 100)))
return results
def benchmark_linear_algebra():
results = {}
A_small = np.random.random(MATRIX_SMALL)
A_medium = np.random.random(MATRIX_MEDIUM)
B_small = np.random.random(MATRIX_SMALL)
B_medium = np.random.random(MATRIX_MEDIUM)
S_small = np.random.random((100, 100))
S_medium = np.random.random((500, 500))
results["matmul_small"] = time_function(np.matmul, A_small, B_small)
results["matmul_medium"] = time_function(np.matmul, A_medium[:100, :100], B_medium[:100, :100])
results["dot_small"] = time_function(np.dot, A_small.flatten()[:100], B_small.flatten()[:100])
results["dot_medium"] = time_function(np.dot, A_medium.flatten()[:1000], B_medium.flatten()[:1000])
results["svd_small"] = time_function(np.linalg.svd, S_small)
results["svd_medium"] = time_function(np.linalg.svd, S_medium)
results["qr_small"] = time_function(np.linalg.qr, S_small)
results["qr_medium"] = time_function(np.linalg.qr, S_medium)
results["cholesky_small"] = time_function(np.linalg.cholesky, S_small @ S_small.T) results["cholesky_medium"] = time_function(np.linalg.cholesky, S_medium @ S_medium.T)
results["det_small"] = time_function(np.linalg.det, S_small)
results["det_medium"] = time_function(np.linalg.det, S_medium)
results["inv_small"] = time_function(np.linalg.inv, S_small)
results["inv_medium"] = time_function(np.linalg.inv, S_medium)
return results
def benchmark_distributions():
results = {}
results["normal_small"] = time_function(np.random.normal, 0, 1, SMALL_SIZE)
results["normal_medium"] = time_function(np.random.normal, 0, 1, MEDIUM_SIZE)
results["normal_large"] = time_function(np.random.normal, 0, 1, LARGE_SIZE)
results["uniform_small"] = time_function(np.random.uniform, 0, 1, SMALL_SIZE)
results["uniform_medium"] = time_function(np.random.uniform, 0, 1, MEDIUM_SIZE)
results["uniform_large"] = time_function(np.random.uniform, 0, 1, LARGE_SIZE)
results["exponential_small"] = time_function(np.random.exponential, 1, SMALL_SIZE)
results["exponential_medium"] = time_function(np.random.exponential, 1, MEDIUM_SIZE)
results["exponential_large"] = time_function(np.random.exponential, 1, LARGE_SIZE)
results["beta_small"] = time_function(np.random.beta, 2, 5, SMALL_SIZE)
results["beta_medium"] = time_function(np.random.beta, 2, 5, MEDIUM_SIZE)
results["beta_large"] = time_function(np.random.beta, 2, 5, LARGE_SIZE)
results["gamma_small"] = time_function(np.random.gamma, 2, 2, SMALL_SIZE)
results["gamma_medium"] = time_function(np.random.gamma, 2, 2, MEDIUM_SIZE)
results["gamma_large"] = time_function(np.random.gamma, 2, 2, LARGE_SIZE)
try:
import scipy.stats as stats
results["noncentral_chisquare_small"] = time_function(stats.ncx2.rvs, 2, 1, size=SMALL_SIZE)
results["noncentral_chisquare_medium"] = time_function(stats.ncx2.rvs, 2, 1, size=MEDIUM_SIZE)
results["noncentral_f_small"] = time_function(stats.ncf.rvs, 2, 5, 1, size=SMALL_SIZE)
results["noncentral_f_medium"] = time_function(stats.ncf.rvs, 2, 5, 1, size=MEDIUM_SIZE)
results["vonmises_small"] = time_function(stats.vonmises.rvs, 1, size=SMALL_SIZE)
results["vonmises_medium"] = time_function(stats.vonmises.rvs, 1, size=MEDIUM_SIZE)
except ImportError:
print("SciPy not available. Skipping advanced distribution benchmarks.")
return results
def run_benchmarks():
results = {}
print("Running NumPy Benchmarks for comparison with NumRS2")
print("-" * 50)
results["system_info"] = get_system_info()
results["timestamp"] = datetime.now().isoformat()
print("Benchmarking array creation...")
results["array_creation"] = benchmark_array_creation()
print("Benchmarking array operations...")
results["array_operations"] = benchmark_array_operations()
print("Benchmarking linear algebra...")
results["linear_algebra"] = benchmark_linear_algebra()
print("Benchmarking distributions...")
results["distributions"] = benchmark_distributions()
with open(RESULTS_FILE, 'w') as f:
json.dump(results, f, indent=2)
print(f"\nBenchmark results saved to {RESULTS_FILE}")
return results
if __name__ == "__main__":
run_benchmarks()
with open(RESULTS_FILE, 'r') as f:
results = json.load(f)
print("\nBenchmark Summary (mean times in ms):")
print("-" * 50)
print("Array Creation:")
print(f" zeros_medium: {results['array_creation']['zeros_medium']['mean']:.2f} ms")
print(f" random_medium: {results['array_creation']['random_medium']['mean']:.2f} ms")
print("\nArray Operations:")
print(f" add_medium: {results['array_operations']['add_medium']['mean']:.2f} ms")
print(f" multiply_medium: {results['array_operations']['multiply_medium']['mean']:.2f} ms")
print(f" sqrt_medium: {results['array_operations']['sqrt_medium']['mean']:.2f} ms")
print("\nLinear Algebra:")
print(f" matmul_small: {results['linear_algebra']['matmul_small']['mean']:.2f} ms")
print(f" svd_small: {results['linear_algebra']['svd_small']['mean']:.2f} ms")
print(f" qr_small: {results['linear_algebra']['qr_small']['mean']:.2f} ms")
print("\nDistributions:")
print(f" normal_medium: {results['distributions']['normal_medium']['mean']:.2f} ms")
print(f" beta_medium: {results['distributions']['beta_medium']['mean']:.2f} ms")
if "noncentral_chisquare_medium" in results["distributions"]:
print(f" noncentral_chisquare_medium: {results['distributions']['noncentral_chisquare_medium']['mean']:.2f} ms")
print("\nRun this benchmark on the same system as NumRS2 benchmarks for valid comparisons.")