numrs2 0.3.0

A Rust implementation inspired by NumPy for numerical computing (NumRS2)
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
#!/usr/bin/env python3
"""
NumPy Benchmark Script for NumRS2 Comparison

This script runs the same benchmarks as the Rust numpy_comparison_benchmark.rs
to enable direct performance comparison between NumRS2 and NumPy.

Usage:
    python bench/numpy_benchmark.py

Output:
    JSON file with benchmark results at bench/numpy/numpy_benchmark_results.json
"""

import numpy as np
import time
import json
import os
from typing import Dict, List, Callable
from dataclasses import dataclass, asdict
import statistics

@dataclass
class BenchmarkResult:
    """Results from a single benchmark"""
    name: str
    size: int
    mean_ms: float
    std_ms: float
    min_ms: float
    max_ms: float
    iterations: int

def benchmark(func: Callable, iterations: int = 100, warmup: int = 10) -> tuple:
    """Run a benchmark function multiple times and return statistics"""
    # Warmup
    for _ in range(warmup):
        func()

    # Actual timing
    times = []
    for _ in range(iterations):
        start = time.perf_counter()
        func()
        end = time.perf_counter()
        times.append((end - start) * 1000)  # Convert to ms

    return (
        statistics.mean(times),
        statistics.stdev(times) if len(times) > 1 else 0,
        min(times),
        max(times),
        iterations
    )

# Standard sizes matching Rust benchmarks
SIZES = [1000, 10000, 100000, 1000000]
MATRIX_SIZES = [32, 64, 128, 256]

def run_creation_benchmarks() -> List[BenchmarkResult]:
    """Array creation benchmarks"""
    results = []

    for size in SIZES:
        # np.zeros
        mean, std, min_t, max_t, iters = benchmark(lambda: np.zeros(size))
        results.append(BenchmarkResult("zeros", size, mean, std, min_t, max_t, iters))

        # np.ones
        mean, std, min_t, max_t, iters = benchmark(lambda: np.ones(size))
        results.append(BenchmarkResult("ones", size, mean, std, min_t, max_t, iters))

        # np.full
        mean, std, min_t, max_t, iters = benchmark(lambda: np.full(size, np.pi))
        results.append(BenchmarkResult("full", size, mean, std, min_t, max_t, iters))

        # np.empty
        mean, std, min_t, max_t, iters = benchmark(lambda: np.empty(size))
        results.append(BenchmarkResult("empty", size, mean, std, min_t, max_t, iters))

        if size <= 100000:
            # np.arange
            mean, std, min_t, max_t, iters = benchmark(lambda s=size: np.arange(s, dtype=np.float64))
            results.append(BenchmarkResult("arange", size, mean, std, min_t, max_t, iters))

            # np.linspace
            mean, std, min_t, max_t, iters = benchmark(lambda s=size: np.linspace(0, 1, s))
            results.append(BenchmarkResult("linspace", size, mean, std, min_t, max_t, iters))

    # Matrix creation
    for size in MATRIX_SIZES:
        # np.eye
        mean, std, min_t, max_t, iters = benchmark(lambda s=size: np.eye(s))
        results.append(BenchmarkResult("eye", size, mean, std, min_t, max_t, iters))

        # np.identity
        mean, std, min_t, max_t, iters = benchmark(lambda s=size: np.identity(s))
        results.append(BenchmarkResult("identity", size, mean, std, min_t, max_t, iters))

    return results

def run_arithmetic_benchmarks() -> List[BenchmarkResult]:
    """Arithmetic operation benchmarks"""
    results = []

    for size in SIZES:
        a = np.arange(size, dtype=np.float64) * 0.001 + 1.0
        b = np.arange(size, dtype=np.float64) * 0.002 + 0.5

        # np.add
        mean, std, min_t, max_t, iters = benchmark(lambda: np.add(a, b))
        results.append(BenchmarkResult("add", size, mean, std, min_t, max_t, iters))

        # np.subtract
        mean, std, min_t, max_t, iters = benchmark(lambda: np.subtract(a, b))
        results.append(BenchmarkResult("subtract", size, mean, std, min_t, max_t, iters))

        # np.multiply
        mean, std, min_t, max_t, iters = benchmark(lambda: np.multiply(a, b))
        results.append(BenchmarkResult("multiply", size, mean, std, min_t, max_t, iters))

        # np.divide
        mean, std, min_t, max_t, iters = benchmark(lambda: np.divide(a, b))
        results.append(BenchmarkResult("divide", size, mean, std, min_t, max_t, iters))

        # Scalar add
        mean, std, min_t, max_t, iters = benchmark(lambda: a + 2.5)
        results.append(BenchmarkResult("add_scalar", size, mean, std, min_t, max_t, iters))

        # Scalar mul
        mean, std, min_t, max_t, iters = benchmark(lambda: a * 2.5)
        results.append(BenchmarkResult("mul_scalar", size, mean, std, min_t, max_t, iters))

    return results

def run_ufunc_benchmarks() -> List[BenchmarkResult]:
    """Universal function benchmarks"""
    results = []

    for size in SIZES:
        if size > 100000:
            continue

        pos = np.arange(1, size + 1, dtype=np.float64)
        small = np.arange(size, dtype=np.float64) * 0.0001
        angles = np.arange(size, dtype=np.float64) * 0.001 * np.pi
        mixed = np.arange(size, dtype=np.float64) - size / 2.0

        # np.sqrt
        mean, std, min_t, max_t, iters = benchmark(lambda: np.sqrt(pos))
        results.append(BenchmarkResult("sqrt", size, mean, std, min_t, max_t, iters))

        # np.exp
        mean, std, min_t, max_t, iters = benchmark(lambda: np.exp(small))
        results.append(BenchmarkResult("exp", size, mean, std, min_t, max_t, iters))

        # np.log
        mean, std, min_t, max_t, iters = benchmark(lambda: np.log(pos))
        results.append(BenchmarkResult("log", size, mean, std, min_t, max_t, iters))

        # np.sin
        mean, std, min_t, max_t, iters = benchmark(lambda: np.sin(angles))
        results.append(BenchmarkResult("sin", size, mean, std, min_t, max_t, iters))

        # np.cos
        mean, std, min_t, max_t, iters = benchmark(lambda: np.cos(angles))
        results.append(BenchmarkResult("cos", size, mean, std, min_t, max_t, iters))

        # np.tan
        mean, std, min_t, max_t, iters = benchmark(lambda: np.tan(angles))
        results.append(BenchmarkResult("tan", size, mean, std, min_t, max_t, iters))

        # np.abs
        mean, std, min_t, max_t, iters = benchmark(lambda: np.abs(mixed))
        results.append(BenchmarkResult("abs", size, mean, std, min_t, max_t, iters))

        # np.power
        mean, std, min_t, max_t, iters = benchmark(lambda: np.power(pos, 2.0))
        results.append(BenchmarkResult("power_2", size, mean, std, min_t, max_t, iters))

        # np.square
        mean, std, min_t, max_t, iters = benchmark(lambda: np.square(pos))
        results.append(BenchmarkResult("square", size, mean, std, min_t, max_t, iters))

    return results

def run_reduction_benchmarks() -> List[BenchmarkResult]:
    """Reduction operation benchmarks"""
    results = []

    for size in SIZES:
        a = np.arange(size, dtype=np.float64) * 0.001

        # np.sum
        mean, std, min_t, max_t, iters = benchmark(lambda: np.sum(a))
        results.append(BenchmarkResult("sum", size, mean, std, min_t, max_t, iters))

        if size <= 10000:
            # np.prod
            prod_arr = 1.0 + np.arange(min(size, 1000), dtype=np.float64) * 0.0001
            mean, std, min_t, max_t, iters = benchmark(lambda: np.prod(prod_arr))
            results.append(BenchmarkResult("prod", size, mean, std, min_t, max_t, iters))

        # np.mean
        mean, std, min_t, max_t, iters = benchmark(lambda: np.mean(a))
        results.append(BenchmarkResult("mean", size, mean, std, min_t, max_t, iters))

        # np.std
        mean, std, min_t, max_t, iters = benchmark(lambda: np.std(a))
        results.append(BenchmarkResult("std", size, mean, std, min_t, max_t, iters))

        # np.var
        mean, std, min_t, max_t, iters = benchmark(lambda: np.var(a))
        results.append(BenchmarkResult("var", size, mean, std, min_t, max_t, iters))

        # np.min
        mean, std, min_t, max_t, iters = benchmark(lambda: np.min(a))
        results.append(BenchmarkResult("min", size, mean, std, min_t, max_t, iters))

        # np.max
        mean, std, min_t, max_t, iters = benchmark(lambda: np.max(a))
        results.append(BenchmarkResult("max", size, mean, std, min_t, max_t, iters))

        # np.argmin
        mean, std, min_t, max_t, iters = benchmark(lambda: np.argmin(a))
        results.append(BenchmarkResult("argmin", size, mean, std, min_t, max_t, iters))

        # np.argmax
        mean, std, min_t, max_t, iters = benchmark(lambda: np.argmax(a))
        results.append(BenchmarkResult("argmax", size, mean, std, min_t, max_t, iters))

    return results

def run_linalg_benchmarks() -> List[BenchmarkResult]:
    """Linear algebra benchmarks"""
    results = []

    for size in MATRIX_SIZES:
        a = ((np.arange(size * size, dtype=np.float64).reshape(size, size) * 0.73) % 10.0) + 0.1
        v = np.arange(size, dtype=np.float64) + 1.0

        # matmul
        mean, std, min_t, max_t, iters = benchmark(lambda: np.matmul(a, a), iterations=50)
        results.append(BenchmarkResult("matmul", size, mean, std, min_t, max_t, iters))

        # dot
        mean, std, min_t, max_t, iters = benchmark(lambda: np.dot(v, v))
        results.append(BenchmarkResult("dot", size, mean, std, min_t, max_t, iters))

        if size <= 128:
            # det
            mean, std, min_t, max_t, iters = benchmark(lambda: np.linalg.det(a), iterations=50)
            results.append(BenchmarkResult("det", size, mean, std, min_t, max_t, iters))

            # inv (for SPD matrix)
            spd = a @ a.T + np.eye(size) * size * 0.1
            mean, std, min_t, max_t, iters = benchmark(lambda: np.linalg.inv(spd), iterations=50)
            results.append(BenchmarkResult("inv", size, mean, std, min_t, max_t, iters))

            # qr
            mean, std, min_t, max_t, iters = benchmark(lambda: np.linalg.qr(a), iterations=50)
            results.append(BenchmarkResult("qr", size, mean, std, min_t, max_t, iters))

            # cholesky
            mean, std, min_t, max_t, iters = benchmark(lambda: np.linalg.cholesky(spd), iterations=50)
            results.append(BenchmarkResult("cholesky", size, mean, std, min_t, max_t, iters))

        if size <= 64:
            # svd
            mean, std, min_t, max_t, iters = benchmark(lambda: np.linalg.svd(a), iterations=20)
            results.append(BenchmarkResult("svd", size, mean, std, min_t, max_t, iters))

            # eig
            mean, std, min_t, max_t, iters = benchmark(lambda: np.linalg.eig(a), iterations=20)
            results.append(BenchmarkResult("eig", size, mean, std, min_t, max_t, iters))

        # trace
        mean, std, min_t, max_t, iters = benchmark(lambda: np.trace(a))
        results.append(BenchmarkResult("trace", size, mean, std, min_t, max_t, iters))

    return results

def run_statistics_benchmarks() -> List[BenchmarkResult]:
    """Statistics benchmarks"""
    results = []

    for size in SIZES:
        if size > 100000:
            continue

        a = ((np.arange(size, dtype=np.float64) * 0.73 + 17.0) % 100.0)

        # median
        mean, std, min_t, max_t, iters = benchmark(lambda: np.median(a))
        results.append(BenchmarkResult("median", size, mean, std, min_t, max_t, iters))

        # percentile
        mean, std, min_t, max_t, iters = benchmark(lambda: np.percentile(a, 75))
        results.append(BenchmarkResult("percentile", size, mean, std, min_t, max_t, iters))

        if size <= 10000:
            # histogram
            mean, std, min_t, max_t, iters = benchmark(lambda: np.histogram(a, bins=50))
            results.append(BenchmarkResult("histogram", size, mean, std, min_t, max_t, iters))

        # cumsum
        mean, std, min_t, max_t, iters = benchmark(lambda: np.cumsum(a))
        results.append(BenchmarkResult("cumsum", size, mean, std, min_t, max_t, iters))

        # diff
        mean, std, min_t, max_t, iters = benchmark(lambda: np.diff(a))
        results.append(BenchmarkResult("diff", size, mean, std, min_t, max_t, iters))

        # sort
        a_copy = a.copy()
        mean, std, min_t, max_t, iters = benchmark(lambda: np.sort(a_copy))
        results.append(BenchmarkResult("sort", size, mean, std, min_t, max_t, iters))

        # argsort
        mean, std, min_t, max_t, iters = benchmark(lambda: np.argsort(a_copy))
        results.append(BenchmarkResult("argsort", size, mean, std, min_t, max_t, iters))

    return results

def run_polynomial_benchmarks() -> List[BenchmarkResult]:
    """Polynomial benchmarks"""
    results = []

    degrees = [5, 10, 20, 50]

    for degree in degrees:
        coeffs = np.array([1.0 / (i + 1) for i in range(degree + 1)])

        # polyval single point
        mean, std, min_t, max_t, iters = benchmark(lambda: np.polyval(coeffs, 2.5))
        results.append(BenchmarkResult("polyval_single", degree, mean, std, min_t, max_t, iters))

        # polyder
        mean, std, min_t, max_t, iters = benchmark(lambda: np.polyder(coeffs))
        results.append(BenchmarkResult("polyder", degree, mean, std, min_t, max_t, iters))

        # polyint
        mean, std, min_t, max_t, iters = benchmark(lambda: np.polyint(coeffs))
        results.append(BenchmarkResult("polyint", degree, mean, std, min_t, max_t, iters))

        # polymul
        coeffs2 = np.arange(degree + 1, dtype=np.float64) * 0.5
        mean, std, min_t, max_t, iters = benchmark(lambda: np.polymul(coeffs, coeffs2))
        results.append(BenchmarkResult("polymul", degree, mean, std, min_t, max_t, iters))

        # polyadd
        mean, std, min_t, max_t, iters = benchmark(lambda: np.polyadd(coeffs, coeffs2))
        results.append(BenchmarkResult("polyadd", degree, mean, std, min_t, max_t, iters))

    # polyfit
    fit_sizes = [10, 50, 100, 500]
    for size in fit_sizes:
        x = np.arange(size, dtype=np.float64)
        y = x * x * 0.5 + x * 2.0 + 1.0 + np.sin(x * 0.1)

        mean, std, min_t, max_t, iters = benchmark(lambda: np.polyfit(x, y, 3))
        results.append(BenchmarkResult("polyfit_deg3", size, mean, std, min_t, max_t, iters))

    return results

def main():
    """Run all benchmarks and save results"""
    print("NumRS2 vs NumPy Benchmark Comparison")
    print("=" * 50)
    print(f"NumPy version: {np.__version__}")
    print()

    all_results = {
        "numpy_version": np.__version__,
        "benchmarks": {}
    }

    # Run each benchmark category
    categories = [
        ("creation", run_creation_benchmarks),
        ("arithmetic", run_arithmetic_benchmarks),
        ("ufuncs", run_ufunc_benchmarks),
        ("reductions", run_reduction_benchmarks),
        ("linalg", run_linalg_benchmarks),
        ("statistics", run_statistics_benchmarks),
        ("polynomial", run_polynomial_benchmarks),
    ]

    for category_name, benchmark_func in categories:
        print(f"Running {category_name} benchmarks...")
        results = benchmark_func()
        all_results["benchmarks"][category_name] = [asdict(r) for r in results]

        # Print summary
        for r in results:
            print(f"  {r.name}[{r.size}]: {r.mean_ms:.4f} ms (±{r.std_ms:.4f})")
        print()

    # Save results
    os.makedirs("bench/numpy", exist_ok=True)
    output_path = "bench/numpy/numpy_benchmark_results.json"
    with open(output_path, "w") as f:
        json.dump(all_results, f, indent=2)

    print(f"Results saved to {output_path}")
    print("Done!")

if __name__ == "__main__":
    main()