oxits 0.1.0

Time series classification and transformation library for Rust
Documentation
#!/usr/bin/env python3
"""Compare Python (pyts) vs Rust (oxits) benchmark results.

Run from test_harness/:
    python compare_benchmarks.py

Reads:
    benchmark_results_python.json
    benchmark_results_rust.json
"""

import json
import sys
from pathlib import Path

# Ordered list of (key, display_name)
BENCHMARKS = [
    # Preprocessing
    ("preprocessing_standard_scaler", "StandardScaler (100x500)"),
    ("preprocessing_minmax_scaler", "MinMaxScaler (100x500)"),
    ("preprocessing_discretizer", "KBinsDiscretizer (100x500)"),
    # Approximation
    ("approximation_paa", "PAA output_size=50 (100x500)"),
    ("approximation_sax", "SAX n_bins=4 (100x500)"),
    ("approximation_dft", "DFT n_coefs=20 (100x500)"),
    ("approximation_sfa", "SFA fit+transform (100x500)"),
    # Metrics
    ("metrics_dtw_classic_100", "DTW classic (n=100)"),
    ("metrics_dtw_classic_500", "DTW classic (n=500)"),
    ("metrics_dtw_classic_1000", "DTW classic (n=1000)"),
    ("metrics_dtw_sakoe_chiba", "DTW Sakoe-Chiba (n=500)"),
    ("metrics_dtw_fast", "DTW fast (n=500)"),
    # Bag of Words
    ("bag_of_words", "BagOfWords (50x200)"),
    # Image
    ("image_gasf", "GASF (50x100)"),
    ("image_gadf", "GADF (50x100)"),
    ("image_mtf", "MTF (50x100)"),
    ("image_recurrence_plot", "RecurrencePlot (50x100)"),
    # Decomposition
    ("decomposition_ssa", "SSA window=10 (20x200)"),
    # Transformation
    ("transformation_boss", "BOSS fit+transform (50x300)"),
    ("transformation_rocket", "ROCKET 500 kernels (50x300)"),
    ("transformation_shapelet", "ShapeletTransform (50x300)"),
    ("transformation_bag_of_patterns", "BagOfPatterns (50x300)"),
    # Classification
    ("classification_knn_euclidean", "KNN k=3 Euclidean (50/20x200)"),
    ("classification_bossvs", "BOSSVS (50/20x200)"),
    ("classification_saxvsm", "SAXVSM (50/20x200)"),
    ("classification_tsf", "TimeSeriesForest (50/20x200)"),
    ("classification_tsbf", "TSBF (50/20x200)"),
    ("classification_learning_shapelets", "LearningShapelets (50/20x200)"),
]

CATEGORY_HEADERS = {
    "preprocessing_standard_scaler": "Preprocessing",
    "approximation_paa": "Approximation",
    "metrics_dtw_classic_100": "Metrics (DTW)",
    "bag_of_words": "Bag of Words",
    "image_gasf": "Image",
    "decomposition_ssa": "Decomposition",
    "transformation_boss": "Transformation",
    "classification_knn_euclidean": "Classification",
}


def fmt_ms(seconds):
    """Format seconds as milliseconds string."""
    if seconds is None:
        return "N/A"
    ms = seconds * 1000
    if ms >= 100:
        return f"{ms:.1f}"
    if ms >= 10:
        return f"{ms:.2f}"
    if ms >= 1:
        return f"{ms:.3f}"
    return f"{ms:.4f}"


def fmt_speedup(py_s, rs_s):
    if py_s is None or rs_s is None:
        return "N/A"
    if rs_s == 0:
        return "inf"
    return f"{py_s / rs_s:.1f}x"


def main():
    base = Path(__file__).parent

    py_path = base / "benchmark_results_python.json"
    rs_path = base / "benchmark_results_rust.json"

    if not py_path.exists():
        print(f"Error: {py_path} not found. Run benchmark_pyts.py first.")
        sys.exit(1)
    if not rs_path.exists():
        print(f"Error: {rs_path} not found. Run: cargo run --release --example benchmark --features decomposition")
        sys.exit(1)

    with open(py_path) as f:
        py_results = json.load(f)
    with open(rs_path) as f:
        rs_results = json.load(f)

    # Column widths
    name_w = 38
    py_w = 12
    rs_w = 12
    sp_w = 10

    header = f"{'Algorithm':<{name_w}}  {'Python (ms)':>{py_w}}  {'Rust (ms)':>{rs_w}}  {'Speedup':>{sp_w}}"
    sep = "\u2500" * name_w + "  " + "\u2500" * py_w + "  " + "\u2500" * rs_w + "  " + "\u2500" * sp_w

    print()
    print("  oxits (Rust) vs pyts (Python) — Benchmark Comparison")
    print(f"  {N_RUNS} runs each, median wall-clock time, release mode\n" if False else "")
    print(f"  {header}")
    print(f"  {sep}")

    speedups = []

    for key, display in BENCHMARKS:
        # Print category header
        if key in CATEGORY_HEADERS:
            if key != "preprocessing_standard_scaler":
                print()
            cat = CATEGORY_HEADERS[key]
            print(f"  \033[1m{cat}\033[0m")

        py_t = py_results.get(key)
        rs_t = rs_results.get(key)

        py_str = fmt_ms(py_t)
        rs_str = fmt_ms(rs_t)
        sp_str = fmt_speedup(py_t, rs_t)

        if py_t is not None and rs_t is not None:
            speedups.append(py_t / rs_t)

        print(f"  {display:<{name_w}}  {py_str:>{py_w}}  {rs_str:>{rs_w}}  {sp_str:>{sp_w}}")

    print(f"  {sep}")

    if speedups:
        geo_mean = 1.0
        for s in speedups:
            geo_mean *= s
        geo_mean = geo_mean ** (1.0 / len(speedups))

        arith_mean = sum(speedups) / len(speedups)
        median_sp = sorted(speedups)[len(speedups) // 2]

        print()
        print(f"  Summary ({len(speedups)} benchmarks with both results):")
        print(f"    Geometric mean speedup:  {geo_mean:.1f}x")
        print(f"    Arithmetic mean speedup: {arith_mean:.1f}x")
        print(f"    Median speedup:          {median_sp:.1f}x")
        print(f"    Min speedup:             {min(speedups):.1f}x")
        print(f"    Max speedup:             {max(speedups):.1f}x")

    print()


if __name__ == "__main__":
    main()