oxits 0.1.0 - Docs.rs

#!/usr/bin/env python3
"""Benchmark pyts algorithms for head-to-head comparison with oxits (Rust).

Run from test_harness/:
    python benchmark_pyts.py

Outputs: benchmark_results_python.json
"""

import json
import sys
import time
import traceback
from pathlib import Path

import numpy as np

N_RUNS = 51
N_WARMUP = 5


def median_time(fn, n_runs=N_RUNS):
    """Run fn with warmup, then n_runs timed iterations, return P25."""
    for _ in range(N_WARMUP):
        fn()
    times = []
    for _ in range(n_runs):
        start = time.perf_counter()
        fn()
        elapsed = time.perf_counter() - start
        times.append(elapsed)
    times.sort()
    return times[len(times) // 4]  # P25: matches Rust methodology


def generate_data(seed, n_samples, n_timestamps):
    rng = np.random.RandomState(seed)
    return rng.randn(n_samples, n_timestamps)


def generate_labels(n_samples, n_classes=2):
    labels = []
    per_class = n_samples // n_classes
    for i in range(n_classes):
        label = chr(ord("A") + i)
        labels.extend([label] * per_class)
    while len(labels) < n_samples:
        labels.append("A")
    return np.array(labels)


def run_bench(name, fn, results):
    """Run a single benchmark, catching errors gracefully."""
    try:
        results[name] = median_time(fn)
        ms = results[name] * 1000
        print(f"    {name:45s} {ms:10.3f} ms")
    except Exception as e:
        print(f"    {name:45s} FAILED: {e}")
        results[name] = None


def run_benchmarks():
    results = {}

    # ── Preprocessing ──────────────────────────────────────────────
    print("Benchmarking preprocessing...")
    try:
        from pyts.preprocessing import KBinsDiscretizer, MinMaxScaler, StandardScaler

        X_pre = generate_data(42, 100, 500)

        ss = StandardScaler()
        run_bench("preprocessing_standard_scaler", lambda: ss.fit_transform(X_pre), results)

        mm = MinMaxScaler()
        run_bench("preprocessing_minmax_scaler", lambda: mm.fit_transform(X_pre), results)

        disc = KBinsDiscretizer(n_bins=4, strategy="quantile")
        run_bench("preprocessing_discretizer", lambda: disc.fit_transform(X_pre), results)
    except ImportError as e:
        print(f"  Skipping preprocessing: {e}")

    # ── Approximation ─────────────────────────────────────────────
    print("Benchmarking approximation...")
    try:
        from pyts.approximation import (
            DiscreteFourierTransform,
            PiecewiseAggregateApproximation,
            SymbolicAggregateApproximation,
            SymbolicFourierApproximation,
        )

        X_approx = generate_data(42, 100, 500)

        paa = PiecewiseAggregateApproximation(output_size=50)
        run_bench("approximation_paa", lambda: paa.fit_transform(X_approx), results)

        sax = SymbolicAggregateApproximation(n_bins=4, strategy="normal")
        run_bench("approximation_sax", lambda: sax.fit_transform(X_approx), results)

        dft = DiscreteFourierTransform(n_coefs=20)
        run_bench("approximation_dft", lambda: dft.fit_transform(X_approx), results)

        sfa = SymbolicFourierApproximation(n_coefs=20, n_bins=4, strategy="quantile")
        run_bench("approximation_sfa", lambda: sfa.fit_transform(X_approx), results)
    except ImportError as e:
        print(f"  Skipping approximation: {e}")

    # ── Metrics (DTW) ─────────────────────────────────────────────
    print("Benchmarking metrics...")
    try:
        from pyts.metrics import dtw

        for n in [100, 500, 1000]:
            rng = np.random.RandomState(42)
            a = rng.randn(n)
            b = rng.randn(n)
            run_bench(
                f"metrics_dtw_classic_{n}",
                lambda a=a, b=b: dtw(a, b, method="classic"),
                results,
            )

        rng = np.random.RandomState(42)
        a = rng.randn(500)
        b = rng.randn(500)
        run_bench(
            "metrics_dtw_sakoe_chiba",
            lambda: dtw(a, b, method="sakoechiba", options={"window_size": 50}),
            results,
        )

        run_bench(
            "metrics_dtw_fast",
            lambda: dtw(a, b, method="fast", options={"radius": 2}),
            results,
        )
    except ImportError as e:
        print(f"  Skipping metrics: {e}")

    # ── Bag of Words ──────────────────────────────────────────────
    print("Benchmarking bag_of_words...")
    try:
        from pyts.bag_of_words import BagOfWords

        X_bow = generate_data(42, 50, 200)
        bow = BagOfWords(window_size=10, word_size=4, n_bins=4, strategy="normal")
        run_bench("bag_of_words", lambda: bow.fit_transform(X_bow), results)
    except ImportError as e:
        print(f"  Skipping bag_of_words: {e}")

    # ── Image ─────────────────────────────────────────────────────
    print("Benchmarking image...")
    try:
        from pyts.image import GramianAngularField, MarkovTransitionField, RecurrencePlot

        X_img = generate_data(42, 50, 100)

        gasf = GramianAngularField(method="summation")
        run_bench("image_gasf", lambda: gasf.fit_transform(X_img), results)

        gadf = GramianAngularField(method="difference")
        run_bench("image_gadf", lambda: gadf.fit_transform(X_img), results)

        mtf = MarkovTransitionField(n_bins=5)
        run_bench("image_mtf", lambda: mtf.fit_transform(X_img), results)

        rp = RecurrencePlot()
        run_bench("image_recurrence_plot", lambda: rp.fit_transform(X_img), results)
    except ImportError as e:
        print(f"  Skipping image: {e}")

    # ── Decomposition ─────────────────────────────────────────────
    print("Benchmarking decomposition...")
    try:
        from pyts.decomposition import SingularSpectrumAnalysis

        X_ssa = generate_data(42, 20, 200)
        ssa = SingularSpectrumAnalysis(window_size=10)
        run_bench("decomposition_ssa", lambda: ssa.fit_transform(X_ssa), results)
    except ImportError as e:
        print(f"  Skipping decomposition: {e}")

    # ── Transformation ────────────────────────────────────────────
    print("Benchmarking transformation...")
    try:
        from pyts.transformation import BOSS, ROCKET, BagOfPatterns, ShapeletTransform

        X_trans = generate_data(42, 50, 300)
        y_trans = generate_labels(50)

        boss = BOSS(window_size=10, word_size=4, n_bins=4)
        run_bench("transformation_boss", lambda: boss.fit_transform(X_trans), results)

        rocket = ROCKET(n_kernels=500, random_state=42)
        run_bench("transformation_rocket", lambda: rocket.fit_transform(X_trans), results)

        st = ShapeletTransform(n_shapelets=5, window_sizes=[0.1, 0.2], random_state=42)
        run_bench(
            "transformation_shapelet",
            lambda: st.fit_transform(X_trans, y_trans),
            results,
        )

        bop = BagOfPatterns(window_size=10, word_size=4, n_bins=4)
        run_bench(
            "transformation_bag_of_patterns",
            lambda: bop.fit_transform(X_trans),
            results,
        )
    except ImportError as e:
        print(f"  Skipping transformation: {e}")

    # ── Classification ────────────────────────────────────────────
    print("Benchmarking classification...")
    X_train_cls = generate_data(42, 50, 200)
    y_train_cls = generate_labels(50)
    rng_test = np.random.RandomState(99)
    X_test_cls = rng_test.randn(20, 200)

    try:
        from pyts.classification import KNeighborsClassifier

        knn = KNeighborsClassifier(n_neighbors=3, metric="euclidean")
        run_bench(
            "classification_knn_euclidean",
            lambda: (knn.fit(X_train_cls, y_train_cls), knn.predict(X_test_cls)),
            results,
        )
    except ImportError as e:
        print(f"  Skipping KNN: {e}")

    try:
        from pyts.classification import BOSSVS

        bossvs = BOSSVS(window_size=10, word_size=4, n_bins=4)
        run_bench(
            "classification_bossvs",
            lambda: (bossvs.fit(X_train_cls, y_train_cls), bossvs.predict(X_test_cls)),
            results,
        )
    except ImportError as e:
        print(f"  Skipping BOSSVS: {e}")

    try:
        from pyts.classification import SAXVSM

        saxvsm = SAXVSM(window_size=10, word_size=4, n_bins=4)
        run_bench(
            "classification_saxvsm",
            lambda: (saxvsm.fit(X_train_cls, y_train_cls), saxvsm.predict(X_test_cls)),
            results,
        )
    except ImportError as e:
        print(f"  Skipping SAXVSM: {e}")

    try:
        from pyts.classification import TimeSeriesForest

        tsf = TimeSeriesForest(n_estimators=50, random_state=42)
        run_bench(
            "classification_tsf",
            lambda: (tsf.fit(X_train_cls, y_train_cls), tsf.predict(X_test_cls)),
            results,
        )
    except ImportError as e:
        print(f"  Skipping TimeSeriesForest: {e}")

    # TSBF and LearningShapelets are not in pyts — Rust-only benchmarks
    results["classification_tsbf"] = None
    results["classification_learning_shapelets"] = None

    return results


def main():
    print(f"Running pyts benchmarks ({N_RUNS} runs each, median)...\n")
    results = run_benchmarks()

    out_path = Path(__file__).parent / "benchmark_results_python.json"
    with open(out_path, "w") as f:
        json.dump(results, f, indent=2)

    n_ok = sum(1 for v in results.values() if v is not None)
    n_fail = sum(1 for v in results.values() if v is None)
    print(f"\nWrote {len(results)} benchmarks to {out_path} ({n_ok} ok, {n_fail} N/A)")


if __name__ == "__main__":
    main()