ruviz 0.4.1 - Docs.rs

from __future__ import annotations

import csv
from pathlib import Path
from typing import Any


def format_ms(value: float) -> str:
    return f"{value:.2f}"


def format_speedup(value: float) -> str:
    if 0.0 < value < 0.01:
        return "<0.01x"
    return f"{value:.2f}x"


def format_throughput(value: float) -> str:
    return f"{value / 1_000_000.0:.2f} M/s"


def flatten_results(runtime_payloads: list[dict[str, Any]]) -> list[dict[str, Any]]:
    rows: list[dict[str, Any]] = []
    for payload in runtime_payloads:
        runtime = payload["runtime"]
        for result in payload["results"]:
            row = dict(result)
            row["runtime"] = runtime
            rows.append(row)
    return rows


def write_consolidated_csv(path: Path, runtime_payloads: list[dict[str, Any]]) -> None:
    rows = flatten_results(runtime_payloads)
    fieldnames = [
        "runtime",
        "implementation",
        "scenarioId",
        "plotKind",
        "sizeLabel",
        "boundary",
        "outputTarget",
        "elements",
        "width",
        "height",
        "dpi",
        "byteCount",
        "datasetHash",
        "warmupIterations",
        "measuredIterations",
        "meanMs",
        "medianMs",
        "p95Ms",
        "minMs",
        "maxMs",
        "stdevMs",
        "throughputElementsPerSec",
    ]
    path.parent.mkdir(parents=True, exist_ok=True)
    with path.open("w", encoding="utf-8", newline="") as handle:
        writer = csv.DictWriter(handle, fieldnames=fieldnames, lineterminator="\n")
        writer.writeheader()
        for row in rows:
            writer.writerow(
                {
                    "runtime": row["runtime"],
                    "implementation": row["implementation"],
                    "scenarioId": row["scenarioId"],
                    "plotKind": row["plotKind"],
                    "sizeLabel": row["sizeLabel"],
                    "boundary": row["boundary"],
                    "outputTarget": row["outputTarget"],
                    "elements": row["elements"],
                    "width": row["canvas"]["width"],
                    "height": row["canvas"]["height"],
                    "dpi": row["canvas"]["dpi"],
                    "byteCount": row["byteCount"],
                    "datasetHash": row["datasetHash"],
                    "warmupIterations": row["warmupIterations"],
                    "measuredIterations": row["measuredIterations"],
                    "meanMs": row["summary"]["meanMs"],
                    "medianMs": row["summary"]["medianMs"],
                    "p95Ms": row["summary"]["p95Ms"],
                    "minMs": row["summary"]["minMs"],
                    "maxMs": row["summary"]["maxMs"],
                    "stdevMs": row["summary"]["stdevMs"],
                    "throughputElementsPerSec": row["summary"]["throughputElementsPerSec"],
                }
            )


def _result_index(
    runtime_payloads: list[dict[str, Any]], *, implementation: str | None = None
) -> dict[tuple[str, str, str, str], dict[str, Any]]:
    index: dict[tuple[str, str, str, str], dict[str, Any]] = {}
    for payload in runtime_payloads:
        runtime = payload["runtime"]
        for result in payload["results"]:
            if implementation and result["implementation"] != implementation:
                continue
            key = (runtime, result["scenarioId"], result["sizeLabel"], result["boundary"])
            index[key] = result
    return index


def _unique_cases(runtime_payloads: list[dict[str, Any]]) -> list[tuple[str, str]]:
    cases = {
        (result["scenarioId"], result["sizeLabel"])
        for payload in runtime_payloads
        for result in payload["results"]
    }
    return sorted(cases)


def _table(headers: list[str], rows: list[list[str]]) -> str:
    lines = [
        "| " + " | ".join(headers) + " |",
        "| " + " | ".join(["---"] * len(headers)) + " |",
    ]
    for row in rows:
        lines.append("| " + " | ".join(row) + " |")
    return "\n".join(lines)


def generate_markdown_report(
    *,
    environment: dict[str, Any],
    runtime_payloads: list[dict[str, Any]],
    raw_link_base: str,
    report_title: str,
) -> str:
    python_matplotlib_index = _result_index(runtime_payloads, implementation="matplotlib")
    ruviz_index = _result_index(runtime_payloads, implementation="ruviz")
    plotters_index = _result_index(runtime_payloads, implementation="plotters")
    cases = _unique_cases(runtime_payloads)
    manifest = environment["manifest"]
    defaults = manifest["defaults"]
    scenario_rows = [
        [
            scenario["id"],
            scenario["datasetKind"],
            ", ".join(size["label"] for size in scenario["sizes"]),
            f"{scenario['canvas']['width']}x{scenario['canvas']['height']} @ {scenario['canvas']['dpi']} DPI",
        ]
        for scenario in manifest["scenarios"]
    ]
    wasm_environment = environment["runtimes"].get("wasm", {})

    sections: list[str] = [
        f"# {report_title}",
        "",
        "This page is generated from the committed large-dataset plotting benchmark reference run.",
        "",
        "## Methodology",
        "",
        "- Output target: in-memory PNG byte generation only",
        "- Dataset generation is excluded from all measured timings",
        "- File I/O is excluded from all measured timings",
        "- Boundaries:",
        "  - `render_only`: reuse a built plot object and measure an uncached render/export call without public-API reconstruction or prepared-frame image reuse",
        "  - `public_api_render`: reuse the input data, rebuild through the normal public API, then render/export",
        "- Plot matrix: `line`, `scatter`, `histogram`, `heatmap`",
        "- Python comparison target: `matplotlib` with the `Agg` backend",
        "- Python `ruviz` benchmark runs use a release-built `maturin develop --release` extension",
        "- Rust comparison target: `plotters` on the `public_api_render` boundary only",
        "- `ruviz` PNG exports now use automatic raster fast paths for eligible cases:",
        "  - large monotonic solid lines without markers/error bars are reduced to a per-column envelope before stroking",
        "  - static histograms reuse prepared bins instead of re-binning on every render-only export",
        "  - nearest, non-annotated heatmaps rasterize directly to the output surface before PNG encoding",
        "- Python host-side `ruviz` rendering now uses a persistent native plot handle and prepared plot instead of rebuilding a Rust plot from JSON on every render",
        "- Python `render_only` timings bypass the prepared-frame image cache so they measure rasterization rather than cached PNG encoding",
        "- Notebook widgets still ship JSON-friendly snapshots to the browser, but that snapshot path is no longer the default host render/export path",
        "- Python full-mode runs cap very slow cases to a 60s per-case budget; the recorded warmup/measured counts reflect the effective counts used",
        "- Rust `plotters` histogram timings reuse pre-binned bars, and the `plotters` heatmap path rasterizes the shared matrix to the fixed output canvas before PNG encoding",
        "- wasm target: Chromium-only browser benchmark via Playwright",
        f"- Full-run warmup / measured iterations: `{defaults['warmupIterations']}` / `{defaults['measuredIterations']}`",
        "",
        "## Why It Got Faster",
        "",
        "The main change in this benchmark update is not a different benchmark harness. It is a different raster renderer path for large PNG exports.",
        "",
        "What changed in `ruviz`:",
        "",
        "- Large monotonic solid line series are reduced to a per-pixel-column envelope before stroking, so a `1M` point line on a `640px` canvas no longer pays to stroke every original segment.",
        "- Static histograms now cache computed `HistogramData`, so `render_only` exports reuse prepared bins instead of re-running histogram binning on every frame.",
        "- Nearest-neighbor, non-annotated heatmaps now render the final output surface directly and blit that image, instead of drawing one anti-aliased rectangle per source cell.",
        "- The parallel line backend now emits a single polyline draw instead of thousands of two-point draw calls.",
        "- Python host rendering now keeps a native Rust plot/prepared-plot handle alive across calls, so `render_png()`, `render_svg()`, `save()`, and `show()` no longer pay a Python JSON serialization + Rust JSON parse + plot reconstruction round-trip on every call.",
        "",
        "Why those changes matter:",
        "",
        "- The old line path scaled with source vertex count even when many samples collapsed onto the same output column.",
        "- The old histogram path repeated statistical preprocessing inside hot render loops.",
        "- The old heatmap path scaled with source cell count rather than output pixel count for raster exports.",
        "- The old Python binding path spent a large share of its time turning Python state into JSON and then rebuilding a fresh Rust `Plot` before rendering anything.",
        "",
        "The result is that current Rust PNG export timings mostly reflect output-resolution work for eligible raster cases, and current Python host-side timings reflect the renderer instead of the snapshot bridge much more closely than before.",
        "",
        "## Scenario Matrix",
        "",
        _table(["Scenario", "Dataset", "Sizes", "Canvas"], scenario_rows),
        "",
        "## Environment",
        "",
        f"- Captured at: `{environment['capturedAt']}`",
        f"- Git commit: `{environment['gitCommit']}`",
        f"- Git branch: `{environment['gitBranch']}`",
        f"- Host OS: `{environment['os']}`",
        f"- Host machine: `{environment['machine']}`",
        f"- Host processor: `{environment['processor']}`",
        f"- CPU count: `{environment['cpuCount']}`",
        f"- Python: `{environment['pythonVersion']}`",
        f"- Rust: `{environment['rustVersion']}`",
        f"- Bun: `{environment['bunVersion']}`",
        f"- Chromium: `{wasm_environment.get('browserVersion', 'unknown')}`",
        "",
        "Raw artifacts:",
        f"- [environment.json]({raw_link_base}/environment.json)",
        f"- [results.csv]({raw_link_base}/results.csv)",
        f"- [python.json]({raw_link_base}/python.json)",
        f"- [rust.json]({raw_link_base}/rust.json)",
        f"- [wasm.json]({raw_link_base}/wasm.json)",
        "",
    ]

    for boundary in ("render_only", "public_api_render"):
        rows: list[list[str]] = []
        for scenario_id, size_label in cases:
            ruviz_key = ("python", scenario_id, size_label, boundary)
            matplotlib_key = ("python", scenario_id, size_label, boundary)
            if ruviz_key not in ruviz_index or matplotlib_key not in python_matplotlib_index:
                continue
            ruviz = ruviz_index[ruviz_key]
            matplotlib = python_matplotlib_index[matplotlib_key]
            speedup = matplotlib["summary"]["medianMs"] / ruviz["summary"]["medianMs"]
            rows.append(
                [
                    scenario_id,
                    size_label,
                    format_ms(ruviz["summary"]["medianMs"]),
                    format_ms(matplotlib["summary"]["medianMs"]),
                    format_speedup(speedup),
                ]
            )

        sections.extend(
            [
                f"## Python: ruviz vs matplotlib (`{boundary}`)",
                "",
                _table(
                    ["Plot", "Size", "ruviz median", "matplotlib median", "Speedup"],
                    rows,
                ),
                "",
            ]
        )

    for boundary in ("render_only", "public_api_render"):
        rows = []
        for scenario_id, size_label in cases:
            python_key = ("python", scenario_id, size_label, boundary)
            rust_key = ("rust", scenario_id, size_label, boundary)
            wasm_key = ("wasm", scenario_id, size_label, boundary)
            if any(key not in ruviz_index for key in (python_key, rust_key, wasm_key)):
                continue
            python_row = ruviz_index[python_key]
            rust_row = ruviz_index[rust_key]
            wasm_row = ruviz_index[wasm_key]
            rows.append(
                [
                    scenario_id,
                    size_label,
                    format_ms(python_row["summary"]["medianMs"]),
                    format_ms(rust_row["summary"]["medianMs"]),
                    format_ms(wasm_row["summary"]["medianMs"]),
                    python_row["datasetHash"][:12],
                ]
            )

        sections.extend(
            [
                f"## ruviz cross-runtime medians (`{boundary}`)",
                "",
                _table(
                    ["Plot", "Size", "Python", "Rust", "Wasm", "Dataset hash"],
                    rows,
                ),
                "",
            ]
        )

    plotters_rows = []
    for scenario_id, size_label in cases:
        key = ("rust", scenario_id, size_label, "public_api_render")
        if key not in plotters_index or key not in ruviz_index:
            continue
        ruviz_row = ruviz_index[key]
        plotters_row = plotters_index[key]
        speedup = plotters_row["summary"]["medianMs"] / ruviz_row["summary"]["medianMs"]
        plotters_rows.append(
            [
                scenario_id,
                size_label,
                format_ms(ruviz_row["summary"]["medianMs"]),
                format_ms(plotters_row["summary"]["medianMs"]),
                format_speedup(speedup),
            ]
        )

    sections.extend(
        [
            "## Rust: ruviz vs plotters (`public_api_render`)",
            "",
            _table(
                ["Plot", "Size", "ruviz median", "plotters median", "Speedup"],
                plotters_rows,
            ),
            "",
        ]
    )

    throughput_rows = []
    for scenario_id, size_label in cases:
        key = ("rust", scenario_id, size_label, "render_only")
        if key not in ruviz_index:
            continue
        rust_row = ruviz_index[key]
        throughput_rows.append(
            [
                scenario_id,
                size_label,
                format_throughput(rust_row["summary"]["throughputElementsPerSec"]),
            ]
        )

    sections.extend(
        [
            "## Rust render-only throughput",
            "",
            _table(["Plot", "Size", "Throughput"], throughput_rows),
            "",
            "## Notes",
            "",
            "- These numbers are a reference snapshot from one machine and should be treated as comparative, not universal.",
            "- Browser wasm timings include browser-side PNG generation, but not any disk writes or download flows.",
            "- `render_only` is a reused-built-object benchmark, not a cached-frame benchmark: it avoids public-API reconstruction but still bypasses prepared-frame image reuse for `ruviz`.",
            "- The remaining `plotters` gap on histogram and heatmap is partly semantic: `plotters` benchmarks pre-binned histogram bars and output-raster heatmap generation, while `ruviz` still includes its own plot-model setup and colorbar semantics on the public API path.",
        ]
    )

    return "\n".join(sections) + "\n"