simstring_rust 0.3.4

A native Rust implementation of the SimString algorithm
Documentation
import json
import platform
import sys
from pathlib import Path

import pandas as pd
import psutil


def get_system_specs():
    return {
        "OS": f"{platform.system()} {platform.release()}",
        "Architecture": platform.machine(),
        "CPU Model": platform.processor(),
        "CPU Cores": f"{psutil.cpu_count(logical=True)} logical, {psutil.cpu_count(logical=False)} physical",
        "Memory": f"{psutil.virtual_memory().total / (1024**3):.2f} GB",
    }


def compare_benchmarks():
    try:
        print("--- Starting benchmark comparison ---")
        benches_dir = Path(__file__).parent
        results_path = benches_dir / "results.json"
        output_path = benches_dir.parent / "BENCHMARKS.md"

        print(f"Reading results from: {results_path}")
        if not results_path.exists():
            print(f"Error: results.json not found at {results_path}!", file=sys.stderr)
            sys.exit(1)

        with open(results_path) as f:
            data = json.load(f)

        if not data:
            print("Error: results.json is empty!", file=sys.stderr)
            sys.exit(1)

        print("Successfully loaded results.json. Normalizing data with pandas.")
        df = pd.json_normalize(data)

        print("Sorting data.")
        df = df.sort_values(["benchmark", "language", "backend"])

        print(f"Writing markdown to: {output_path}")
        with open(output_path, "w") as f:
            f.write(
                "This file is automatically generated by the CI. Do not edit manually.\n\n"
            )

            # Add system specs
            specs = get_system_specs()
            f.write("## System Specifications\n\n")
            for key, value in specs.items():
                f.write(f"- **{key}:** {value}\n")
            f.write("\n")

            for benchmark, group in df.groupby("benchmark"):
                f.write(f"### {str(benchmark).capitalize()} Benchmark\n")

                param_cols = [
                    col.replace("parameters.", "")
                    for col in df.columns
                    if col.startswith("parameters.")
                ]
                display_cols = (
                    ["language", "backend"]
                    + [f"parameters.{p}" for p in param_cols]
                    + ["stats.mean", "stats.stddev", "stats.iterations"]
                )

                display_cols = [col for col in display_cols if col in group.columns]

                group = group[display_cols].dropna(axis=1, how="all")

                group.columns = [
                    col.replace("parameters.", "").replace("stats.", "")
                    for col in group.columns
                ]

                markdown_output = group.to_markdown(index=False)
                if markdown_output:
                    f.write(markdown_output)
                f.write("\n\n")

        print("--- Finished benchmark comparison successfully ---")

    except Exception as e:
        print(f"An unexpected error occurred: {e}", file=sys.stderr)
        import traceback

        traceback.print_exc()
        sys.exit(1)


if __name__ == "__main__":
    compare_benchmarks()