{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Audio Sample Conversion Benchmark Analysis"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import re\n",
"import os"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Parse Benchmark Data\n",
"\n",
"The benchmark data is in text format. Let's parse it to extract the key metrics:\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"def parse_benchmark_data(file_path):\n",
" \"\"\"\n",
" Parse the benchmark data from the given file path into a DataFrame.\n",
" \"\"\"\n",
" benchmark_data = []\n",
"\n",
" with open(file_path, \"r\") as f:\n",
" content = f.read()\n",
"\n",
" # Find all benchmark sections\n",
" benchmark_sections = re.findall(\n",
" r\"Benchmarking (.*?)time:\\s+\\[(.*?)\\]\", content, re.DOTALL\n",
" )\n",
"\n",
" for section, time_part in benchmark_sections:\n",
" # Extract the benchmark name\n",
" benchmark_name = section.strip().split(\"/\")[0]\n",
"\n",
" # Parse the conversion information\n",
" match = re.match(\n",
" r\"Samples conversion (\\w+) to (\\w+) - (\\d+)s - (\\d+)Hz - (\\d+)ch\",\n",
" benchmark_name,\n",
" )\n",
" if match:\n",
" from_type, to_type, duration, sample_rate, channels = match.groups()\n",
"\n",
" # Parse time measurements\n",
" time_values = re.findall(r\"(\\d+\\.\\d+) (\\w+)\", time_part)\n",
" if time_values:\n",
" min_time, unit = time_values[0]\n",
" median_time, _ = time_values[1]\n",
" max_time, _ = time_values[2]\n",
"\n",
" # Convert to consistent units (nanoseconds)\n",
" unit_multiplier = {\n",
" \"ps\": 0.001, # picoseconds to nanoseconds\n",
" \"ns\": 1, # nanoseconds\n",
" \"µs\": 1000, # microseconds to nanoseconds\n",
" \"ms\": 1000000, # milliseconds to nanoseconds\n",
" }\n",
"\n",
" min_time_ns = float(min_time) * unit_multiplier.get(unit, 1)\n",
" median_time_ns = float(median_time) * unit_multiplier.get(unit, 1)\n",
" max_time_ns = float(max_time) * unit_multiplier.get(unit, 1)\n",
"\n",
" benchmark_data.append(\n",
" {\n",
" \"from_type\": from_type,\n",
" \"to_type\": to_type,\n",
" \"duration_sec\": int(duration),\n",
" \"sample_rate_hz\": int(sample_rate),\n",
" \"channels\": int(channels),\n",
" \"min_time_ns\": min_time_ns,\n",
" \"median_time_ns\": median_time_ns,\n",
" \"max_time_ns\": max_time_ns,\n",
" \"time_unit\": unit,\n",
" }\n",
" )\n",
"\n",
" return pd.DataFrame(benchmark_data)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total number of benchmarks: 180\n"
]
}
],
"source": [
"# Load the benchmark data\n",
"benchmark_df = parse_benchmark_data(\"conversion_benches.bench\")\n",
"\n",
"# Show the first few rows\n",
"print(f\"Total number of benchmarks: {len(benchmark_df)}\")\n",
"benchmark_df.head()\n",
"\n",
"os.makedirs(\"./bench_results\", exist_ok=True)\n",
"\n",
"for group_name, group in benchmark_df.groupby([\"from_type\", \"to_type\"]):\n",
" from_type, to_type = group_name\n",
" fp = f\"./bench_results/{from_type}_to_{to_type}.md\"\n",
" group.to_markdown(fp, tablefmt=\"github\", index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "audio_sample_benches",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}