{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Audio Sample Conversion Benchmark Analysis"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import re\n",
"import os\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Parse Benchmark Data\n",
"\n",
"The benchmark data is in text format. Let's parse it to extract the key metrics:\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"def parse_benchmark_data(file_path):\n",
" \"\"\"\n",
" Parse the benchmark data from the given file path into a DataFrame.\n",
" \"\"\"\n",
" benchmark_data = []\n",
" \n",
" with open(file_path, 'r') as f:\n",
" content = f.read()\n",
" \n",
" # Find all benchmark sections\n",
" benchmark_sections = re.findall(r'Benchmarking (.*?)time:\\s+\\[(.*?)\\]', content, re.DOTALL)\n",
" \n",
" for section, time_part in benchmark_sections:\n",
" # Extract the benchmark name\n",
" benchmark_name = section.strip().split('/')[0]\n",
" \n",
" # Parse the conversion information\n",
" match = re.match(r'Samples conversion (\\w+) to (\\w+) - (\\d+)s - (\\d+)Hz - (\\d+)ch', benchmark_name)\n",
" if match:\n",
" from_type, to_type, duration, sample_rate, channels = match.groups()\n",
" \n",
" # Parse time measurements\n",
" time_values = re.findall(r'(\\d+\\.\\d+) (\\w+)', time_part)\n",
" if time_values:\n",
" min_time, unit = time_values[0]\n",
" median_time, _ = time_values[1]\n",
" max_time, _ = time_values[2]\n",
" \n",
" # Convert to consistent units (nanoseconds)\n",
" unit_multiplier = {\n",
" 'ps': 0.001, # picoseconds to nanoseconds\n",
" 'ns': 1, # nanoseconds\n",
" 'µs': 1000, # microseconds to nanoseconds\n",
" 'ms': 1000000 # milliseconds to nanoseconds\n",
" }\n",
" \n",
" min_time_ns = float(min_time) * unit_multiplier.get(unit, 1)\n",
" median_time_ns = float(median_time) * unit_multiplier.get(unit, 1)\n",
" max_time_ns = float(max_time) * unit_multiplier.get(unit, 1)\n",
" \n",
" benchmark_data.append({\n",
" 'from_type': from_type,\n",
" 'to_type': to_type,\n",
" 'duration_sec': int(duration),\n",
" 'sample_rate_hz': int(sample_rate),\n",
" 'channels': int(channels),\n",
" 'min_time_ns': min_time_ns,\n",
" 'median_time_ns': median_time_ns,\n",
" 'max_time_ns': max_time_ns,\n",
" 'time_unit': unit\n",
" })\n",
" \n",
" return pd.DataFrame(benchmark_data)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total number of benchmarks: 180\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>from_type</th>\n",
" <th>to_type</th>\n",
" <th>duration_sec</th>\n",
" <th>sample_rate_hz</th>\n",
" <th>channels</th>\n",
" <th>min_time_ns</th>\n",
" <th>median_time_ns</th>\n",
" <th>max_time_ns</th>\n",
" <th>time_unit</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>i16</td>\n",
" <td>i24</td>\n",
" <td>1</td>\n",
" <td>8000</td>\n",
" <td>1</td>\n",
" <td>0.18923</td>\n",
" <td>0.18965</td>\n",
" <td>0.19010</td>\n",
" <td>ps</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>i16</td>\n",
" <td>i24</td>\n",
" <td>1</td>\n",
" <td>16000</td>\n",
" <td>1</td>\n",
" <td>0.19057</td>\n",
" <td>0.19093</td>\n",
" <td>0.19130</td>\n",
" <td>ps</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>i16</td>\n",
" <td>i24</td>\n",
" <td>1</td>\n",
" <td>44100</td>\n",
" <td>1</td>\n",
" <td>0.18862</td>\n",
" <td>0.18908</td>\n",
" <td>0.18961</td>\n",
" <td>ps</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>i16</td>\n",
" <td>i24</td>\n",
" <td>10</td>\n",
" <td>8000</td>\n",
" <td>1</td>\n",
" <td>0.19038</td>\n",
" <td>0.19068</td>\n",
" <td>0.19097</td>\n",
" <td>ps</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>i16</td>\n",
" <td>i24</td>\n",
" <td>10</td>\n",
" <td>16000</td>\n",
" <td>1</td>\n",
" <td>0.18884</td>\n",
" <td>0.18923</td>\n",
" <td>0.18966</td>\n",
" <td>ps</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" from_type to_type duration_sec sample_rate_hz channels min_time_ns \\\n",
"0 i16 i24 1 8000 1 0.18923 \n",
"1 i16 i24 1 16000 1 0.19057 \n",
"2 i16 i24 1 44100 1 0.18862 \n",
"3 i16 i24 10 8000 1 0.19038 \n",
"4 i16 i24 10 16000 1 0.18884 \n",
"\n",
" median_time_ns max_time_ns time_unit \n",
"0 0.18965 0.19010 ps \n",
"1 0.19093 0.19130 ps \n",
"2 0.18908 0.18961 ps \n",
"3 0.19068 0.19097 ps \n",
"4 0.18923 0.18966 ps "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"# Load the benchmark data\n",
"benchmark_df = parse_benchmark_data('conversion_benches.bench')\n",
"\n",
"# Show the first few rows\n",
"print(f\"Total number of benchmarks: {len(benchmark_df)}\")\n",
"benchmark_df.head()\n",
"\n",
"os.makedirs(\"./bench_results\", exist_ok=True)\n",
"\n",
"for group_name, group in benchmark_df.groupby([\"from_type\", \"to_type\"]):\n",
" from_type, to_type = group_name\n",
" fp = f\"./bench_results/{from_type}_to_{to_type}.md\"\n",
" group.to_markdown(fp, tablefmt=\"github\", index=False)\n",
" \n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "audio_sample_benches",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}