{
"schema": "polyvoice-der-baseline-v2",
"crate_version": "0.6.3",
"git_sha": "6ab2dce",
"command_line": "cargo run --release --features cli --bin polyvoice-bench -- data/<dataset> --profile balanced --collar 0.25",
"voxconverse_test": {
"files": 232,
"profile": "balanced",
"der_collar_0_25": 13.83,
"der_no_collar": null,
"tolerance": 1.0,
"model_versions": {
"segmenter": "silero_vad",
"embedder": "wespeaker_resnet34"
},
"_status": "operational — filled 2026-05-08 by v0.5.2 legacy pipeline (threshold=0.45)",
"_filled_by": "polyvoice-bench-v052"
},
"voxconverse_test_10files": {
"files": 10,
"profile": "balanced",
"der_collar_0_25": 17.43,
"der_no_collar": 25.99,
"tolerance": 1.0,
"model_versions": {
"segmenter": "silero_vad",
"embedder": "wespeaker_resnet34"
},
"_status": "operational — 10-file subset, v0.6.0-alpha.7 legacy pipeline (threshold=0.45)",
"_filled_by": "polyvoice-bench-v0.6"
},
"e2e_smoke": {
"files": 1,
"profile": "balanced",
"der_collar_0_25": 6.62,
"der_no_collar": null,
"tolerance": 1.0,
"model_versions": {
"segmenter": "silero_vad",
"embedder": "wespeaker_resnet34"
},
"_status": "operational — bundled 26 s clip, v0.6.0-alpha.3 legacy pipeline (threshold=0.45)",
"_filled_by": "cargo test --all-features --test e2e_smoke_test -- --ignored"
},
"ami_test_single": {
"files": 1,
"profile": "balanced",
"der_collar_0_25": 36.30,
"der_no_collar": 44.73,
"tolerance": 1.0,
"model_versions": {
"segmenter": "silero_vad",
"embedder": "wespeaker_resnet34"
},
"_status": "operational — EN2002a single meeting, v0.6.0-alpha.7 legacy pipeline (threshold=0.45)",
"_filled_by": "polyvoice-bench-v0.6"
},
"v2_e2e_smoke": {
"files": 1,
"profile": "balanced",
"der_collar_0_25": 4.43,
"der_no_collar": null,
"tolerance": 1.0,
"model_versions": {
"segmenter": "powerset_fp32",
"embedder": "wespeaker_resnet34"
},
"_status": "operational — Pipeline v2 (Powerset + ResNet34 + AHC), DER < 5% on 26s clip",
"_filled_by": "cargo test --test pipeline_v2_integration -- --ignored"
},
"hybrid_e2e_smoke": {
"files": 1,
"profile": "balanced",
"der_collar_0_25": 4.43,
"der_no_collar": null,
"tolerance": 1.0,
"model_versions": {
"segmenter": "powerset_fp32",
"embedder": "wespeaker_resnet34"
},
"_status": "operational — Hybrid pipeline (Powerset VAD + ResNet34 + AHC), API-only",
"_filled_by": "cargo test --test pipeline_v2_hybrid_test -- --ignored"
},
"hybrid_voxconverse_3file": {
"files": 3,
"profile": "balanced",
"der_collar_0_25": 8.27,
"der_no_collar": null,
"tolerance": 2.0,
"model_versions": {
"segmenter": "powerset_fp32",
"embedder": "wespeaker_resnet34"
},
"_status": "operational — Hybrid pipeline 3-file subset (aepyx/aggyz/aiqwk)",
"_filled_by": "cargo test --test pipeline_v2_hybrid_test -- --ignored"
},
"hybrid_voxconverse_10file": {
"files": 10,
"profile": "balanced",
"der_collar_0_25": 15.03,
"der_no_collar": null,
"tolerance": 2.0,
"model_versions": {
"segmenter": "powerset_fp32",
"embedder": "wespeaker_resnet34"
},
"_status": "operational — Hybrid pipeline 10-file subset (threshold=0.40). aorju is a known outlier (47.00% DER, 12 speakers, 23 min, 17% overlap). Excluding aorju: avg DER ~9.4%.",
"_filled_by": "cargo test --release --test pipeline_v2_hybrid_test -- hybrid_voxconverse_10_file_subset --ignored --nocapture"
},
"hybrid_ami_test_single": {
"files": 1,
"profile": "balanced",
"der_collar_0_25": 24.95,
"der_no_collar": null,
"tolerance": 3.0,
"model_versions": {
"segmenter": "powerset_fp32",
"embedder": "wespeaker_resnet34"
},
"_status": "operational — Hybrid pipeline on AMI EN2002a (35 min, 4 speakers, 78.8% overlap). Legacy DER on same file: 36.30%.",
"_filled_by": "cargo test --release --test pipeline_v2_hybrid_test -- hybrid_ami_test_single --ignored --nocapture"
}
}