polyvoice 0.6.9

Speaker diarization for Rust — who spoke when. ONNX-powered: Silero VAD, WeSpeaker embeddings, Pyannote segmentation, K-means/AHC clustering, overlap detection.
Documentation
{
  "schema": "polyvoice-der-baseline-v2",
  "crate_version": "0.6.3",
  "git_sha": "6ab2dce",
  "command_line": "cargo run --release --features cli --bin polyvoice-bench -- data/<dataset> --profile balanced --collar 0.25",
  "voxconverse_test": {
    "files": 232,
    "profile": "balanced",
    "der_collar_0_25": 13.83,
    "der_no_collar": null,
    "tolerance": 1.0,
    "model_versions": {
      "segmenter": "silero_vad",
      "embedder": "wespeaker_resnet34"
    },
    "_status": "operational — filled 2026-05-08 by v0.5.2 legacy pipeline (threshold=0.45)",
    "_filled_by": "polyvoice-bench-v052"
  },
  "voxconverse_test_10files": {
    "files": 10,
    "profile": "balanced",
    "der_collar_0_25": 17.43,
    "der_no_collar": 25.99,
    "tolerance": 1.0,
    "model_versions": {
      "segmenter": "silero_vad",
      "embedder": "wespeaker_resnet34"
    },
    "_status": "operational — 10-file subset, v0.6.0-alpha.7 legacy pipeline (threshold=0.45)",
    "_filled_by": "polyvoice-bench-v0.6"
  },
  "e2e_smoke": {
    "files": 1,
    "profile": "balanced",
    "der_collar_0_25": 6.62,
    "der_no_collar": null,
    "tolerance": 1.0,
    "model_versions": {
      "segmenter": "silero_vad",
      "embedder": "wespeaker_resnet34"
    },
    "_status": "operational — bundled 26 s clip, v0.6.0-alpha.3 legacy pipeline (threshold=0.45)",
    "_filled_by": "cargo test --all-features --test e2e_smoke_test -- --ignored"
  },
  "ami_test_single": {
    "files": 1,
    "profile": "balanced",
    "der_collar_0_25": 36.30,
    "der_no_collar": 44.73,
    "tolerance": 1.0,
    "model_versions": {
      "segmenter": "silero_vad",
      "embedder": "wespeaker_resnet34"
    },
    "_status": "operational — EN2002a single meeting, v0.6.0-alpha.7 legacy pipeline (threshold=0.45)",
    "_filled_by": "polyvoice-bench-v0.6"
  },
  "v2_e2e_smoke": {
    "files": 1,
    "profile": "balanced",
    "der_collar_0_25": 4.43,
    "der_no_collar": null,
    "tolerance": 1.0,
    "model_versions": {
      "segmenter": "powerset_fp32",
      "embedder": "wespeaker_resnet34"
    },
    "_status": "operational — Pipeline v2 (Powerset + ResNet34 + AHC), DER < 5% on 26s clip",
    "_filled_by": "cargo test --test pipeline_v2_integration -- --ignored"
  },
  "hybrid_e2e_smoke": {
    "files": 1,
    "profile": "balanced",
    "der_collar_0_25": 4.43,
    "der_no_collar": null,
    "tolerance": 1.0,
    "model_versions": {
      "segmenter": "powerset_fp32",
      "embedder": "wespeaker_resnet34"
    },
    "_status": "operational — Hybrid pipeline (Powerset VAD + ResNet34 + AHC), API-only",
    "_filled_by": "cargo test --test pipeline_v2_hybrid_test -- --ignored"
  },
  "hybrid_voxconverse_3file": {
    "files": 3,
    "profile": "balanced",
    "der_collar_0_25": 8.27,
    "der_no_collar": null,
    "tolerance": 2.0,
    "model_versions": {
      "segmenter": "powerset_fp32",
      "embedder": "wespeaker_resnet34"
    },
    "_status": "operational — Hybrid pipeline 3-file subset (aepyx/aggyz/aiqwk)",
    "_filled_by": "cargo test --test pipeline_v2_hybrid_test -- --ignored"
  },
  "hybrid_voxconverse_10file": {
    "files": 10,
    "profile": "balanced",
    "der_collar_0_25": 15.03,
    "der_no_collar": null,
    "tolerance": 2.0,
    "model_versions": {
      "segmenter": "powerset_fp32",
      "embedder": "wespeaker_resnet34"
    },
    "_status": "operational — Hybrid pipeline 10-file subset (threshold=0.40). aorju is a known outlier (47.00% DER, 12 speakers, 23 min, 17% overlap). Excluding aorju: avg DER ~9.4%.",
    "_filled_by": "cargo test --release --test pipeline_v2_hybrid_test -- hybrid_voxconverse_10_file_subset --ignored --nocapture"
  },
  "hybrid_ami_test_single": {
    "files": 1,
    "profile": "balanced",
    "der_collar_0_25": 24.95,
    "der_no_collar": null,
    "tolerance": 3.0,
    "model_versions": {
      "segmenter": "powerset_fp32",
      "embedder": "wespeaker_resnet34"
    },
    "_status": "operational — Hybrid pipeline on AMI EN2002a (35 min, 4 speakers, 78.8% overlap). Legacy DER on same file: 36.30%.",
    "_filled_by": "cargo test --release --test pipeline_v2_hybrid_test -- hybrid_ami_test_single --ignored --nocapture"
  }
}