use serde::{Deserialize, Serialize};
use std::fmt;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct SpeakerId(pub u32);
impl fmt::Display for SpeakerId {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "SPEAKER_{:02}", self.0)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct SampleRate(u32);
impl SampleRate {
pub fn new(rate: u32) -> Option<Self> {
(8000..=192000).contains(&rate).then_some(Self(rate))
}
pub fn get(&self) -> u32 {
self.0
}
}
impl Default for SampleRate {
fn default() -> Self {
Self(16000)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
pub struct Confidence(f32);
impl Confidence {
pub fn new(v: f32) -> Option<Self> {
(0.0..=1.0).contains(&v).then_some(Self(v))
}
pub fn get(&self) -> f32 {
self.0
}
}
impl Default for Confidence {
fn default() -> Self {
Self(1.0)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct EmbeddingDim(usize);
impl EmbeddingDim {
pub fn new(dim: usize) -> Option<Self> {
(dim > 0).then_some(Self(dim))
}
pub fn get(&self) -> usize {
self.0
}
}
impl Default for EmbeddingDim {
fn default() -> Self {
Self(256)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
pub struct Seconds(f32);
impl Seconds {
pub fn new(v: f32) -> Option<Self> {
(v >= 0.0).then_some(Self(v))
}
pub fn get(&self) -> f32 {
self.0
}
}
impl Default for Seconds {
fn default() -> Self {
Self(0.0)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
pub struct TimeRange {
pub start: f64,
pub end: f64,
}
impl TimeRange {
pub fn duration(&self) -> f64 {
self.end - self.start
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Segment {
pub time: TimeRange,
pub speaker: Option<SpeakerId>,
pub confidence: Option<f32>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct SpeakerTurn {
pub speaker: SpeakerId,
pub time: TimeRange,
#[serde(skip_serializing_if = "Option::is_none")]
pub text: Option<String>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct WordAlignment {
pub word: String,
pub time: TimeRange,
pub speaker: Option<SpeakerId>,
pub confidence: f32,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct DiarizationResult {
pub segments: Vec<Segment>,
pub turns: Vec<SpeakerTurn>,
pub num_speakers: usize,
}
#[derive(Debug, Clone, Copy)]
pub struct DiarizationConfig {
pub threshold: f32,
pub max_speakers: usize,
pub window_secs: f32,
pub hop_secs: f32,
pub min_speech_secs: f32,
pub sample_rate: u32,
}
impl Default for DiarizationConfig {
fn default() -> Self {
Self {
threshold: 0.5,
max_speakers: 64,
window_secs: 1.5,
hop_secs: 0.75,
min_speech_secs: 0.25,
sample_rate: 16000,
}
}
}
impl DiarizationConfig {
pub fn window_samples(&self) -> usize {
(self.window_secs * self.sample_rate as f32) as usize
}
pub fn hop_samples(&self) -> usize {
(self.hop_secs * self.sample_rate as f32) as usize
}
pub fn min_speech_samples(&self) -> usize {
(self.min_speech_secs * self.sample_rate as f32) as usize
}
}