use std::ops::Deref;
use ndarray::{Array2, Array3, s};
use crate::powerset::PowersetMapping;
use super::ChunkLayout;
pub(in crate::pipeline) struct PendingEmbedding<'a> {
pub chunk_idx: usize,
pub speaker_idx: usize,
pub audio: &'a [f32],
pub mask: Vec<f32>,
pub clean_mask: Vec<f32>,
}
pub(in crate::pipeline) struct PendingSplitEmbedding {
pub chunk_idx: usize,
pub speaker_idx: usize,
pub fbank_idx: usize,
pub weights: Vec<f32>,
}
#[derive(Debug, Clone)]
pub struct DecodedSegmentations(pub Array3<f32>);
impl Deref for DecodedSegmentations {
type Target = Array3<f32>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
#[derive(Debug, Clone)]
pub struct ChunkEmbeddings(pub Array3<f32>);
impl Deref for ChunkEmbeddings {
type Target = Array3<f32>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
#[derive(Debug, Clone)]
pub struct SpeakerCountTrack(pub Vec<usize>);
impl Deref for SpeakerCountTrack {
type Target = Vec<usize>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
#[derive(Debug, Clone)]
pub struct ChunkSpeakerClusters(pub Array2<i32>);
impl Deref for ChunkSpeakerClusters {
type Target = Array2<i32>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
#[derive(Debug, Clone)]
pub struct DiscreteDiarization(pub Array2<f32>);
impl Deref for DiscreteDiarization {
type Target = Array2<f32>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl DiscreteDiarization {
pub fn make_exclusive(&mut self) {
crate::reconstruct::make_exclusive(&mut self.0);
}
pub fn to_segments(
&self,
frame_step_seconds: f64,
frame_duration_seconds: f64,
) -> Vec<crate::segment::Segment> {
crate::segment::to_segments(&self.0, frame_step_seconds, frame_duration_seconds)
}
}
#[derive(Debug, Clone)]
pub(crate) struct FrameActivations(pub(crate) Array2<f32>);
impl Deref for FrameActivations {
type Target = Array2<f32>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
pub(in crate::pipeline) struct RawSegmentationWindows(pub Vec<Array2<f32>>);
impl RawSegmentationWindows {
pub(in crate::pipeline) fn decode(self, powerset: &PowersetMapping) -> DecodedSegmentations {
let mut windows = self.0.into_iter();
let Some(first_window) = windows.next() else {
return DecodedSegmentations(Array3::zeros((0, 0, 0)));
};
let num_windows = windows.len() + 1;
let first = powerset.hard_decode(&first_window);
let mut stacked = Array3::<f32>::zeros((num_windows, first.nrows(), first.ncols()));
stacked.slice_mut(s![0, .., ..]).assign(&first);
for (window_idx, window) in windows.enumerate() {
let decoded = powerset.hard_decode(&window);
stacked
.slice_mut(s![window_idx + 1, .., ..])
.assign(&decoded);
}
DecodedSegmentations(stacked)
}
}
pub struct BatchInput<'a> {
pub audio: &'a [f32],
pub file_id: &'a str,
}
pub struct InferenceArtifacts {
pub(in crate::pipeline) layout: ChunkLayout,
pub(in crate::pipeline) segmentations: DecodedSegmentations,
pub(in crate::pipeline) embeddings: ChunkEmbeddings,
}
pub struct DiarizationResult {
pub segmentations: DecodedSegmentations,
pub embeddings: ChunkEmbeddings,
pub speaker_count: SpeakerCountTrack,
pub hard_clusters: ChunkSpeakerClusters,
pub discrete_diarization: DiscreteDiarization,
pub segments: Vec<crate::segment::Segment>,
}
impl DiarizationResult {
pub fn rttm(&self, file_id: &str) -> String {
crate::segment::to_rttm(&self.segments, file_id)
}
}
pub(in crate::pipeline) enum InferencePath {
Sequential,
Concurrent,
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub(in crate::pipeline) enum EmbeddingPath {
Masked,
Split,
MultiMask,
}