use crate::cluster::SpeakerCluster;
use crate::embedding::{DummyExtractor, EmbeddingExtractor};
use crate::types::DiarizationConfig;
use std::ffi::{CString, c_char, c_float};
use std::os::raw::c_int;
use std::ptr;
pub struct PolyvoiceDiarizer {
config: DiarizationConfig,
cluster: SpeakerCluster,
extractor: DummyExtractor,
}
#[repr(C)]
pub struct PolyvoiceTurn {
pub speaker: *mut c_char,
pub start: c_float,
pub end: c_float,
}
#[repr(C)]
pub struct PolyvoiceResult {
pub turns: *mut PolyvoiceTurn,
pub num_turns: usize,
}
#[unsafe(no_mangle)] pub unsafe extern "C" fn polyvoice_diarizer_new(
threshold: c_float,
max_speakers: c_int,
) -> *mut PolyvoiceDiarizer {
let config = DiarizationConfig {
threshold,
max_speakers: max_speakers as usize,
..Default::default()
};
let diarizer = PolyvoiceDiarizer {
config,
cluster: SpeakerCluster::new(config),
extractor: DummyExtractor::new(256),
};
Box::into_raw(Box::new(diarizer))
}
#[unsafe(no_mangle)] pub unsafe extern "C" fn polyvoice_diarizer_run(
diarizer: *mut PolyvoiceDiarizer,
samples: *const c_float,
sample_count: usize,
) -> *mut PolyvoiceResult {
if diarizer.is_null() || samples.is_null() || sample_count == 0 {
return ptr::null_mut();
}
let d = unsafe {
&mut *diarizer
};
let audio = unsafe {
std::slice::from_raw_parts(samples, sample_count)
};
let window = d.config.window_samples();
let hop = d.config.hop_samples();
if audio.len() < window {
return ptr::null_mut();
}
let mut turns: Vec<PolyvoiceTurn> = Vec::new();
let mut start = 0usize;
while start + window <= audio.len() {
let chunk = &audio[start..start + window];
match d.extractor.extract(chunk, &d.config) {
Ok(emb) => {
let (speaker, _conf) = d.cluster.assign(&emb);
let speaker_cstr = match CString::new(format!("SPEAKER_{:02}", speaker.0)) {
Ok(s) => s,
Err(_) => {
for turn in &turns {
if !turn.speaker.is_null() {
unsafe {
let _ = CString::from_raw(turn.speaker);
}
}
}
return ptr::null_mut();
}
};
turns.push(PolyvoiceTurn {
speaker: speaker_cstr.into_raw(),
start: (start as f32 / d.config.sample_rate.get() as f32),
end: ((start + window) as f32 / d.config.sample_rate.get() as f32),
});
}
Err(_) => {
}
}
start += hop;
}
let num_turns = turns.len();
let mut boxed = turns.into_boxed_slice();
let turns_ptr = boxed.as_mut_ptr();
std::mem::forget(boxed);
let result = PolyvoiceResult {
turns: turns_ptr,
num_turns,
};
Box::into_raw(Box::new(result))
}
#[unsafe(no_mangle)] pub unsafe extern "C" fn polyvoice_diarizer_free(
diarizer: *mut PolyvoiceDiarizer,
) {
if !diarizer.is_null() {
unsafe {
let _ = Box::from_raw(diarizer);
}
}
}
#[unsafe(no_mangle)] pub unsafe extern "C" fn polyvoice_result_free(
result: *mut PolyvoiceResult,
) {
if result.is_null() {
return;
}
unsafe {
let r = &mut *result;
if !r.turns.is_null() {
let slice_ptr = std::ptr::slice_from_raw_parts_mut(r.turns, r.num_turns);
let turns = Box::from_raw(slice_ptr);
for turn in turns.iter() {
if !turn.speaker.is_null() {
let _ = CString::from_raw(turn.speaker);
}
}
}
let _ = Box::from_raw(result);
}
}
#[unsafe(no_mangle)] pub extern "C" fn polyvoice_version() -> *const c_char {
c"0.4.3".as_ptr() as *const c_char
}