Function pyannote_rs::segment
source ยท pub fn segment<P: AsRef<Path>>(
samples: &[i16],
sample_rate: u32,
model_path: P,
) -> Result<Vec<Segment>>
Examples found in repository?
examples/infinite.rs (line 22)
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
fn main() {
let audio_path = std::env::args().nth(1).expect("Please specify audio file");
let search_threshold = 0.5;
let embedding_model_path = "wespeaker_en_voxceleb_CAM++.onnx";
let segmentation_model_path = "segmentation-3.0.onnx";
let (samples, sample_rate) = pyannote_rs::read_wav(&audio_path).unwrap();
let mut embedding_extractor = EmbeddingExtractor::new(embedding_model_path).unwrap();
let mut embedding_manager = EmbeddingManager::new(usize::MAX);
let segments = pyannote_rs::segment(&samples, sample_rate, segmentation_model_path).unwrap();
for segment in segments {
// Compute the embedding result
let embedding_result: Vec<f32> = match embedding_extractor.compute(&segment.samples) {
Ok(result) => result.collect(),
Err(error) => {
println!(
"Error in {:.2}s: {:.2}s: {:?}",
segment.start, segment.end, error
);
println!(
"start = {:.2}, end = {:.2}, speaker = ?",
segment.start, segment.end
);
continue; // Skip to the next segment
}
};
// Find the speaker
let speaker = embedding_manager
.search_speaker(embedding_result.clone(), search_threshold)
.ok_or_else(|| embedding_manager.search_speaker(embedding_result, 0.0)) // Ensure always to return speaker
.map(|r| r.to_string())
.unwrap_or("?".into());
println!(
"start = {:.2}, end = {:.2}, speaker = {}",
segment.start, segment.end, speaker
);
}
}
More examples
examples/max_speakers.rs (line 23)
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
fn main() {
let audio_path = std::env::args().nth(1).expect("Please specify audio file");
let max_speakers = 6;
let search_threshold = 0.5;
let embedding_model_path = "wespeaker_en_voxceleb_CAM++.onnx";
let segmentation_model_path = "segmentation-3.0.onnx";
let (samples, sample_rate) = pyannote_rs::read_wav(&audio_path).unwrap();
let mut embedding_extractor = EmbeddingExtractor::new(embedding_model_path).unwrap();
let mut embedding_manager = EmbeddingManager::new(max_speakers);
let segments = pyannote_rs::segment(&samples, sample_rate, segmentation_model_path).unwrap();
for segment in segments {
// Compute the embedding result
let embedding_result = match embedding_extractor.compute(&segment.samples) {
Ok(result) => result.collect(),
Err(error) => {
println!(
"Error in {:.2}s: {:.2}s: {:?}",
segment.start, segment.end, error
);
println!(
"start = {:.2}, end = {:.2}, speaker = ?",
segment.start, segment.end
);
continue; // Skip to the next segment
}
};
// Find the speaker
let speaker = if embedding_manager.get_all_speakers().len() == max_speakers {
embedding_manager
.get_best_speaker_match(embedding_result)
.map(|r| r.to_string())
.unwrap_or("?".into())
} else {
embedding_manager
.search_speaker(embedding_result, search_threshold)
.map(|r| r.to_string())
.unwrap_or("?".into())
};
println!(
"start = {:.2}, end = {:.2}, speaker = {}",
segment.start, segment.end, speaker
);
}
}