Struct pyannote_rs::EmbeddingExtractor
source · pub struct EmbeddingExtractor { /* private fields */ }
Implementations§
source§impl EmbeddingExtractor
impl EmbeddingExtractor
sourcepub fn new<P: AsRef<Path>>(model_path: P) -> Result<Self>
pub fn new<P: AsRef<Path>>(model_path: P) -> Result<Self>
Examples found in repository?
examples/infinite.rs (line 19)
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
fn main() {
let audio_path = std::env::args().nth(1).expect("Please specify audio file");
let search_threshold = 0.5;
let embedding_model_path = "wespeaker_en_voxceleb_CAM++.onnx";
let segmentation_model_path = "segmentation-3.0.onnx";
let (samples, sample_rate) = pyannote_rs::read_wav(&audio_path).unwrap();
let mut embedding_extractor = EmbeddingExtractor::new(embedding_model_path).unwrap();
let mut embedding_manager = EmbeddingManager::new(usize::MAX);
let segments = pyannote_rs::segment(&samples, sample_rate, segmentation_model_path).unwrap();
for segment in segments {
// Compute the embedding result
let embedding_result: Vec<f32> = match embedding_extractor.compute(&segment.samples) {
Ok(result) => result.collect(),
Err(error) => {
println!(
"Error in {:.2}s: {:.2}s: {:?}",
segment.start, segment.end, error
);
println!(
"start = {:.2}, end = {:.2}, speaker = ?",
segment.start, segment.end
);
continue; // Skip to the next segment
}
};
// Find the speaker
let speaker = embedding_manager
.search_speaker(embedding_result.clone(), search_threshold)
.ok_or_else(|| embedding_manager.search_speaker(embedding_result, 0.0)) // Ensure always to return speaker
.map(|r| r.to_string())
.unwrap_or("?".into());
println!(
"start = {:.2}, end = {:.2}, speaker = {}",
segment.start, segment.end, speaker
);
}
}
More examples
examples/max_speakers.rs (line 20)
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
fn main() {
let audio_path = std::env::args().nth(1).expect("Please specify audio file");
let max_speakers = 6;
let search_threshold = 0.5;
let embedding_model_path = "wespeaker_en_voxceleb_CAM++.onnx";
let segmentation_model_path = "segmentation-3.0.onnx";
let (samples, sample_rate) = pyannote_rs::read_wav(&audio_path).unwrap();
let mut embedding_extractor = EmbeddingExtractor::new(embedding_model_path).unwrap();
let mut embedding_manager = EmbeddingManager::new(max_speakers);
let segments = pyannote_rs::segment(&samples, sample_rate, segmentation_model_path).unwrap();
for segment in segments {
// Compute the embedding result
let embedding_result = match embedding_extractor.compute(&segment.samples) {
Ok(result) => result.collect(),
Err(error) => {
println!(
"Error in {:.2}s: {:.2}s: {:?}",
segment.start, segment.end, error
);
println!(
"start = {:.2}, end = {:.2}, speaker = ?",
segment.start, segment.end
);
continue; // Skip to the next segment
}
};
// Find the speaker
let speaker = if embedding_manager.get_all_speakers().len() == max_speakers {
embedding_manager
.get_best_speaker_match(embedding_result)
.map(|r| r.to_string())
.unwrap_or("?".into())
} else {
embedding_manager
.search_speaker(embedding_result, search_threshold)
.map(|r| r.to_string())
.unwrap_or("?".into())
};
println!(
"start = {:.2}, end = {:.2}, speaker = {}",
segment.start, segment.end, speaker
);
}
}
sourcepub fn compute(&mut self, samples: &[i16]) -> Result<impl Iterator<Item = f32>>
pub fn compute(&mut self, samples: &[i16]) -> Result<impl Iterator<Item = f32>>
Examples found in repository?
examples/infinite.rs (line 26)
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
fn main() {
let audio_path = std::env::args().nth(1).expect("Please specify audio file");
let search_threshold = 0.5;
let embedding_model_path = "wespeaker_en_voxceleb_CAM++.onnx";
let segmentation_model_path = "segmentation-3.0.onnx";
let (samples, sample_rate) = pyannote_rs::read_wav(&audio_path).unwrap();
let mut embedding_extractor = EmbeddingExtractor::new(embedding_model_path).unwrap();
let mut embedding_manager = EmbeddingManager::new(usize::MAX);
let segments = pyannote_rs::segment(&samples, sample_rate, segmentation_model_path).unwrap();
for segment in segments {
// Compute the embedding result
let embedding_result: Vec<f32> = match embedding_extractor.compute(&segment.samples) {
Ok(result) => result.collect(),
Err(error) => {
println!(
"Error in {:.2}s: {:.2}s: {:?}",
segment.start, segment.end, error
);
println!(
"start = {:.2}, end = {:.2}, speaker = ?",
segment.start, segment.end
);
continue; // Skip to the next segment
}
};
// Find the speaker
let speaker = embedding_manager
.search_speaker(embedding_result.clone(), search_threshold)
.ok_or_else(|| embedding_manager.search_speaker(embedding_result, 0.0)) // Ensure always to return speaker
.map(|r| r.to_string())
.unwrap_or("?".into());
println!(
"start = {:.2}, end = {:.2}, speaker = {}",
segment.start, segment.end, speaker
);
}
}
More examples
examples/max_speakers.rs (line 27)
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
fn main() {
let audio_path = std::env::args().nth(1).expect("Please specify audio file");
let max_speakers = 6;
let search_threshold = 0.5;
let embedding_model_path = "wespeaker_en_voxceleb_CAM++.onnx";
let segmentation_model_path = "segmentation-3.0.onnx";
let (samples, sample_rate) = pyannote_rs::read_wav(&audio_path).unwrap();
let mut embedding_extractor = EmbeddingExtractor::new(embedding_model_path).unwrap();
let mut embedding_manager = EmbeddingManager::new(max_speakers);
let segments = pyannote_rs::segment(&samples, sample_rate, segmentation_model_path).unwrap();
for segment in segments {
// Compute the embedding result
let embedding_result = match embedding_extractor.compute(&segment.samples) {
Ok(result) => result.collect(),
Err(error) => {
println!(
"Error in {:.2}s: {:.2}s: {:?}",
segment.start, segment.end, error
);
println!(
"start = {:.2}, end = {:.2}, speaker = ?",
segment.start, segment.end
);
continue; // Skip to the next segment
}
};
// Find the speaker
let speaker = if embedding_manager.get_all_speakers().len() == max_speakers {
embedding_manager
.get_best_speaker_match(embedding_result)
.map(|r| r.to_string())
.unwrap_or("?".into())
} else {
embedding_manager
.search_speaker(embedding_result, search_threshold)
.map(|r| r.to_string())
.unwrap_or("?".into())
};
println!(
"start = {:.2}, end = {:.2}, speaker = {}",
segment.start, segment.end, speaker
);
}
}
Trait Implementations§
Auto Trait Implementations§
impl Freeze for EmbeddingExtractor
impl !RefUnwindSafe for EmbeddingExtractor
impl Send for EmbeddingExtractor
impl Sync for EmbeddingExtractor
impl Unpin for EmbeddingExtractor
impl !UnwindSafe for EmbeddingExtractor
Blanket Implementations§
source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more