Struct pyannote_rs::EmbeddingManager
source · pub struct EmbeddingManager { /* private fields */ }
Implementations§
source§impl EmbeddingManager
impl EmbeddingManager
sourcepub fn new(max_speakers: usize) -> Self
pub fn new(max_speakers: usize) -> Self
Examples found in repository?
examples/infinite.rs (line 20)
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
fn main() {
let audio_path = std::env::args().nth(1).expect("Please specify audio file");
let search_threshold = 0.5;
let embedding_model_path = "wespeaker_en_voxceleb_CAM++.onnx";
let segmentation_model_path = "segmentation-3.0.onnx";
let (samples, sample_rate) = pyannote_rs::read_wav(&audio_path).unwrap();
let mut embedding_extractor = EmbeddingExtractor::new(embedding_model_path).unwrap();
let mut embedding_manager = EmbeddingManager::new(usize::MAX);
let segments = pyannote_rs::segment(&samples, sample_rate, segmentation_model_path).unwrap();
for segment in segments {
// Compute the embedding result
let embedding_result: Vec<f32> = match embedding_extractor.compute(&segment.samples) {
Ok(result) => result.collect(),
Err(error) => {
println!(
"Error in {:.2}s: {:.2}s: {:?}",
segment.start, segment.end, error
);
println!(
"start = {:.2}, end = {:.2}, speaker = ?",
segment.start, segment.end
);
continue; // Skip to the next segment
}
};
// Find the speaker
let speaker = embedding_manager
.search_speaker(embedding_result.clone(), search_threshold)
.ok_or_else(|| embedding_manager.search_speaker(embedding_result, 0.0)) // Ensure always to return speaker
.map(|r| r.to_string())
.unwrap_or("?".into());
println!(
"start = {:.2}, end = {:.2}, speaker = {}",
segment.start, segment.end, speaker
);
}
}
More examples
examples/max_speakers.rs (line 21)
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
fn main() {
let audio_path = std::env::args().nth(1).expect("Please specify audio file");
let max_speakers = 6;
let search_threshold = 0.5;
let embedding_model_path = "wespeaker_en_voxceleb_CAM++.onnx";
let segmentation_model_path = "segmentation-3.0.onnx";
let (samples, sample_rate) = pyannote_rs::read_wav(&audio_path).unwrap();
let mut embedding_extractor = EmbeddingExtractor::new(embedding_model_path).unwrap();
let mut embedding_manager = EmbeddingManager::new(max_speakers);
let segments = pyannote_rs::segment(&samples, sample_rate, segmentation_model_path).unwrap();
for segment in segments {
// Compute the embedding result
let embedding_result = match embedding_extractor.compute(&segment.samples) {
Ok(result) => result.collect(),
Err(error) => {
println!(
"Error in {:.2}s: {:.2}s: {:?}",
segment.start, segment.end, error
);
println!(
"start = {:.2}, end = {:.2}, speaker = ?",
segment.start, segment.end
);
continue; // Skip to the next segment
}
};
// Find the speaker
let speaker = if embedding_manager.get_all_speakers().len() == max_speakers {
embedding_manager
.get_best_speaker_match(embedding_result)
.map(|r| r.to_string())
.unwrap_or("?".into())
} else {
embedding_manager
.search_speaker(embedding_result, search_threshold)
.map(|r| r.to_string())
.unwrap_or("?".into())
};
println!(
"start = {:.2}, end = {:.2}, speaker = {}",
segment.start, segment.end, speaker
);
}
}
sourcepub fn search_speaker(
&mut self,
embedding: Vec<f32>,
threshold: f32,
) -> Option<usize>
pub fn search_speaker( &mut self, embedding: Vec<f32>, threshold: f32, ) -> Option<usize>
Search or create speaker
Examples found in repository?
examples/infinite.rs (line 43)
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
fn main() {
let audio_path = std::env::args().nth(1).expect("Please specify audio file");
let search_threshold = 0.5;
let embedding_model_path = "wespeaker_en_voxceleb_CAM++.onnx";
let segmentation_model_path = "segmentation-3.0.onnx";
let (samples, sample_rate) = pyannote_rs::read_wav(&audio_path).unwrap();
let mut embedding_extractor = EmbeddingExtractor::new(embedding_model_path).unwrap();
let mut embedding_manager = EmbeddingManager::new(usize::MAX);
let segments = pyannote_rs::segment(&samples, sample_rate, segmentation_model_path).unwrap();
for segment in segments {
// Compute the embedding result
let embedding_result: Vec<f32> = match embedding_extractor.compute(&segment.samples) {
Ok(result) => result.collect(),
Err(error) => {
println!(
"Error in {:.2}s: {:.2}s: {:?}",
segment.start, segment.end, error
);
println!(
"start = {:.2}, end = {:.2}, speaker = ?",
segment.start, segment.end
);
continue; // Skip to the next segment
}
};
// Find the speaker
let speaker = embedding_manager
.search_speaker(embedding_result.clone(), search_threshold)
.ok_or_else(|| embedding_manager.search_speaker(embedding_result, 0.0)) // Ensure always to return speaker
.map(|r| r.to_string())
.unwrap_or("?".into());
println!(
"start = {:.2}, end = {:.2}, speaker = {}",
segment.start, segment.end, speaker
);
}
}
More examples
examples/max_speakers.rs (line 50)
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
fn main() {
let audio_path = std::env::args().nth(1).expect("Please specify audio file");
let max_speakers = 6;
let search_threshold = 0.5;
let embedding_model_path = "wespeaker_en_voxceleb_CAM++.onnx";
let segmentation_model_path = "segmentation-3.0.onnx";
let (samples, sample_rate) = pyannote_rs::read_wav(&audio_path).unwrap();
let mut embedding_extractor = EmbeddingExtractor::new(embedding_model_path).unwrap();
let mut embedding_manager = EmbeddingManager::new(max_speakers);
let segments = pyannote_rs::segment(&samples, sample_rate, segmentation_model_path).unwrap();
for segment in segments {
// Compute the embedding result
let embedding_result = match embedding_extractor.compute(&segment.samples) {
Ok(result) => result.collect(),
Err(error) => {
println!(
"Error in {:.2}s: {:.2}s: {:?}",
segment.start, segment.end, error
);
println!(
"start = {:.2}, end = {:.2}, speaker = ?",
segment.start, segment.end
);
continue; // Skip to the next segment
}
};
// Find the speaker
let speaker = if embedding_manager.get_all_speakers().len() == max_speakers {
embedding_manager
.get_best_speaker_match(embedding_result)
.map(|r| r.to_string())
.unwrap_or("?".into())
} else {
embedding_manager
.search_speaker(embedding_result, search_threshold)
.map(|r| r.to_string())
.unwrap_or("?".into())
};
println!(
"start = {:.2}, end = {:.2}, speaker = {}",
segment.start, segment.end, speaker
);
}
}
sourcepub fn get_best_speaker_match(&mut self, embedding: Vec<f32>) -> Result<usize>
pub fn get_best_speaker_match(&mut self, embedding: Vec<f32>) -> Result<usize>
Examples found in repository?
examples/max_speakers.rs (line 45)
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
fn main() {
let audio_path = std::env::args().nth(1).expect("Please specify audio file");
let max_speakers = 6;
let search_threshold = 0.5;
let embedding_model_path = "wespeaker_en_voxceleb_CAM++.onnx";
let segmentation_model_path = "segmentation-3.0.onnx";
let (samples, sample_rate) = pyannote_rs::read_wav(&audio_path).unwrap();
let mut embedding_extractor = EmbeddingExtractor::new(embedding_model_path).unwrap();
let mut embedding_manager = EmbeddingManager::new(max_speakers);
let segments = pyannote_rs::segment(&samples, sample_rate, segmentation_model_path).unwrap();
for segment in segments {
// Compute the embedding result
let embedding_result = match embedding_extractor.compute(&segment.samples) {
Ok(result) => result.collect(),
Err(error) => {
println!(
"Error in {:.2}s: {:.2}s: {:?}",
segment.start, segment.end, error
);
println!(
"start = {:.2}, end = {:.2}, speaker = ?",
segment.start, segment.end
);
continue; // Skip to the next segment
}
};
// Find the speaker
let speaker = if embedding_manager.get_all_speakers().len() == max_speakers {
embedding_manager
.get_best_speaker_match(embedding_result)
.map(|r| r.to_string())
.unwrap_or("?".into())
} else {
embedding_manager
.search_speaker(embedding_result, search_threshold)
.map(|r| r.to_string())
.unwrap_or("?".into())
};
println!(
"start = {:.2}, end = {:.2}, speaker = {}",
segment.start, segment.end, speaker
);
}
}
sourcepub fn get_all_speakers(&self) -> &HashMap<usize, Array1<f32>>
pub fn get_all_speakers(&self) -> &HashMap<usize, Array1<f32>>
Examples found in repository?
examples/max_speakers.rs (line 43)
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
fn main() {
let audio_path = std::env::args().nth(1).expect("Please specify audio file");
let max_speakers = 6;
let search_threshold = 0.5;
let embedding_model_path = "wespeaker_en_voxceleb_CAM++.onnx";
let segmentation_model_path = "segmentation-3.0.onnx";
let (samples, sample_rate) = pyannote_rs::read_wav(&audio_path).unwrap();
let mut embedding_extractor = EmbeddingExtractor::new(embedding_model_path).unwrap();
let mut embedding_manager = EmbeddingManager::new(max_speakers);
let segments = pyannote_rs::segment(&samples, sample_rate, segmentation_model_path).unwrap();
for segment in segments {
// Compute the embedding result
let embedding_result = match embedding_extractor.compute(&segment.samples) {
Ok(result) => result.collect(),
Err(error) => {
println!(
"Error in {:.2}s: {:.2}s: {:?}",
segment.start, segment.end, error
);
println!(
"start = {:.2}, end = {:.2}, speaker = ?",
segment.start, segment.end
);
continue; // Skip to the next segment
}
};
// Find the speaker
let speaker = if embedding_manager.get_all_speakers().len() == max_speakers {
embedding_manager
.get_best_speaker_match(embedding_result)
.map(|r| r.to_string())
.unwrap_or("?".into())
} else {
embedding_manager
.search_speaker(embedding_result, search_threshold)
.map(|r| r.to_string())
.unwrap_or("?".into())
};
println!(
"start = {:.2}, end = {:.2}, speaker = {}",
segment.start, segment.end, speaker
);
}
}
Trait Implementations§
source§impl Clone for EmbeddingManager
impl Clone for EmbeddingManager
source§fn clone(&self) -> EmbeddingManager
fn clone(&self) -> EmbeddingManager
Returns a copy of the value. Read more
1.0.0 · source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source
. Read moreAuto Trait Implementations§
impl Freeze for EmbeddingManager
impl RefUnwindSafe for EmbeddingManager
impl Send for EmbeddingManager
impl Sync for EmbeddingManager
impl Unpin for EmbeddingManager
impl UnwindSafe for EmbeddingManager
Blanket Implementations§
source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
source§default unsafe fn clone_to_uninit(&self, dst: *mut T)
default unsafe fn clone_to_uninit(&self, dst: *mut T)
🔬This is a nightly-only experimental API. (
clone_to_uninit
)