silero_vad_rust/
lib.rs

1//! Rust port of the Silero Voice Activity Detector with ONNX Runtime helpers.
2//!
3//! The crate re-exports the most common entry points from [`silero_vad::model`]
4//! and [`silero_vad::utils_vad`] so downstream crates can call into the
5//! high-level helpers without navigating the internal module tree.
6//!
7//! # Quick Start
8//!
9//! ## Offline Pass
10//!
11//! ```no_run
12//! use silero_vad_rust::{get_speech_timestamps, load_silero_vad, read_audio};
13//! use silero_vad_rust::silero_vad::utils_vad::VadParameters;
14//!
15//! fn main() -> anyhow::Result<()> {
16//!     let audio = read_audio("samples/test.wav", 16_000)?;
17//!     let mut model = load_silero_vad()?; // defaults to ONNX opset 16
18//!     let params = VadParameters {
19//!         return_seconds: true,
20//!         ..Default::default()
21//!     };
22//!
23//!     let speech = get_speech_timestamps(&audio, &mut model, &params)?;
24//!     println!("Detected segments: {speech:?}");
25//!     Ok(())
26//! }
27//! ```
28//!
29//! ## Streaming Chunks
30//!
31//! ```no_run
32//! use silero_vad_rust::{load_silero_vad, read_audio};
33//!
34//! fn stream_chunks() -> anyhow::Result<()> {
35//!     let audio = read_audio("samples/long.wav", 16_000)?;
36//!     let mut model = load_silero_vad()?;
37//!     let chunk_size = 512; // 16 kHz window
38//!
39//!     for frame in audio.chunks(chunk_size) {
40//!         let padded = if frame.len() == chunk_size {
41//!             frame.to_vec()
42//!         } else {
43//!             let mut tmp = vec![0.0f32; chunk_size];
44//!             tmp[..frame.len()].copy_from_slice(frame);
45//!             tmp
46//!         };
47//!
48//!         let probs = model.forward_chunk(&padded, 16_000)?;
49//!         println!("frame prob={:.3}", probs[[0, 0]]);
50//!     }
51//!
52//!     Ok(())
53//! }
54//! ```
55//!
56//! ## Segment Trimming & Muting
57//!
58//! ```no_run
59//! use silero_vad_rust::{
60//!     collect_chunks, drop_chunks, get_speech_timestamps, load_silero_vad, read_audio, save_audio,
61//! };
62//! use silero_vad_rust::silero_vad::utils_vad::VadParameters;
63//!
64//! fn trim_audio() -> anyhow::Result<()> {
65//!     let audio = read_audio("samples/raw.wav", 16_000)?;
66//!     let mut model = load_silero_vad()?;
67//!     let params = VadParameters {
68//!         return_seconds: false,
69//!         ..Default::default()
70//!     };
71//!     let speech = get_speech_timestamps(&audio, &mut model, &params)?;
72//!
73//!     let voice_only = collect_chunks(&speech, &audio, false, None)?;
74//!     save_audio("out_voice.wav", &voice_only, 16_000)?;
75//!
76//!     let muted_voice = drop_chunks(&speech, &audio, false, None)?;
77//!     save_audio("out_silence.wav", &muted_voice, 16_000)?;
78//!     Ok(())
79//! }
80//! ```
81//!
82//! ## Event-Driven Iterator
83//!
84//! ```no_run
85//! use silero_vad_rust::{
86//!     load_silero_vad, read_audio,
87//!     silero_vad::utils_vad::{VadEvent, VadIterator, VadIteratorParams},
88//! };
89//!
90//! fn iterate_events() -> anyhow::Result<()> {
91//!     let audio = read_audio("samples/live.wav", 16_000)?;
92//!     let model = load_silero_vad()?;
93//!     let params = VadIteratorParams {
94//!         threshold: 0.55,
95//!         ..Default::default()
96//!     };
97//!     let mut iterator = VadIterator::new(model, params)?;
98//!
99//!     for frame in audio.chunks(512) {
100//!         let event = iterator.process_chunk(frame, true, 1)?;
101//!         if let Some(VadEvent::Start(ts)) = event {
102//!             println!("speech started at {ts}s");
103//!         } else if let Some(VadEvent::End(ts)) = event {
104//!             println!("speech ended at {ts}s");
105//!         }
106//!     }
107//!
108//!     Ok(())
109//! }
110//! ```
111//!
112//! ## Enabling GPU Runtime
113//!
114//! ```no_run
115//! use silero_vad_rust::silero_vad::model::{load_silero_vad_with_options, LoadOptions};
116//!
117//! fn load_gpu_model() -> anyhow::Result<()> {
118//!     let options = LoadOptions {
119//!         opset_version: 15,
120//!         force_onnx_cpu: false, // allow custom providers (GPU, NNAPI, etc.)
121//!         ..Default::default()
122//!     };
123//!     let _model = load_silero_vad_with_options(options)?;
124//!     Ok(())
125//! }
126//! ```
127//!
128//! ## Tuning Parameters
129//!
130//! ```no_run
131//! use silero_vad_rust::{get_speech_timestamps, load_silero_vad, read_audio};
132//! use silero_vad_rust::silero_vad::utils_vad::VadParameters;
133//!
134//! fn compare_thresholds() -> anyhow::Result<()> {
135//!     let audio = read_audio("samples/noisy.wav", 16_000)?;
136//!     let mut model = load_silero_vad()?;
137//!
138//!     let mut strict = VadParameters::default();
139//!     strict.threshold = 0.65;
140//!     strict.min_speech_duration_ms = 400;
141//!
142//!     let mut permissive = VadParameters::default();
143//!     permissive.threshold = 0.4;
144//!     permissive.min_speech_duration_ms = 150;
145//!
146//!     let strict_segments = get_speech_timestamps(&audio, &mut model, &strict)?;
147//!     model.reset_states();
148//!     let permissive_segments = get_speech_timestamps(&audio, &mut model, &permissive)?;
149//!
150//!     println!("strict count: {}", strict_segments.len());
151//!     println!("permissive count: {}", permissive_segments.len());
152//!     Ok(())
153//! }
154//! ```
155
156pub mod silero_vad;
157
158/// Loads the default Silero VAD ONNX model (opset 16, CPU provider).
159pub use silero_vad::model::load_silero_vad;
160/// Convenience re-exports for the high-level audio helpers.
161pub use silero_vad::utils_vad::{
162    VadIterator, collect_chunks, drop_chunks, get_speech_timestamps, read_audio, save_audio,
163};