silero_vad_rust/lib.rs
1//! Rust port of the Silero Voice Activity Detector with ONNX Runtime helpers.
2//!
3//! The crate re-exports the most common entry points from [`silero_vad::model`]
4//! and [`silero_vad::utils_vad`] so downstream crates can call into the
5//! high-level helpers without navigating the internal module tree.
6//!
7//! # Quick Start
8//!
9//! ## Offline Pass
10//!
11//! ```no_run
12//! use silero_vad_rust::{get_speech_timestamps, load_silero_vad, read_audio};
13//! use silero_vad_rust::silero_vad::utils_vad::VadParameters;
14//!
15//! fn main() -> anyhow::Result<()> {
16//! let audio = read_audio("samples/test.wav", 16_000)?;
17//! let mut model = load_silero_vad()?; // defaults to ONNX opset 16
18//! let params = VadParameters {
19//! return_seconds: true,
20//! ..Default::default()
21//! };
22//!
23//! let speech = get_speech_timestamps(&audio, &mut model, ¶ms)?;
24//! println!("Detected segments: {speech:?}");
25//! Ok(())
26//! }
27//! ```
28//!
29//! ## Streaming Chunks
30//!
31//! ```no_run
32//! use silero_vad_rust::{load_silero_vad, read_audio};
33//!
34//! fn stream_chunks() -> anyhow::Result<()> {
35//! let audio = read_audio("samples/long.wav", 16_000)?;
36//! let mut model = load_silero_vad()?;
37//! let chunk_size = 512; // 16 kHz window
38//!
39//! for frame in audio.chunks(chunk_size) {
40//! let padded = if frame.len() == chunk_size {
41//! frame.to_vec()
42//! } else {
43//! let mut tmp = vec![0.0f32; chunk_size];
44//! tmp[..frame.len()].copy_from_slice(frame);
45//! tmp
46//! };
47//!
48//! let probs = model.forward_chunk(&padded, 16_000)?;
49//! println!("frame prob={:.3}", probs[[0, 0]]);
50//! }
51//!
52//! Ok(())
53//! }
54//! ```
55//!
56//! ## Segment Trimming & Muting
57//!
58//! ```no_run
59//! use silero_vad_rust::{
60//! collect_chunks, drop_chunks, get_speech_timestamps, load_silero_vad, read_audio, save_audio,
61//! };
62//! use silero_vad_rust::silero_vad::utils_vad::VadParameters;
63//!
64//! fn trim_audio() -> anyhow::Result<()> {
65//! let audio = read_audio("samples/raw.wav", 16_000)?;
66//! let mut model = load_silero_vad()?;
67//! let params = VadParameters {
68//! return_seconds: false,
69//! ..Default::default()
70//! };
71//! let speech = get_speech_timestamps(&audio, &mut model, ¶ms)?;
72//!
73//! let voice_only = collect_chunks(&speech, &audio, false, None)?;
74//! save_audio("out_voice.wav", &voice_only, 16_000)?;
75//!
76//! let muted_voice = drop_chunks(&speech, &audio, false, None)?;
77//! save_audio("out_silence.wav", &muted_voice, 16_000)?;
78//! Ok(())
79//! }
80//! ```
81//!
82//! ## Event-Driven Iterator
83//!
84//! ```no_run
85//! use silero_vad_rust::{
86//! load_silero_vad, read_audio,
87//! silero_vad::utils_vad::{VadEvent, VadIterator, VadIteratorParams},
88//! };
89//!
90//! fn iterate_events() -> anyhow::Result<()> {
91//! let audio = read_audio("samples/live.wav", 16_000)?;
92//! let model = load_silero_vad()?;
93//! let params = VadIteratorParams {
94//! threshold: 0.55,
95//! ..Default::default()
96//! };
97//! let mut iterator = VadIterator::new(model, params)?;
98//!
99//! for frame in audio.chunks(512) {
100//! let event = iterator.process_chunk(frame, true, 1)?;
101//! if let Some(VadEvent::Start(ts)) = event {
102//! println!("speech started at {ts}s");
103//! } else if let Some(VadEvent::End(ts)) = event {
104//! println!("speech ended at {ts}s");
105//! }
106//! }
107//!
108//! Ok(())
109//! }
110//! ```
111//!
112//! ## Enabling GPU Runtime
113//!
114//! ```no_run
115//! use silero_vad_rust::silero_vad::model::{load_silero_vad_with_options, LoadOptions};
116//!
117//! fn load_gpu_model() -> anyhow::Result<()> {
118//! let options = LoadOptions {
119//! opset_version: 15,
120//! force_onnx_cpu: false, // allow custom providers (GPU, NNAPI, etc.)
121//! ..Default::default()
122//! };
123//! let _model = load_silero_vad_with_options(options)?;
124//! Ok(())
125//! }
126//! ```
127//!
128//! ## Tuning Parameters
129//!
130//! ```no_run
131//! use silero_vad_rust::{get_speech_timestamps, load_silero_vad, read_audio};
132//! use silero_vad_rust::silero_vad::utils_vad::VadParameters;
133//!
134//! fn compare_thresholds() -> anyhow::Result<()> {
135//! let audio = read_audio("samples/noisy.wav", 16_000)?;
136//! let mut model = load_silero_vad()?;
137//!
138//! let mut strict = VadParameters::default();
139//! strict.threshold = 0.65;
140//! strict.min_speech_duration_ms = 400;
141//!
142//! let mut permissive = VadParameters::default();
143//! permissive.threshold = 0.4;
144//! permissive.min_speech_duration_ms = 150;
145//!
146//! let strict_segments = get_speech_timestamps(&audio, &mut model, &strict)?;
147//! model.reset_states();
148//! let permissive_segments = get_speech_timestamps(&audio, &mut model, &permissive)?;
149//!
150//! println!("strict count: {}", strict_segments.len());
151//! println!("permissive count: {}", permissive_segments.len());
152//! Ok(())
153//! }
154//! ```
155
156pub mod silero_vad;
157
158/// Loads the default Silero VAD ONNX model (opset 16, CPU provider).
159pub use silero_vad::model::load_silero_vad;
160/// Convenience re-exports for the high-level audio helpers.
161pub use silero_vad::utils_vad::{
162 VadIterator, collect_chunks, drop_chunks, get_speech_timestamps, read_audio, save_audio,
163};