Skip to main content

clawft_plugin/voice/
vad.rs

1//! Voice Activity Detection using Silero VAD.
2
3/// VAD processing result.
4#[non_exhaustive]
5#[derive(Debug, Clone)]
6pub enum VadEvent {
7    /// Speech started at this sample offset.
8    SpeechStart { offset: usize },
9    /// Speech ended at this sample offset.
10    SpeechEnd { offset: usize },
11    /// No speech detected in this frame.
12    Silence,
13}
14
15/// Voice Activity Detector wrapping Silero VAD model.
16///
17/// Currently a stub -- real sherpa-rs integration after VP.
18pub struct VoiceActivityDetector {
19    threshold: f32,
20    silence_timeout_ms: u32,
21    active: bool,
22}
23
24impl VoiceActivityDetector {
25    pub fn new(threshold: f32, silence_timeout_ms: u32) -> Self {
26        Self {
27            threshold,
28            silence_timeout_ms,
29            active: false,
30        }
31    }
32
33    /// Process a chunk of audio samples.
34    /// Returns VAD events detected in the chunk.
35    pub fn process(&mut self, _samples: &[f32]) -> Vec<VadEvent> {
36        // Stub: real Silero VAD inference goes here
37        vec![VadEvent::Silence]
38    }
39
40    /// Reset the VAD state.
41    pub fn reset(&mut self) {
42        self.active = false;
43    }
44
45    pub fn threshold(&self) -> f32 {
46        self.threshold
47    }
48
49    pub fn silence_timeout_ms(&self) -> u32 {
50        self.silence_timeout_ms
51    }
52}