Skip to main content

clawft_plugin/voice/
stt.rs

1//! Speech-to-text via sherpa-rs streaming recognizer.
2
3/// STT result from processing an audio segment.
4#[derive(Debug, Clone)]
5pub struct SttResult {
6    /// Transcribed text.
7    pub text: String,
8    /// Whether this is a partial or final result.
9    pub is_final: bool,
10    /// Confidence score (0.0 - 1.0).
11    pub confidence: f32,
12}
13
14/// Streaming speech-to-text engine.
15///
16/// Currently a stub -- real sherpa-rs integration after VP.
17pub struct SpeechToText {
18    model_path: std::path::PathBuf,
19    language: String,
20}
21
22impl SpeechToText {
23    pub fn new(model_path: std::path::PathBuf, language: String) -> Self {
24        Self { model_path, language }
25    }
26
27    /// Process audio samples and return transcription results.
28    pub fn process(&mut self, _samples: &[f32]) -> Vec<SttResult> {
29        // Stub: real sherpa-rs streaming recognition goes here
30        vec![]
31    }
32
33    /// Finalize the current utterance and get the final result.
34    pub fn finalize(&mut self) -> Option<SttResult> {
35        // Stub
36        None
37    }
38
39    /// Reset the recognizer state for a new utterance.
40    pub fn reset(&mut self) {
41        // Stub
42    }
43
44    pub fn model_path(&self) -> &std::path::Path {
45        &self.model_path
46    }
47
48    pub fn language(&self) -> &str {
49        &self.language
50    }
51}