clawft_plugin/voice/stt.rs
1//! Speech-to-text via sherpa-rs streaming recognizer.
2
3/// STT result from processing an audio segment.
4#[derive(Debug, Clone)]
5pub struct SttResult {
6 /// Transcribed text.
7 pub text: String,
8 /// Whether this is a partial or final result.
9 pub is_final: bool,
10 /// Confidence score (0.0 - 1.0).
11 pub confidence: f32,
12}
13
14/// Streaming speech-to-text engine.
15///
16/// Currently a stub -- real sherpa-rs integration after VP.
17pub struct SpeechToText {
18 model_path: std::path::PathBuf,
19 language: String,
20}
21
22impl SpeechToText {
23 pub fn new(model_path: std::path::PathBuf, language: String) -> Self {
24 Self { model_path, language }
25 }
26
27 /// Process audio samples and return transcription results.
28 pub fn process(&mut self, _samples: &[f32]) -> Vec<SttResult> {
29 // Stub: real sherpa-rs streaming recognition goes here
30 vec![]
31 }
32
33 /// Finalize the current utterance and get the final result.
34 pub fn finalize(&mut self) -> Option<SttResult> {
35 // Stub
36 None
37 }
38
39 /// Reset the recognizer state for a new utterance.
40 pub fn reset(&mut self) {
41 // Stub
42 }
43
44 pub fn model_path(&self) -> &std::path::Path {
45 &self.model_path
46 }
47
48 pub fn language(&self) -> &str {
49 &self.language
50 }
51}