pub struct SttEngine { /* private fields */ }Expand description
Speech-to-text engine optimized for speed and ease of use.
This is the main entry point for transcription. Create an engine, warm it up, and start transcribing audio samples.
§Example
use memo_stt::SttEngine;
// Create engine with default model
let mut engine = SttEngine::new_default(16000)?;
// Warm up GPU (reduces first-transcription latency)
engine.warmup()?;
// Transcribe audio samples (16kHz, mono, i16 PCM)
let samples: Vec<i16> = vec![]; // Your audio data here
let text = engine.transcribe(&samples)?;
println!("Transcribed: {}", text);§Performance
- First transcription: ~500ms-1s (after warmup)
- Subsequent transcriptions: ~200-500ms
- GPU acceleration is automatic on supported platforms
Implementations§
Source§impl SttEngine
impl SttEngine
Sourcepub fn new_default(input_sample_rate: u32) -> Result<Self>
pub fn new_default(input_sample_rate: u32) -> Result<Self>
Create a new engine with the default model.
The model will be automatically downloaded to the cache directory on first use.
For custom model paths, use new.
§Arguments
input_sample_rate- Sample rate of input audio (e.g., 16000, 48000)
§Example
use memo_stt::SttEngine;
let engine = SttEngine::new_default(16000)?;Examples found in repository?
More examples
13fn main() -> Result<(), Box<dyn std::error::Error>> {
14 let mut engine = SttEngine::new_default(16000)?;
15
16 engine.set_prompt(Some(
17 "Rust programming language, cargo, crates.io, GitHub, \
18 async await, tokio, serde, clippy, rustfmt"
19 .to_string(),
20 ));
21
22 engine.warmup()?;
23
24 println!("Engine ready with custom vocabulary.");
25 println!("Pass audio samples to engine.transcribe(&samples) to use it.");
26
27 Ok(())
28}13fn main() -> Result<(), Box<dyn std::error::Error>> {
14 println!("Creating STT engine (this may download the model on first run)...");
15 let mut engine = SttEngine::new_default(16000)?;
16
17 println!("Warming up...");
18 engine.warmup()?;
19
20 println!("Engine ready.");
21 println!();
22 println!("To transcribe, pass 16-bit mono PCM samples:");
23 println!(" let samples: Vec<i16> = /* your audio */;");
24 println!(" let text = engine.transcribe(&samples)?;");
25
26 // Example with one second of silence (just to demonstrate the call shape).
27 let samples = vec![0i16; 16_000];
28 match engine.transcribe(&samples) {
29 Ok(text) => println!("Transcribed (silence): {:?}", text),
30 Err(e) => println!("Transcribe error: {}", e),
31 }
32
33 Ok(())
34}Sourcepub fn new(model_path: impl AsRef<Path>, input_sample_rate: u32) -> Result<Self>
pub fn new(model_path: impl AsRef<Path>, input_sample_rate: u32) -> Result<Self>
Create a new engine with a custom model path.
If the model doesn’t exist, it will attempt to download it automatically (if it’s a known model name). Otherwise, you’ll need to provide the full path to an existing model file.
§Arguments
model_path- Path to a GGML speech model, or model nameinput_sample_rate- Sample rate of input audio (e.g., 16000, 48000)
§Example
use memo_stt::SttEngine;
// Use default model (auto-downloads if needed)
let engine = SttEngine::new_default(16000)?;
// Or specify a custom path
let engine = SttEngine::new("models/ggml-small.en-q5_1.bin", 16000)?;§Recommended Models
ggml-small.en-q5_1.bin(~500MB) - Best balance of speed and accuracyggml-distil-large-v3-q5_1.bin(~500MB) - Higher accuracyggml-distil-large-v3-q8_0.bin(~800MB) - Highest accuracy
Models are downloaded from: https://huggingface.co/ggerganov/whisper.cpp
Sourcepub fn transcribe(&mut self, samples: &[i16]) -> Result<String>
pub fn transcribe(&mut self, samples: &[i16]) -> Result<String>
Transcribe audio samples to text.
Takes PCM audio samples (16-bit signed integers) and returns transcribed text.
§Arguments
samples- Audio samples asi16PCM data at the sample rate specified when creating the engine
§Returns
Transcribed text as a String. Returns empty string if no speech detected.
§Example
use memo_stt::SttEngine;
let mut engine = SttEngine::new_default(16000)?;
engine.warmup()?;
// Your audio samples (16kHz, mono, i16 PCM)
let samples: Vec<i16> = vec![]; // Replace with actual audio
let text = engine.transcribe(&samples)?;
println!("{}", text);§Audio Format Requirements
- Format: 16-bit signed integer PCM (
i16) - Channels: Mono
- Sample rate: Must match the
input_sample_rateprovided tonew()ornew_default() - Minimum length: 1 second (16000 samples at 16kHz)
Examples found in repository?
More examples
13fn main() -> Result<(), Box<dyn std::error::Error>> {
14 println!("Creating STT engine (this may download the model on first run)...");
15 let mut engine = SttEngine::new_default(16000)?;
16
17 println!("Warming up...");
18 engine.warmup()?;
19
20 println!("Engine ready.");
21 println!();
22 println!("To transcribe, pass 16-bit mono PCM samples:");
23 println!(" let samples: Vec<i16> = /* your audio */;");
24 println!(" let text = engine.transcribe(&samples)?;");
25
26 // Example with one second of silence (just to demonstrate the call shape).
27 let samples = vec![0i16; 16_000];
28 match engine.transcribe(&samples) {
29 Ok(text) => println!("Transcribed (silence): {:?}", text),
30 Err(e) => println!("Transcribe error: {}", e),
31 }
32
33 Ok(())
34}Sourcepub fn set_prompt(&mut self, prompt: Option<String>)
pub fn set_prompt(&mut self, prompt: Option<String>)
Set initial prompt for custom vocabulary or context.
Useful for improving accuracy with domain-specific terms, names, or technical vocabulary.
§Example
use memo_stt::SttEngine;
let mut engine = SttEngine::new_default(16000)?;
engine.set_prompt(Some("Rust programming language, cargo, crates.io".to_string()));Examples found in repository?
13fn main() -> Result<(), Box<dyn std::error::Error>> {
14 let mut engine = SttEngine::new_default(16000)?;
15
16 engine.set_prompt(Some(
17 "Rust programming language, cargo, crates.io, GitHub, \
18 async await, tokio, serde, clippy, rustfmt"
19 .to_string(),
20 ));
21
22 engine.warmup()?;
23
24 println!("Engine ready with custom vocabulary.");
25 println!("Pass audio samples to engine.transcribe(&samples) to use it.");
26
27 Ok(())
28}Sourcepub fn warmup(&self) -> Result<()>
pub fn warmup(&self) -> Result<()>
Warm up the GPU to reduce first-transcription latency.
Call this after creating the engine to pre-initialize GPU resources. The first transcription after warmup will be faster.
§Example
use memo_stt::SttEngine;
let mut engine = SttEngine::new_default(16000)?;
engine.warmup()?; // Pre-initialize GPU
// Now transcriptions will be fasterExamples found in repository?
More examples
13fn main() -> Result<(), Box<dyn std::error::Error>> {
14 let mut engine = SttEngine::new_default(16000)?;
15
16 engine.set_prompt(Some(
17 "Rust programming language, cargo, crates.io, GitHub, \
18 async await, tokio, serde, clippy, rustfmt"
19 .to_string(),
20 ));
21
22 engine.warmup()?;
23
24 println!("Engine ready with custom vocabulary.");
25 println!("Pass audio samples to engine.transcribe(&samples) to use it.");
26
27 Ok(())
28}13fn main() -> Result<(), Box<dyn std::error::Error>> {
14 println!("Creating STT engine (this may download the model on first run)...");
15 let mut engine = SttEngine::new_default(16000)?;
16
17 println!("Warming up...");
18 engine.warmup()?;
19
20 println!("Engine ready.");
21 println!();
22 println!("To transcribe, pass 16-bit mono PCM samples:");
23 println!(" let samples: Vec<i16> = /* your audio */;");
24 println!(" let text = engine.transcribe(&samples)?;");
25
26 // Example with one second of silence (just to demonstrate the call shape).
27 let samples = vec![0i16; 16_000];
28 match engine.transcribe(&samples) {
29 Ok(text) => println!("Transcribed (silence): {:?}", text),
30 Err(e) => println!("Transcribe error: {}", e),
31 }
32
33 Ok(())
34}