murmur_core/transcription/
streaming.rs

1//! Chunked streaming transcription with overlap stitching.
2//!
3//! Audio is captured continuously and transcribed in overlapping chunks
4//! via a subprocess worker. Each chunk includes a configurable overlap
5//! with the previous chunk, and word-level stitching deduplicates the
6//! overlap region before appending new text.
7
8use std::sync::atomic::{AtomicBool, Ordering};
9use std::sync::mpsc;
10use std::sync::{Arc, Mutex};
11
12use super::engine::AsrEngine;
13use crate::audio::capture::TARGET_RATE;
14
15// ── Configuration ──────────────────────────────────────────────────────
16
17/// Minimum new audio (seconds) before re-transcribing.
18const MIN_NEW_AUDIO_SECS: f32 = 2.0;
19/// Minimum interval between transcription attempts, in milliseconds.
20const POLL_INTERVAL_MS: u64 = 300;
21/// Maximum chunk size sent to Whisper, in seconds.
22const MAX_CHUNK_SECS: f32 = 5.0;
23/// Overlap between consecutive chunks (seconds). This gives whisper
24/// context across chunk boundaries so words aren't lost or split.
25const OVERLAP_SECS: f32 = 1.5;
26
27// ── Public API ─────────────────────────────────────────────────────────
28
29/// A message carrying newly-transcribed text from a streaming chunk.
30pub enum StreamingEvent {
31    /// Replace the last `replace_chars` characters with `text`.
32    /// If `replace_chars` is 0, just append.
33    PartialText { text: String, replace_chars: usize },
34    /// VAD detected speech in the audio stream (heartbeat for silence timeout).
35    SpeechDetected,
36}
37
38/// Handle returned by [`start_streaming`] to control the streaming thread.
39///
40/// Dropping the handle sends a stop signal (by disconnecting the channel),
41/// but does **not** block until the thread exits. Call [`stop_and_join`]
42/// when you need to guarantee the thread has exited before reusing the
43/// `Transcriber` (e.g. before starting a final transcription pass).
44pub struct StreamingHandle {
45    stop_tx: mpsc::Sender<()>,
46    abort_flag: Arc<AtomicBool>,
47    join_handle: Option<std::thread::JoinHandle<()>>,
48}
49
50impl StreamingHandle {
51    /// Signal the streaming thread to stop and block until it exits.
52    ///
53    /// Sets the abort flag first so any in-progress whisper inference
54    /// is cancelled immediately, then sends the channel stop signal
55    /// and joins the thread.
56    pub fn stop_and_join(mut self) {
57        self.abort_flag.store(true, Ordering::Relaxed);
58        let _ = self.stop_tx.send(());
59        if let Some(handle) = self.join_handle.take() {
60            if let Err(e) = handle.join() {
61                log::error!("Streaming thread panicked: {e:?}");
62            }
63        }
64    }
65}
66
67/// Start streaming transcription in a background thread.
68///
69/// Reads from `sample_buffer` (the AudioRecorder's shared buffer), transcribes
70/// overlapping chunks, and sends incremental text via `tx`.
71///
72/// `worker` is a pre-spawned subprocess transcriber. Spawning it ahead of
73/// time avoids the model-loading delay on first recording.
74///
75/// Returns a [`StreamingHandle`] that can stop and join the thread.
76pub fn start_streaming(
77    sample_buffer: Arc<Mutex<Vec<f32>>>,
78    engine: Arc<dyn AsrEngine + Send + Sync>,
79    translate: bool,
80    filler_word_removal: bool,
81    tx: mpsc::Sender<StreamingEvent>,
82    worker: Option<super::subprocess::SubprocessTranscriber>,
83) -> StreamingHandle {
84    let (stop_tx, stop_rx) = mpsc::channel::<()>();
85    let abort_flag = Arc::new(AtomicBool::new(false));
86    let abort_flag_clone = Arc::clone(&abort_flag);
87
88    let join_handle = std::thread::spawn(move || {
89        streaming_loop(
90            sample_buffer,
91            engine,
92            translate,
93            filler_word_removal,
94            tx,
95            stop_rx,
96            abort_flag_clone,
97            worker,
98        );
99    });
100
101    StreamingHandle {
102        stop_tx,
103        abort_flag,
104        join_handle: Some(join_handle),
105    }
106}
107
108/// Start native streaming transcription for engines that support it (e.g. Qwen3-ASR).
109///
110/// Unlike [`start_streaming`], this does **not** use chunked overlap or word
111/// stitching. Instead it periodically reads all accumulated audio and asks
112/// the engine to transcribe the full utterance, sending the complete text as
113/// a replacement each time.
114pub fn start_native_streaming(
115    sample_buffer: Arc<Mutex<Vec<f32>>>,
116    engine: Arc<dyn AsrEngine + Send + Sync>,
117    translate: bool,
118    tx: mpsc::Sender<StreamingEvent>,
119) -> StreamingHandle {
120    let (stop_tx, stop_rx) = mpsc::channel::<()>();
121    let abort_flag = Arc::new(AtomicBool::new(false));
122    let abort_flag_clone = Arc::clone(&abort_flag);
123
124    let join_handle = std::thread::spawn(move || {
125        native_streaming_loop(
126            sample_buffer,
127            engine,
128            translate,
129            tx,
130            stop_rx,
131            abort_flag_clone,
132        );
133    });
134
135    StreamingHandle {
136        stop_tx,
137        abort_flag,
138        join_handle: Some(join_handle),
139    }
140}
141
142/// Native streaming loop: transcribe accumulated audio on each tick.
143fn native_streaming_loop(
144    sample_buffer: Arc<Mutex<Vec<f32>>>,
145    engine: Arc<dyn AsrEngine + Send + Sync>,
146    translate: bool,
147    tx: mpsc::Sender<StreamingEvent>,
148    stop_rx: mpsc::Receiver<()>,
149    abort_flag: Arc<AtomicBool>,
150) {
151    let min_samples = (MIN_NEW_AUDIO_SECS * TARGET_RATE as f32) as usize;
152    let mut prev_len: usize = 0;
153    let mut prev_text = String::new();
154
155    loop {
156        // Check for stop signal
157        match stop_rx.try_recv() {
158            Ok(()) | Err(mpsc::TryRecvError::Disconnected) => break,
159            Err(mpsc::TryRecvError::Empty) => {}
160        }
161
162        let current_len = match sample_buffer.lock() {
163            Ok(b) => b.len(),
164            Err(e) => e.into_inner().len(),
165        };
166
167        // Wait for enough new audio
168        let new_samples = current_len.saturating_sub(prev_len);
169        if new_samples < min_samples {
170            std::thread::sleep(std::time::Duration::from_millis(POLL_INTERVAL_MS));
171            continue;
172        }
173
174        // Read all accumulated samples
175        let samples: Vec<f32> = match sample_buffer.lock() {
176            Ok(b) => b.clone(),
177            Err(e) => e.into_inner().clone(),
178        };
179
180        if samples.is_empty() {
181            std::thread::sleep(std::time::Duration::from_millis(POLL_INTERVAL_MS));
182            continue;
183        }
184
185        // VAD check on just the new audio
186        let new_start = prev_len.min(samples.len());
187        if !super::vad::contains_speech(&samples[new_start..]) {
188            prev_len = current_len;
189            std::thread::sleep(std::time::Duration::from_millis(POLL_INTERVAL_MS));
190            continue;
191        }
192
193        let _ = tx.send(StreamingEvent::SpeechDetected);
194
195        // Transcribe the full utterance
196        match engine.transcribe(&samples, translate) {
197            Ok(result) => {
198                if abort_flag.load(Ordering::Relaxed) {
199                    break;
200                }
201                if !result.text.is_empty() && result.text != prev_text {
202                    // Compute the common prefix to avoid deleting and retyping
203                    // text that hasn't changed.
204                    if result.text.starts_with(&prev_text) {
205                        // New text extends the previous — just append the suffix
206                        let new_suffix = &result.text[prev_text.len()..];
207                        if !new_suffix.is_empty() {
208                            let _ = tx.send(StreamingEvent::PartialText {
209                                text: new_suffix.to_string(),
210                                replace_chars: 0,
211                            });
212                        }
213                    } else {
214                        // Text changed (model revised earlier words) — replace all
215                        let replace_chars = prev_text.chars().count();
216                        let _ = tx.send(StreamingEvent::PartialText {
217                            text: result.text.clone(),
218                            replace_chars,
219                        });
220                    }
221                    prev_text = result.text;
222                }
223            }
224            Err(e) => {
225                log::error!("Native streaming transcription failed: {e}");
226            }
227        }
228
229        prev_len = current_len;
230    }
231}
232
233// ── Internal (chunked Whisper streaming) ───────────────────────────────
234
235#[allow(clippy::too_many_arguments)]
236fn streaming_loop(
237    sample_buffer: Arc<Mutex<Vec<f32>>>,
238    engine: Arc<dyn AsrEngine + Send + Sync>,
239    translate: bool,
240    filler_word_removal: bool,
241    tx: mpsc::Sender<StreamingEvent>,
242    stop_rx: mpsc::Receiver<()>,
243    _abort_flag: Arc<AtomicBool>,
244    mut worker: Option<super::subprocess::SubprocessTranscriber>,
245) {
246    let min_new_samples = (MIN_NEW_AUDIO_SECS * TARGET_RATE as f32) as usize;
247    let max_chunk_samples = (MAX_CHUNK_SECS * TARGET_RATE as f32) as usize;
248    let overlap_samples = (OVERLAP_SECS * TARGET_RATE as f32) as usize;
249
250    // The boundary up to which audio has been consumed (minus overlap).
251    let mut consumed_boundary: usize = 0;
252    // Words emitted so far (for stitching overlap regions).
253    let mut committed_words: Vec<String> = Vec::new();
254    let mut chunk: Vec<f32> = Vec::with_capacity(max_chunk_samples);
255
256    loop {
257        match stop_rx.try_recv() {
258            Ok(()) | Err(mpsc::TryRecvError::Disconnected) => {
259                // Final chunk: transcribe remaining audio with overlap.
260                let total = match sample_buffer.lock() {
261                    Ok(b) => b.len(),
262                    Err(e) => e.into_inner().len(),
263                };
264                let start = consumed_boundary.saturating_sub(overlap_samples);
265                if total > start {
266                    chunk.clear();
267                    {
268                        let buf = sample_buffer.lock().unwrap_or_else(|e| e.into_inner());
269                        chunk.extend_from_slice(&buf[start..total]);
270                    }
271                    emit_chunk(
272                        &mut worker,
273                        &engine,
274                        &chunk,
275                        translate,
276                        filler_word_removal,
277                        &mut committed_words,
278                        &tx,
279                    );
280                }
281                break;
282            }
283            Err(mpsc::TryRecvError::Empty) => {}
284        }
285
286        let total_samples = match sample_buffer.lock() {
287            Ok(b) => b.len(),
288            Err(e) => e.into_inner().len(),
289        };
290
291        if !has_enough_new_audio(total_samples, consumed_boundary, min_new_samples) {
292            std::thread::sleep(std::time::Duration::from_millis(POLL_INTERVAL_MS));
293            continue;
294        }
295
296        let (chunk_start, chunk_end) = compute_chunk_bounds(
297            consumed_boundary,
298            overlap_samples,
299            total_samples,
300            max_chunk_samples,
301        );
302
303        chunk.clear();
304        {
305            let buf = sample_buffer.lock().unwrap_or_else(|e| e.into_inner());
306            chunk.extend_from_slice(&buf[chunk_start..chunk_end]);
307        }
308
309        if !super::vad::contains_speech(&chunk) {
310            consumed_boundary = chunk_end;
311            std::thread::sleep(std::time::Duration::from_millis(POLL_INTERVAL_MS));
312            continue;
313        }
314
315        let _ = tx.send(StreamingEvent::SpeechDetected);
316
317        emit_chunk(
318            &mut worker,
319            &engine,
320            &chunk,
321            translate,
322            filler_word_removal,
323            &mut committed_words,
324            &tx,
325        );
326
327        // Advance boundary (new audio will overlap by overlap_samples).
328        consumed_boundary = match sample_buffer.lock() {
329            Ok(b) => b.len(),
330            Err(e) => e.into_inner().len(),
331        };
332    }
333}
334
335/// Transcribe a chunk and emit only the new (stitched) words.
336fn emit_chunk(
337    worker: &mut Option<super::subprocess::SubprocessTranscriber>,
338    engine: &Arc<dyn AsrEngine + Send + Sync>,
339    chunk: &[f32],
340    translate: bool,
341    filler_word_removal: bool,
342    committed_words: &mut Vec<String>,
343    tx: &mpsc::Sender<StreamingEvent>,
344) {
345    let text = if let Some(ref mut w) = worker {
346        match w.transcribe(chunk, translate) {
347            Ok(t) => t,
348            Err(e) => {
349                log::error!("Streaming transcription failed: {e}");
350                return;
351            }
352        }
353    } else {
354        match engine.transcribe(chunk, translate) {
355            Ok(result) => result.text,
356            Err(e) => {
357                log::error!("Streaming transcription failed: {e}");
358                return;
359            }
360        }
361    };
362
363    let new_words = compute_new_words(&text, filler_word_removal, committed_words);
364    if let Some(event) = format_partial_event(&new_words) {
365        let _ = tx.send(event);
366        committed_words.extend(new_words);
367    }
368}
369
370/// Check whether enough new audio has accumulated to warrant a transcription attempt.
371pub(crate) fn has_enough_new_audio(
372    total_samples: usize,
373    consumed_boundary: usize,
374    min_new_samples: usize,
375) -> bool {
376    let new_samples = total_samples.saturating_sub(consumed_boundary);
377    new_samples >= min_new_samples
378}
379
380/// Compute the chunk boundaries for the next transcription window.
381///
382/// Returns `(chunk_start, chunk_end)` — the sample indices to slice from the buffer.
383/// `chunk_start` includes overlap from the previous chunk for Whisper context.
384/// `chunk_end` is capped at `max_chunk_samples` from `chunk_start`.
385pub(crate) fn compute_chunk_bounds(
386    consumed_boundary: usize,
387    overlap_samples: usize,
388    total_samples: usize,
389    max_chunk_samples: usize,
390) -> (usize, usize) {
391    let chunk_start = consumed_boundary.saturating_sub(overlap_samples);
392    let chunk_end = if total_samples - chunk_start > max_chunk_samples {
393        chunk_start + max_chunk_samples
394    } else {
395        total_samples
396    };
397    (chunk_start, chunk_end)
398}
399
400/// Given raw transcription text, apply postprocessing and stitch against
401/// previously committed words to determine what new words to emit.
402///
403/// Returns the new words to append to `committed_words`, or an empty vec
404/// if nothing novel was produced.
405pub(crate) fn compute_new_words(
406    raw_text: &str,
407    filler_word_removal: bool,
408    committed_words: &[String],
409) -> Vec<String> {
410    if raw_text.is_empty() {
411        return Vec::new();
412    }
413
414    let mut text = raw_text.to_string();
415    if filler_word_removal {
416        text = super::postprocess::remove_filler_words(&text);
417        if text.is_empty() {
418            return Vec::new();
419        }
420    }
421    text = super::postprocess::ensure_space_after_punctuation(&text);
422
423    let chunk_words: Vec<String> = text.split_whitespace().map(String::from).collect();
424    stitch(committed_words, &chunk_words)
425}
426
427/// Format new words into a `StreamingEvent::PartialText`, or `None` if empty.
428pub(crate) fn format_partial_event(new_words: &[String]) -> Option<StreamingEvent> {
429    if new_words.is_empty() {
430        return None;
431    }
432    let new_text = new_words.join(" ");
433    Some(StreamingEvent::PartialText {
434        text: format!(" {new_text}"),
435        replace_chars: 0,
436    })
437}
438
439/// Find the byte length of the common prefix between two strings,
440/// aligned to char boundaries.
441#[allow(dead_code)]
442fn common_prefix_len(a: &str, b: &str) -> usize {
443    a.chars()
444        .zip(b.chars())
445        .take_while(|(ac, bc)| ac == bc)
446        .map(|(c, _)| c.len_utf8())
447        .sum()
448}
449
450// ── Stitching ──────────────────────────────────────────────────────────
451
452/// Given previously committed words and a new chunk's words, determine which
453/// words from the chunk are genuinely new (i.e. not already covered by the
454/// committed output).
455///
456/// Uses a suffix-prefix LCS match: we look for the longest suffix of
457/// `committed` that matches a prefix of `chunk_words`, then return
458/// everything in `chunk_words` after that matched prefix.
459pub fn stitch(committed: &[String], chunk_words: &[String]) -> Vec<String> {
460    if committed.is_empty() {
461        return chunk_words.to_vec();
462    }
463    if chunk_words.is_empty() {
464        return Vec::new();
465    }
466
467    // Only compare the tail of committed (at most as many words as the chunk).
468    let tail_len = committed.len().min(chunk_words.len());
469    let tail = &committed[committed.len() - tail_len..];
470
471    // Find the longest prefix of chunk_words that matches a suffix of tail.
472    let best_match = longest_suffix_prefix_match(tail, chunk_words);
473
474    if best_match > 0 {
475        chunk_words[best_match..].to_vec()
476    } else {
477        chunk_words.to_vec()
478    }
479}
480
481/// Strip leading/trailing punctuation from a word for comparison purposes.
482fn normalize_for_match(word: &str) -> &str {
483    word.trim_matches(|c: char| c.is_ascii_punctuation())
484}
485
486/// Find the length of the longest suffix of `a` that equals a prefix of `b`,
487/// using case-insensitive, punctuation-insensitive comparison.
488fn longest_suffix_prefix_match(a: &[String], b: &[String]) -> usize {
489    let max_len = a.len().min(b.len());
490    let mut best = 0;
491
492    for len in 1..=max_len {
493        let suffix = &a[a.len() - len..];
494        let prefix = &b[..len];
495        if suffix.iter().zip(prefix.iter()).all(|(s, p)| {
496            let s_norm = normalize_for_match(s);
497            let p_norm = normalize_for_match(p);
498            !s_norm.is_empty() && !p_norm.is_empty() && s_norm.eq_ignore_ascii_case(p_norm)
499        }) {
500            best = len;
501        }
502    }
503
504    best
505}
506
507// ── Utilities ──────────────────────────────────────────────────────────
508
509/// Split text into words, normalising whitespace.
510#[allow(dead_code)]
511fn split_words(text: &str) -> Vec<String> {
512    text.split_whitespace().map(|w| w.to_string()).collect()
513}
514
515// ── Tests ──────────────────────────────────────────────────────────────
516
517#[cfg(test)]
518mod tests {
519    use super::*;
520
521    #[test]
522    fn test_stitch_no_overlap() {
523        let committed: Vec<String> = vec!["hello".into(), "world".into()];
524        let chunk: Vec<String> = vec!["foo".into(), "bar".into()];
525        let result = stitch(&committed, &chunk);
526        assert_eq!(result, vec!["foo", "bar"]);
527    }
528
529    #[test]
530    fn test_stitch_full_overlap() {
531        let committed: Vec<String> = vec!["the".into(), "quick".into(), "brown".into()];
532        let chunk: Vec<String> = vec!["quick".into(), "brown".into(), "fox".into()];
533        let result = stitch(&committed, &chunk);
534        assert_eq!(result, vec!["fox"]);
535    }
536
537    #[test]
538    fn test_stitch_single_word_overlap() {
539        let committed: Vec<String> = vec!["hello".into(), "world".into()];
540        let chunk: Vec<String> = vec!["world".into(), "today".into()];
541        let result = stitch(&committed, &chunk);
542        assert_eq!(result, vec!["today"]);
543    }
544
545    #[test]
546    fn test_stitch_empty_committed() {
547        let committed: Vec<String> = vec![];
548        let chunk: Vec<String> = vec!["hello".into(), "world".into()];
549        let result = stitch(&committed, &chunk);
550        assert_eq!(result, vec!["hello", "world"]);
551    }
552
553    #[test]
554    fn test_stitch_empty_chunk() {
555        let committed: Vec<String> = vec!["hello".into()];
556        let chunk: Vec<String> = vec![];
557        let result = stitch(&committed, &chunk);
558        assert!(result.is_empty());
559    }
560
561    #[test]
562    fn test_stitch_case_insensitive() {
563        let committed: Vec<String> = vec!["Hello".into(), "World".into()];
564        let chunk: Vec<String> = vec!["world".into(), "today".into()];
565        let result = stitch(&committed, &chunk);
566        assert_eq!(result, vec!["today"]);
567    }
568
569    #[test]
570    fn test_stitch_complete_duplicate() {
571        let committed: Vec<String> = vec!["a".into(), "b".into(), "c".into()];
572        let chunk: Vec<String> = vec!["a".into(), "b".into(), "c".into()];
573        let result = stitch(&committed, &chunk);
574        assert!(result.is_empty());
575    }
576
577    #[test]
578    fn test_vad_silence_no_speech() {
579        let samples = vec![0.0f32; 16000];
580        assert!(!crate::transcription::vad::contains_speech(&samples));
581    }
582
583    #[test]
584    fn test_vad_low_noise_no_speech() {
585        let samples = vec![0.001f32; 16000];
586        assert!(!crate::transcription::vad::contains_speech(&samples));
587    }
588
589    #[test]
590    fn test_vad_empty_no_speech() {
591        assert!(!crate::transcription::vad::contains_speech(&[]));
592    }
593
594    #[test]
595    fn test_longest_suffix_prefix_match_basic() {
596        let a: Vec<String> = vec!["x".into(), "y".into(), "z".into()];
597        let b: Vec<String> = vec!["y".into(), "z".into(), "w".into()];
598        assert_eq!(longest_suffix_prefix_match(&a, &b), 2);
599    }
600
601    #[test]
602    fn test_longest_suffix_prefix_match_none() {
603        let a: Vec<String> = vec!["a".into(), "b".into()];
604        let b: Vec<String> = vec!["c".into(), "d".into()];
605        assert_eq!(longest_suffix_prefix_match(&a, &b), 0);
606    }
607
608    #[test]
609    fn test_longest_suffix_prefix_match_full() {
610        let a: Vec<String> = vec!["a".into(), "b".into()];
611        let b: Vec<String> = vec!["a".into(), "b".into()];
612        assert_eq!(longest_suffix_prefix_match(&a, &b), 2);
613    }
614
615    #[test]
616    fn test_split_words_basic() {
617        assert_eq!(split_words("hello world"), vec!["hello", "world"]);
618    }
619
620    #[test]
621    fn test_split_words_extra_whitespace() {
622        assert_eq!(split_words("  hello   world  "), vec!["hello", "world"]);
623    }
624
625    #[test]
626    fn test_split_words_empty() {
627        assert!(split_words("").is_empty());
628        assert!(split_words("   ").is_empty());
629    }
630
631    #[test]
632    fn test_split_words_single() {
633        assert_eq!(split_words("hello"), vec!["hello"]);
634    }
635
636    #[test]
637    fn test_constants() {
638        const { assert!(MIN_NEW_AUDIO_SECS > 0.0) };
639        const { assert!(MAX_CHUNK_SECS > 0.0) };
640        const { assert!(MAX_CHUNK_SECS <= 30.0) };
641        const { assert!(OVERLAP_SECS >= 0.0) };
642        const { assert!(POLL_INTERVAL_MS > 0) };
643        assert_eq!(TARGET_RATE, 16_000);
644    }
645
646    #[test]
647    fn test_streaming_event_partial_text() {
648        let event = StreamingEvent::PartialText {
649            text: "hello".to_string(),
650            replace_chars: 3,
651        };
652        match event {
653            StreamingEvent::PartialText {
654                text,
655                replace_chars,
656            } => {
657                assert_eq!(text, "hello");
658                assert_eq!(replace_chars, 3);
659            }
660            StreamingEvent::SpeechDetected => panic!("unexpected variant"),
661        }
662    }
663
664    #[test]
665    fn test_stitch_long_committed_short_chunk() {
666        let committed: Vec<String> = (0..100).map(|i| format!("w{i}")).collect();
667        let chunk: Vec<String> = vec!["w99".into(), "new1".into()];
668        let result = stitch(&committed, &chunk);
669        assert_eq!(result, vec!["new1"]);
670    }
671
672    #[test]
673    fn test_longest_suffix_prefix_match_case_insensitive() {
674        let a: Vec<String> = vec!["Hello".into(), "WORLD".into()];
675        let b: Vec<String> = vec!["hello".into(), "world".into(), "test".into()];
676        assert_eq!(longest_suffix_prefix_match(&a, &b), 2);
677    }
678
679    #[test]
680    fn test_longest_suffix_prefix_match_punctuation() {
681        // "come." should match "come" (trailing punctuation stripped)
682        let a: Vec<String> = vec!["it'll".into(), "come.".into()];
683        let b: Vec<String> = vec!["come".into(), "pop".into(), "up".into()];
684        assert_eq!(longest_suffix_prefix_match(&a, &b), 1);
685    }
686
687    #[test]
688    fn test_longest_suffix_prefix_match_leading_punctuation() {
689        let a: Vec<String> = vec!["hello".into(), "world".into()];
690        let b: Vec<String> = vec!["\"world".into(), "today".into()];
691        assert_eq!(longest_suffix_prefix_match(&a, &b), 1);
692    }
693
694    #[test]
695    fn test_stitch_punctuation_mismatch() {
696        // Real-world case: chunk N ends with "come." and chunk N+1 starts with "come"
697        let committed: Vec<String> = vec![
698            "keep".into(),
699            "talking".into(),
700            "and".into(),
701            "eventually".into(),
702            "it'll".into(),
703            "come.".into(),
704        ];
705        let chunk: Vec<String> = vec![
706            "eventually".into(),
707            "it'll".into(),
708            "come".into(),
709            "pop".into(),
710            "up".into(),
711        ];
712        let result = stitch(&committed, &chunk);
713        assert_eq!(result, vec!["pop", "up"]);
714    }
715
716    #[test]
717    fn test_normalize_for_match() {
718        assert_eq!(normalize_for_match("hello"), "hello");
719        assert_eq!(normalize_for_match("hello."), "hello");
720        assert_eq!(normalize_for_match("hello,"), "hello");
721        assert_eq!(normalize_for_match("\"hello\""), "hello");
722        assert_eq!(normalize_for_match("..."), "");
723    }
724
725    // ── common_prefix_len tests ──────────────────────────────────────
726
727    #[test]
728    fn test_common_prefix_len_identical() {
729        assert_eq!(common_prefix_len("hello world", "hello world"), 11);
730    }
731
732    #[test]
733    fn test_common_prefix_len_prefix_match() {
734        assert_eq!(common_prefix_len("hello world", "hello world foo"), 11);
735    }
736
737    #[test]
738    fn test_common_prefix_len_diverges() {
739        assert_eq!(common_prefix_len("hello world", "hello earth"), 6);
740    }
741
742    #[test]
743    fn test_common_prefix_len_empty() {
744        assert_eq!(common_prefix_len("", "hello"), 0);
745        assert_eq!(common_prefix_len("hello", ""), 0);
746    }
747
748    #[test]
749    fn test_common_prefix_len_no_common() {
750        assert_eq!(common_prefix_len("abc", "xyz"), 0);
751    }
752
753    #[test]
754    fn test_common_prefix_len_unicode() {
755        // "café " = c(1) + a(1) + f(1) + é(2) + space(1) = 6 bytes
756        assert_eq!(common_prefix_len("café latte", "café mocha"), 6);
757    }
758
759    #[test]
760    fn test_common_prefix_len_revision_scenario() {
761        // Simulates Whisper revising "Frack" to "Freck"
762        let old = "My name is Jacob Frack and I am";
763        let new_text = "My name is Jacob Freck and I am a principal";
764        // "My name is Jacob F" = 18 bytes, then 'r' vs 'r' matches, 'a' vs 'e' diverges
765        // "My name is Jacob Fr" = 19 bytes
766        assert_eq!(common_prefix_len(old, new_text), 19);
767    }
768
769    // ── has_enough_new_audio tests ───────────────────────────────────
770
771    #[test]
772    fn test_has_enough_audio_above_threshold() {
773        assert!(has_enough_new_audio(50_000, 10_000, 32_000));
774    }
775
776    #[test]
777    fn test_has_enough_audio_exactly_at_threshold() {
778        assert!(has_enough_new_audio(42_000, 10_000, 32_000));
779    }
780
781    #[test]
782    fn test_has_enough_audio_below_threshold() {
783        assert!(!has_enough_new_audio(30_000, 10_000, 32_000));
784    }
785
786    #[test]
787    fn test_has_enough_audio_no_new_samples() {
788        assert!(!has_enough_new_audio(10_000, 10_000, 32_000));
789    }
790
791    #[test]
792    fn test_has_enough_audio_consumed_beyond_total() {
793        // saturating_sub handles consumed > total gracefully
794        assert!(!has_enough_new_audio(5_000, 10_000, 32_000));
795    }
796
797    #[test]
798    fn test_has_enough_audio_zero_threshold() {
799        assert!(has_enough_new_audio(100, 100, 0));
800    }
801
802    // ── compute_chunk_bounds tests ──────────────────────────────────
803
804    #[test]
805    fn test_chunk_bounds_basic() {
806        // consumed=10000, overlap=2000, total=20000, max=8000
807        let (start, end) = compute_chunk_bounds(10_000, 2_000, 20_000, 8_000);
808        assert_eq!(start, 8_000); // 10000 - 2000
809        assert_eq!(end, 16_000); // 8000 + 8000 (capped by max)
810    }
811
812    #[test]
813    fn test_chunk_bounds_no_cap() {
814        // total - start <= max, so chunk_end = total
815        let (start, end) = compute_chunk_bounds(10_000, 2_000, 14_000, 80_000);
816        assert_eq!(start, 8_000);
817        assert_eq!(end, 14_000);
818    }
819
820    #[test]
821    fn test_chunk_bounds_overlap_exceeds_consumed() {
822        // overlap > consumed, so saturating_sub clamps to 0
823        let (start, end) = compute_chunk_bounds(1_000, 5_000, 10_000, 80_000);
824        assert_eq!(start, 0);
825        assert_eq!(end, 10_000);
826    }
827
828    #[test]
829    fn test_chunk_bounds_zero_overlap() {
830        let (start, end) = compute_chunk_bounds(5_000, 0, 10_000, 80_000);
831        assert_eq!(start, 5_000);
832        assert_eq!(end, 10_000);
833    }
834
835    #[test]
836    fn test_chunk_bounds_exact_max() {
837        // total - start == max exactly
838        let (start, end) = compute_chunk_bounds(5_000, 1_000, 12_000, 8_000);
839        assert_eq!(start, 4_000);
840        assert_eq!(end, 12_000); // 12000 - 4000 = 8000 == max, so not capped
841    }
842
843    #[test]
844    fn test_chunk_bounds_with_real_constants() {
845        let overlap_samples = (OVERLAP_SECS * TARGET_RATE as f32) as usize;
846        let max_chunk_samples = (MAX_CHUNK_SECS * TARGET_RATE as f32) as usize;
847        // Simulate 10 seconds of audio, consumed up to 5 seconds
848        let consumed = 5 * TARGET_RATE as usize;
849        let total = 10 * TARGET_RATE as usize;
850        let (start, end) =
851            compute_chunk_bounds(consumed, overlap_samples, total, max_chunk_samples);
852        assert!(start < consumed);
853        assert!(end <= total);
854        assert!(end - start <= max_chunk_samples);
855    }
856
857    // ── compute_new_words tests ─────────────────────────────────────
858
859    #[test]
860    fn test_compute_new_words_empty_text() {
861        let committed: Vec<String> = vec!["hello".into()];
862        assert!(compute_new_words("", false, &committed).is_empty());
863    }
864
865    #[test]
866    fn test_compute_new_words_no_committed() {
867        let result = compute_new_words("hello world", false, &[]);
868        assert_eq!(result, vec!["hello", "world"]);
869    }
870
871    #[test]
872    fn test_compute_new_words_with_overlap() {
873        let committed: Vec<String> = vec!["the".into(), "quick".into(), "brown".into()];
874        let result = compute_new_words("quick brown fox", false, &committed);
875        assert_eq!(result, vec!["fox"]);
876    }
877
878    #[test]
879    fn test_compute_new_words_identical_result() {
880        let committed: Vec<String> = vec!["hello".into(), "world".into()];
881        let result = compute_new_words("hello world", false, &committed);
882        assert!(result.is_empty());
883    }
884
885    #[test]
886    fn test_compute_new_words_no_overlap() {
887        let committed: Vec<String> = vec!["alpha".into(), "beta".into()];
888        let result = compute_new_words("gamma delta", false, &committed);
889        assert_eq!(result, vec!["gamma", "delta"]);
890    }
891
892    #[test]
893    fn test_compute_new_words_filler_removal_produces_empty() {
894        // "um" is a filler word; after removal the text may be empty
895        let result = compute_new_words("um", true, &[]);
896        // After filler removal the text might be empty or "um" might not be
897        // in the filler list. Either way, ensure no panic.
898        let _ = result;
899    }
900
901    #[test]
902    fn test_compute_new_words_whitespace_only() {
903        // split_whitespace produces nothing for whitespace-only
904        let result = compute_new_words("   ", false, &[]);
905        assert!(result.is_empty());
906    }
907
908    // ── format_partial_event tests ──────────────────────────────────
909
910    #[test]
911    fn test_format_partial_event_empty() {
912        assert!(format_partial_event(&[]).is_none());
913    }
914
915    #[test]
916    fn test_format_partial_event_single_word() {
917        let words: Vec<String> = vec!["hello".into()];
918        let event = format_partial_event(&words).unwrap();
919        match event {
920            StreamingEvent::PartialText {
921                text,
922                replace_chars,
923            } => {
924                assert_eq!(text, " hello");
925                assert_eq!(replace_chars, 0);
926            }
927            StreamingEvent::SpeechDetected => panic!("unexpected variant"),
928        }
929    }
930
931    #[test]
932    fn test_format_partial_event_multiple_words() {
933        let words: Vec<String> = vec!["hello".into(), "world".into()];
934        let event = format_partial_event(&words).unwrap();
935        match event {
936            StreamingEvent::PartialText {
937                text,
938                replace_chars,
939            } => {
940                assert_eq!(text, " hello world");
941                assert_eq!(replace_chars, 0);
942            }
943            StreamingEvent::SpeechDetected => panic!("unexpected variant"),
944        }
945    }
946
947    #[test]
948    fn test_format_partial_event_leading_space() {
949        // Verify the leading space is always present
950        let words: Vec<String> = vec!["x".into()];
951        let event = format_partial_event(&words).unwrap();
952        match event {
953            StreamingEvent::PartialText { text, .. } => {
954                assert!(text.starts_with(' '));
955            }
956            _ => panic!("unexpected variant"),
957        }
958    }
959
960    // ── Integration: compute_new_words + format_partial_event ───────
961
962    #[test]
963    fn test_emission_pipeline_with_overlap() {
964        let committed: Vec<String> = vec!["the".into(), "quick".into()];
965        let new_words = compute_new_words("the quick brown fox", false, &committed);
966        assert_eq!(new_words, vec!["brown", "fox"]);
967        let event = format_partial_event(&new_words).unwrap();
968        match event {
969            StreamingEvent::PartialText {
970                text,
971                replace_chars,
972            } => {
973                assert_eq!(text, " brown fox");
974                assert_eq!(replace_chars, 0);
975            }
976            StreamingEvent::SpeechDetected => panic!("unexpected variant"),
977        }
978    }
979
980    #[test]
981    fn test_emission_pipeline_no_novelty() {
982        let committed: Vec<String> = vec!["hello".into(), "world".into()];
983        let new_words = compute_new_words("hello world", false, &committed);
984        assert!(new_words.is_empty());
985        assert!(format_partial_event(&new_words).is_none());
986    }
987}
murmur_core/transcription/streaming.rs

murmur_core/transcription/
streaming.rs