1use std::sync::atomic::{AtomicBool, Ordering};
9use std::sync::mpsc;
10use std::sync::{Arc, Mutex};
11
12use super::engine::AsrEngine;
13use crate::audio::capture::TARGET_RATE;
14
15const MIN_NEW_AUDIO_SECS: f32 = 2.0;
19const POLL_INTERVAL_MS: u64 = 300;
21const MAX_CHUNK_SECS: f32 = 5.0;
23const OVERLAP_SECS: f32 = 1.5;
26
27pub enum StreamingEvent {
31 PartialText { text: String, replace_chars: usize },
34 SpeechDetected,
36}
37
38pub struct StreamingHandle {
45 stop_tx: mpsc::Sender<()>,
46 abort_flag: Arc<AtomicBool>,
47 join_handle: Option<std::thread::JoinHandle<()>>,
48}
49
50impl StreamingHandle {
51 pub fn stop_and_join(mut self) {
57 self.abort_flag.store(true, Ordering::Relaxed);
58 let _ = self.stop_tx.send(());
59 if let Some(handle) = self.join_handle.take() {
60 if let Err(e) = handle.join() {
61 log::error!("Streaming thread panicked: {e:?}");
62 }
63 }
64 }
65}
66
67pub fn start_streaming(
77 sample_buffer: Arc<Mutex<Vec<f32>>>,
78 engine: Arc<dyn AsrEngine + Send + Sync>,
79 translate: bool,
80 filler_word_removal: bool,
81 tx: mpsc::Sender<StreamingEvent>,
82 worker: Option<super::subprocess::SubprocessTranscriber>,
83) -> StreamingHandle {
84 let (stop_tx, stop_rx) = mpsc::channel::<()>();
85 let abort_flag = Arc::new(AtomicBool::new(false));
86 let abort_flag_clone = Arc::clone(&abort_flag);
87
88 let join_handle = std::thread::spawn(move || {
89 streaming_loop(
90 sample_buffer,
91 engine,
92 translate,
93 filler_word_removal,
94 tx,
95 stop_rx,
96 abort_flag_clone,
97 worker,
98 );
99 });
100
101 StreamingHandle {
102 stop_tx,
103 abort_flag,
104 join_handle: Some(join_handle),
105 }
106}
107
108pub fn start_native_streaming(
115 sample_buffer: Arc<Mutex<Vec<f32>>>,
116 engine: Arc<dyn AsrEngine + Send + Sync>,
117 translate: bool,
118 tx: mpsc::Sender<StreamingEvent>,
119) -> StreamingHandle {
120 let (stop_tx, stop_rx) = mpsc::channel::<()>();
121 let abort_flag = Arc::new(AtomicBool::new(false));
122 let abort_flag_clone = Arc::clone(&abort_flag);
123
124 let join_handle = std::thread::spawn(move || {
125 native_streaming_loop(
126 sample_buffer,
127 engine,
128 translate,
129 tx,
130 stop_rx,
131 abort_flag_clone,
132 );
133 });
134
135 StreamingHandle {
136 stop_tx,
137 abort_flag,
138 join_handle: Some(join_handle),
139 }
140}
141
142fn native_streaming_loop(
144 sample_buffer: Arc<Mutex<Vec<f32>>>,
145 engine: Arc<dyn AsrEngine + Send + Sync>,
146 translate: bool,
147 tx: mpsc::Sender<StreamingEvent>,
148 stop_rx: mpsc::Receiver<()>,
149 abort_flag: Arc<AtomicBool>,
150) {
151 let min_samples = (MIN_NEW_AUDIO_SECS * TARGET_RATE as f32) as usize;
152 let mut prev_len: usize = 0;
153 let mut prev_text = String::new();
154
155 loop {
156 match stop_rx.try_recv() {
158 Ok(()) | Err(mpsc::TryRecvError::Disconnected) => break,
159 Err(mpsc::TryRecvError::Empty) => {}
160 }
161
162 let current_len = match sample_buffer.lock() {
163 Ok(b) => b.len(),
164 Err(e) => e.into_inner().len(),
165 };
166
167 let new_samples = current_len.saturating_sub(prev_len);
169 if new_samples < min_samples {
170 std::thread::sleep(std::time::Duration::from_millis(POLL_INTERVAL_MS));
171 continue;
172 }
173
174 let samples: Vec<f32> = match sample_buffer.lock() {
176 Ok(b) => b.clone(),
177 Err(e) => e.into_inner().clone(),
178 };
179
180 if samples.is_empty() {
181 std::thread::sleep(std::time::Duration::from_millis(POLL_INTERVAL_MS));
182 continue;
183 }
184
185 let new_start = prev_len.min(samples.len());
187 if !super::vad::contains_speech(&samples[new_start..]) {
188 prev_len = current_len;
189 std::thread::sleep(std::time::Duration::from_millis(POLL_INTERVAL_MS));
190 continue;
191 }
192
193 let _ = tx.send(StreamingEvent::SpeechDetected);
194
195 match engine.transcribe(&samples, translate) {
197 Ok(result) => {
198 if abort_flag.load(Ordering::Relaxed) {
199 break;
200 }
201 if !result.text.is_empty() && result.text != prev_text {
202 if result.text.starts_with(&prev_text) {
205 let new_suffix = &result.text[prev_text.len()..];
207 if !new_suffix.is_empty() {
208 let _ = tx.send(StreamingEvent::PartialText {
209 text: new_suffix.to_string(),
210 replace_chars: 0,
211 });
212 }
213 } else {
214 let replace_chars = prev_text.chars().count();
216 let _ = tx.send(StreamingEvent::PartialText {
217 text: result.text.clone(),
218 replace_chars,
219 });
220 }
221 prev_text = result.text;
222 }
223 }
224 Err(e) => {
225 log::error!("Native streaming transcription failed: {e}");
226 }
227 }
228
229 prev_len = current_len;
230 }
231}
232
233#[allow(clippy::too_many_arguments)]
236fn streaming_loop(
237 sample_buffer: Arc<Mutex<Vec<f32>>>,
238 engine: Arc<dyn AsrEngine + Send + Sync>,
239 translate: bool,
240 filler_word_removal: bool,
241 tx: mpsc::Sender<StreamingEvent>,
242 stop_rx: mpsc::Receiver<()>,
243 _abort_flag: Arc<AtomicBool>,
244 mut worker: Option<super::subprocess::SubprocessTranscriber>,
245) {
246 let min_new_samples = (MIN_NEW_AUDIO_SECS * TARGET_RATE as f32) as usize;
247 let max_chunk_samples = (MAX_CHUNK_SECS * TARGET_RATE as f32) as usize;
248 let overlap_samples = (OVERLAP_SECS * TARGET_RATE as f32) as usize;
249
250 let mut consumed_boundary: usize = 0;
252 let mut committed_words: Vec<String> = Vec::new();
254 let mut chunk: Vec<f32> = Vec::with_capacity(max_chunk_samples);
255
256 loop {
257 match stop_rx.try_recv() {
258 Ok(()) | Err(mpsc::TryRecvError::Disconnected) => {
259 let total = match sample_buffer.lock() {
261 Ok(b) => b.len(),
262 Err(e) => e.into_inner().len(),
263 };
264 let start = consumed_boundary.saturating_sub(overlap_samples);
265 if total > start {
266 chunk.clear();
267 {
268 let buf = sample_buffer.lock().unwrap_or_else(|e| e.into_inner());
269 chunk.extend_from_slice(&buf[start..total]);
270 }
271 emit_chunk(
272 &mut worker,
273 &engine,
274 &chunk,
275 translate,
276 filler_word_removal,
277 &mut committed_words,
278 &tx,
279 );
280 }
281 break;
282 }
283 Err(mpsc::TryRecvError::Empty) => {}
284 }
285
286 let total_samples = match sample_buffer.lock() {
287 Ok(b) => b.len(),
288 Err(e) => e.into_inner().len(),
289 };
290
291 if !has_enough_new_audio(total_samples, consumed_boundary, min_new_samples) {
292 std::thread::sleep(std::time::Duration::from_millis(POLL_INTERVAL_MS));
293 continue;
294 }
295
296 let (chunk_start, chunk_end) = compute_chunk_bounds(
297 consumed_boundary,
298 overlap_samples,
299 total_samples,
300 max_chunk_samples,
301 );
302
303 chunk.clear();
304 {
305 let buf = sample_buffer.lock().unwrap_or_else(|e| e.into_inner());
306 chunk.extend_from_slice(&buf[chunk_start..chunk_end]);
307 }
308
309 if !super::vad::contains_speech(&chunk) {
310 consumed_boundary = chunk_end;
311 std::thread::sleep(std::time::Duration::from_millis(POLL_INTERVAL_MS));
312 continue;
313 }
314
315 let _ = tx.send(StreamingEvent::SpeechDetected);
316
317 emit_chunk(
318 &mut worker,
319 &engine,
320 &chunk,
321 translate,
322 filler_word_removal,
323 &mut committed_words,
324 &tx,
325 );
326
327 consumed_boundary = match sample_buffer.lock() {
329 Ok(b) => b.len(),
330 Err(e) => e.into_inner().len(),
331 };
332 }
333}
334
335fn emit_chunk(
337 worker: &mut Option<super::subprocess::SubprocessTranscriber>,
338 engine: &Arc<dyn AsrEngine + Send + Sync>,
339 chunk: &[f32],
340 translate: bool,
341 filler_word_removal: bool,
342 committed_words: &mut Vec<String>,
343 tx: &mpsc::Sender<StreamingEvent>,
344) {
345 let text = if let Some(ref mut w) = worker {
346 match w.transcribe(chunk, translate) {
347 Ok(t) => t,
348 Err(e) => {
349 log::error!("Streaming transcription failed: {e}");
350 return;
351 }
352 }
353 } else {
354 match engine.transcribe(chunk, translate) {
355 Ok(result) => result.text,
356 Err(e) => {
357 log::error!("Streaming transcription failed: {e}");
358 return;
359 }
360 }
361 };
362
363 let new_words = compute_new_words(&text, filler_word_removal, committed_words);
364 if let Some(event) = format_partial_event(&new_words) {
365 let _ = tx.send(event);
366 committed_words.extend(new_words);
367 }
368}
369
370pub(crate) fn has_enough_new_audio(
372 total_samples: usize,
373 consumed_boundary: usize,
374 min_new_samples: usize,
375) -> bool {
376 let new_samples = total_samples.saturating_sub(consumed_boundary);
377 new_samples >= min_new_samples
378}
379
380pub(crate) fn compute_chunk_bounds(
386 consumed_boundary: usize,
387 overlap_samples: usize,
388 total_samples: usize,
389 max_chunk_samples: usize,
390) -> (usize, usize) {
391 let chunk_start = consumed_boundary.saturating_sub(overlap_samples);
392 let chunk_end = if total_samples - chunk_start > max_chunk_samples {
393 chunk_start + max_chunk_samples
394 } else {
395 total_samples
396 };
397 (chunk_start, chunk_end)
398}
399
400pub(crate) fn compute_new_words(
406 raw_text: &str,
407 filler_word_removal: bool,
408 committed_words: &[String],
409) -> Vec<String> {
410 if raw_text.is_empty() {
411 return Vec::new();
412 }
413
414 let mut text = raw_text.to_string();
415 if filler_word_removal {
416 text = super::postprocess::remove_filler_words(&text);
417 if text.is_empty() {
418 return Vec::new();
419 }
420 }
421 text = super::postprocess::ensure_space_after_punctuation(&text);
422
423 let chunk_words: Vec<String> = text.split_whitespace().map(String::from).collect();
424 stitch(committed_words, &chunk_words)
425}
426
427pub(crate) fn format_partial_event(new_words: &[String]) -> Option<StreamingEvent> {
429 if new_words.is_empty() {
430 return None;
431 }
432 let new_text = new_words.join(" ");
433 Some(StreamingEvent::PartialText {
434 text: format!(" {new_text}"),
435 replace_chars: 0,
436 })
437}
438
439#[allow(dead_code)]
442fn common_prefix_len(a: &str, b: &str) -> usize {
443 a.chars()
444 .zip(b.chars())
445 .take_while(|(ac, bc)| ac == bc)
446 .map(|(c, _)| c.len_utf8())
447 .sum()
448}
449
450pub fn stitch(committed: &[String], chunk_words: &[String]) -> Vec<String> {
460 if committed.is_empty() {
461 return chunk_words.to_vec();
462 }
463 if chunk_words.is_empty() {
464 return Vec::new();
465 }
466
467 let tail_len = committed.len().min(chunk_words.len());
469 let tail = &committed[committed.len() - tail_len..];
470
471 let best_match = longest_suffix_prefix_match(tail, chunk_words);
473
474 if best_match > 0 {
475 chunk_words[best_match..].to_vec()
476 } else {
477 chunk_words.to_vec()
478 }
479}
480
481fn normalize_for_match(word: &str) -> &str {
483 word.trim_matches(|c: char| c.is_ascii_punctuation())
484}
485
486fn longest_suffix_prefix_match(a: &[String], b: &[String]) -> usize {
489 let max_len = a.len().min(b.len());
490 let mut best = 0;
491
492 for len in 1..=max_len {
493 let suffix = &a[a.len() - len..];
494 let prefix = &b[..len];
495 if suffix.iter().zip(prefix.iter()).all(|(s, p)| {
496 let s_norm = normalize_for_match(s);
497 let p_norm = normalize_for_match(p);
498 !s_norm.is_empty() && !p_norm.is_empty() && s_norm.eq_ignore_ascii_case(p_norm)
499 }) {
500 best = len;
501 }
502 }
503
504 best
505}
506
507#[allow(dead_code)]
511fn split_words(text: &str) -> Vec<String> {
512 text.split_whitespace().map(|w| w.to_string()).collect()
513}
514
515#[cfg(test)]
518mod tests {
519 use super::*;
520
521 #[test]
522 fn test_stitch_no_overlap() {
523 let committed: Vec<String> = vec!["hello".into(), "world".into()];
524 let chunk: Vec<String> = vec!["foo".into(), "bar".into()];
525 let result = stitch(&committed, &chunk);
526 assert_eq!(result, vec!["foo", "bar"]);
527 }
528
529 #[test]
530 fn test_stitch_full_overlap() {
531 let committed: Vec<String> = vec!["the".into(), "quick".into(), "brown".into()];
532 let chunk: Vec<String> = vec!["quick".into(), "brown".into(), "fox".into()];
533 let result = stitch(&committed, &chunk);
534 assert_eq!(result, vec!["fox"]);
535 }
536
537 #[test]
538 fn test_stitch_single_word_overlap() {
539 let committed: Vec<String> = vec!["hello".into(), "world".into()];
540 let chunk: Vec<String> = vec!["world".into(), "today".into()];
541 let result = stitch(&committed, &chunk);
542 assert_eq!(result, vec!["today"]);
543 }
544
545 #[test]
546 fn test_stitch_empty_committed() {
547 let committed: Vec<String> = vec![];
548 let chunk: Vec<String> = vec!["hello".into(), "world".into()];
549 let result = stitch(&committed, &chunk);
550 assert_eq!(result, vec!["hello", "world"]);
551 }
552
553 #[test]
554 fn test_stitch_empty_chunk() {
555 let committed: Vec<String> = vec!["hello".into()];
556 let chunk: Vec<String> = vec![];
557 let result = stitch(&committed, &chunk);
558 assert!(result.is_empty());
559 }
560
561 #[test]
562 fn test_stitch_case_insensitive() {
563 let committed: Vec<String> = vec!["Hello".into(), "World".into()];
564 let chunk: Vec<String> = vec!["world".into(), "today".into()];
565 let result = stitch(&committed, &chunk);
566 assert_eq!(result, vec!["today"]);
567 }
568
569 #[test]
570 fn test_stitch_complete_duplicate() {
571 let committed: Vec<String> = vec!["a".into(), "b".into(), "c".into()];
572 let chunk: Vec<String> = vec!["a".into(), "b".into(), "c".into()];
573 let result = stitch(&committed, &chunk);
574 assert!(result.is_empty());
575 }
576
577 #[test]
578 fn test_vad_silence_no_speech() {
579 let samples = vec![0.0f32; 16000];
580 assert!(!crate::transcription::vad::contains_speech(&samples));
581 }
582
583 #[test]
584 fn test_vad_low_noise_no_speech() {
585 let samples = vec![0.001f32; 16000];
586 assert!(!crate::transcription::vad::contains_speech(&samples));
587 }
588
589 #[test]
590 fn test_vad_empty_no_speech() {
591 assert!(!crate::transcription::vad::contains_speech(&[]));
592 }
593
594 #[test]
595 fn test_longest_suffix_prefix_match_basic() {
596 let a: Vec<String> = vec!["x".into(), "y".into(), "z".into()];
597 let b: Vec<String> = vec!["y".into(), "z".into(), "w".into()];
598 assert_eq!(longest_suffix_prefix_match(&a, &b), 2);
599 }
600
601 #[test]
602 fn test_longest_suffix_prefix_match_none() {
603 let a: Vec<String> = vec!["a".into(), "b".into()];
604 let b: Vec<String> = vec!["c".into(), "d".into()];
605 assert_eq!(longest_suffix_prefix_match(&a, &b), 0);
606 }
607
608 #[test]
609 fn test_longest_suffix_prefix_match_full() {
610 let a: Vec<String> = vec!["a".into(), "b".into()];
611 let b: Vec<String> = vec!["a".into(), "b".into()];
612 assert_eq!(longest_suffix_prefix_match(&a, &b), 2);
613 }
614
615 #[test]
616 fn test_split_words_basic() {
617 assert_eq!(split_words("hello world"), vec!["hello", "world"]);
618 }
619
620 #[test]
621 fn test_split_words_extra_whitespace() {
622 assert_eq!(split_words(" hello world "), vec!["hello", "world"]);
623 }
624
625 #[test]
626 fn test_split_words_empty() {
627 assert!(split_words("").is_empty());
628 assert!(split_words(" ").is_empty());
629 }
630
631 #[test]
632 fn test_split_words_single() {
633 assert_eq!(split_words("hello"), vec!["hello"]);
634 }
635
636 #[test]
637 fn test_constants() {
638 const { assert!(MIN_NEW_AUDIO_SECS > 0.0) };
639 const { assert!(MAX_CHUNK_SECS > 0.0) };
640 const { assert!(MAX_CHUNK_SECS <= 30.0) };
641 const { assert!(OVERLAP_SECS >= 0.0) };
642 const { assert!(POLL_INTERVAL_MS > 0) };
643 assert_eq!(TARGET_RATE, 16_000);
644 }
645
646 #[test]
647 fn test_streaming_event_partial_text() {
648 let event = StreamingEvent::PartialText {
649 text: "hello".to_string(),
650 replace_chars: 3,
651 };
652 match event {
653 StreamingEvent::PartialText {
654 text,
655 replace_chars,
656 } => {
657 assert_eq!(text, "hello");
658 assert_eq!(replace_chars, 3);
659 }
660 StreamingEvent::SpeechDetected => panic!("unexpected variant"),
661 }
662 }
663
664 #[test]
665 fn test_stitch_long_committed_short_chunk() {
666 let committed: Vec<String> = (0..100).map(|i| format!("w{i}")).collect();
667 let chunk: Vec<String> = vec!["w99".into(), "new1".into()];
668 let result = stitch(&committed, &chunk);
669 assert_eq!(result, vec!["new1"]);
670 }
671
672 #[test]
673 fn test_longest_suffix_prefix_match_case_insensitive() {
674 let a: Vec<String> = vec!["Hello".into(), "WORLD".into()];
675 let b: Vec<String> = vec!["hello".into(), "world".into(), "test".into()];
676 assert_eq!(longest_suffix_prefix_match(&a, &b), 2);
677 }
678
679 #[test]
680 fn test_longest_suffix_prefix_match_punctuation() {
681 let a: Vec<String> = vec!["it'll".into(), "come.".into()];
683 let b: Vec<String> = vec!["come".into(), "pop".into(), "up".into()];
684 assert_eq!(longest_suffix_prefix_match(&a, &b), 1);
685 }
686
687 #[test]
688 fn test_longest_suffix_prefix_match_leading_punctuation() {
689 let a: Vec<String> = vec!["hello".into(), "world".into()];
690 let b: Vec<String> = vec!["\"world".into(), "today".into()];
691 assert_eq!(longest_suffix_prefix_match(&a, &b), 1);
692 }
693
694 #[test]
695 fn test_stitch_punctuation_mismatch() {
696 let committed: Vec<String> = vec![
698 "keep".into(),
699 "talking".into(),
700 "and".into(),
701 "eventually".into(),
702 "it'll".into(),
703 "come.".into(),
704 ];
705 let chunk: Vec<String> = vec![
706 "eventually".into(),
707 "it'll".into(),
708 "come".into(),
709 "pop".into(),
710 "up".into(),
711 ];
712 let result = stitch(&committed, &chunk);
713 assert_eq!(result, vec!["pop", "up"]);
714 }
715
716 #[test]
717 fn test_normalize_for_match() {
718 assert_eq!(normalize_for_match("hello"), "hello");
719 assert_eq!(normalize_for_match("hello."), "hello");
720 assert_eq!(normalize_for_match("hello,"), "hello");
721 assert_eq!(normalize_for_match("\"hello\""), "hello");
722 assert_eq!(normalize_for_match("..."), "");
723 }
724
725 #[test]
728 fn test_common_prefix_len_identical() {
729 assert_eq!(common_prefix_len("hello world", "hello world"), 11);
730 }
731
732 #[test]
733 fn test_common_prefix_len_prefix_match() {
734 assert_eq!(common_prefix_len("hello world", "hello world foo"), 11);
735 }
736
737 #[test]
738 fn test_common_prefix_len_diverges() {
739 assert_eq!(common_prefix_len("hello world", "hello earth"), 6);
740 }
741
742 #[test]
743 fn test_common_prefix_len_empty() {
744 assert_eq!(common_prefix_len("", "hello"), 0);
745 assert_eq!(common_prefix_len("hello", ""), 0);
746 }
747
748 #[test]
749 fn test_common_prefix_len_no_common() {
750 assert_eq!(common_prefix_len("abc", "xyz"), 0);
751 }
752
753 #[test]
754 fn test_common_prefix_len_unicode() {
755 assert_eq!(common_prefix_len("café latte", "café mocha"), 6);
757 }
758
759 #[test]
760 fn test_common_prefix_len_revision_scenario() {
761 let old = "My name is Jacob Frack and I am";
763 let new_text = "My name is Jacob Freck and I am a principal";
764 assert_eq!(common_prefix_len(old, new_text), 19);
767 }
768
769 #[test]
772 fn test_has_enough_audio_above_threshold() {
773 assert!(has_enough_new_audio(50_000, 10_000, 32_000));
774 }
775
776 #[test]
777 fn test_has_enough_audio_exactly_at_threshold() {
778 assert!(has_enough_new_audio(42_000, 10_000, 32_000));
779 }
780
781 #[test]
782 fn test_has_enough_audio_below_threshold() {
783 assert!(!has_enough_new_audio(30_000, 10_000, 32_000));
784 }
785
786 #[test]
787 fn test_has_enough_audio_no_new_samples() {
788 assert!(!has_enough_new_audio(10_000, 10_000, 32_000));
789 }
790
791 #[test]
792 fn test_has_enough_audio_consumed_beyond_total() {
793 assert!(!has_enough_new_audio(5_000, 10_000, 32_000));
795 }
796
797 #[test]
798 fn test_has_enough_audio_zero_threshold() {
799 assert!(has_enough_new_audio(100, 100, 0));
800 }
801
802 #[test]
805 fn test_chunk_bounds_basic() {
806 let (start, end) = compute_chunk_bounds(10_000, 2_000, 20_000, 8_000);
808 assert_eq!(start, 8_000); assert_eq!(end, 16_000); }
811
812 #[test]
813 fn test_chunk_bounds_no_cap() {
814 let (start, end) = compute_chunk_bounds(10_000, 2_000, 14_000, 80_000);
816 assert_eq!(start, 8_000);
817 assert_eq!(end, 14_000);
818 }
819
820 #[test]
821 fn test_chunk_bounds_overlap_exceeds_consumed() {
822 let (start, end) = compute_chunk_bounds(1_000, 5_000, 10_000, 80_000);
824 assert_eq!(start, 0);
825 assert_eq!(end, 10_000);
826 }
827
828 #[test]
829 fn test_chunk_bounds_zero_overlap() {
830 let (start, end) = compute_chunk_bounds(5_000, 0, 10_000, 80_000);
831 assert_eq!(start, 5_000);
832 assert_eq!(end, 10_000);
833 }
834
835 #[test]
836 fn test_chunk_bounds_exact_max() {
837 let (start, end) = compute_chunk_bounds(5_000, 1_000, 12_000, 8_000);
839 assert_eq!(start, 4_000);
840 assert_eq!(end, 12_000); }
842
843 #[test]
844 fn test_chunk_bounds_with_real_constants() {
845 let overlap_samples = (OVERLAP_SECS * TARGET_RATE as f32) as usize;
846 let max_chunk_samples = (MAX_CHUNK_SECS * TARGET_RATE as f32) as usize;
847 let consumed = 5 * TARGET_RATE as usize;
849 let total = 10 * TARGET_RATE as usize;
850 let (start, end) =
851 compute_chunk_bounds(consumed, overlap_samples, total, max_chunk_samples);
852 assert!(start < consumed);
853 assert!(end <= total);
854 assert!(end - start <= max_chunk_samples);
855 }
856
857 #[test]
860 fn test_compute_new_words_empty_text() {
861 let committed: Vec<String> = vec!["hello".into()];
862 assert!(compute_new_words("", false, &committed).is_empty());
863 }
864
865 #[test]
866 fn test_compute_new_words_no_committed() {
867 let result = compute_new_words("hello world", false, &[]);
868 assert_eq!(result, vec!["hello", "world"]);
869 }
870
871 #[test]
872 fn test_compute_new_words_with_overlap() {
873 let committed: Vec<String> = vec!["the".into(), "quick".into(), "brown".into()];
874 let result = compute_new_words("quick brown fox", false, &committed);
875 assert_eq!(result, vec!["fox"]);
876 }
877
878 #[test]
879 fn test_compute_new_words_identical_result() {
880 let committed: Vec<String> = vec!["hello".into(), "world".into()];
881 let result = compute_new_words("hello world", false, &committed);
882 assert!(result.is_empty());
883 }
884
885 #[test]
886 fn test_compute_new_words_no_overlap() {
887 let committed: Vec<String> = vec!["alpha".into(), "beta".into()];
888 let result = compute_new_words("gamma delta", false, &committed);
889 assert_eq!(result, vec!["gamma", "delta"]);
890 }
891
892 #[test]
893 fn test_compute_new_words_filler_removal_produces_empty() {
894 let result = compute_new_words("um", true, &[]);
896 let _ = result;
899 }
900
901 #[test]
902 fn test_compute_new_words_whitespace_only() {
903 let result = compute_new_words(" ", false, &[]);
905 assert!(result.is_empty());
906 }
907
908 #[test]
911 fn test_format_partial_event_empty() {
912 assert!(format_partial_event(&[]).is_none());
913 }
914
915 #[test]
916 fn test_format_partial_event_single_word() {
917 let words: Vec<String> = vec!["hello".into()];
918 let event = format_partial_event(&words).unwrap();
919 match event {
920 StreamingEvent::PartialText {
921 text,
922 replace_chars,
923 } => {
924 assert_eq!(text, " hello");
925 assert_eq!(replace_chars, 0);
926 }
927 StreamingEvent::SpeechDetected => panic!("unexpected variant"),
928 }
929 }
930
931 #[test]
932 fn test_format_partial_event_multiple_words() {
933 let words: Vec<String> = vec!["hello".into(), "world".into()];
934 let event = format_partial_event(&words).unwrap();
935 match event {
936 StreamingEvent::PartialText {
937 text,
938 replace_chars,
939 } => {
940 assert_eq!(text, " hello world");
941 assert_eq!(replace_chars, 0);
942 }
943 StreamingEvent::SpeechDetected => panic!("unexpected variant"),
944 }
945 }
946
947 #[test]
948 fn test_format_partial_event_leading_space() {
949 let words: Vec<String> = vec!["x".into()];
951 let event = format_partial_event(&words).unwrap();
952 match event {
953 StreamingEvent::PartialText { text, .. } => {
954 assert!(text.starts_with(' '));
955 }
956 _ => panic!("unexpected variant"),
957 }
958 }
959
960 #[test]
963 fn test_emission_pipeline_with_overlap() {
964 let committed: Vec<String> = vec!["the".into(), "quick".into()];
965 let new_words = compute_new_words("the quick brown fox", false, &committed);
966 assert_eq!(new_words, vec!["brown", "fox"]);
967 let event = format_partial_event(&new_words).unwrap();
968 match event {
969 StreamingEvent::PartialText {
970 text,
971 replace_chars,
972 } => {
973 assert_eq!(text, " brown fox");
974 assert_eq!(replace_chars, 0);
975 }
976 StreamingEvent::SpeechDetected => panic!("unexpected variant"),
977 }
978 }
979
980 #[test]
981 fn test_emission_pipeline_no_novelty() {
982 let committed: Vec<String> = vec!["hello".into(), "world".into()];
983 let new_words = compute_new_words("hello world", false, &committed);
984 assert!(new_words.is_empty());
985 assert!(format_partial_event(&new_words).is_none());
986 }
987}