piper_phoneme_streaming/lang_detect.rs
1use std::collections::VecDeque;
2
3use crate::semantic::Language;
4use crate::text_expand::ExpandUnit;
5
6const CONTEXT_WINDOW_SIZE: usize = 5;
7/// A single unambiguous word with confidence ≥ this triggers an immediate
8/// language switch and resets the context window (fast path).
9const SINGLE_WORD_SWITCH_THRESHOLD: f64 = 0.80;
10/// The context window must reach this confidence before switching language
11/// (slow path). 0.70 = 0.5 base + 0.20 hysteresis.
12const CONTEXT_SWITCH_THRESHOLD: f64 = 0.70;
13
14/// Trait for language detection backends.
15///
16/// Implementations receive a context string and return the most likely language
17/// plus its confidence score, or `None` if detection fails.
18pub(crate) trait LanguageDetector: Send + Sync {
19 fn detect(&self, context: &str) -> Option<(Language, f64)>;
20}
21
22/// Streaming language detector that wraps a [`LanguageDetector`] backend.
23///
24/// Takes [`ExpandUnit`] values one at a time and returns the detected
25/// [`Language`] for each:
26///
27/// - `Word` units trigger detection via a sliding context window with
28/// hysteresis — language only switches when confidence exceeds 0.5 + 0.20.
29/// - `Number` and `Mark` units inherit the current language without detection.
30/// - Sentence boundaries (`.`, `?`, `!`) should be signalled via
31/// [`reset_context`](Self::reset_context) to clear the sliding window.
32pub struct StreamingLanguageDetector {
33 detector: Box<dyn LanguageDetector>,
34 current_language: Language,
35 context_window: VecDeque<String>,
36}
37
38impl StreamingLanguageDetector {
39 pub(crate) fn new(default_language: Language, detector: Box<dyn LanguageDetector>) -> Self {
40 Self {
41 detector,
42 current_language: default_language,
43 context_window: VecDeque::new(),
44 }
45 }
46
47 /// Build a detector backed by the `lingua` library.
48 ///
49 /// `languages` must contain at least two languages; `default_language` is
50 /// the starting assumption before any text is seen.
51 ///
52 pub fn with_lingua(languages: &[Language], default_language: Language) -> Self {
53 Self::new(default_language, Box::new(LinguaDetector::new(languages)))
54 }
55
56 /// Push one expand unit and get back the language for that unit.
57 ///
58 /// Words run through the detection algorithm; numbers and marks inherit
59 /// the current language without running detection.
60 pub fn push(&mut self, unit: &ExpandUnit) -> Language {
61 match unit {
62 ExpandUnit::Word(word) => self.detect_for_word(word),
63 ExpandUnit::Number(_) | ExpandUnit::Mark(_) => self.current_language,
64 }
65 }
66
67 /// Clear the context window on sentence boundaries (`.`, `?`, `!`).
68 pub fn reset_context(&mut self) {
69 self.context_window.clear();
70 }
71
72 fn detect_for_word(&mut self, word: &str) -> Language {
73 // Fast path: a single unambiguous word with high confidence triggers an
74 // immediate switch and resets the context window so the new language is
75 // not dragged back by old context.
76 if let Some((word_lang, word_conf)) = self.detector.detect(word)
77 && word_lang != self.current_language
78 && word_conf >= SINGLE_WORD_SWITCH_THRESHOLD
79 {
80 self.current_language = word_lang;
81 self.context_window.clear();
82 self.context_window.push_back(word.to_string());
83 return self.current_language;
84 }
85
86 // Slow path: accumulate a sliding context window and switch only when
87 // the aggregated confidence exceeds the hysteresis threshold.
88 self.context_window.push_back(word.to_string());
89 if self.context_window.len() > CONTEXT_WINDOW_SIZE {
90 self.context_window.pop_front();
91 }
92
93 let context: String = self
94 .context_window
95 .iter()
96 .map(String::as_str)
97 .collect::<Vec<_>>()
98 .join(" ");
99
100 if let Some((ctx_lang, ctx_conf)) = self.detector.detect(&context)
101 && ctx_lang != self.current_language
102 && ctx_conf >= CONTEXT_SWITCH_THRESHOLD
103 {
104 self.current_language = ctx_lang;
105 }
106
107 self.current_language
108 }
109}
110
111// ---------------------------------------------------------------------------
112// lingua backend
113// ---------------------------------------------------------------------------
114
115struct LinguaDetector {
116 detector: lingua::LanguageDetector,
117}
118
119impl LinguaDetector {
120 fn new(languages: &[Language]) -> Self {
121 let lingua_langs: Vec<lingua::Language> = languages
122 .iter()
123 .map(|l| match l {
124 Language::English => lingua::Language::English,
125 Language::Vietnamese => lingua::Language::Vietnamese,
126 })
127 .collect();
128 let detector = lingua::LanguageDetectorBuilder::from_languages(&lingua_langs)
129 .with_minimum_relative_distance(0.25)
130 .build();
131 Self { detector }
132 }
133}
134
135impl LanguageDetector for LinguaDetector {
136 fn detect(&self, context: &str) -> Option<(Language, f64)> {
137 let confidences = self.detector.compute_language_confidence_values(context);
138 confidences.first().map(|(lingua_lang, confidence)| {
139 let lang = match lingua_lang {
140 lingua::Language::English => Language::English,
141 lingua::Language::Vietnamese => Language::Vietnamese,
142 };
143 (lang, *confidence)
144 })
145 }
146}
147
148// ---------------------------------------------------------------------------
149// Tests
150//
151// Each test case is: (preferred_language, &[(word, expected_language)])
152// Every word is pushed as ExpandUnit::Word; the output language is asserted
153// per word so failures point to the exact token where behavior diverges.
154// ---------------------------------------------------------------------------
155
156#[cfg(test)]
157mod tests {
158 use super::*;
159 use crate::text_expand::ExpandUnit;
160 use Language::{English as EN, Vietnamese as VI};
161
162 fn det(default: Language) -> StreamingLanguageDetector {
163 StreamingLanguageDetector::with_lingua(&[EN, VI], default)
164 }
165
166 /// Core helper: push each `(word, expected_language)` pair and assert per step.
167 fn check(preferred: Language, steps: &[(&str, Language)]) {
168 let mut d = det(preferred);
169 for (word, expected) in steps {
170 let got = d.push(&ExpandUnit::Word(word.to_string()));
171 assert_eq!(
172 got, *expected,
173 "preferred={preferred:?} word={word:?} expected={expected:?} got={got:?}"
174 );
175 }
176 }
177
178 // -------------------------------------------------------------------------
179 // Pure-language — window fills with one language, never switches
180 // -------------------------------------------------------------------------
181
182 #[test]
183 fn case_pure_english() {
184 check(
185 EN,
186 &[
187 ("the", EN), // window: [the]
188 ("quick", EN), // window: [the, quick]
189 ("brown", EN), // window: [the, quick, brown]
190 ("fox", EN), // window: [the, quick, brown, fox]
191 ("jumps", EN), // window: [the, quick, brown, fox, jumps]
192 ],
193 );
194 }
195
196 #[test]
197 fn case_pure_vietnamese() {
198 check(
199 VI,
200 &[
201 ("xin", VI), // window: [xin]
202 ("chào", VI), // window: [xin, chào]
203 ("bạn", VI), // window: [xin, chào, bạn]
204 ("tên", VI), // window: [xin, chào, bạn, tên]
205 ("là", VI), // window: [xin, chào, bạn, tên, là]
206 ],
207 );
208 }
209
210 // -------------------------------------------------------------------------
211 // EN → VI transition
212 //
213 // "chào" has single-word VI confidence = 1.0 ≥ 0.80 → fast switch at
214 // first clear Vietnamese word. Context resets; VI stable thereafter.
215 // -------------------------------------------------------------------------
216
217 #[test]
218 fn case_en_to_vi_transition() {
219 check(
220 EN,
221 &[
222 // ── build EN context ──
223 ("the", EN), // window: [the]
224 ("quick", EN), // window: [the, quick]
225 ("brown", EN), // window: [the, quick, brown]
226 ("fox", EN), // window: [the, quick, brown, fox]
227 ("jumps", EN), // window: [the, quick, brown, fox, jumps]
228 // ── first VI word: high single-word confidence → fast switch ──
229 ("chào", VI), // fast path: VI 1.0 ≥ 0.80 → switch; context reset to [chào]
230 // ── settled in VI ──
231 ("bạn", VI),
232 ("tôi", VI),
233 ("muốn", VI),
234 ("học", VI),
235 ],
236 );
237 }
238
239 // -------------------------------------------------------------------------
240 // VI → EN transition
241 //
242 // "the" has single-word EN confidence ≥ 0.80 → fast switch immediately.
243 // Context resets to ["the"]; EN stable for all following EN words.
244 // -------------------------------------------------------------------------
245
246 #[test]
247 fn case_vi_to_en_transition() {
248 check(
249 VI,
250 &[
251 // ── build VI context ──
252 ("xin", VI),
253 ("chào", VI),
254 ("bạn", VI),
255 ("tên", VI),
256 ("là", VI),
257 // ── first EN word: fast switch ──
258 ("the", EN), // fast path: EN ≥ 0.80 → switch; context reset to [the]
259 // ── settled in EN ──
260 ("quick", EN),
261 ("brown", EN),
262 ("fox", EN),
263 ("jumps", EN),
264 ],
265 );
266 }
267
268 // -------------------------------------------------------------------------
269 // Reset clears context — next language dominates immediately
270 //
271 // With an empty window after reset, even a single unambiguous word exceeds
272 // the 0.70 threshold and switches language.
273 // -------------------------------------------------------------------------
274
275 #[test]
276 fn case_reset_vi_context_then_en() {
277 let mut d = det(VI);
278 for w in &["xin", "chào", "bạn"] {
279 d.push(&ExpandUnit::Word(w.to_string()));
280 }
281 d.reset_context();
282 // Empty window → single EN word immediately switches
283 for (word, expected) in &[
284 ("the", EN), // window: [the] — triggers switch on first word
285 ("quick", EN),
286 ("brown", EN),
287 ("fox", EN),
288 ("jumps", EN),
289 ] {
290 let got = d.push(&ExpandUnit::Word(word.to_string()));
291 assert_eq!(
292 got, *expected,
293 "after reset word={word:?} expected={expected:?} got={got:?}"
294 );
295 }
296 }
297
298 // -------------------------------------------------------------------------
299 // Numbers and marks — always inherit current language, never enter window
300 // -------------------------------------------------------------------------
301
302 #[test]
303 fn case_number_inherits_en() {
304 let mut d = det(EN);
305 assert_eq!(d.push(&ExpandUnit::Number("42".into())), EN);
306 assert_eq!(d.push(&ExpandUnit::Number("0".into())), EN);
307 }
308
309 #[test]
310 fn case_number_inherits_vi() {
311 let mut d = det(VI);
312 assert_eq!(d.push(&ExpandUnit::Number("100".into())), VI);
313 assert_eq!(d.push(&ExpandUnit::Number("1000".into())), VI);
314 }
315
316 #[test]
317 fn case_mark_inherits_en() {
318 let mut d = det(EN);
319 assert_eq!(d.push(&ExpandUnit::Mark(' ')), EN);
320 assert_eq!(d.push(&ExpandUnit::Mark(',')), EN);
321 assert_eq!(d.push(&ExpandUnit::Mark('.')), EN);
322 }
323
324 #[test]
325 fn case_mark_inherits_vi() {
326 let mut d = det(VI);
327 assert_eq!(d.push(&ExpandUnit::Mark(' ')), VI);
328 assert_eq!(d.push(&ExpandUnit::Mark(',')), VI);
329 }
330
331 /// Numbers do not enter the context window; they inherit the current language.
332 /// With fast-path switching, "chào" switches to VI immediately.
333 #[test]
334 fn case_number_does_not_affect_transition_timing() {
335 let mut d = det(EN);
336 for w in &["the", "quick", "brown", "fox", "jumps"] {
337 d.push(&ExpandUnit::Word(w.to_string()));
338 }
339 assert_eq!(d.push(&ExpandUnit::Word("chào".into())), VI); // fast path: VI 1.0 → switch
340 assert_eq!(d.push(&ExpandUnit::Number("100".into())), VI); // inherits VI
341 assert_eq!(d.push(&ExpandUnit::Word("bạn".into())), VI); // still VI
342 assert_eq!(d.push(&ExpandUnit::Number("7".into())), VI); // inherits VI
343 }
344
345 /// Marks behave identically — no window effect.
346 #[test]
347 fn case_mark_does_not_affect_transition_timing() {
348 let mut d = det(EN);
349 for w in &["the", "quick", "brown", "fox", "jumps"] {
350 d.push(&ExpandUnit::Word(w.to_string()));
351 }
352 assert_eq!(d.push(&ExpandUnit::Word("chào".into())), VI); // fast path: VI 1.0 → switch
353 assert_eq!(d.push(&ExpandUnit::Mark(',')), VI); // inherits VI
354 assert_eq!(d.push(&ExpandUnit::Word("bạn".into())), VI); // still VI
355 }
356
357 // -------------------------------------------------------------------------
358 // Mixed sentences — fast path switches on clear single-word signals
359 //
360 // "trong tiếng anh hello world có nghĩa là xin chào"
361 // = "in English, 'hello world' means 'xin chào'"
362 //
363 // "hello" (EN 0.886) and "world" (EN 0.851) both exceed the fast-path
364 // threshold → immediate EN switch. "có" exceeds the VI threshold →
365 // fast switch back to VI.
366 // -------------------------------------------------------------------------
367
368 #[test]
369 fn case_vi_sentence_with_embedded_en_words() {
370 check(
371 VI,
372 &[
373 // ── pure VI context ──
374 ("trong", VI),
375 ("tiếng", VI),
376 ("anh", VI),
377 // ── embedded English words — fast-path switches to EN ──
378 ("hello", EN), // fast path: EN 0.886 ≥ 0.80 → switch; context reset to [hello]
379 ("world", EN), // stays EN (EN 0.851 ≥ 0.80 but already EN)
380 // ── back to Vietnamese — fast-path switches back ──
381 ("có", VI), // fast path: VI ≥ 0.80 → switch back
382 ("nghĩa", VI),
383 ("là", VI),
384 ("xin", VI),
385 ("chào", VI),
386 ],
387 );
388 }
389
390 /// A single Vietnamese word with high single-word confidence fast-switches
391 /// out of EN, and the next clear English word fast-switches back.
392 #[test]
393 fn case_en_sentence_with_single_embedded_vi_word() {
394 check(
395 EN,
396 &[
397 ("the", EN),
398 ("quick", EN),
399 ("brown", EN),
400 ("fox", EN),
401 // ── single VI word: VI 1.0 ≥ 0.80 → fast switch ──
402 ("chào", VI), // fast path: VI 1.0 → switch; context reset to [chào]
403 // ── next clear EN word fast-switches back ──
404 ("jumps", EN), // fast path: EN ≥ 0.80 → switch back
405 ("over", EN),
406 ("lazy", EN),
407 ("dog", EN),
408 ("today", EN),
409 ],
410 );
411 }
412
413 // -------------------------------------------------------------------------
414 // Numbers in mixed-language context
415 // -------------------------------------------------------------------------
416
417 /// "giá 100 đồng" — number throughout inherits VI; reset then switches EN.
418 #[test]
419 fn case_number_in_vi_then_reset_to_en() {
420 let mut d = det(VI);
421 for w in &["giá", "tiền", "là"] {
422 d.push(&ExpandUnit::Word(w.to_string()));
423 }
424 assert_eq!(d.push(&ExpandUnit::Number("100".into())), VI);
425 d.reset_context();
426 assert_eq!(d.push(&ExpandUnit::Word("the".into())), EN); // immediate switch after reset
427 assert_eq!(d.push(&ExpandUnit::Word("price".into())), EN);
428 }
429
430 /// Number during the EN→VI transition: with fast-path, "chào" switches
431 /// immediately and the number inherits VI right away.
432 #[test]
433 fn case_number_tracks_language_through_en_to_vi_transition() {
434 let mut d = det(EN);
435 for w in &["the", "quick", "brown", "fox", "jumps"] {
436 d.push(&ExpandUnit::Word(w.to_string()));
437 }
438 assert_eq!(d.push(&ExpandUnit::Word("chào".into())), VI); // fast path: VI 1.0 → switch
439 assert_eq!(d.push(&ExpandUnit::Number("42".into())), VI); // inherits VI immediately
440 assert_eq!(d.push(&ExpandUnit::Word("bạn".into())), VI); // still VI
441 assert_eq!(d.push(&ExpandUnit::Number("7".into())), VI); // inherits VI
442 }
443}