Skip to main content

piper_phoneme_streaming/g2p/
streaming.rs

1use std::collections::HashMap;
2use std::sync::Arc;
3
4use crate::embedded_data::materialized_data_dir;
5use crate::error::Result;
6use crate::lang_detect::StreamingLanguageDetector;
7use crate::phoneme::PhonemeData;
8use crate::{
9    G2pToken, Language, SentenceUnit, StreamingSentencePhonemeUpgrade,
10    StreamingSentencePhonemeUpgradeSession, TextExpand, TextUnit, WordPhonemizer,
11};
12
13/// A stateful session for streaming grapheme-to-phoneme conversion.
14///
15/// This session holds the partial text expansions and sentence syntax state
16/// between character pushes. It must be created via [`StreamingG2P::new_session`].
17pub struct StreamingG2pSession {
18    expander: TextExpand,
19    session: StreamingSentencePhonemeUpgradeSession,
20}
21
22/// The core engine for streaming Text-to-Phoneme (G2P) conversion.
23///
24/// `StreamingG2P` processes text incrementally, expanding abbreviations/numbers
25/// and resolving phonemes dynamically. It supports multi-language text using
26/// internal heuristics.
27pub struct StreamingG2P {
28    phonemizers: HashMap<Language, WordPhonemizer>,
29    stream_sentence_upgrade: StreamingSentencePhonemeUpgrade,
30    default_language: Language,
31    languages: Vec<Language>,
32}
33
34impl StreamingG2P {
35    /// Constructs a single-language streaming G2P engine.
36    pub fn new(lang: Language) -> Result<Self> {
37        Self::with_languages(&[lang], lang)
38    }
39
40    /// Constructs a multi-language streaming G2P engine.
41    ///
42    /// It intelligently detects the language of the incoming stream from the provided
43    /// `languages` list. If detection fails or is ambiguous, it falls back to the
44    /// `default_language`.
45    pub fn with_languages(languages: &[Language], default_language: Language) -> Result<Self> {
46        let data_dir = materialized_data_dir()?;
47        let phdata = Arc::new(PhonemeData::load(data_dir)?);
48
49        let mut phonemizers = HashMap::new();
50        for &lang in languages {
51            phonemizers.insert(
52                lang,
53                WordPhonemizer::new_with_data(lang, Arc::clone(&phdata))?,
54            );
55        }
56        Ok(Self {
57            phonemizers,
58            stream_sentence_upgrade: StreamingSentencePhonemeUpgrade::new(default_language)?,
59            default_language,
60            languages: languages.to_vec(),
61        })
62    }
63
64    /// Creates a new isolated phonemization session.
65    ///
66    /// The session must be mutated with `push_text` and finalized with `finish`.
67    pub fn new_session(&self) -> StreamingG2pSession {
68        let expander = if self.languages.len() == 1 {
69            TextExpand::with_language(self.default_language)
70        } else {
71            let detector =
72                StreamingLanguageDetector::with_lingua(&self.languages, self.default_language);
73            TextExpand::with_detector(&self.languages, self.default_language, detector)
74        };
75
76        StreamingG2pSession {
77            session: self.stream_sentence_upgrade.new_session(),
78            expander,
79        }
80    }
81
82    /// Pushes a chunk of text into the streaming g2p processor.
83    ///
84    /// Yields any phonemes that have been fully resolved (typically at word boundaries).
85    /// Partial words or unresolved expansions are buffered internally within the `session`.
86    pub fn push_text(
87        &self,
88        session: &mut StreamingG2pSession,
89        text: &str,
90    ) -> Result<Vec<G2pToken>> {
91        let mut outs = Vec::new();
92        for ch in text.chars() {
93            if let Some((unit, lang)) = session.expander.push(ch) {
94                let text_unit = TextUnit::from_expand_unit(unit, lang);
95                let phonemizer = self.phonemizer_for(lang);
96                let su = SentenceUnit::from_text_unit(text_unit, phonemizer)?;
97                outs.extend(session.session.push(su));
98            }
99        }
100        Ok(outs)
101    }
102
103    /// Finalizes the stream and flushes any pending text buffered in the session.
104    ///
105    /// This should be called exactly once at the end of the text stream to guarantee
106    /// the final phonemes are emitted.
107    pub fn finish(&self, session: &mut StreamingG2pSession) -> Result<Vec<G2pToken>> {
108        let mut outs = Vec::new();
109        while let Some((unit, lang)) = session.expander.finish() {
110            let text_unit = TextUnit::from_expand_unit(unit, lang);
111            let phonemizer = self.phonemizer_for(lang);
112            let su = SentenceUnit::from_text_unit(text_unit, phonemizer)?;
113            outs.extend(session.session.push(su));
114        }
115        outs.extend(session.session.finish());
116        Ok(outs)
117    }
118
119    fn phonemizer_for(&self, lang: Language) -> &WordPhonemizer {
120        self.phonemizers
121            .get(&lang)
122            .or_else(|| self.phonemizers.get(&self.default_language))
123            .expect("at least the default language phonemizer must be present")
124    }
125}