jpreprocess_njd/
lib.rs

1mod contrib;
2mod node;
3mod open_jtalk;
4
5use jpreprocess_core::{word_entry::WordEntry, JPreprocessResult};
6use jpreprocess_dictionary::tokenizer::Token;
7use jpreprocess_window::{IterQuintMut, IterQuintMutTrait};
8
9pub use contrib::*;
10pub use node::*;
11pub use open_jtalk::*;
12
13#[derive(Clone, Debug, PartialEq)]
14pub struct NJD {
15    pub nodes: Vec<NJDNode>,
16}
17
18impl NJD {
19    pub fn remove_silent_node(&mut self) {
20        self.nodes.retain(|node| !node.get_pron().is_empty())
21    }
22
23    pub fn from_tokens<'a, T: Token>(
24        tokens: impl 'a + IntoIterator<Item = T>,
25    ) -> JPreprocessResult<Self> {
26        let mut nodes = Vec::new();
27        for mut token in tokens {
28            let (string, entry) = token.fetch()?;
29            nodes.extend(NJDNode::load(string, &entry));
30        }
31
32        Ok(Self { nodes })
33    }
34    pub fn from_strings(njd_features: Vec<String>) -> Self {
35        Self {
36            nodes: njd_features
37                .iter()
38                .flat_map(|feature| NJDNode::load_csv(feature))
39                .collect(),
40        }
41    }
42
43    pub fn preprocess(&mut self) {
44        use open_jtalk::*;
45
46        pronunciation::njd_set_pronunciation(self);
47        digit_sequence::njd_digit_sequence(self);
48        digit::njd_set_digit(self);
49        accent_phrase::njd_set_accent_phrase(self);
50        accent_type::njd_set_accent_type(self);
51        unvoiced_vowel::njd_set_unvoiced_vowel(self);
52        // long vowel estimator is deprecated
53        // long_vowel::njd_set_long_vowel(self);
54    }
55}
56
57impl<'a> FromIterator<(&'a str, &'a WordEntry)> for NJD {
58    fn from_iter<I: IntoIterator<Item = (&'a str, &'a WordEntry)>>(iter: I) -> Self {
59        let nodes = iter
60            .into_iter()
61            .flat_map(|(text, word_entry)| NJDNode::load(text, word_entry))
62            .collect();
63        Self { nodes }
64    }
65}
66impl<'a> FromIterator<&'a str> for NJD {
67    fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> Self {
68        let nodes = iter.into_iter().flat_map(NJDNode::load_csv).collect();
69        Self { nodes }
70    }
71}
72
73impl IterQuintMutTrait for NJD {
74    type Item = NJDNode;
75    fn iter_quint_mut(&mut self) -> IterQuintMut<'_, Self::Item> {
76        IterQuintMut::new(&mut self.nodes)
77    }
78    fn iter_quint_mut_range(&mut self, start: usize, end: usize) -> IterQuintMut<'_, Self::Item> {
79        IterQuintMut::new(&mut self.nodes[start..end])
80    }
81}
82
83impl From<NJD> for Vec<String> {
84    fn from(njd: NJD) -> Self {
85        njd.nodes.into_iter().map(|node| node.to_string()).collect()
86    }
87}