jpreprocess_core/
word_entry.rs

1use crate::{word_details::WordDetails, word_line::WordDetailsLine, JPreprocessResult};
2use serde::{Deserialize, Serialize};
3
4#[derive(Clone, PartialEq, Serialize, Deserialize, Debug)]
5pub enum WordEntry {
6    Single(WordDetails),
7    Multiple(Vec<(String, WordDetails)>),
8}
9
10impl Default for WordEntry {
11    fn default() -> Self {
12        Self::Single(WordDetails::default())
13    }
14}
15
16impl WordEntry {
17    pub fn load(details: &[&str]) -> JPreprocessResult<Self> {
18        WordDetailsLine::from_strs(details).try_into()
19    }
20
21    pub fn get_with_string(&self, string: &str) -> Vec<(String, WordDetails)> {
22        match &self {
23            Self::Single(word_details) => vec![(string.to_string(), word_details.to_owned())],
24            Self::Multiple(word_details_vec) => {
25                let mut result = Vec::with_capacity(word_details_vec.len());
26                let mut len = 0;
27                for (i, (orig, word_details)) in word_details_vec.iter().enumerate() {
28                    if i + 1 < word_details_vec.len() {
29                        // not the last word
30                        result.push((orig.to_string(), word_details.to_owned()));
31                        len += orig.len();
32                    } else {
33                        // the last word
34                        result.push((string[len..].to_string(), word_details.to_owned()));
35                    }
36                }
37                result
38            }
39        }
40    }
41
42    pub fn to_str_vec(&self, orig: String) -> [String; 9] {
43        let mut line = WordDetailsLine::from(self);
44
45        if matches!(self, Self::Single(_)) {
46            line.orig = orig;
47        }
48
49        [
50            format!(
51                "{},{},{},{}",
52                line.pos, line.pos_group1, line.pos_group2, line.pos_group3
53            ),
54            line.ctype.to_string(),
55            line.cform.to_string(),
56            line.orig.to_string(),
57            line.read.to_string(),
58            line.pron.to_string(),
59            line.acc_morasize.to_string(),
60            line.chain_rule.to_string(),
61            line.chain_flag.to_string(),
62        ]
63    }
64}
65
66impl TryFrom<WordDetailsLine> for WordEntry {
67    type Error = crate::JPreprocessError;
68    fn try_from(value: WordDetailsLine) -> Result<Self, Self::Error> {
69        if value.orig.contains(':') {
70            let mut iter = value
71                .orig
72                .split(':')
73                .zip(value.read.split(':'))
74                .zip(value.pron.split(':'))
75                .zip(value.acc_morasize.split(':'))
76                .map(|(((orig, read), pron), acc_morasize)| (orig, read, pron, acc_morasize));
77
78            let mut word_details = Vec::new();
79
80            let (orig_base, base) = {
81                let (orig, read, pron, acc_morasize) = iter.next().unwrap();
82
83                let details = WordDetailsLine {
84                    orig: orig.to_string(),
85                    read: read.to_string(),
86                    pron: pron.to_string(),
87                    acc_morasize: acc_morasize.to_string(),
88                    ..value
89                };
90
91                (orig.to_string(), WordDetails::try_from(details)?)
92            };
93
94            word_details.push((orig_base, base.clone()));
95
96            for (orig, read, pron, acc_morasize) in iter {
97                let mut extended = base.clone();
98                extended.extend_splited(read, pron, acc_morasize)?;
99                word_details.push((orig.to_string(), extended))
100            }
101
102            Ok(Self::Multiple(word_details))
103        } else {
104            Ok(Self::Single(WordDetails::try_from(value)?))
105        }
106    }
107}
108
109impl From<&WordEntry> for WordDetailsLine {
110    fn from(value: &WordEntry) -> Self {
111        match value {
112            WordEntry::Single(details) => details.into(),
113            WordEntry::Multiple(details_vec) => {
114                details_vec.iter().skip(1).fold(
115                    {
116                        let first_elem = &details_vec[0];
117                        Self {
118                            orig: first_elem.0.to_owned(),
119                            ..(&first_elem.1).into()
120                        }
121                    },
122                    |acc, (orig, details)| {
123                        let v: Self = details.into();
124
125                        Self {
126                            orig: format!("{}:{}", acc.orig, orig),   // orig
127                            read: format!("{}:{}", acc.read, v.read), // read
128                            pron: format!("{}:{}", acc.pron, v.pron), // pron
129                            acc_morasize: format!("{}:{}", acc.acc_morasize, v.acc_morasize), // acc/mora_size
130                            ..acc
131                        }
132                    },
133                )
134            }
135        }
136    }
137}
138
139#[cfg(test)]
140mod tests {
141    use crate::{cform::CForm, ctype::CType, pos::*, pron, word_entry::WordEntry};
142
143    #[test]
144    fn load_single_node() {
145        let input: Vec<&str> = ".,名詞,接尾,助数詞,*,*,*,.,テン,テン,0/2,*,"
146            .split(',')
147            .collect();
148        let entry = WordEntry::load(&input[1..]).unwrap();
149        let details_vec = entry.get_with_string(input[0]);
150        assert_eq!(details_vec.len(), 1);
151
152        let (string, details) = &details_vec[0];
153
154        assert_eq!(string, ".");
155        assert!(matches!(
156            details.pos,
157            POS::Meishi(Meishi::Setsubi(Setsubi::Josuushi))
158        ));
159        assert_eq!(details.ctype, CType::None);
160        assert_eq!(details.cform, CForm::None);
161        assert_eq!(details.read.as_ref().unwrap(), "テン");
162        assert_eq!(details.pron, pron!([Te, N], 0));
163        assert_eq!(details.chain_rule.get_rule(&POS::Filler), None);
164        assert_eq!(details.chain_flag, None);
165
166        let v = entry.to_str_vec(input[0].to_owned());
167        assert_eq!(v[0..8].join(","), input[1..12].join(","));
168    }
169
170    #[test]
171    fn load_multiple_nodes() {
172        let input: Vec<&str> = "あーあ,感動詞,*,*,*,*,*,あー:あ,アー:ア,アー:ア,1/2:1/1,C1,"
173            .split(',')
174            .collect();
175        let entry = WordEntry::load(&input[1..]).unwrap();
176        let details_vec = entry.get_with_string(input[0]);
177        assert_eq!(details_vec.len(), 2);
178
179        assert_eq!(details_vec[0].0, "あー");
180        assert_eq!(details_vec[1].0, "あ");
181
182        let details0 = &details_vec[0].1;
183        let details1 = &details_vec[1].1;
184
185        assert_eq!(&details0.pron, &pron!([A, Long], 1));
186        assert_eq!(&details1.pron, &pron!([A], 1));
187
188        let v = entry.to_str_vec(input[0].to_owned());
189        assert_eq!(v[0..8].join(","), input[1..12].join(","));
190    }
191}