1use std::{
2 fmt::{Debug, Display},
3 str::FromStr,
4};
5
6use once_cell::sync::Lazy;
7use regex::Regex;
8use serde::{Deserialize, Serialize};
9
10use crate::JPreprocessResult;
11
12use super::pos::POS;
13
14#[derive(Debug, thiserror::Error, PartialEq, Eq)]
15pub enum AccentRuleParseError {
16 #[error("Unknown part of speech (POS) {0}")]
17 UnknownPOS(String),
18 #[error("Unrecognized syntax {0}")]
19 SyntaxError(String),
20}
21
22static PARSE_REGEX: Lazy<Regex> = Lazy::new(|| {
23 Regex::new("^((?P<pos>名詞|形容詞|助詞|特殊助動詞|動詞)%)?(?P<accent>[FC][1-5]|P1|P2|P6|P14)?(@(?P<add>[-0-9]+))?$")
24 .expect("Failed to compile accent rule regex")
25});
26
27#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
28pub enum AccentType {
29 F1,
30 F2,
31 F3,
32 F4,
33 F5,
34 C1,
36 C2,
37 C3,
38 C4,
39 C5,
40 P1,
41 P2,
42 P6,
44 P14,
46 None,
47}
48
49impl FromStr for AccentType {
50 type Err = ();
51 fn from_str(s: &str) -> Result<Self, Self::Err> {
52 match s {
53 "F1" => Ok(Self::F1),
54 "F2" => Ok(Self::F2),
55 "F3" => Ok(Self::F3),
56 "F4" => Ok(Self::F4),
57 "F5" => Ok(Self::F5),
58 "C1" => Ok(Self::C1),
59 "C2" => Ok(Self::C2),
60 "C3" => Ok(Self::C3),
61 "C4" => Ok(Self::C4),
62 "C5" => Ok(Self::C5),
63 "P1" => Ok(Self::P1),
64 "P2" => Ok(Self::P2),
65 "P6" => Ok(Self::P6),
66 "P14" => Ok(Self::P14),
67 "" | "*" => Ok(Self::None),
68 _ => Err(()),
69 }
70 }
71}
72
73impl Display for AccentType {
74 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
75 f.write_str(match &self {
76 Self::F1 => "F1",
77 Self::F2 => "F2",
78 Self::F3 => "F3",
79 Self::F4 => "F4",
80 Self::F5 => "F5",
81 Self::C1 => "C1",
82 Self::C2 => "C2",
83 Self::C3 => "C3",
84 Self::C4 => "C4",
85 Self::C5 => "C5",
86 Self::P1 => "P1",
87 Self::P2 => "P2",
88 Self::P6 => "P6",
89 Self::P14 => "P14",
90 Self::None => "*",
91 })
92 }
93}
94
95#[derive(Clone, PartialEq, Serialize, Deserialize, Debug)]
97pub struct ChainRule {
98 pub accent_type: AccentType,
99 pub add_type: isize,
100}
101
102impl ChainRule {
103 pub fn new(accent_type: AccentType, add_type: isize) -> Self {
104 Self {
105 accent_type,
106 add_type,
107 }
108 }
109}
110
111impl Display for ChainRule {
112 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
113 if self.add_type == 0 {
114 write!(f, "{}", self.accent_type)
115 } else {
116 write!(f, "{}@{}", self.accent_type, self.add_type)
117 }
118 }
119}
120
121#[derive(Debug)]
122pub enum POSMatch {
123 Default,
124 Doushi,
125 Joshi,
126 Keiyoushi,
127 Meishi,
128}
129
130impl FromStr for POSMatch {
131 type Err = AccentRuleParseError;
132 fn from_str(s: &str) -> Result<Self, Self::Err> {
133 match s {
134 "動詞" => Ok(Self::Doushi),
135 "助詞" => Ok(Self::Joshi),
136 "形容詞" => Ok(Self::Keiyoushi),
137 "名詞" => Ok(Self::Meishi),
138 _ => Err(AccentRuleParseError::UnknownPOS(s.to_string())),
139 }
140 }
141}
142
143#[derive(Clone, PartialEq, Serialize, Deserialize, Debug, Default)]
144pub struct ChainRules {
145 pub default: Option<ChainRule>,
146 pub doushi: Option<ChainRule>,
147 pub joshi: Option<ChainRule>,
148 pub keiyoushi: Option<ChainRule>,
149 pub meishi: Option<ChainRule>,
150}
151
152impl ChainRules {
153 pub fn new(rules: &str) -> Self {
154 let mut result = Self::default();
155 if rules == "*" {
156 return result;
157 }
158
159 for rule in rules.split('/') {
160 if result.push_rule(rule).is_err() {
161 eprintln!("WARN: accent rule parsing has failed in {}. Skipped.", rule);
162 }
163 }
164 result
165 }
166
167 fn push_rule(&mut self, rule_str: &str) -> JPreprocessResult<()> {
168 let (pos, rule) = Self::parse_rule(rule_str)?;
169 match pos {
170 POSMatch::Doushi => self.doushi.replace(rule),
171 POSMatch::Joshi => self.joshi.replace(rule),
172 POSMatch::Keiyoushi => self.keiyoushi.replace(rule),
173 POSMatch::Meishi => self.meishi.replace(rule),
174 POSMatch::Default => self.default.replace(rule),
175 };
176 Ok(())
177 }
178
179 fn parse_rule(rule: &str) -> JPreprocessResult<(POSMatch, ChainRule)> {
180 let capture = PARSE_REGEX
181 .captures(rule)
182 .ok_or_else(|| AccentRuleParseError::SyntaxError(rule.to_string()))?;
183
184 let pos = {
185 if let Some(pos) = capture.name("pos") {
186 POSMatch::from_str(pos.as_str())?
187 } else {
188 POSMatch::Default
189 }
190 };
191
192 let accent_type = if let Some(matched) = capture.name("accent") {
193 AccentType::from_str(matched.as_str()).unwrap()
195 } else {
196 AccentType::None
197 };
198
199 let add_type = capture
200 .name("add")
201 .and_then(|matched| matched.as_str().parse().ok())
202 .unwrap_or(0);
203
204 Ok((pos, ChainRule::new(accent_type, add_type)))
205 }
206
207 pub fn get_rule(&self, pos: &POS) -> Option<&ChainRule> {
208 let rule = match pos {
209 POS::Doushi(_) => self.doushi.as_ref(),
210 POS::Joshi(_) => self.joshi.as_ref(),
211 POS::Keiyoushi(_) => self.keiyoushi.as_ref(),
212 POS::Meishi(_) => self.meishi.as_ref(),
213 _ => None,
214 };
215 rule.or(self.default.as_ref())
216 }
217
218 pub fn unset(&mut self) {
219 self.default = None;
220 self.doushi = None;
221 self.joshi = None;
222 self.keiyoushi = None;
223 self.meishi = None;
224 }
225}
226
227impl Display for ChainRules {
228 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
229 let text = &[
230 ("", &self.default),
231 ("動詞", &self.doushi),
232 ("助詞", &self.joshi),
233 ("形容詞", &self.keiyoushi),
234 ("名詞", &self.meishi),
235 ]
236 .iter()
237 .filter(|(_name, chainrule_option)| chainrule_option.is_some())
238 .fold(String::new(), |acc, (pos, chainrule_option)| {
239 let chainrule = chainrule_option.as_ref().unwrap();
240 let delim = if acc.is_empty() { "" } else { "/" };
241 if pos.is_empty() {
242 format!("{}{}{}", acc, delim, chainrule)
243 } else {
244 format!("{}{}{}%{}", acc, delim, pos, chainrule)
245 }
246 });
247 if text.is_empty() {
248 f.write_str("*")
249 } else {
250 f.write_str(text)
251 }
252 }
253}
254
255#[cfg(test)]
256mod tests {
257 use crate::{accent_rule::AccentType, pos::*};
258
259 use super::ChainRules;
260
261 #[test]
262 fn simple_rule() {
263 let rules = ChainRules::new("C3");
264 let rule = rules.get_rule(&POS::Others).unwrap();
265 assert_eq!(rule.accent_type, AccentType::C3);
266 assert_eq!(rule.add_type, 0);
267
268 assert_eq!(rules.to_string(), "C3");
269 }
270
271 #[test]
272 fn single_complex_rule() {
273 let rules = ChainRules::new("形容詞%F2@-1");
274 let rule = rules.get_rule(&POS::Keiyoushi(Keiyoushi::Jiritsu)).unwrap();
275 assert_eq!(rule.accent_type, AccentType::F2);
276 assert_eq!(rule.add_type, -1);
277
278 assert_eq!(rules.to_string(), "形容詞%F2@-1");
279 }
280
281 #[test]
282 fn multiple_complex_rule() {
283 let rules = ChainRules::new("形容詞%F2@0/動詞%F5");
284 let rule1 = rules.get_rule(&POS::Keiyoushi(Keiyoushi::Jiritsu)).unwrap();
285 assert_eq!(rule1.accent_type, AccentType::F2);
286 assert_eq!(rule1.add_type, 0);
287 let rule2 = rules.get_rule(&POS::Doushi(Doushi::Jiritsu)).unwrap();
288 assert_eq!(rule2.accent_type, AccentType::F5);
289 assert_eq!(rule2.add_type, 0);
290
291 assert_eq!(rules.to_string(), "動詞%F5/形容詞%F2");
292 }
293
294 #[test]
295 fn reject_invalid_pos() {
296 assert!(ChainRules::parse_rule("特殊助詞%F2@0").is_err());
297 }
298
299 #[test]
300 fn add_type_only() {
301 ChainRules::new("-1");
302 }
303
304 #[test]
305 fn default_rule_1() {
306 let rules = ChainRules::new("形容詞%F2/F5");
307 let rule1 = rules.get_rule(&POS::Keiyoushi(Keiyoushi::Jiritsu)).unwrap();
308 assert_eq!(rule1.accent_type, AccentType::F2);
309 let rule2 = rules.get_rule(&POS::Doushi(Doushi::Jiritsu)).unwrap();
310 assert_eq!(rule2.accent_type, AccentType::F5);
311
312 assert_eq!(rules.to_string(), "F5/形容詞%F2");
313 }
314
315 #[test]
316 fn default_rule_2() {
317 let rules = ChainRules::new("F5/形容詞%F2");
318 let rule1 = rules.get_rule(&POS::Keiyoushi(Keiyoushi::Jiritsu)).unwrap();
319 assert_eq!(rule1.accent_type, AccentType::F2);
320 let rule2 = rules.get_rule(&POS::Doushi(Doushi::Jiritsu)).unwrap();
321 assert_eq!(rule2.accent_type, AccentType::F5);
322
323 assert_eq!(rules.to_string(), "F5/形容詞%F2");
324 }
325
326 #[test]
327 fn empty() {
328 let rules = ChainRules::new("*");
329 assert_eq!(rules.default, None);
330 assert_eq!(rules.doushi, None);
331 assert_eq!(rules.joshi, None);
332 assert_eq!(rules.keiyoushi, None);
333 assert_eq!(rules.meishi, None);
334
335 assert_eq!(rules.to_string(), "*");
336 }
337}