kanji_number_parser/
lib.rs

1#[macro_use]
2extern crate failure;
3extern crate num_bigint;
4
5use num_bigint::{BigUint, ToBigUint};
6use num_traits::pow::Pow;
7use std::fmt;
8
9#[derive(Debug, Fail, PartialEq)]
10pub struct KanjiNumberParseError;
11impl fmt::Display for KanjiNumberParseError {
12    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
13        write!(f, "A kanji number parse error occured")
14    }
15}
16
17macro_rules! num {
18    ($buf4: ident, $buf1: ident, $x:expr) => {{
19        $buf4 += $buf1;
20        $buf1 = $x;
21    }};
22}
23
24macro_rules! base_disit {
25    ($buf4: ident, $buf1: ident, $x:expr) => {{
26        if $buf1 == 0 {
27            $buf1 = 1;
28        }
29        $buf4 += $buf1 * $x;
30        $buf1 = 0;
31    }};
32}
33macro_rules! pow_disit {
34    ($buf: ident, $buf4: ident, $buf1: ident, $x:expr) => {{
35        $buf4 += $buf1;
36        $buf1 = 0;
37        $buf += $buf4.to_biguint().unwrap() * 10.to_biguint().unwrap().pow($x as usize);
38        $buf4 = 0;
39    }};
40}
41
42
43/// 漢数字で書かれた文字列 `String` を `BigUInt` に変換する
44///
45/// # Examples
46///
47/// ```
48/// use num_bigint::{BigUint, ToBigUint};
49/// use kanji_number_parser::{parse, KanjiNumberParseError};
50///
51/// assert_eq!(
52///    parse(String::from("一億五千万")),
53///    Ok(150000000.to_biguint().unwrap()) as Result<BigUint, KanjiNumberParseError>
54/// );
55/// ```
56pub fn parse(s: String) -> Result<BigUint, KanjiNumberParseError> {
57    let mut buf = 0.to_biguint().unwrap();
58    let mut buf1 = 0;
59    let mut buf4 = 0;
60    let mut pre_c = '_';
61
62    for c in s.chars() {
63        match c {
64            '零' => num!(buf4, buf1, 0),
65            '一' => num!(buf4, buf1, 1),
66            '二' => num!(buf4, buf1, 2),
67            '三' => num!(buf4, buf1, 3),
68            '四' => num!(buf4, buf1, 4),
69            '五' => num!(buf4, buf1, 5),
70            '六' => num!(buf4, buf1, 6),
71            '七' => num!(buf4, buf1, 7),
72            '八' => num!(buf4, buf1, 8),
73            '九' => num!(buf4, buf1, 9),
74            '十' => base_disit!(buf4, buf1, 10),
75            '百' => base_disit!(buf4, buf1, 100),
76            '千' => base_disit!(buf4, buf1, 1000),
77            '万' => pow_disit!(buf, buf4, buf1, 4),
78            '億' => pow_disit!(buf, buf4, buf1, 8),
79            '兆' => pow_disit!(buf, buf4, buf1, 12),
80            '京' => pow_disit!(buf, buf4, buf1, 16),
81            '垓' => pow_disit!(buf, buf4, buf1, 20),
82            '𥝱' => pow_disit!(buf, buf4, buf1, 24),
83            '穣' => pow_disit!(buf, buf4, buf1, 28),
84            '溝' => pow_disit!(buf, buf4, buf1, 32),
85            '澗' => pow_disit!(buf, buf4, buf1, 36),
86            '正' => pow_disit!(buf, buf4, buf1, 40),
87            '載' => pow_disit!(buf, buf4, buf1, 44),
88            '極' => pow_disit!(buf, buf4, buf1, 48),
89            '恒' => {
90                if pre_c == '_' {
91                    pre_c = '恒';
92                } else {
93                    return Err(KanjiNumberParseError);
94                }
95            }
96            '河' => {
97                if pre_c == '恒' {
98                    pre_c = '河';
99                } else {
100                    return Err(KanjiNumberParseError);
101                }
102            }
103            '沙' => {
104                if pre_c == '河' {
105                    pre_c = '_';
106                    pow_disit!(buf, buf4, buf1, 52);
107                } else {
108                    return Err(KanjiNumberParseError);
109                }
110            }
111            '阿' => {
112                if pre_c == '_' {
113                    pre_c = '阿';
114                } else {
115                    return Err(KanjiNumberParseError);
116                }
117            }
118            '僧' => {
119                if pre_c == '阿' {
120                    pre_c = '僧';
121                } else {
122                    return Err(KanjiNumberParseError);
123                }
124            }
125            '祇' => {
126                if pre_c == '僧' {
127                    pre_c = '_';
128                    pow_disit!(buf, buf4, buf1, 56);
129                } else {
130                    return Err(KanjiNumberParseError);
131                }
132            }
133
134            '那' => {
135                if pre_c == '_' {
136                    pre_c = '那';
137                } else {
138                    return Err(KanjiNumberParseError);
139                }
140            }
141            '由' => {
142                if pre_c == '那' {
143                    pre_c = '由';
144                } else {
145                    return Err(KanjiNumberParseError);
146                }
147            }
148            '他' => {
149                if pre_c == '由' {
150                    pre_c = '_';
151                    pow_disit!(buf, buf4, buf1, 60);
152                } else {
153                    return Err(KanjiNumberParseError);
154                }
155            }
156
157            '不' => {
158                if pre_c == '_' {
159                    pre_c = '不';
160                } else {
161                    return Err(KanjiNumberParseError);
162                }
163            }
164            '可' => {
165                if pre_c == '不' {
166                    pre_c = '可';
167                } else {
168                    return Err(KanjiNumberParseError);
169                }
170            }
171            '思' => {
172                if pre_c == '可' {
173                    pre_c = '思';
174                } else {
175                    return Err(KanjiNumberParseError);
176                }
177            }
178            '議' => {
179                if pre_c == '思' {
180                    pre_c = '_';
181                    pow_disit!(buf, buf4, buf1, 64);
182                } else {
183                    return Err(KanjiNumberParseError);
184                }
185            }
186
187            '無' => {
188                if pre_c == '_' {
189                    pre_c = '無';
190                } else {
191                    return Err(KanjiNumberParseError);
192                }
193            }
194            '量' => {
195                if pre_c == '無' {
196                    pre_c = '量';
197                } else {
198                    return Err(KanjiNumberParseError);
199                }
200            }
201            '大' => {
202                if pre_c == '量' {
203                    pre_c = '大';
204                } else {
205                    return Err(KanjiNumberParseError);
206                }
207            }
208            '数' => {
209                if pre_c == '大' {
210                    pre_c = '_';
211                    pow_disit!(buf, buf4, buf1, 68);
212                } else {
213                    return Err(KanjiNumberParseError);
214                }
215            }
216            _ => return Err(KanjiNumberParseError),
217        };
218    }
219    buf4 += buf1;
220    if let Some(buf4) = buf4.to_biguint() {
221        buf += buf4;
222    }
223    Ok(buf)
224}
225
226#[cfg(test)]
227mod tests {
228    use super::*;
229    #[test]
230    fn parse_works() {
231        assert_eq!(parse(String::from("零")), Ok(0.to_biguint().unwrap()));
232        assert_eq!(
233            parse(String::from("千百十")),
234            Ok(1110.to_biguint().unwrap()) as Result<BigUint, KanjiNumberParseError>
235        );
236        assert_eq!(
237            parse(String::from("一千万")),
238            Ok(10000000.to_biguint().unwrap()) as Result<BigUint, KanjiNumberParseError>
239        );
240        assert_eq!(
241            parse(String::from("四千三百二十一")),
242            Ok(4321.to_biguint().unwrap()) as Result<BigUint, KanjiNumberParseError>
243        );
244        assert_eq!(
245            parse(String::from("五千三十")),
246            Ok(5030.to_biguint().unwrap()) as Result<BigUint, KanjiNumberParseError>
247        );
248        assert_eq!(
249            parse(String::from("一億五千万")),
250            Ok(150000000.to_biguint().unwrap()) as Result<BigUint, KanjiNumberParseError>
251        );
252        assert_eq!(
253            parse(String::from("一億五千万")),
254            Ok(BigUint::from_radix_be(&vec![1, 50, 0, 0, 0], 100).unwrap())
255                as Result<BigUint, KanjiNumberParseError>
256        );
257        assert_eq!(
258            parse(String::from("一兆五千億")),
259            Ok(BigUint::from_radix_be(&vec![1, 50, 0, 0, 0, 0, 0], 100).unwrap())
260                as Result<BigUint, KanjiNumberParseError>
261        );
262        assert_eq!(
263            parse(String::from("五千六百七十八溝九千十二穣三千四百五十六𥝱七千八百九十垓千二百三十四京五千六百七十八兆九千十二億三千四百五十六万七千八百九十")),
264            Ok(BigUint::from_radix_be(&vec![
265                56,78, // 32:溝
266                90,12, // 28:穣
267                34,56, // 24:𥝱
268                78,90, // 20:垓
269                12,34, // 16:京
270                56,78, // 12:兆
271                90,12, //  8:億
272                34,56, //  4:万
273                78,90
274                ], 100).unwrap()) as Result<BigUint, KanjiNumberParseError>
275        );
276        assert_eq!(
277            parse(String::from("十二無量大数三千四百五十六不可思議七千八百九十那由他千二百三十四阿僧祇五千六百七十八恒河沙")),
278            Ok(BigUint::from_radix_be(&vec![
279                   12, // 68:無量大数
280                34,56, // 64:不可思議
281                78,90, // 60:那由他
282                12,34, // 56:阿僧祇
283                56,78, // 52:恒河沙
284                 0, 0, // 48:極
285                 0, 0, // 44:載
286                 0, 0, // 40:正
287                 0, 0, // 36:澗
288                 0, 0, // 32:溝
289                 0, 0, // 28:穣
290                 0, 0, // 24:𥝱
291                 0, 0, // 20:垓
292                 0, 0, // 16:京
293                 0, 0, // 12:兆
294                 0, 0, //  8:億
295                 0, 0, //  4:万
296                 0, 0
297                ], 100).unwrap()) as Result<BigUint, KanjiNumberParseError>
298        );
299        assert_eq!(
300            parse(String::from("数ではない")),
301            Err(KanjiNumberParseError)
302        )
303    }
304}