1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
use crate::shared::{comments, decode_jis_kanji, decode_jis_radical};
use nom::{
bytes::{
complete::{tag, take_until},
streaming::is_not,
},
character::complete::char,
combinator::{map, map_res, opt},
multi::separated_list1,
sequence::separated_pair,
IResult,
};
use std::path::Path;
use thiserror::Error;
#[cfg(test)]
mod tests;
#[derive(Error, Debug)]
pub enum KradError {
#[error("Error while parsing kradfile")]
Parse,
#[error("Error while reading kradfile")]
Io(#[from] std::io::Error),
}
const SEPARATOR: &[u8] = " : ".as_bytes();
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Decomposition {
pub kanji: String,
pub radicals: Vec<String>,
}
type KradResult = Result<Vec<Decomposition>, KradError>;
pub fn parse_file<P: AsRef<Path>>(path: P) -> KradResult {
parse_file_implementation(path.as_ref())
}
fn parse_file_implementation(path: &Path) -> KradResult {
std::fs::read(path)
.map_err(|err| err.into())
.and_then(|b| parse_bytes(&b))
}
pub fn parse_bytes(b: &[u8]) -> KradResult {
lines(b).map(|(_i, o)| o).map_err(|_err| KradError::Parse)
}
fn lines(b: &[u8]) -> IResult<&[u8], Vec<Decomposition>> {
separated_list1(char('\n'), next_kanji)(b)
}
fn next_kanji(b: &[u8]) -> IResult<&[u8], Decomposition> {
map(
separated_pair(comments, opt(char('\n')), kanji_line),
|(_comments, kanji)| kanji,
)(b)
}
fn kanji_line(b: &[u8]) -> IResult<&[u8], Decomposition> {
map(
separated_pair(kanji, tag(SEPARATOR), radicals),
|(kanji, radicals)| Decomposition { kanji, radicals },
)(b)
}
fn kanji(b: &[u8]) -> IResult<&[u8], String> {
map_res(take_until(" "), decode_jis_kanji)(b)
}
fn radicals(b: &[u8]) -> IResult<&[u8], Vec<String>> {
separated_list1(char(' '), radical)(b)
}
fn radical(b: &[u8]) -> IResult<&[u8], String> {
map_res(is_not(" \n"), decode_jis_radical)(b)
}