1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
use crate::{shared::NomErrorReason, take_uint, IResult, NomErr, TryFromPrimitiveError};
use nom::{
branch::alt,
bytes::complete::{tag, take_while1},
character::streaming::one_of,
combinator::{map, recognize, value},
multi::many_till,
};
use num_enum::TryFromPrimitive;
use serde::{Deserialize, Serialize};
use serde_repr::*;
use std::convert::TryFrom;
use thiserror::Error;
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct PinYin {
pub romanization: String,
pub tone: Tone,
}
#[derive(
Debug,
PartialEq,
Eq,
PartialOrd,
Ord,
Hash,
Clone,
Copy,
TryFromPrimitive,
Serialize_repr,
Deserialize_repr,
)]
#[repr(u8)]
pub enum Tone {
High = 1,
Rising,
Low,
Falling,
Neutral,
}
#[derive(Error, Debug, PartialEq, Eq, Clone)]
pub enum PinYinParseError {
#[error("(Pin yin) Tone not recognized: {0}")]
InvalidTone(#[from] TryFromPrimitiveError<Tone>),
#[error("(Pin yin) Format: {0}")]
Format(NomErrorReason),
}
impl<'a> From<NomErr<'a>> for PinYinParseError {
fn from(err: NomErr<'a>) -> Self {
Self::Format(err.into())
}
}
impl TryFrom<&str> for PinYin {
type Error = PinYinParseError;
fn try_from(text: &str) -> Result<Self, Self::Error> {
let (_i, (romanization, tone)) = parts(text)?;
let tone = Tone::try_from(tone)?;
Ok(PinYin { romanization, tone })
}
}
fn parts(s: &str) -> IResult<(String, u8)> {
map(pronunciation_parts, |(parts, tone)| (parts.join(""), tone))(s)
}
fn pronunciation_parts(s: &str) -> IResult<(Vec<&str>, u8)> {
many_till(alt((umlaut, carrot, special_letter, letters)), take_uint)(s)
}
fn umlaut(s: &str) -> IResult<&str> {
value("ü", tag("u:"))(s)
}
fn carrot(s: &str) -> IResult<&str> {
value("ê", tag("e^"))(s)
}
fn special_letter(s: &str) -> IResult<&str> {
recognize(one_of("ue"))(s)
}
fn letters(s: &str) -> IResult<&str> {
take_while1(|c: char| c != 'u' && c != 'e' && c.is_ascii_alphabetic())(s)
}