1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#![warn(clippy::pedantic, clippy::nursery)]

/// ```
/// use match_pinyin_with_hanzi::match_pinyin_with_hanzi;
/// match_pinyin_with_hanzi("māmā qí mǎ, mǎ màn, māma mà mǎ.", "妈妈骑马,马慢,妈妈骂马。").unwrap();
///
/// // Erhua is also supported.
/// // This sample sentence is taken from Wiktionary: https://en.wiktionary.org/w/index.php?title=%E4%B8%80%E9%BB%9E%E5%85%92&oldid=60782800
/// match_pinyin_with_hanzi("Jiù wèi zhème yīdiǎnr shìr shēngqì, zhídàng de ma?", "就為這麼一點兒事兒生氣,值當的嗎?").unwrap();
/// ```
/// 
/// # Panics
/// Panics when
/// - hanzi runs out
/// 
/// # Errors
/// Returns `Err` when
/// - hanzi's pinyin candidate does not match with the given pinyin
pub fn match_pinyin_with_hanzi(pinyin_str: &str, hanzi_str: &str) -> Result<(), String> {
    use pinyin::ToPinyinMulti;
    use pinyin_parser::PinyinParser;

    let mut hanzi_iter = hanzi_str.chars();
    for pinyin in PinyinParser::new()
        .with_strictness(pinyin_parser::Strictness::StrictAndSeparateApostropheFromCurlyQuote)
        .parse(pinyin_str)
    {
        let (hanzi, candidates) = loop {
            let hanzi = hanzi_iter.next().unwrap_or_else(|| {
                panic!(
                    "hanzi ran out, while matching `{}` with `{}`",
                    pinyin_str, hanzi_str
                )
            });

            if let Some(multi) = hanzi.to_pinyin_multi() {
                let mut candidates = vec![];
                for cand_pinyin in multi {
                    candidates.push(cand_pinyin.with_tone());
                    candidates.push(cand_pinyin.plain()); // to allow light tone
                }
                break (hanzi, candidates);
            }
        };

        if pinyin.ends_with('r') && !["er", "ēr", "ér", "ěr", "èr"].contains(&&pinyin[..]) {
            // Erhua. Get the next Chinese character and verify that it is 儿 or 兒
            loop {
                let expect_儿 = hanzi_iter.next().unwrap_or_else(|| {
                    panic!(
                        "hanzi ran out, expected 儿 or 兒, while matching `{}` with `{}`",
                        pinyin_str, hanzi_str
                    )
                });
                if expect_儿.to_pinyin_multi().is_some() {
                    if "儿兒".contains(expect_儿) {
                        break;
                    }
                    return Err(format!(
                        "expected 儿 or 兒 because of the rhotic pinyin {pinyin}, but instead found a Chinese character {expect_儿}",
                    ));
                }
            }
        } else {
            if candidates.contains(&&pinyin[..]) {
                continue;
            }

            return Err(format!(
                "{pinyin} not found within candidates {candidates:?} possible for the Chinese character {hanzi}. Encountered this while matching `{pinyin_str}` with `{hanzi_str}`.",
            ));
        }
    }

    Ok(())
}

#[cfg(test)]
mod tests {
    use crate::match_pinyin_with_hanzi;
    #[test]
    fn it_works() {
        match_pinyin_with_hanzi("Nǐ qù nǎli?", "你去哪里?").unwrap();
    }
}