ucd_parse/
jamo_short_name.rs

1use std::path::Path;
2
3use crate::{
4    common::{Codepoint, CodepointIter, UcdFile, UcdFileByCodepoint},
5    error::Error,
6};
7
8/// A single row in the `Jamo.txt` file.
9///
10/// The `Jamo.txt` file defines the `Jamo_Short_Name` property.
11#[derive(Clone, Debug, Default, Eq, PartialEq)]
12pub struct JamoShortName {
13    /// The codepoint corresponding to this row.
14    pub codepoint: Codepoint,
15    /// The actual "Jamo Short Name." This string contains at most 3 bytes and
16    /// may be empty.
17    pub name: String,
18}
19
20impl UcdFile for JamoShortName {
21    fn relative_file_path() -> &'static Path {
22        Path::new("Jamo.txt")
23    }
24}
25
26impl UcdFileByCodepoint for JamoShortName {
27    fn codepoints(&self) -> CodepointIter {
28        self.codepoint.into_iter()
29    }
30}
31
32impl std::str::FromStr for JamoShortName {
33    type Err = Error;
34
35    fn from_str(line: &str) -> Result<JamoShortName, Error> {
36        let re_parts = regex!(
37            r"(?x)
38                ^
39                (?P<codepoint>[A-Z0-9]+);
40                \s*
41                (?P<name>[A-Z]*)
42                ",
43        );
44
45        let caps = match re_parts.captures(line.trim()) {
46            Some(caps) => caps,
47            None => return err!("invalid Jamo_Short_name line"),
48        };
49        Ok(JamoShortName {
50            codepoint: caps["codepoint"].parse()?,
51            name: caps.name("name").unwrap().as_str().to_string(),
52        })
53    }
54}
55
56#[cfg(test)]
57mod tests {
58    use super::JamoShortName;
59
60    #[test]
61    fn parse1() {
62        let line = "1164; YAE # HANGUL JUNGSEONG YAE\n";
63        let row: JamoShortName = line.parse().unwrap();
64        assert_eq!(row.codepoint, 0x1164);
65        assert_eq!(row.name, "YAE");
66    }
67
68    #[test]
69    fn parse2() {
70        let line = "110B;     # HANGUL CHOSEONG IEUNG\n";
71        let row: JamoShortName = line.parse().unwrap();
72        assert_eq!(row.codepoint, 0x110B);
73        assert_eq!(row.name, "");
74    }
75}