ucd_parse/
name_aliases.rs

1use std::path::Path;
2
3use crate::{
4    common::{Codepoint, CodepointIter, UcdFile, UcdFileByCodepoint},
5    error::Error,
6};
7
8/// A single row in the `NameAliases.txt` file.
9///
10/// Note that there are multiple rows for some codepoint. Each row provides a
11/// new alias.
12#[derive(Clone, Debug, Default, Eq, PartialEq)]
13pub struct NameAlias {
14    /// The codepoint corresponding to this row.
15    pub codepoint: Codepoint,
16    /// The alias.
17    pub alias: String,
18    /// The label of this alias.
19    pub label: NameAliasLabel,
20}
21
22impl UcdFile for NameAlias {
23    fn relative_file_path() -> &'static Path {
24        Path::new("NameAliases.txt")
25    }
26}
27
28impl UcdFileByCodepoint for NameAlias {
29    fn codepoints(&self) -> CodepointIter {
30        self.codepoint.into_iter()
31    }
32}
33
34impl std::str::FromStr for NameAlias {
35    type Err = Error;
36
37    fn from_str(line: &str) -> Result<NameAlias, Error> {
38        let re_parts = regex!(
39            r"(?x)
40                ^
41                (?P<codepoint>[A-Z0-9]+);
42                \s*
43                (?P<alias>[^;]+);
44                \s*
45                (?P<label>\S+)
46                ",
47        );
48
49        let caps = match re_parts.captures(line.trim()) {
50            Some(caps) => caps,
51            None => return err!("invalid NameAliases line"),
52        };
53        Ok(NameAlias {
54            codepoint: caps["codepoint"].parse()?,
55            alias: caps.name("alias").unwrap().as_str().to_string(),
56            label: caps["label"].parse()?,
57        })
58    }
59}
60
61/// The label of a name alias.
62#[derive(Clone, Copy, Debug, Eq, PartialEq)]
63pub enum NameAliasLabel {
64    /// Corrections for serious problems in a character name.
65    Correction,
66    /// ISO 6429 names for C0 and C1 control functions and other commonly
67    /// occurring names for control codes.
68    Control,
69    /// A few widely used alternate names for format characters.
70    Alternate,
71    /// Several documented labels for C1 control code points which were
72    /// never actually approved in any standard.
73    Figment,
74    /// Commonly occurring abbreviations (or acronyms) for control codes,
75    /// format characters, spaces and variation selectors.
76    Abbreviation,
77}
78
79impl Default for NameAliasLabel {
80    fn default() -> NameAliasLabel {
81        // This is arbitrary, but the Default impl is convenient.
82        NameAliasLabel::Correction
83    }
84}
85
86impl std::str::FromStr for NameAliasLabel {
87    type Err = Error;
88
89    fn from_str(s: &str) -> Result<NameAliasLabel, Error> {
90        match s {
91            "correction" => Ok(NameAliasLabel::Correction),
92            "control" => Ok(NameAliasLabel::Control),
93            "alternate" => Ok(NameAliasLabel::Alternate),
94            "figment" => Ok(NameAliasLabel::Figment),
95            "abbreviation" => Ok(NameAliasLabel::Abbreviation),
96            unknown => err!("unknown name alias label: '{}'", unknown),
97        }
98    }
99}
100
101#[cfg(test)]
102mod tests {
103    use super::{NameAlias, NameAliasLabel};
104
105    #[test]
106    fn parse1() {
107        let line = "0000;NULL;control\n";
108        let row: NameAlias = line.parse().unwrap();
109        assert_eq!(row.codepoint, 0x0);
110        assert_eq!(row.alias, "NULL");
111        assert_eq!(row.label, NameAliasLabel::Control);
112    }
113
114    #[test]
115    fn parse2() {
116        let line = "000B;VERTICAL TABULATION;control\n";
117        let row: NameAlias = line.parse().unwrap();
118        assert_eq!(row.codepoint, 0xB);
119        assert_eq!(row.alias, "VERTICAL TABULATION");
120        assert_eq!(row.label, NameAliasLabel::Control);
121    }
122
123    #[test]
124    fn parse3() {
125        let line = "0081;HIGH OCTET PRESET;figment\n";
126        let row: NameAlias = line.parse().unwrap();
127        assert_eq!(row.codepoint, 0x81);
128        assert_eq!(row.alias, "HIGH OCTET PRESET");
129        assert_eq!(row.label, NameAliasLabel::Figment);
130    }
131
132    #[test]
133    fn parse4() {
134        let line = "E01EF;VS256;abbreviation\n";
135        let row: NameAlias = line.parse().unwrap();
136        assert_eq!(row.codepoint, 0xE01EF);
137        assert_eq!(row.alias, "VS256");
138        assert_eq!(row.label, NameAliasLabel::Abbreviation);
139    }
140}