ucd_parse/
property_aliases.rs

1use std::path::Path;
2
3use crate::{common::UcdFile, error::Error};
4
5/// A single row in the `PropertyAliases.txt` file.
6#[derive(Clone, Debug, Default, Eq, PartialEq)]
7pub struct PropertyAlias {
8    /// An abbreviation for this property.
9    pub abbreviation: String,
10    /// The "long" name of this property.
11    pub long: String,
12    /// Additional aliases (if present).
13    pub aliases: Vec<String>,
14}
15
16impl UcdFile for PropertyAlias {
17    fn relative_file_path() -> &'static Path {
18        Path::new("PropertyAliases.txt")
19    }
20}
21
22impl std::str::FromStr for PropertyAlias {
23    type Err = Error;
24
25    fn from_str(line: &str) -> Result<PropertyAlias, Error> {
26        let re_parts = regex!(
27            r"(?x)
28                ^
29                \s*(?P<abbrev>[^\s;]+)\s*;
30                \s*(?P<long>[^\s;]+)\s*
31                (?:;(?P<aliases>.*))?
32                ",
33        );
34        let re_aliases = regex!(r"\s*(?P<alias>[^\s;]+)\s*;?\s*");
35
36        let caps = match re_parts.captures(line.trim()) {
37            Some(caps) => caps,
38            None => return err!("invalid PropertyAliases line: '{}'", line),
39        };
40        let mut aliases = vec![];
41        if let Some(m) = caps.name("aliases") {
42            for acaps in re_aliases.captures_iter(m.as_str()) {
43                let alias = acaps.name("alias").unwrap().as_str();
44                aliases.push(alias.to_string());
45            }
46        }
47        Ok(PropertyAlias {
48            abbreviation: caps.name("abbrev").unwrap().as_str().to_string(),
49            long: caps.name("long").unwrap().as_str().to_string(),
50            aliases,
51        })
52    }
53}
54
55#[cfg(test)]
56mod tests {
57    use super::PropertyAlias;
58
59    #[test]
60    fn parse1() {
61        let line = "cjkAccountingNumeric     ; kAccountingNumeric\n";
62        let row: PropertyAlias = line.parse().unwrap();
63        assert_eq!(row.abbreviation, "cjkAccountingNumeric");
64        assert_eq!(row.long, "kAccountingNumeric");
65        assert!(row.aliases.is_empty());
66    }
67
68    #[test]
69    fn parse2() {
70        let line = "nv                       ; Numeric_Value\n";
71        let row: PropertyAlias = line.parse().unwrap();
72        assert_eq!(row.abbreviation, "nv");
73        assert_eq!(row.long, "Numeric_Value");
74        assert!(row.aliases.is_empty());
75    }
76
77    #[test]
78    fn parse3() {
79        let line =
80            "scf                      ; Simple_Case_Folding         ; sfc\n";
81        let row: PropertyAlias = line.parse().unwrap();
82        assert_eq!(row.abbreviation, "scf");
83        assert_eq!(row.long, "Simple_Case_Folding");
84        assert_eq!(row.aliases, vec!["sfc"]);
85    }
86
87    #[test]
88    fn parse4() {
89        let line = "cjkRSUnicode             ; kRSUnicode                  ; Unicode_Radical_Stroke; URS\n";
90        let row: PropertyAlias = line.parse().unwrap();
91        assert_eq!(row.abbreviation, "cjkRSUnicode");
92        assert_eq!(row.long, "kRSUnicode");
93        assert_eq!(row.aliases, vec!["Unicode_Radical_Stroke", "URS"]);
94    }
95
96    #[test]
97    fn parse5() {
98        let line = "isc                      ; ISO_Comment";
99        let row: PropertyAlias = line.parse().unwrap();
100        assert_eq!(row.abbreviation, "isc");
101        assert_eq!(row.long, "ISO_Comment");
102        assert!(row.aliases.is_empty());
103    }
104}