ucd_parse/
property_value_aliases.rs

1use std::path::Path;
2
3use crate::{common::UcdFile, error::Error};
4
5/// A single row in the `PropertyValueAliases.txt` file.
6#[derive(Clone, Debug, Default, Eq, PartialEq)]
7pub struct PropertyValueAlias {
8    /// The property name for which this value alias applies.
9    pub property: String,
10    /// A numeric abbreviation for this property value, if present. (This is
11    /// seemingly only present for the `ccc`/`Canonical_Combining_Class`
12    /// property.)
13    pub numeric: Option<u8>,
14    /// An abbreviation for this property value.
15    pub abbreviation: String,
16    /// The "long" form of this property value.
17    pub long: String,
18    /// Additional value aliases (if present).
19    pub aliases: Vec<String>,
20}
21
22impl UcdFile for PropertyValueAlias {
23    fn relative_file_path() -> &'static Path {
24        Path::new("PropertyValueAliases.txt")
25    }
26}
27
28impl std::str::FromStr for PropertyValueAlias {
29    type Err = Error;
30
31    fn from_str(line: &str) -> Result<PropertyValueAlias, Error> {
32        let re_parts = regex!(
33            r"(?x)
34                ^
35                \s*(?P<prop>[^\s;]+)\s*;
36                \s*(?P<abbrev>[^\s;]+)\s*;
37                \s*(?P<long>[^\s;]+)\s*
38                (?:;(?P<aliases>.*))?
39                ",
40        );
41        let re_parts_ccc = regex!(
42            r"(?x)
43                ^
44                ccc;
45                \s*(?P<num_class>[0-9]+)\s*;
46                \s*(?P<abbrev>[^\s;]+)\s*;
47                \s*(?P<long>[^\s;]+)
48                ",
49        );
50        let re_aliases = regex!(r"\s*(?P<alias>[^\s;]+)\s*;?\s*");
51
52        if line.starts_with("ccc;") {
53            let caps = match re_parts_ccc.captures(line.trim()) {
54                Some(caps) => caps,
55                None => {
56                    return err!("invalid PropertyValueAliases (ccc) line")
57                }
58            };
59            let n = match caps["num_class"].parse() {
60                Ok(n) => n,
61                Err(err) => {
62                    return err!(
63                        "failed to parse ccc number '{}': {}",
64                        &caps["num_class"],
65                        err
66                    )
67                }
68            };
69            let abbrev = caps.name("abbrev").unwrap().as_str();
70            let long = caps.name("long").unwrap().as_str();
71            return Ok(PropertyValueAlias {
72                property: line[0..3].to_string(),
73                numeric: Some(n),
74                abbreviation: abbrev.to_string(),
75                long: long.to_string(),
76                aliases: vec![],
77            });
78        }
79
80        let caps = match re_parts.captures(line.trim()) {
81            Some(caps) => caps,
82            None => return err!("invalid PropertyValueAliases line"),
83        };
84        let mut aliases = vec![];
85        if let Some(m) = caps.name("aliases") {
86            for acaps in re_aliases.captures_iter(m.as_str()) {
87                let alias = acaps.name("alias").unwrap().as_str();
88                if alias == "#" {
89                    // This starts a comment, so stop reading.
90                    break;
91                }
92                aliases.push(alias.to_string());
93            }
94        }
95        Ok(PropertyValueAlias {
96            property: caps.name("prop").unwrap().as_str().to_string(),
97            numeric: None,
98            abbreviation: caps.name("abbrev").unwrap().as_str().to_string(),
99            long: caps.name("long").unwrap().as_str().to_string(),
100            aliases,
101        })
102    }
103}
104
105#[cfg(test)]
106mod tests {
107    use super::PropertyValueAlias;
108
109    #[test]
110    fn parse1() {
111        let line = "blk; Arabic_PF_A                      ; Arabic_Presentation_Forms_A      ; Arabic_Presentation_Forms-A\n";
112        let row: PropertyValueAlias = line.parse().unwrap();
113        assert_eq!(row.property, "blk");
114        assert_eq!(row.numeric, None);
115        assert_eq!(row.abbreviation, "Arabic_PF_A");
116        assert_eq!(row.long, "Arabic_Presentation_Forms_A");
117        assert_eq!(row.aliases, vec!["Arabic_Presentation_Forms-A"]);
118    }
119
120    #[test]
121    fn parse2() {
122        let line = "AHex; N                               ; No                               ; F                                ; False\n";
123        let row: PropertyValueAlias = line.parse().unwrap();
124        assert_eq!(row.property, "AHex");
125        assert_eq!(row.numeric, None);
126        assert_eq!(row.abbreviation, "N");
127        assert_eq!(row.long, "No");
128        assert_eq!(row.aliases, vec!["F", "False"]);
129    }
130
131    #[test]
132    fn parse3() {
133        let line = "age; 1.1                              ; V1_1\n";
134        let row: PropertyValueAlias = line.parse().unwrap();
135        assert_eq!(row.property, "age");
136        assert_eq!(row.numeric, None);
137        assert_eq!(row.abbreviation, "1.1");
138        assert_eq!(row.long, "V1_1");
139        assert!(row.aliases.is_empty());
140    }
141
142    #[test]
143    fn parse4() {
144        let line = "ccc;   0; NR                         ; Not_Reordered\n";
145        let row: PropertyValueAlias = line.parse().unwrap();
146        assert_eq!(row.property, "ccc");
147        assert_eq!(row.numeric, Some(0));
148        assert_eq!(row.abbreviation, "NR");
149        assert_eq!(row.long, "Not_Reordered");
150        assert!(row.aliases.is_empty());
151    }
152
153    #[test]
154    fn parse5() {
155        let line =
156            "ccc; 133; CCC133                     ; CCC133 # RESERVED\n";
157        let row: PropertyValueAlias = line.parse().unwrap();
158        assert_eq!(row.property, "ccc");
159        assert_eq!(row.numeric, Some(133));
160        assert_eq!(row.abbreviation, "CCC133");
161        assert_eq!(row.long, "CCC133");
162        assert!(row.aliases.is_empty());
163    }
164
165    #[test]
166    fn parse6() {
167        let line = "gc ; P                                ; Punctuation                      ; punct                            # Pc | Pd | Pe | Pf | Pi | Po | Ps\n";
168        let row: PropertyValueAlias = line.parse().unwrap();
169        assert_eq!(row.property, "gc");
170        assert_eq!(row.numeric, None);
171        assert_eq!(row.abbreviation, "P");
172        assert_eq!(row.long, "Punctuation");
173        assert_eq!(row.aliases, vec!["punct"]);
174    }
175}