ucd_parse/
property_value_aliases.rs1use std::path::Path;
2
3use crate::{common::UcdFile, error::Error};
4
5#[derive(Clone, Debug, Default, Eq, PartialEq)]
7pub struct PropertyValueAlias {
8 pub property: String,
10 pub numeric: Option<u8>,
14 pub abbreviation: String,
16 pub long: String,
18 pub aliases: Vec<String>,
20}
21
22impl UcdFile for PropertyValueAlias {
23 fn relative_file_path() -> &'static Path {
24 Path::new("PropertyValueAliases.txt")
25 }
26}
27
28impl std::str::FromStr for PropertyValueAlias {
29 type Err = Error;
30
31 fn from_str(line: &str) -> Result<PropertyValueAlias, Error> {
32 let re_parts = regex!(
33 r"(?x)
34 ^
35 \s*(?P<prop>[^\s;]+)\s*;
36 \s*(?P<abbrev>[^\s;]+)\s*;
37 \s*(?P<long>[^\s;]+)\s*
38 (?:;(?P<aliases>.*))?
39 ",
40 );
41 let re_parts_ccc = regex!(
42 r"(?x)
43 ^
44 ccc;
45 \s*(?P<num_class>[0-9]+)\s*;
46 \s*(?P<abbrev>[^\s;]+)\s*;
47 \s*(?P<long>[^\s;]+)
48 ",
49 );
50 let re_aliases = regex!(r"\s*(?P<alias>[^\s;]+)\s*;?\s*");
51
52 if line.starts_with("ccc;") {
53 let caps = match re_parts_ccc.captures(line.trim()) {
54 Some(caps) => caps,
55 None => {
56 return err!("invalid PropertyValueAliases (ccc) line")
57 }
58 };
59 let n = match caps["num_class"].parse() {
60 Ok(n) => n,
61 Err(err) => {
62 return err!(
63 "failed to parse ccc number '{}': {}",
64 &caps["num_class"],
65 err
66 )
67 }
68 };
69 let abbrev = caps.name("abbrev").unwrap().as_str();
70 let long = caps.name("long").unwrap().as_str();
71 return Ok(PropertyValueAlias {
72 property: line[0..3].to_string(),
73 numeric: Some(n),
74 abbreviation: abbrev.to_string(),
75 long: long.to_string(),
76 aliases: vec![],
77 });
78 }
79
80 let caps = match re_parts.captures(line.trim()) {
81 Some(caps) => caps,
82 None => return err!("invalid PropertyValueAliases line"),
83 };
84 let mut aliases = vec![];
85 if let Some(m) = caps.name("aliases") {
86 for acaps in re_aliases.captures_iter(m.as_str()) {
87 let alias = acaps.name("alias").unwrap().as_str();
88 if alias == "#" {
89 break;
91 }
92 aliases.push(alias.to_string());
93 }
94 }
95 Ok(PropertyValueAlias {
96 property: caps.name("prop").unwrap().as_str().to_string(),
97 numeric: None,
98 abbreviation: caps.name("abbrev").unwrap().as_str().to_string(),
99 long: caps.name("long").unwrap().as_str().to_string(),
100 aliases,
101 })
102 }
103}
104
105#[cfg(test)]
106mod tests {
107 use super::PropertyValueAlias;
108
109 #[test]
110 fn parse1() {
111 let line = "blk; Arabic_PF_A ; Arabic_Presentation_Forms_A ; Arabic_Presentation_Forms-A\n";
112 let row: PropertyValueAlias = line.parse().unwrap();
113 assert_eq!(row.property, "blk");
114 assert_eq!(row.numeric, None);
115 assert_eq!(row.abbreviation, "Arabic_PF_A");
116 assert_eq!(row.long, "Arabic_Presentation_Forms_A");
117 assert_eq!(row.aliases, vec!["Arabic_Presentation_Forms-A"]);
118 }
119
120 #[test]
121 fn parse2() {
122 let line = "AHex; N ; No ; F ; False\n";
123 let row: PropertyValueAlias = line.parse().unwrap();
124 assert_eq!(row.property, "AHex");
125 assert_eq!(row.numeric, None);
126 assert_eq!(row.abbreviation, "N");
127 assert_eq!(row.long, "No");
128 assert_eq!(row.aliases, vec!["F", "False"]);
129 }
130
131 #[test]
132 fn parse3() {
133 let line = "age; 1.1 ; V1_1\n";
134 let row: PropertyValueAlias = line.parse().unwrap();
135 assert_eq!(row.property, "age");
136 assert_eq!(row.numeric, None);
137 assert_eq!(row.abbreviation, "1.1");
138 assert_eq!(row.long, "V1_1");
139 assert!(row.aliases.is_empty());
140 }
141
142 #[test]
143 fn parse4() {
144 let line = "ccc; 0; NR ; Not_Reordered\n";
145 let row: PropertyValueAlias = line.parse().unwrap();
146 assert_eq!(row.property, "ccc");
147 assert_eq!(row.numeric, Some(0));
148 assert_eq!(row.abbreviation, "NR");
149 assert_eq!(row.long, "Not_Reordered");
150 assert!(row.aliases.is_empty());
151 }
152
153 #[test]
154 fn parse5() {
155 let line =
156 "ccc; 133; CCC133 ; CCC133 # RESERVED\n";
157 let row: PropertyValueAlias = line.parse().unwrap();
158 assert_eq!(row.property, "ccc");
159 assert_eq!(row.numeric, Some(133));
160 assert_eq!(row.abbreviation, "CCC133");
161 assert_eq!(row.long, "CCC133");
162 assert!(row.aliases.is_empty());
163 }
164
165 #[test]
166 fn parse6() {
167 let line = "gc ; P ; Punctuation ; punct # Pc | Pd | Pe | Pf | Pi | Po | Ps\n";
168 let row: PropertyValueAlias = line.parse().unwrap();
169 assert_eq!(row.property, "gc");
170 assert_eq!(row.numeric, None);
171 assert_eq!(row.abbreviation, "P");
172 assert_eq!(row.long, "Punctuation");
173 assert_eq!(row.aliases, vec!["punct"]);
174 }
175}