1use crate::Error;
2use lazy_static::lazy_static;
3use regex::Regex;
4use std::fs::File;
5use std::io::{self, BufRead};
6use std::marker::PhantomData;
7use std::path::{Path, PathBuf};
8use std::str::FromStr;
9use ucd_parse::CodepointRange;
10
11#[derive(Debug)]
22pub struct CsvLineParser<R, D> {
23 path: Option<PathBuf>,
24 rdr: io::BufReader<R>,
25 line: String,
26 line_number: u64,
27 _data: PhantomData<D>,
28}
29
30impl<D> CsvLineParser<File, D> {
31 pub fn from_path<P: AsRef<Path>>(path: P) -> Result<CsvLineParser<File, D>, Error> {
33 let path = path.as_ref();
34 let file = File::open(path).map_err(|e| Error {
35 mesg: format!("IO Error: {}", e),
36 line: None,
37 path: Some(path.to_path_buf()),
38 })?;
39 Ok(CsvLineParser::new(Some(path.to_path_buf()), file))
40 }
41}
42
43impl<R: io::Read, D> CsvLineParser<R, D> {
44 pub(crate) fn new(path: Option<PathBuf>, rdr: R) -> CsvLineParser<R, D> {
52 CsvLineParser {
53 path,
54 rdr: io::BufReader::new(rdr),
55 line: String::new(),
56 line_number: 0,
57 _data: PhantomData,
58 }
59 }
60}
61
62impl<R: io::Read, D: FromStr<Err = Error>> Iterator for CsvLineParser<R, D> {
63 type Item = Result<D, Error>;
64
65 fn next(&mut self) -> Option<Result<D, Error>> {
66 loop {
67 self.line_number += 1;
68 self.line.clear();
69 let n = match self.rdr.read_line(&mut self.line) {
70 Err(err) => {
71 return Some(Err(Error {
72 mesg: format!("IO Error: {}", err),
73 line: None,
74 path: self.path.clone(),
75 }))
76 }
77 Ok(n) => n,
78 };
79 if n == 0 {
80 return None;
81 }
82 if self.line_number > 1 {
84 break;
85 }
86 }
87 let line_number = self.line_number;
88 Some(self.line.parse().map_err(|mut err: Error| {
89 err.line = Some(line_number);
90 err
91 }))
92 }
93}
94
95#[derive(Clone, Copy, Debug, Eq, PartialEq)]
99pub enum DerivedProperty {
100 PValid,
102 FreePVal,
106 ContextJ,
108 ContextO,
110 Disallowed,
112 IdDis,
116 Unassigned,
118}
119
120impl FromStr for DerivedProperty {
121 type Err = Error;
122
123 fn from_str(word: &str) -> Result<DerivedProperty, Error> {
124 if word.eq("PVALID") {
125 Ok(DerivedProperty::PValid)
126 } else if word.eq("FREE_PVAL") {
127 Ok(DerivedProperty::FreePVal)
128 } else if word.eq("CONTEXTJ") {
129 Ok(DerivedProperty::ContextJ)
130 } else if word.eq("CONTEXTO") {
131 Ok(DerivedProperty::ContextO)
132 } else if word.eq("DISALLOWED") {
133 Ok(DerivedProperty::Disallowed)
134 } else if word.eq("ID_DIS") {
135 Ok(DerivedProperty::IdDis)
136 } else if word.eq("UNASSIGNED") {
137 Ok(DerivedProperty::Unassigned)
138 } else {
139 Err(Error {
140 mesg: format!("Invalid derived property: {}", word),
141 line: None,
142 path: None,
143 })
144 }
145 }
146}
147
148fn parse_codepoint_range(s: &str) -> Result<ucd_parse::CodepointRange, Error> {
149 lazy_static! {
150 static ref PARTS: Regex = Regex::new(r"^(?P<start>[A-Z0-9]+)-(?P<end>[A-Z0-9]+)$").unwrap();
151 }
152 let caps = match PARTS.captures(s) {
153 Some(caps) => caps,
154 None => return err!("invalid codepoint range: '{}'", s),
155 };
156
157 let start = caps["start"].parse()?;
158 let end = caps["end"].parse()?;
159
160 Ok(CodepointRange { start, end })
161}
162
163fn parse_codepoints(s: &str) -> Result<ucd_parse::Codepoints, Error> {
164 if s.contains('-') {
165 let range = parse_codepoint_range(s)?;
166 Ok(ucd_parse::Codepoints::Range(range))
167 } else {
168 let cp = s.parse()?;
169 Ok(ucd_parse::Codepoints::Single(cp))
170 }
171}
172
173fn parse_derived_property_tuple(s: &str) -> Result<(DerivedProperty, DerivedProperty), Error> {
174 lazy_static! {
175 static ref PARTS: Regex = Regex::new(r"^(?P<p1>[A-Z_]+)\s+or\s+(?P<p2>[A-Z_]+)$").unwrap();
176 }
177
178 let caps = match PARTS.captures(s) {
179 Some(caps) => caps,
180 None => return err!("invalid properties: '{}'", s),
181 };
182 let p1 = caps["p1"].parse()?;
183 let p2 = caps["p2"].parse()?;
184
185 Ok((p1, p2))
186}
187
188fn parse_derived_properties(s: &str) -> Result<DerivedProperties, Error> {
189 if s.contains(" or ") {
190 let (p1, p2) = parse_derived_property_tuple(s)?;
191 Ok(DerivedProperties::Tuple((p1, p2)))
192 } else {
193 let p = s.parse()?;
194 Ok(DerivedProperties::Single(p))
195 }
196}
197
198fn parse_precis_table_line(
199 line: &str,
200) -> Result<(ucd_parse::Codepoints, DerivedProperties, &str), Error> {
201 let v: Vec<&str> = line.splitn(3, ',').collect();
202 if v.len() != 3 {
203 return Err(Error {
204 mesg: "Error parsing line".to_string(),
205 line: None,
206 path: None,
207 });
208 }
209
210 let cps = parse_codepoints(v[0])?;
211 let props = parse_derived_properties(v[1])?;
212 let desc = v[2];
213
214 Ok((cps, props, desc))
215}
216
217#[derive(Clone, Copy, Debug, Eq, PartialEq)]
221pub enum DerivedProperties {
222 Single(DerivedProperty),
224 Tuple((DerivedProperty, DerivedProperty)),
226}
227
228impl FromStr for DerivedProperties {
229 type Err = Error;
230
231 fn from_str(s: &str) -> Result<DerivedProperties, Error> {
232 parse_derived_properties(s)
233 }
234}
235
236#[derive(Clone, Debug, Eq, PartialEq)]
238pub struct PrecisDerivedProperty {
239 pub codepoints: ucd_parse::Codepoints,
241 pub properties: DerivedProperties,
243 pub description: String,
245}
246
247impl FromStr for PrecisDerivedProperty {
248 type Err = Error;
249
250 fn from_str(line: &str) -> Result<PrecisDerivedProperty, Error> {
251 let (codepoints, properties, desc) = parse_precis_table_line(line)?;
252 Ok(PrecisDerivedProperty {
253 codepoints,
254 properties,
255 description: desc.to_string(),
256 })
257 }
258}
259
260#[cfg(test)]
261mod tests {
262 use crate::csv_parser::*;
263
264 macro_rules! codepoints {
265 ($a:expr, $b:expr) => {{
266 let tmp_start = ucd_parse::Codepoint::from_u32($a).unwrap();
267 let tmp_end = ucd_parse::Codepoint::from_u32($b).unwrap();
268 let tmp_range = ucd_parse::CodepointRange {
269 start: tmp_start,
270 end: tmp_end,
271 };
272 ucd_parse::Codepoints::Range(tmp_range)
273 }};
274 ($a:expr) => {{
275 let tmp_cp = ucd_parse::Codepoint::from_u32($a).unwrap();
276 ucd_parse::Codepoints::Single(tmp_cp)
277 }};
278 }
279
280 #[test]
281 fn derived_property_from_str() {
282 assert!(DerivedProperty::from_str("PVALID").is_ok());
283 assert_eq!(
284 DerivedProperty::from_str("PVALID").unwrap(),
285 DerivedProperty::PValid
286 );
287
288 assert!(DerivedProperty::from_str("FREE_PVAL").is_ok());
289 assert_eq!(
290 DerivedProperty::from_str("FREE_PVAL").unwrap(),
291 DerivedProperty::FreePVal
292 );
293
294 assert!(DerivedProperty::from_str("CONTEXTJ").is_ok());
295 assert_eq!(
296 DerivedProperty::from_str("CONTEXTJ").unwrap(),
297 DerivedProperty::ContextJ
298 );
299
300 assert!(DerivedProperty::from_str("CONTEXTO").is_ok());
301 assert_eq!(
302 DerivedProperty::from_str("CONTEXTO").unwrap(),
303 DerivedProperty::ContextO
304 );
305
306 assert!(DerivedProperty::from_str("DISALLOWED").is_ok());
307 assert_eq!(
308 DerivedProperty::from_str("DISALLOWED").unwrap(),
309 DerivedProperty::Disallowed
310 );
311
312 assert!(DerivedProperty::from_str("ID_DIS").is_ok());
313 assert_eq!(
314 DerivedProperty::from_str("ID_DIS").unwrap(),
315 DerivedProperty::IdDis
316 );
317
318 assert!(DerivedProperty::from_str("UNASSIGNED").is_ok());
319 assert_eq!(
320 DerivedProperty::from_str("UNASSIGNED").unwrap(),
321 DerivedProperty::Unassigned
322 );
323
324 assert!(DerivedProperty::from_str("ASDFR").is_err());
325 }
326
327 #[test]
328 fn derived_properties_from_str() {
329 let res = DerivedProperties::from_str("UNASSIGNED");
330 assert!(res.is_ok());
331 assert_eq!(
332 DerivedProperties::Single(DerivedProperty::Unassigned),
333 res.unwrap()
334 );
335
336 let res = DerivedProperties::from_str("ID_DIS or FREE_PVAL");
337 assert!(res.is_ok());
338 assert_eq!(
339 DerivedProperties::Tuple((DerivedProperty::IdDis, DerivedProperty::FreePVal)),
340 res.unwrap()
341 );
342
343 let res = DerivedProperties::from_str("ID_DIS or FREE_PVAL");
344 assert!(res.is_ok());
345 assert_eq!(
346 DerivedProperties::Tuple((DerivedProperty::IdDis, DerivedProperty::FreePVal)),
347 res.unwrap()
348 );
349
350 let res = DerivedProperties::from_str("ID_DIS or INVALID");
351 assert!(res.is_err());
352
353 let res = DerivedProperties::from_str(" or ");
354 assert!(res.is_err());
355
356 let res = DerivedProperties::from_str("");
357 assert!(res.is_err());
358
359 let res = DerivedProperties::from_str("INVALID");
360 assert!(res.is_err());
361 }
362
363 #[test]
364 fn codepoints_parse() {
365 let res = parse_codepoints("0141-0148");
366 assert!(res.is_ok());
367 assert_eq!(codepoints!(0x0141, 0x148), res.unwrap());
368
369 let res = parse_codepoints("0141");
370 assert!(res.is_ok());
371 assert_eq!(codepoints!(0x0141), res.unwrap());
372
373 let res = parse_codepoints("ghy0141");
374 assert!(res.is_err());
375
376 let res = parse_codepoints("");
377 assert!(res.is_err());
378
379 let res = parse_codepoints("-0148");
380 assert!(res.is_err());
381
382 let res = parse_codepoints("0148-");
383 assert!(res.is_err());
384
385 let res = parse_codepoints("124-0148-2345");
386 assert!(res.is_err());
387
388 let res = parse_codepoints("123454325460148");
389 assert!(res.is_err());
390 }
391
392 #[test]
393 fn precis_derived_property_from_str() {
394 assert!(PrecisDerivedProperty::from_str("0020,ID_DIS or FREE_PVAL,SPACE").is_ok());
395 assert!(PrecisDerivedProperty::from_str(
396 "0000-001F,DISALLOWED,NULL..INFORMATION SEPARATOR ONE"
397 )
398 .is_ok());
399 assert!(PrecisDerivedProperty::from_str(",ID_DIS or FREE_PVAL,SPACE").is_err());
400 assert!(PrecisDerivedProperty::from_str("0020,,SPACE").is_err());
401 assert!(PrecisDerivedProperty::from_str(",,SPACE").is_err());
402 assert!(PrecisDerivedProperty::from_str("").is_err());
403 }
404}