1use std::collections::HashMap;
3
4use super::{Codepoint, Encoding, EquivalenceType};
5
6use pest::error::{Error, ErrorVariant};
7use pest::iterators::Pair;
8pub use pest::Parser;
9use pest::Position;
10use pest_derive::Parser;
11
12#[derive(Parser)]
14#[grammar = "../resources/ucm.pest"]
15pub struct UcmParser;
16
17pub fn parse_debug_dump(ucms: &str) {
20 let ast = UcmParser::parse(Rule::ucm, ucms).unwrap_or_else(|e|{panic!("{:?}", e);});
21 use pest_ascii_tree::into_ascii_tree;
22 eprintln!("{}", into_ascii_tree(ast).unwrap());
23}
24
25pub fn parse(ucms: &str) -> Result<Encoding, Error<Rule>> {
27 fn parse_bytestring(bs: &str) -> Vec<u8> {
28 bs.split("\\x")
29 .filter(|s| s.trim().len() != 0)
30 .map(|s| u8::from_str_radix(s.trim(), 16).unwrap())
31 .collect()
32 }
33
34 let ucm = match UcmParser::parse(Rule::ucm, ucms)?.next() {
35 Some(parsed) => parsed,
36 None => {
37 Err(Error::new_from_pos(
38 ErrorVariant::CustomError {
39 message: "No rules in parsed file?".to_string(),
40 },
41 Position::new(ucms, 0).unwrap(),
42 ))?
43 }
44 };
45
46 let mut codepoints = vec![];
47 let mut metadata = HashMap::new();
48 let mut states = vec![];
49 for i in ucm.into_inner() {
50 let rules: Vec<Pair<_>> = i.clone().into_inner().into_iter().collect();
51 match i.as_rule() {
52 Rule::unicode_record => {
53 let (uni, bytestring, utype) = (&rules[0], &rules[1], &rules[2]);
54 debug_assert_eq!(uni.as_rule(), Rule::unicode_inner);
55 debug_assert!([Rule::type0, Rule::type1, Rule::type2, Rule::type3].iter().any(|r|utype.as_rule() == *r));
56 let uni = char::from_u32(u32::from_str_radix(uni.as_span().as_str(), 16).unwrap())
57 .unwrap();
58 let eq_type = match utype.as_rule() {
59 Rule::type0 => EquivalenceType::Type0,
60 Rule::type1 => EquivalenceType::Type1,
61 Rule::type2 => EquivalenceType::Type2,
62 Rule::type3 => EquivalenceType::Type3,
63 _ => unreachable!(),
64 };
65 let bytestring: Vec<u8> = parse_bytestring(bytestring.as_str());
66 codepoints.push(Codepoint {
67 uni,
68 eq_type,
69 bytestring,
70 });
71 }
72 Rule::metadata_record => {
73 let (key, value) = (&rules[0], &rules[1]);
74 debug_assert_eq!(key.as_rule(), Rule::metadata_key);
75 metadata
76 .insert(key.as_str().to_owned(), value.as_str().to_owned());
77 }
78 Rule::state_record => {
79 let state_row = &rules[0];
80 debug_assert_eq!(state_row.as_rule(), Rule::state_row);
81 states.push(state_row.as_str().to_owned());
82 }
83 _ => {}
84 }
85 }
86 Ok(Encoding { codepoints, metadata, states })
87}