type1_encoding_parser/
lib.rs

1extern crate pom;
2
3use pom::char_class::{alpha, hex_digit, oct_digit, multispace};
4use pom::Parser;
5use pom::parser::*;
6use pom::DataInput;
7use std::collections::HashMap;
8
9use std::str::FromStr;
10
11#[derive(Debug)]
12pub enum Value {
13    LiteralString(Vec<u8>),
14    Name(Vec<u8>),
15    Number(String),
16    Integer(i64),
17    Array(Vec<Value>),
18    Procedure(Vec<Value>),
19    Operator(String),
20    Boolean(bool),
21    Dictionary(HashMap<String, Value>),
22}
23
24fn hex_char() -> Parser<u8, u8> {
25    let number = is_a(hex_digit).repeat(2);
26    number.collect().convert(|v|u8::from_str_radix(&String::from_utf8(v).unwrap(), 16))
27}
28
29fn comment() -> Parser<u8, ()> {
30    sym(b'%') * none_of(b"\r\n").repeat(0..) * eol().discard()
31}
32
33fn content_space() -> Parser<u8, ()> {
34    is_a(multispace).repeat(0..).discard()
35}
36
37fn operator() -> Parser<u8, String> {
38    (is_a(alpha) | one_of(b"*'\"")).repeat(1..).convert(|v|String::from_utf8(v))
39}
40
41fn oct_char() -> Parser<u8, u8> {
42    let number = is_a(oct_digit).repeat(1..4);
43    number.collect().convert(|v|u8::from_str_radix(&String::from_utf8(v).unwrap(), 8))
44}
45
46fn escape_sequence() -> Parser<u8, Vec<u8>> {
47    sym(b'\\') *
48        ( sym(b'\\').map(|_| vec![b'\\'])
49            | sym(b'(').map(|_| vec![b'('])
50            | sym(b')').map(|_| vec![b')'])
51            | sym(b'n').map(|_| vec![b'\n'])
52            | sym(b'r').map(|_| vec![b'\r'])
53            | sym(b't').map(|_| vec![b'\t'])
54            | sym(b'b').map(|_| vec![b'\x08'])
55            | sym(b'f').map(|_| vec![b'\x0C'])
56            | oct_char().map(|c| vec![c])
57            | eol()     .map(|_| vec![])
58            | empty()   .map(|_| vec![])
59        )
60}
61
62fn nested_literal_string() -> Parser<u8, Vec<u8>> {
63    sym(b'(') *
64        ( none_of(b"\\()").repeat(1..)
65            | escape_sequence()
66            | call(nested_literal_string)
67        ).repeat(0..).map(|segments| {
68            let mut bytes = segments.into_iter().fold(
69                vec![b'('],
70                |mut bytes, mut segment| {
71                    bytes.append(&mut segment);
72                    bytes
73                });
74            bytes.push(b')');
75            bytes
76        })
77        - sym(b')')
78}
79
80fn literal_string() -> Parser<u8, Vec<u8>> {
81    sym(b'(') *
82        ( none_of(b"\\()").repeat(1..)
83            | escape_sequence()
84            | nested_literal_string()
85        ).repeat(0..).map(|segments|segments.concat())
86        - sym(b')')
87}
88
89fn name() -> Parser<u8, Vec<u8>> {
90    sym(b'/') * (none_of(b" \t\n\r\x0C()<>[]{}/%#") | sym(b'#') * hex_char()).repeat(0..)
91}
92
93fn integer() -> Parser<u8, i64> {
94    let number = one_of(b"+-").opt() + one_of(b"0123456789").repeat(1..);
95    number.collect().convert(|v|String::from_utf8(v)).convert(|s|i64::from_str(&s))
96}
97
98fn number() -> Parser<u8, String> {
99    let number = one_of(b"+-").opt() +
100        ( (one_of(b"0123456789") - one_of(b"0123456789").repeat(0..).discard())
101            | (one_of(b"0123456789").repeat(1..) * sym(b'.') - one_of(b"0123456789").repeat(0..))
102            | sym(b'.') - one_of(b"0123456789").repeat(1..)
103        );
104    number.collect().convert(|v|String::from_utf8(v))
105}
106
107fn space() -> Parser<u8, ()> {
108    ( one_of(b" \t\n\r\0\x0C").repeat(1..).discard()
109    ).repeat(0..).discard()
110}
111
112// Dictionaries are not mentioned in the CMap spec but are produced by software like Cairo and Skia and supported other by readers
113fn dictionary() -> Parser<u8, HashMap<String, Value>> {
114    let entry = name() - space() + call(value);
115    let entries = seq(b"<<") * space() * entry.repeat(0..) - seq(b">>");
116    entries.map(|entries| entries.into_iter().fold(
117        HashMap::new(),
118        |mut dict: HashMap<String, Value>, (key, value)| { dict.insert(String::from_utf8(key).unwrap(), value); dict }
119    ))
120}
121
122fn hexadecimal_string() -> Parser<u8, Vec<u8>> {
123    sym(b'<') * hex_char().repeat(0..) - sym(b'>')
124}
125
126fn eol() -> Parser<u8, u8> {
127    sym(b'\r') * sym(b'\n') | sym(b'\n') | sym(b'\r')
128}
129
130fn value() -> Parser<u8, Value> {
131    ( seq(b"true").map(|_| Value::Boolean(true))
132    | seq(b"false").map(|_| Value::Boolean(false))
133    | integer().map(|v| Value::Integer(v))
134    | number().map(|v| Value::Number(v))
135    | name().map(|v| Value::Name(v))
136    | operator().map(|v| Value::Operator(v))
137    | literal_string().map(|v| Value::LiteralString(v))
138    | dictionary().map(|v| Value::Dictionary(v))
139    | hexadecimal_string().map(|v| Value::LiteralString(v))
140    | array().map(|v| Value::Array(v))
141    | procedure().map(|v| Value::Procedure(v))
142    ) - content_space()
143}
144
145
146
147fn array() -> Parser<u8, Vec<Value>> {
148    sym(b'[') * space() * call(value).repeat(0..) - sym(b']')
149}
150
151fn procedure() -> Parser<u8, Vec<Value>> {
152    sym(b'{') * space() * call(value).repeat(0..) - sym(b'}')
153}
154
155
156
157
158fn file() -> Parser<u8,Vec<Value>>
159{
160    ( comment().repeat(0..) * content_space() * value()).repeat(1..)
161}
162
163pub fn parse(input: &[u8]) -> Result<Vec<Value>, pom::Error> {
164    file().parse(&mut DataInput::new(input))
165}
166
167
168pub fn get_encoding_map(input: &[u8]) -> Result<HashMap<u32, Vec<u8>>, &'static str> {
169    let lexed = parse(&input).expect("failed to parse");
170
171    let mut i = 0;
172    let mut map = HashMap::new();
173    while i < lexed.len() {
174        match lexed[i] {
175            Value::Operator(ref o) => {
176                match o.as_ref() {
177                    "array" => {
178                        let count = if let &Value::Integer(ref c) = &lexed[i-1] { Ok(*c) } else { Err("array expected int") }?;
179                        let name = if let &Value::Name(ref n) = &lexed[i-2] { Ok(n) } else { Err("expected name") }?;
180                        i += 1;
181                        if name == b"Encoding" {
182                            while i < lexed.len() {
183                                match lexed[i] {
184                                    Value::Operator(ref o) => {
185                                        match o.as_ref() {
186                                            "put" => {
187                                                let name = if let &Value::Name(ref n) = &lexed[i-1] { Ok(n) } else { Err("expected name") }?;
188                                                let id = if let &Value::Integer(ref c) = &lexed[i-2] { Ok(*c) } else { Err("array expected int") }?;
189                                                map.insert(id as u32, name.clone());
190                                            }
191                                            "def" => {
192                                                break;
193                                            }
194                                            _ => {}
195                                        }
196                                    }
197                                    _ => {}
198                                }
199                                i += 1;
200                            }
201                        }
202                    }
203                    _ => { i += 1; }
204                }
205
206            }
207            _ => { i += 1; }
208        }
209    }
210    Ok(map)
211
212}
213
214
215#[cfg(test)]
216mod tests {
217    use parse;
218    use std::fs::File;
219    use std::io::BufReader;
220    use std::io::Read;
221
222    fn do_parse(input: &[u8]) {
223        let result = parse(input);
224        if let Ok(lines) = result  {
225            for l in lines {
226                println!("{:?}", l)
227            }
228        } else {
229            println!("{:?}", result)
230        }
231    }
232    #[test]
233    fn it_works() {
234        let f = File::open("example").unwrap();
235        let mut f = BufReader::new(f);
236        let mut contents = Vec::new();
237        f.read_to_end(&mut contents);
238
239        //for line in f.lines() {
240        do_parse(&contents);
241
242    }
243}