glyphs_reader/
plist.rs

1use std::collections::BTreeMap;
2use std::{borrow::Cow, fmt::Debug};
3
4use kurbo::{Affine, Point};
5use ordered_float::OrderedFloat;
6
7use smol_str::SmolStr;
8
9/// A plist dictionary
10pub type Dictionary = BTreeMap<SmolStr, Plist>;
11
12/// An array of plist values
13pub type Array = Vec<Plist>;
14
15/// An enum representing a property list.
16#[derive(Clone, Debug, PartialEq, Eq, Hash)]
17pub enum Plist {
18    Dictionary(Dictionary),
19    Array(Array),
20    String(String),
21    Integer(i64),
22    Float(OrderedFloat<f64>),
23    Data(Vec<u8>),
24}
25
26#[derive(Clone, Debug, PartialEq, Eq, thiserror::Error)]
27pub enum Error {
28    #[error("Unexpected character '{0}'")]
29    UnexpectedChar(char),
30    #[error("Unterminated string")]
31    UnclosedString,
32    #[error("Unterminated data block")]
33    UnclosedData,
34    #[error("Data block did not contain valid paired hex digits")]
35    BadData,
36    #[error("Unknown escape code")]
37    UnknownEscape,
38    #[error("Invalid unicode escape sequence: '{0}'")]
39    InvalidUnicodeEscape(String),
40    #[error("Expected string, found '{token_name}")]
41    NotAString { token_name: &'static str },
42    #[error("Missing '='")]
43    ExpectedEquals,
44    #[error("Missing ','")]
45    ExpectedComma,
46    #[error("Missing ';'")]
47    ExpectedSemicolon,
48    #[error("Missing '{{'")]
49    ExpectedOpenBrace,
50    #[error("Missing '}}'")]
51    ExpectedCloseBrace,
52    #[error("Missing '('")]
53    ExpectedOpenParen,
54    #[error("Missing ')'")]
55    ExpectedCloseParen,
56    #[error("Expected character '{0}'")]
57    ExpectedChar(char),
58    #[error("Expected numeric value")]
59    ExpectedNumber,
60    #[error("Expected string value")]
61    ExpectedString,
62    #[error("Expected '{expected}', found '{found}'")]
63    UnexpectedDataType {
64        expected: &'static str,
65        found: &'static str,
66    },
67    #[error("Unexpected token '{name}'")]
68    UnexpectedToken { name: &'static str },
69    #[error("Expected {value_type}, found '{actual}'")]
70    UnexpectedNumberOfValues {
71        value_type: &'static str,
72        actual: usize,
73    },
74    #[error("parsing failed: '{0}'")]
75    Parse(String),
76}
77
78#[derive(Debug, PartialEq)]
79pub(crate) enum Token<'a> {
80    Eof,
81    OpenBrace,
82    OpenParen,
83    Data(Vec<u8>),
84    String(Cow<'a, str>),
85    Atom(&'a str),
86}
87
88fn is_numeric(b: u8) -> bool {
89    b.is_ascii_digit() || b == b'.' || b == b'-'
90}
91
92fn is_alnum(b: u8) -> bool {
93    // https://github.com/opensource-apple/CF/blob/3cc41a76b1491f50813e28a4ec09954ffa359e6f/CFOldStylePList.c#L79
94    is_numeric(b)
95        || b.is_ascii_uppercase()
96        || b.is_ascii_lowercase()
97        || b == b'_'
98        || b == b'$'
99        || b == b'/'
100        || b == b':'
101        || b == b'.'
102        || b == b'-'
103}
104
105// Used for serialization; make sure UUID's get quoted
106fn is_alnum_strict(b: u8) -> bool {
107    is_alnum(b) && b != b'-'
108}
109
110fn is_hex_upper(b: u8) -> bool {
111    b.is_ascii_digit() || (b'A'..=b'F').contains(&b)
112}
113
114fn is_ascii_whitespace(b: u8) -> bool {
115    b == b' ' || b == b'\t' || b == b'\r' || b == b'\n'
116}
117
118fn numeric_ok(s: &str) -> bool {
119    let s = s.as_bytes();
120    if s.is_empty() {
121        return false;
122    }
123    let s = if s.len() > 1 && (*s.first().unwrap(), *s.last().unwrap()) == (b'"', b'"') {
124        &s[1..s.len()]
125    } else {
126        s
127    };
128    if s.iter().all(|&b| is_hex_upper(b)) && !s.iter().all(|&b| b.is_ascii_digit()) {
129        return false;
130    }
131    if s.len() > 1 && s[0] == b'0' {
132        return !s.iter().all(|&b| b.is_ascii_digit());
133    }
134    // Prevent parsing of "infinity", "inf", "nan" as numbers, we
135    // want to keep them as strings (e.g. glyphname)
136    // https://doc.rust-lang.org/std/primitive.f64.html#grammar
137    if s.eq_ignore_ascii_case(b"infinity")
138        || s.eq_ignore_ascii_case(b"inf")
139        || s.eq_ignore_ascii_case(b"nan")
140    {
141        return false;
142    }
143    true
144}
145
146fn skip_ws(s: &str, mut ix: usize) -> usize {
147    while ix < s.len() && is_ascii_whitespace(s.as_bytes()[ix]) {
148        ix += 1;
149    }
150    ix
151}
152
153fn escape_string(buf: &mut String, s: &str) {
154    if !s.is_empty() && s.as_bytes().iter().all(|&b| is_alnum_strict(b)) {
155        buf.push_str(s);
156    } else {
157        buf.push('"');
158        let mut start = 0;
159        let mut ix = start;
160        while ix < s.len() {
161            let b = s.as_bytes()[ix];
162            match b {
163                b'"' | b'\\' => {
164                    buf.push_str(&s[start..ix]);
165                    buf.push('\\');
166                    start = ix;
167                }
168                _ => (),
169            }
170            ix += 1;
171        }
172        buf.push_str(&s[start..]);
173        buf.push('"');
174    }
175}
176
177impl Plist {
178    pub fn parse(s: &str) -> Result<Plist, Error> {
179        let (plist, _ix) = Plist::parse_rec(s, 0)?;
180        // TODO: check that we're actually at eof
181        Ok(plist)
182    }
183
184    fn name(&self) -> &'static str {
185        match self {
186            Plist::Array(..) => "array",
187            Plist::Dictionary(..) => "dictionary",
188            Plist::Float(..) => "float",
189            Plist::Integer(..) => "integer",
190            Plist::String(..) => "string",
191            Plist::Data(..) => "data",
192        }
193    }
194
195    pub fn get(&self, key: &str) -> Option<&Plist> {
196        match self {
197            Plist::Dictionary(d) => d.get(key),
198            _ => None,
199        }
200    }
201
202    pub fn as_dict(&self) -> Option<&BTreeMap<SmolStr, Plist>> {
203        match self {
204            Plist::Dictionary(d) => Some(d),
205            _ => None,
206        }
207    }
208
209    pub fn as_array(&self) -> Option<&[Plist]> {
210        match self {
211            Plist::Array(a) => Some(a),
212            _ => None,
213        }
214    }
215
216    pub fn as_str(&self) -> Option<&str> {
217        match self {
218            Plist::String(s) => Some(s),
219            _ => None,
220        }
221    }
222
223    pub fn as_i64(&self) -> Option<i64> {
224        match self {
225            Plist::Integer(i) => Some(*i),
226            Plist::String(raw) => raw.parse().ok(),
227            _ => None,
228        }
229    }
230
231    pub fn as_f64(&self) -> Option<f64> {
232        match self {
233            Plist::Integer(i) => Some(*i as f64),
234            Plist::Float(f) => Some((*f).into_inner()),
235            Plist::String(raw) => raw.parse().ok(),
236            _ => None,
237        }
238    }
239
240    pub fn expect_dict(self) -> Result<Dictionary, Error> {
241        match self {
242            Plist::Dictionary(dict) => Ok(dict),
243            _other => Err(Error::UnexpectedDataType {
244                expected: "dictionary",
245                found: _other.name(),
246            }),
247        }
248    }
249
250    pub fn expect_array(self) -> Result<Array, Error> {
251        match self {
252            Plist::Array(array) => Ok(array),
253            _other => Err(Error::UnexpectedDataType {
254                expected: "array",
255                found: _other.name(),
256            }),
257        }
258    }
259
260    pub fn expect_string(self) -> Result<String, Error> {
261        match self {
262            Plist::String(string) => Ok(string),
263            _other => Err(Error::UnexpectedDataType {
264                expected: "string",
265                found: _other.name(),
266            }),
267        }
268    }
269
270    pub fn expect_data(self) -> Result<Vec<u8>, Error> {
271        match self {
272            Plist::Data(bytes) => Ok(bytes),
273            _other => Err(Error::UnexpectedDataType {
274                expected: "data",
275                found: _other.name(),
276            }),
277        }
278    }
279
280    fn parse_rec(s: &str, ix: usize) -> Result<(Plist, usize), Error> {
281        let (tok, mut ix) = Token::lex(s, ix)?;
282        match tok {
283            Token::Atom(s) => Ok((Plist::parse_atom(s), ix)),
284            Token::String(s) => Ok((Plist::String(s.into()), ix)),
285            Token::Data(bytes) => Ok((Plist::Data(bytes), ix)),
286            Token::OpenBrace => {
287                let mut dict = BTreeMap::new();
288                loop {
289                    if let Some(ix) = Token::expect(s, ix, b'}') {
290                        return Ok((Plist::Dictionary(dict), ix));
291                    }
292                    let (key, next) = Token::lex(s, ix)?;
293                    let key_str = Token::try_into_smolstr(key)?;
294                    let next = Token::expect(s, next, b'=');
295                    if next.is_none() {
296                        return Err(Error::ExpectedEquals);
297                    }
298                    let (val, next) = Self::parse_rec(s, next.unwrap())?;
299                    dict.insert(key_str, val);
300                    if let Some(next) = Token::expect(s, next, b';') {
301                        ix = next;
302                    } else {
303                        return Err(Error::ExpectedSemicolon);
304                    }
305                }
306            }
307            Token::OpenParen => {
308                let mut list = Vec::new();
309                loop {
310                    if let Some(ix) = Token::expect(s, ix, b')') {
311                        return Ok((Plist::Array(list), ix));
312                    }
313                    let (val, next) = Self::parse_rec(s, ix)?;
314                    list.push(val);
315                    if let Some(ix) = Token::expect(s, next, b')') {
316                        return Ok((Plist::Array(list), ix));
317                    }
318                    if let Some(next) = Token::expect(s, next, b',') {
319                        ix = next;
320                        if let Some(next) = Token::expect(s, next, b')') {
321                            return Ok((Plist::Array(list), next));
322                        }
323                    } else {
324                        return Err(Error::ExpectedComma);
325                    }
326                }
327            }
328            _ => Err(Error::UnexpectedToken { name: tok.name() }),
329        }
330    }
331
332    fn parse_atom(s: &str) -> Plist {
333        if numeric_ok(s) {
334            if let Ok(num) = s.parse() {
335                return Plist::Integer(num);
336            }
337            if let Ok(num) = s.parse() {
338                return Plist::Float(num);
339            }
340        }
341        Plist::String(s.into())
342    }
343
344    #[allow(clippy::inherent_to_string, unused)]
345    pub fn to_string(&self) -> String {
346        let mut s = String::new();
347        self.push_to_string(&mut s);
348        s
349    }
350
351    fn push_to_string(&self, s: &mut String) {
352        match self {
353            Plist::Array(a) => {
354                s.push('(');
355                let mut delim = "\n";
356                for el in a {
357                    s.push_str(delim);
358                    el.push_to_string(s);
359                    delim = ",\n";
360                }
361                s.push_str("\n)");
362            }
363            Plist::Dictionary(a) => {
364                s.push_str("{\n");
365                let mut keys: Vec<_> = a.keys().collect();
366                keys.sort();
367                for k in keys {
368                    let el = &a[k];
369                    // TODO: quote if needed?
370                    escape_string(s, k);
371                    s.push_str(" = ");
372                    el.push_to_string(s);
373                    s.push_str(";\n");
374                }
375                s.push('}');
376            }
377            Plist::String(st) => escape_string(s, st),
378            Plist::Integer(i) => {
379                s.push_str(&format!("{i}"));
380            }
381            Plist::Float(f) => {
382                s.push_str(&format!("{f}"));
383            }
384            Plist::Data(data) => {
385                s.push('<');
386                for byte in data {
387                    s.extend(hex_digits_for_byte(*byte))
388                }
389                s.push('>');
390            }
391        }
392    }
393}
394
395impl FromPlist for Plist {
396    fn parse(tokenizer: &mut Tokenizer) -> Result<Self, Error> {
397        let Tokenizer { content, idx } = tokenizer;
398        let (val, end_idx) = Self::parse_rec(content, *idx)?;
399        *idx = end_idx;
400        Ok(val)
401    }
402}
403
404impl Default for Plist {
405    fn default() -> Self {
406        // kind of arbitrary but seems okay
407        Plist::Array(Vec::new())
408    }
409}
410
411fn hex_digits_for_byte(byte: u8) -> [char; 2] {
412    fn to_hex_digit(val: u8) -> char {
413        match val {
414            0..=9 => ('0' as u32 as u8 + val).into(),
415            10..=15 => (('a' as u32 as u8) + val - 10).into(),
416            _ => unreachable!("only called with values in range 0..=15"),
417        }
418    }
419
420    [to_hex_digit(byte >> 4), to_hex_digit(byte & 0x0f)]
421}
422
423fn byte_from_hex(hex: [u8; 2]) -> Result<u8, Error> {
424    fn hex_digit_to_byte(digit: u8) -> Result<u8, Error> {
425        match digit {
426            b'0'..=b'9' => Ok(digit - b'0'),
427            b'a'..=b'f' => Ok(digit - b'a' + 10),
428            b'A'..=b'F' => Ok(digit - b'A' + 10),
429            _ => Err(Error::BadData),
430        }
431    }
432    let maj = hex_digit_to_byte(hex[0])? << 4;
433    let min = hex_digit_to_byte(hex[1])?;
434    Ok(maj | min)
435}
436
437impl<'a> Token<'a> {
438    fn lex(s: &'a str, ix: usize) -> Result<(Token<'a>, usize), Error> {
439        let start = skip_ws(s, ix);
440        if start == s.len() {
441            return Ok((Token::Eof, start));
442        }
443        let b = s.as_bytes()[start];
444        match b {
445            b'{' => Ok((Token::OpenBrace, start + 1)),
446            b'(' => Ok((Token::OpenParen, start + 1)),
447            b'<' => {
448                let data_start = start + 1;
449                let data_end = data_start
450                    + s.as_bytes()[data_start..]
451                        .iter()
452                        .position(|b| *b == b'>')
453                        .ok_or(Error::UnclosedData)?;
454                let chunks = s.as_bytes()[data_start..data_end].chunks_exact(2);
455                if !chunks.remainder().is_empty() {
456                    return Err(Error::BadData);
457                }
458                let data = chunks
459                    .map(|x| byte_from_hex(x.try_into().unwrap()))
460                    .collect::<Result<_, _>>()?;
461                Ok((Token::Data(data), data_end + 1))
462            }
463            b'"' => {
464                let mut ix = start + 1;
465                let mut cow_start = ix;
466                let mut buf = String::new();
467                while ix < s.len() {
468                    let b = s.as_bytes()[ix];
469                    match b {
470                        b'"' => {
471                            // End of string
472                            let string = if buf.is_empty() {
473                                s[cow_start..ix].into()
474                            } else {
475                                buf.push_str(&s[cow_start..ix]);
476                                buf.into()
477                            };
478                            return Ok((Token::String(string), ix + 1));
479                        }
480                        b'\\' => {
481                            buf.push_str(&s[cow_start..ix]);
482                            if ix + 1 == s.len() {
483                                return Err(Error::UnclosedString);
484                            }
485                            let (c, len) = parse_escape(&s[ix..])?;
486                            buf.push(c);
487                            ix += len;
488                            cow_start = ix;
489                        }
490                        _ => ix += 1,
491                    }
492                }
493                Err(Error::UnclosedString)
494            }
495            _ => {
496                if is_alnum(b) {
497                    let mut ix = start + 1;
498                    while ix < s.len() {
499                        if !is_alnum(s.as_bytes()[ix]) {
500                            break;
501                        }
502                        ix += 1;
503                    }
504                    Ok((Token::Atom(&s[start..ix]), ix))
505                } else {
506                    Err(Error::UnexpectedChar(s[start..].chars().next().unwrap()))
507                }
508            }
509        }
510    }
511
512    fn try_into_smolstr(self) -> Result<SmolStr, Error> {
513        match self {
514            Token::Atom(s) => Ok(s.into()),
515            Token::String(s) => Ok(s.into()),
516            _ => Err(Error::NotAString {
517                token_name: self.name(),
518            }),
519        }
520    }
521
522    pub fn as_str(&self) -> Option<&str> {
523        match self {
524            Token::Atom(s) => Some(*s),
525            Token::String(s) => Some(s),
526            Token::Eof => None,
527            Token::OpenBrace => None,
528            Token::OpenParen => None,
529            Token::Data(_) => None,
530        }
531    }
532
533    fn expect(s: &str, ix: usize, delim: u8) -> Option<usize> {
534        let ix = skip_ws(s, ix);
535        if ix < s.len() {
536            let b = s.as_bytes()[ix];
537            if b == delim {
538                return Some(ix + 1);
539            }
540        }
541        None
542    }
543
544    pub(crate) fn name(&self) -> &'static str {
545        match self {
546            Token::Atom(..) => "Atom",
547            Token::String(..) => "String",
548            Token::Eof => "Eof",
549            Token::OpenBrace => "OpenBrace",
550            Token::OpenParen => "OpenParen",
551            Token::Data(_) => "Data",
552        }
553    }
554}
555
556fn parse_escape(s: &str) -> Result<(char, usize), Error> {
557    // checked before this is called
558    assert!(s.starts_with('\\') && s.len() > 1);
559
560    let mut ix = 1;
561    let b = s.as_bytes()[ix];
562    match b {
563        b'"' | b'\\' => Ok((b as _, 2)),
564        b'n' => Ok(('\n', 2)),
565        b'r' => Ok(('\r', 2)),
566        b't' => Ok(('\t', 2)),
567        // unicode escapes
568        b'U' if s.len() >= 3 => {
569            // here we will parse up to 4 hexdigits:
570            // https://github.com/opensource-apple/CF/blob/3cc41a76b1491f5/CFOldStylePList.c#L150C2-L150C6
571            ix += 1;
572            let (val, len) = parse_hex_digit(&s.as_bytes()[ix..])?;
573            ix += len;
574            let result = if !is_surrogate(val) || !s.as_bytes()[ix..].starts_with(b"\\U") {
575                // we can't cast! this is a utf-16 value, not a codepoint
576                char::decode_utf16([val]).next()
577            } else {
578                ix += 2;
579                let (val2, len) = parse_hex_digit(&s.as_bytes()[ix..])?;
580                ix += len;
581                char::decode_utf16([val, val2]).next()
582            };
583            result
584                .transpose()
585                .ok()
586                .flatten()
587                .ok_or_else(|| Error::InvalidUnicodeEscape(s[..ix].to_string()))
588                .map(|c| (c, ix))
589        }
590        b'0'..=b'3' if s.len() >= 4 => {
591            // octal escape
592            let b1 = s.as_bytes()[ix + 1];
593            let b2 = s.as_bytes()[ix + 2];
594            if (b'0'..=b'7').contains(&b1) && (b'0'..=b'7').contains(&b2) {
595                let oct = (b - b'0') * 64 + (b1 - b'0') * 8 + (b2 - b'0');
596                ix += 3;
597                Ok((oct as _, ix))
598            } else {
599                Err(Error::UnknownEscape)
600            }
601        }
602        _ => Err(Error::UnknownEscape),
603    }
604}
605
606fn is_surrogate(val: u16) -> bool {
607    matches!(val, 0xD800..=0xDFFF)
608}
609
610// parse up to four hex digits as a u16
611// returns an error if the first byte is not a valid ascii hex digit,
612// then will read up to four bytes
613fn parse_hex_digit(bytes: &[u8]) -> Result<(u16, usize), Error> {
614    match bytes {
615        &[] => Err(Error::UnknownEscape),
616        &[one, ..] if !one.is_ascii_hexdigit() => Err(Error::UnknownEscape),
617        other => Ok(other
618            .iter()
619            .take(4)
620            .map_while(|b| (*b as char).to_digit(16).map(|x| x as u16))
621            .fold((0u16, 0usize), |(num, len), hexval| {
622                ((num << 4) + hexval, len + 1)
623            })),
624    }
625}
626
627impl From<String> for Plist {
628    fn from(x: String) -> Plist {
629        Plist::String(x)
630    }
631}
632
633impl From<i64> for Plist {
634    fn from(x: i64) -> Plist {
635        Plist::Integer(x)
636    }
637}
638
639impl From<f64> for Plist {
640    fn from(x: f64) -> Plist {
641        Plist::Float(x.into())
642    }
643}
644
645impl From<Vec<Plist>> for Plist {
646    fn from(x: Vec<Plist>) -> Plist {
647        Plist::Array(x)
648    }
649}
650
651impl From<Dictionary> for Plist {
652    fn from(x: Dictionary) -> Plist {
653        Plist::Dictionary(x)
654    }
655}
656
657pub(crate) fn parse_int(s: &str) -> Result<i64, Error> {
658    if numeric_ok(s) {
659        if let Ok(num) = s.parse::<i64>() {
660            return Ok(num);
661        }
662        if let Ok(num) = s.parse::<f64>() {
663            return Ok(num as i64);
664        }
665    }
666    Err(Error::ExpectedNumber)
667}
668
669pub(crate) fn parse_float(s: &str) -> Result<f64, Error> {
670    if numeric_ok(s)
671        && let Ok(num) = s.parse::<f64>()
672    {
673        return Ok(num);
674    }
675    Err(Error::ExpectedNumber)
676}
677
678/// This type can be parsed from a Plist string
679pub trait FromPlist
680where
681    Self: Sized,
682{
683    fn parse(tokenizer: &mut Tokenizer) -> Result<Self, Error>;
684
685    fn parse_plist(plist: &str) -> Result<Self, Error> {
686        Tokenizer::new(plist).parse()
687    }
688}
689
690impl<T> FromPlist for Vec<T>
691where
692    T: FromPlist,
693{
694    fn parse(tokenizer: &mut Tokenizer<'_>) -> Result<Self, crate::plist::Error> {
695        tokenizer.parse_delimited_vec(VecDelimiters::CSV_IN_PARENS)
696    }
697}
698
699impl<T: FromPlist> FromPlist for BTreeMap<SmolStr, T> {
700    fn parse(tokenizer: &mut Tokenizer) -> Result<Self, Error> {
701        tokenizer.parse_map()
702    }
703}
704
705impl<T> FromPlist for Option<T>
706where
707    T: FromPlist,
708{
709    fn parse(tokenizer: &mut Tokenizer<'_>) -> Result<Self, crate::plist::Error> {
710        Ok(Some(tokenizer.parse()?))
711    }
712}
713
714pub struct Tokenizer<'a> {
715    content: &'a str,
716    idx: usize,
717}
718
719impl Debug for Tokenizer<'_> {
720    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
721        let start = self.idx;
722        let end = (start + 16).min(self.content.len());
723        f.debug_struct("Tokenizer")
724            .field("content", &&self.content[start..end])
725            .field("idx", &self.idx)
726            .finish()
727    }
728}
729
730impl<'a> Tokenizer<'a> {
731    pub fn new(content: &'a str) -> Tokenizer<'a> {
732        Tokenizer { content, idx: 0 }
733    }
734
735    pub(crate) fn peek(&mut self) -> Result<Token<'a>, Error> {
736        let (tok, _) = Token::lex(self.content, self.idx)?;
737        Ok(tok)
738    }
739
740    pub(crate) fn lex(&mut self) -> Result<Token<'a>, Error> {
741        let (tok, idx) = Token::lex(self.content, self.idx)?;
742        self.idx = idx;
743        Ok(tok)
744    }
745
746    pub(crate) fn eat(&mut self, delim: u8) -> Result<(), Error> {
747        let Some(idx) = Token::expect(self.content, self.idx, delim) else {
748            return Err(Error::ExpectedChar(delim as char));
749        };
750        self.idx = idx;
751        Ok(())
752    }
753
754    /// Jump over the next thing, regardless of whether it's simple (atom or string) or complex
755    /// (bracketed or braced construct)
756    ///
757    /// Named to match parse_rec.
758    pub(crate) fn skip_rec(&mut self) -> Result<(), Error> {
759        match self.lex()? {
760            Token::Atom(..) | Token::String(..) | Token::Data(..) => Ok(()),
761            Token::OpenBrace => loop {
762                if self.eat(b'}').is_ok() {
763                    return Ok(());
764                }
765                let key = self.lex()?;
766                Token::try_into_smolstr(key)?;
767                self.eat(b'=')?;
768                self.skip_rec()?;
769                self.eat(b';')?;
770            },
771            Token::OpenParen => {
772                if self.eat(b')').is_ok() {
773                    return Ok(());
774                }
775                loop {
776                    self.skip_rec()?;
777                    if self.eat(b')').is_ok() {
778                        return Ok(());
779                    }
780                    self.eat(b',')?;
781                    if self.eat(b')').is_ok() {
782                        return Ok(());
783                    }
784                }
785            }
786            other => Err(Error::UnexpectedToken { name: other.name() }),
787        }
788    }
789
790    pub(crate) fn parse_delimited_vec<T>(
791        &mut self,
792        delim: VecDelimiters,
793    ) -> Result<Vec<T>, crate::plist::Error>
794    where
795        T: FromPlist,
796    {
797        let mut list = Vec::new();
798        self.eat(delim.start)?;
799        loop {
800            if self.eat(delim.end).is_ok() {
801                return Ok(list);
802            }
803            list.push(self.parse()?);
804            if self.eat(delim.end).is_ok() {
805                return Ok(list);
806            }
807            self.eat(delim.sep)?;
808            // handle possible traliing separator
809            if self.eat(delim.end).is_ok() {
810                return Ok(list);
811            }
812        }
813    }
814
815    pub(crate) fn parse_map<T: FromPlist>(&mut self) -> Result<BTreeMap<SmolStr, T>, Error> {
816        self.eat(b'{')?;
817        let mut map = BTreeMap::new();
818        loop {
819            if self.eat(b'}').is_ok() {
820                break;
821            }
822            let key = self.parse::<SmolStr>()?;
823            self.eat(b'=')?;
824            map.insert(key, self.parse()?);
825            self.eat(b';')?;
826        }
827        Ok(map)
828    }
829
830    pub(crate) fn parse<T>(&mut self) -> Result<T, crate::plist::Error>
831    where
832        T: FromPlist,
833    {
834        T::parse(self)
835    }
836}
837
838pub(crate) struct VecDelimiters {
839    start: u8,
840    end: u8,
841    sep: u8,
842}
843
844impl VecDelimiters {
845    pub(crate) const CSV_IN_PARENS: VecDelimiters = VecDelimiters {
846        start: b'(',
847        end: b')',
848        sep: b',',
849    };
850    pub(crate) const CSV_IN_BRACES: VecDelimiters = VecDelimiters {
851        start: b'{',
852        end: b'}',
853        sep: b',',
854    };
855}
856
857impl FromPlist for OrderedFloat<f64> {
858    fn parse(tokenizer: &mut Tokenizer<'_>) -> Result<Self, Error> {
859        let val: f64 = tokenizer.parse()?;
860        Ok(val.into())
861    }
862}
863
864impl FromPlist for f64 {
865    fn parse(tokenizer: &mut Tokenizer<'_>) -> Result<Self, Error> {
866        match tokenizer.lex()? {
867            Token::Atom(val) => parse_float(val),
868            Token::String(val) => parse_float(&val),
869            _ => Err(Error::ExpectedNumber),
870        }
871    }
872}
873
874impl FromPlist for i64 {
875    fn parse(tokenizer: &mut Tokenizer<'_>) -> Result<Self, Error> {
876        match tokenizer.lex()? {
877            Token::Atom(val) => parse_int(val),
878            Token::String(val) => parse_int(&val),
879            _ => Err(Error::ExpectedNumber),
880        }
881    }
882}
883
884impl FromPlist for bool {
885    fn parse(tokenizer: &mut Tokenizer<'_>) -> Result<Self, Error> {
886        match tokenizer.lex()? {
887            Token::Atom(val) => parse_int(val).map(|v| v == 1),
888            Token::String(val) => parse_int(&val).map(|v| v == 1),
889            _ => Err(Error::ExpectedNumber),
890        }
891    }
892}
893
894impl FromPlist for String {
895    fn parse(tokenizer: &mut Tokenizer<'_>) -> Result<Self, Error> {
896        match tokenizer.lex()? {
897            Token::Atom(val) => Ok(val.to_string()),
898            Token::String(val) => Ok(val.to_string()),
899            _ => Err(Error::ExpectedString),
900        }
901    }
902}
903
904impl FromPlist for SmolStr {
905    fn parse(tokenizer: &mut Tokenizer<'_>) -> Result<Self, Error> {
906        match tokenizer.lex()? {
907            Token::Atom(val) => Ok(val.into()),
908            Token::String(val) => Ok(val.into()),
909            _ => Err(Error::ExpectedString),
910        }
911    }
912}
913
914/// Hand-written because Glyphs 2 points don't look like Glyphs 3 points
915impl FromPlist for Point {
916    fn parse(tokenizer: &mut Tokenizer<'_>) -> Result<Self, Error> {
917        let delims = if let Token::OpenBrace = tokenizer.peek()? {
918            VecDelimiters::CSV_IN_BRACES
919        } else {
920            VecDelimiters::CSV_IN_PARENS
921        };
922        let coords: Vec<f64> = tokenizer.parse_delimited_vec(delims)?;
923        if coords.len() != 2 {
924            return Err(Error::Parse("wrong number of coords in point".to_string()));
925        }
926        Ok((coords[0], coords[1]).into())
927    }
928}
929
930/// Hand-written because it's a String that becomes a Thing
931impl FromPlist for Affine {
932    fn parse(tokenizer: &mut Tokenizer<'_>) -> Result<Self, Error> {
933        let tok = tokenizer.lex()?;
934        let raw = match &tok {
935            Token::Atom(val) => *val,
936            Token::String(val) => val,
937            _ => return Err(Error::ExpectedString),
938        };
939        let raw = &raw[1..raw.len() - 1];
940        let coords: Vec<f64> = raw.split(", ").map(|c| c.parse().unwrap()).collect();
941        Ok(Affine::new([
942            coords[0], coords[1], coords[2], coords[3], coords[4], coords[5],
943        ]))
944    }
945}
946
947#[cfg(test)]
948mod tests {
949    use ascii_plist_derive::FromPlist;
950    use std::collections::BTreeMap;
951
952    use super::*;
953
954    #[test]
955    fn parse_unquoted_strings() {
956        let contents = r#"
957        {
958            name = "UFO Filename";
959            value1 = ../../build/instance_ufos/Testing_Rg.ufo;
960            value2 = _;
961            value3 = $;
962            value4 = /;
963            value5 = :;
964            value6 = .;
965            value7 = -;
966        }
967        "#;
968
969        let plist = Plist::parse(contents).unwrap();
970        let plist_expected = Plist::Dictionary(BTreeMap::from_iter([
971            ("name".into(), Plist::String("UFO Filename".into())),
972            (
973                "value1".into(),
974                Plist::String("../../build/instance_ufos/Testing_Rg.ufo".into()),
975            ),
976            ("value2".into(), Plist::String("_".into())),
977            ("value3".into(), Plist::String("$".into())),
978            ("value4".into(), Plist::String("/".into())),
979            ("value5".into(), Plist::String(":".into())),
980            ("value6".into(), Plist::String(".".into())),
981            ("value7".into(), Plist::String("-".into())),
982        ]));
983        assert_eq!(plist, plist_expected);
984    }
985
986    #[test]
987    fn parse_int_map() {
988        let contents = r#"
989        {
990            foo = 5;
991            bar = 32;
992        }"#;
993
994        let foobar = BTreeMap::<SmolStr, i64>::parse_plist(contents).unwrap();
995        assert_eq!(foobar.get("foo"), Some(&5));
996        assert_eq!(foobar.get("bar"), Some(&32));
997    }
998
999    #[test]
1000    #[should_panic(expected = "ExpectedNumber")]
1001    fn parse_map_fail() {
1002        let contents = r#"
1003        {
1004            foo = hello;
1005            bar = 32;
1006        }"#;
1007
1008        let _foobar = BTreeMap::<SmolStr, i64>::parse_plist(contents).unwrap();
1009    }
1010
1011    #[test]
1012    fn parse_binary_data() {
1013        let contents = r#"
1014        {
1015            mydata = <deadbeef>;
1016        }
1017            "#;
1018        let plist = Plist::parse(contents).unwrap();
1019        let data = plist.get("mydata").unwrap().clone().expect_data().unwrap();
1020        assert_eq!(data, [0xde, 0xad, 0xbe, 0xef])
1021    }
1022
1023    #[test]
1024    fn hex_to_ascii() {
1025        assert_eq!(hex_digits_for_byte(0x01), ['0', '1']);
1026        assert_eq!(hex_digits_for_byte(0x00), ['0', '0']);
1027        assert_eq!(hex_digits_for_byte(0xff), ['f', 'f']);
1028        assert_eq!(hex_digits_for_byte(0xf0), ['f', '0']);
1029        assert_eq!(hex_digits_for_byte(0x0f), ['0', 'f']);
1030    }
1031
1032    #[test]
1033    fn ascii_to_hex() {
1034        assert_eq!(byte_from_hex([b'0', b'1']), Ok(0x01));
1035        assert_eq!(byte_from_hex([b'0', b'0']), Ok(0x00));
1036        assert_eq!(byte_from_hex([b'f', b'f']), Ok(0xff));
1037        assert_eq!(byte_from_hex([b'f', b'0']), Ok(0xf0));
1038        assert_eq!(byte_from_hex([b'0', b'f']), Ok(0x0f));
1039    }
1040
1041    // in arrays the trailing comma is optional but supported
1042    #[test]
1043    fn array_optional_trailing_comma() {
1044        let _ = env_logger::builder().is_test(true).try_init();
1045        // we include a list that is not parsed in derive because that
1046        // takes a second codepath.
1047        let trailing = r#"
1048        {
1049            items = (
1050                "a",
1051                "b",
1052            );
1053            skip_me = (
1054                "c",
1055                "d",
1056            );
1057        }"#;
1058
1059        let no_trailing = r#"
1060        {
1061            items = (
1062                "a",
1063                "b"
1064            );
1065            skip_me = (
1066                "c",
1067                "d"
1068            );
1069        }"#;
1070
1071        #[derive(Default, FromPlist)]
1072        struct TestMe {
1073            items: Vec<String>,
1074        }
1075
1076        let trailing = TestMe::parse_plist(trailing).unwrap();
1077        assert_eq!(trailing.items, ["a", "b"]);
1078        let no_trailing = TestMe::parse_plist(no_trailing).unwrap();
1079        assert_eq!(trailing.items, no_trailing.items);
1080    }
1081
1082    #[test]
1083    fn parse_to_plist_type() {
1084        let plist_str = r#"
1085        {
1086            name = "meta";
1087            value = (
1088                {
1089                    data = latn;
1090                    tag = dlng;
1091                    num = 5;
1092                },
1093                {
1094                    data = "latn,cyrl";
1095                    tag = slng;
1096                    num = -3.0;
1097                }
1098            );
1099        }"#;
1100
1101        let plist = Plist::parse_plist(plist_str).unwrap();
1102        let root = plist.expect_dict().unwrap();
1103        assert_eq!(root.get("name").unwrap().as_str(), Some("meta"));
1104        let value = root.get("value").unwrap().as_array().unwrap();
1105        assert_eq!(value.len(), 2);
1106        let first = value[0].as_dict().unwrap();
1107        assert_eq!(first.get("data").and_then(Plist::as_str), Some("latn"));
1108        assert_eq!(first.get("tag").and_then(Plist::as_str), Some("dlng"));
1109        assert_eq!(first.get("num").and_then(Plist::as_i64), Some(5));
1110        let second = value[1].as_dict().unwrap();
1111        assert_eq!(
1112            second.get("data").and_then(Plist::as_str),
1113            Some("latn,cyrl")
1114        );
1115        assert_eq!(second.get("tag").and_then(Plist::as_str), Some("slng"));
1116        assert_eq!(second.get("num").and_then(Plist::as_f64), Some(-3.0));
1117    }
1118
1119    #[test]
1120    fn parse_hex_digit_sanity() {
1121        assert_eq!(parse_hex_digit(b"2019"), Ok((0x2019, 4)));
1122        assert_eq!(parse_hex_digit(b"201"), Ok((0x201, 3)));
1123        assert_eq!(parse_hex_digit(b"201z"), Ok((0x201, 3)));
1124        assert_eq!(parse_hex_digit(b"fu"), Ok((0xf, 1)));
1125        assert_eq!(parse_hex_digit(b"z"), Err(Error::UnknownEscape));
1126    }
1127
1128    // partially borrowed from from python: https://github.com/fonttools/openstep-plist/blob/2fa77b267d67/tests/test_parser.py#L135
1129    #[test]
1130    fn escape_parsing_good() {
1131        for (input, expected, expected_len) in [
1132            ("\\n", '\n', 2),                    // octal escape
1133            ("\\012", '\n', 4),                  // octal escape
1134            ("\\U2019", '\u{2019}', 6),          // unicode escape (’)
1135            ("\\UD83D\\UDCA9", '\u{1F4A9}', 12), // surrogate pair (💩)
1136        ] {
1137            let (result, len) = parse_escape(input).unwrap();
1138            {
1139                assert_eq!((result, len), (expected, expected_len));
1140            }
1141        }
1142    }
1143
1144    #[test]
1145    fn escape_parsing_bad() {
1146        assert_eq!(
1147            parse_escape("\\UD83D"),
1148            Err(Error::InvalidUnicodeEscape("\\UD83D".to_string()))
1149        );
1150    }
1151
1152    #[test]
1153    fn parsing_escape_in_string() {
1154        for (input, expected, expected_len) in [
1155            ("\"a\\012b\"", "a\nb", 8),
1156            ("\"a\\nb\"", "a\nb", 6),
1157            ("\"a\\U000Ab\"", "a\nb", 10),
1158        ] {
1159            let (token, len) = Token::lex(input, 0).unwrap();
1160            assert_eq!(token, Token::String(Cow::Borrowed(expected)));
1161            assert_eq!(len, expected_len);
1162        }
1163    }
1164
1165    #[test]
1166    fn parse_quoted_and_unquoted() {
1167        assert_eq!(
1168            (
1169                Ok(1),
1170                Ok(1),
1171                Ok(true),
1172                Ok(true),
1173                Ok(false),
1174                Ok(false),
1175                Ok(1.0),
1176                Ok(1.0)
1177            ),
1178            (
1179                Tokenizer::new("1").parse::<i64>(),
1180                Tokenizer::new("\"1\"").parse::<i64>(),
1181                Tokenizer::new("1").parse::<bool>(),
1182                Tokenizer::new("\"1\"").parse::<bool>(),
1183                Tokenizer::new("0").parse::<bool>(),
1184                Tokenizer::new("\"0\"").parse::<bool>(),
1185                Tokenizer::new("1").parse::<f64>(),
1186                Tokenizer::new("\"1\"").parse::<f64>(),
1187            )
1188        );
1189    }
1190}