glyphs_reader/
plist.rs

1use std::collections::BTreeMap;
2use std::{borrow::Cow, fmt::Debug};
3
4use kurbo::{Affine, Point};
5use ordered_float::OrderedFloat;
6
7use smol_str::SmolStr;
8
9/// A plist dictionary
10pub type Dictionary = BTreeMap<SmolStr, Plist>;
11
12/// An array of plist values
13pub type Array = Vec<Plist>;
14
15/// An enum representing a property list.
16#[derive(Clone, Debug, PartialEq, Eq, Hash)]
17pub enum Plist {
18    Dictionary(Dictionary),
19    Array(Array),
20    String(String),
21    Integer(i64),
22    Float(OrderedFloat<f64>),
23    Data(Vec<u8>),
24}
25
26#[derive(Clone, Debug, PartialEq, Eq, thiserror::Error)]
27pub enum Error {
28    #[error("Unexpected character '{0}'")]
29    UnexpectedChar(char),
30    #[error("Unterminated string")]
31    UnclosedString,
32    #[error("Unterminated data block")]
33    UnclosedData,
34    #[error("Data block did not contain valid paired hex digits")]
35    BadData,
36    #[error("Unknown escape code")]
37    UnknownEscape,
38    #[error("Invalid unicode escape sequence: '{0}'")]
39    InvalidUnicodeEscape(String),
40    #[error("Expected string, found '{token_name}")]
41    NotAString { token_name: &'static str },
42    #[error("Missing '='")]
43    ExpectedEquals,
44    #[error("Missing ','")]
45    ExpectedComma,
46    #[error("Missing ';'")]
47    ExpectedSemicolon,
48    #[error("Missing '{{'")]
49    ExpectedOpenBrace,
50    #[error("Missing '}}'")]
51    ExpectedCloseBrace,
52    #[error("Missing '('")]
53    ExpectedOpenParen,
54    #[error("Missing ')'")]
55    ExpectedCloseParen,
56    #[error("Expected character '{0}'")]
57    ExpectedChar(char),
58    #[error("Expected numeric value")]
59    ExpectedNumber,
60    #[error("Expected string value")]
61    ExpectedString,
62    #[error("Expected '{expected}', found '{found}'")]
63    UnexpectedDataType {
64        expected: &'static str,
65        found: &'static str,
66    },
67    #[error("Unexpected token '{name}'")]
68    UnexpectedToken { name: &'static str },
69    #[error("Expected {value_type}, found '{actual}'")]
70    UnexpectedNumberOfValues {
71        value_type: &'static str,
72        actual: usize,
73    },
74    #[error("parsing failed: '{0}'")]
75    Parse(String),
76}
77
78#[derive(Debug, PartialEq)]
79pub(crate) enum Token<'a> {
80    Eof,
81    OpenBrace,
82    OpenParen,
83    Data(Vec<u8>),
84    String(Cow<'a, str>),
85    Atom(&'a str),
86}
87
88fn is_numeric(b: u8) -> bool {
89    b.is_ascii_digit() || b == b'.' || b == b'-'
90}
91
92fn is_alnum(b: u8) -> bool {
93    // https://github.com/opensource-apple/CF/blob/3cc41a76b1491f50813e28a4ec09954ffa359e6f/CFOldStylePList.c#L79
94    is_numeric(b)
95        || b.is_ascii_uppercase()
96        || b.is_ascii_lowercase()
97        || b == b'_'
98        || b == b'$'
99        || b == b'/'
100        || b == b':'
101        || b == b'.'
102        || b == b'-'
103}
104
105// Used for serialization; make sure UUID's get quoted
106fn is_alnum_strict(b: u8) -> bool {
107    is_alnum(b) && b != b'-'
108}
109
110fn is_hex_upper(b: u8) -> bool {
111    b.is_ascii_digit() || (b'A'..=b'F').contains(&b)
112}
113
114fn is_ascii_whitespace(b: u8) -> bool {
115    b == b' ' || b == b'\t' || b == b'\r' || b == b'\n'
116}
117
118fn numeric_ok(s: &str) -> bool {
119    let s = s.as_bytes();
120    if s.is_empty() {
121        return false;
122    }
123    let s = if s.len() > 1 && (*s.first().unwrap(), *s.last().unwrap()) == (b'"', b'"') {
124        &s[1..s.len()]
125    } else {
126        s
127    };
128    if s.iter().all(|&b| is_hex_upper(b)) && !s.iter().all(|&b| b.is_ascii_digit()) {
129        return false;
130    }
131    if s.len() > 1 && s[0] == b'0' {
132        return !s.iter().all(|&b| b.is_ascii_digit());
133    }
134    // Prevent parsing of "infinity", "inf", "nan" as numbers, we
135    // want to keep them as strings (e.g. glyphname)
136    // https://doc.rust-lang.org/std/primitive.f64.html#grammar
137    if s.eq_ignore_ascii_case(b"infinity")
138        || s.eq_ignore_ascii_case(b"inf")
139        || s.eq_ignore_ascii_case(b"nan")
140    {
141        return false;
142    }
143    true
144}
145
146fn skip_ws(s: &str, mut ix: usize) -> usize {
147    while ix < s.len() && is_ascii_whitespace(s.as_bytes()[ix]) {
148        ix += 1;
149    }
150    ix
151}
152
153fn escape_string(buf: &mut String, s: &str) {
154    if !s.is_empty() && s.as_bytes().iter().all(|&b| is_alnum_strict(b)) {
155        buf.push_str(s);
156    } else {
157        buf.push('"');
158        let mut start = 0;
159        let mut ix = start;
160        while ix < s.len() {
161            let b = s.as_bytes()[ix];
162            match b {
163                b'"' | b'\\' => {
164                    buf.push_str(&s[start..ix]);
165                    buf.push('\\');
166                    start = ix;
167                }
168                _ => (),
169            }
170            ix += 1;
171        }
172        buf.push_str(&s[start..]);
173        buf.push('"');
174    }
175}
176
177impl Plist {
178    pub fn parse(s: &str) -> Result<Plist, Error> {
179        let (plist, _ix) = Plist::parse_rec(s, 0)?;
180        // TODO: check that we're actually at eof
181        Ok(plist)
182    }
183
184    fn name(&self) -> &'static str {
185        match self {
186            Plist::Array(..) => "array",
187            Plist::Dictionary(..) => "dictionary",
188            Plist::Float(..) => "float",
189            Plist::Integer(..) => "integer",
190            Plist::String(..) => "string",
191            Plist::Data(..) => "data",
192        }
193    }
194
195    pub fn get(&self, key: &str) -> Option<&Plist> {
196        match self {
197            Plist::Dictionary(d) => d.get(key),
198            _ => None,
199        }
200    }
201
202    pub fn as_dict(&self) -> Option<&BTreeMap<SmolStr, Plist>> {
203        match self {
204            Plist::Dictionary(d) => Some(d),
205            _ => None,
206        }
207    }
208
209    pub fn as_array(&self) -> Option<&[Plist]> {
210        match self {
211            Plist::Array(a) => Some(a),
212            _ => None,
213        }
214    }
215
216    pub fn as_str(&self) -> Option<&str> {
217        match self {
218            Plist::String(s) => Some(s),
219            _ => None,
220        }
221    }
222
223    pub fn as_i64(&self) -> Option<i64> {
224        match self {
225            Plist::Integer(i) => Some(*i),
226            _ => None,
227        }
228    }
229
230    pub fn as_f64(&self) -> Option<f64> {
231        match self {
232            Plist::Integer(i) => Some(*i as f64),
233            Plist::Float(f) => Some((*f).into_inner()),
234            _ => None,
235        }
236    }
237
238    pub fn expect_dict(self) -> Result<Dictionary, Error> {
239        match self {
240            Plist::Dictionary(dict) => Ok(dict),
241            _other => Err(Error::UnexpectedDataType {
242                expected: "dictionary",
243                found: _other.name(),
244            }),
245        }
246    }
247
248    pub fn expect_array(self) -> Result<Array, Error> {
249        match self {
250            Plist::Array(array) => Ok(array),
251            _other => Err(Error::UnexpectedDataType {
252                expected: "array",
253                found: _other.name(),
254            }),
255        }
256    }
257
258    pub fn expect_string(self) -> Result<String, Error> {
259        match self {
260            Plist::String(string) => Ok(string),
261            _other => Err(Error::UnexpectedDataType {
262                expected: "string",
263                found: _other.name(),
264            }),
265        }
266    }
267
268    pub fn expect_data(self) -> Result<Vec<u8>, Error> {
269        match self {
270            Plist::Data(bytes) => Ok(bytes),
271            _other => Err(Error::UnexpectedDataType {
272                expected: "data",
273                found: _other.name(),
274            }),
275        }
276    }
277
278    fn parse_rec(s: &str, ix: usize) -> Result<(Plist, usize), Error> {
279        let (tok, mut ix) = Token::lex(s, ix)?;
280        match tok {
281            Token::Atom(s) => Ok((Plist::parse_atom(s), ix)),
282            Token::String(s) => Ok((Plist::String(s.into()), ix)),
283            Token::Data(bytes) => Ok((Plist::Data(bytes), ix)),
284            Token::OpenBrace => {
285                let mut dict = BTreeMap::new();
286                loop {
287                    if let Some(ix) = Token::expect(s, ix, b'}') {
288                        return Ok((Plist::Dictionary(dict), ix));
289                    }
290                    let (key, next) = Token::lex(s, ix)?;
291                    let key_str = Token::try_into_smolstr(key)?;
292                    let next = Token::expect(s, next, b'=');
293                    if next.is_none() {
294                        return Err(Error::ExpectedEquals);
295                    }
296                    let (val, next) = Self::parse_rec(s, next.unwrap())?;
297                    dict.insert(key_str, val);
298                    if let Some(next) = Token::expect(s, next, b';') {
299                        ix = next;
300                    } else {
301                        return Err(Error::ExpectedSemicolon);
302                    }
303                }
304            }
305            Token::OpenParen => {
306                let mut list = Vec::new();
307                loop {
308                    if let Some(ix) = Token::expect(s, ix, b')') {
309                        return Ok((Plist::Array(list), ix));
310                    }
311                    let (val, next) = Self::parse_rec(s, ix)?;
312                    list.push(val);
313                    if let Some(ix) = Token::expect(s, next, b')') {
314                        return Ok((Plist::Array(list), ix));
315                    }
316                    if let Some(next) = Token::expect(s, next, b',') {
317                        ix = next;
318                        if let Some(next) = Token::expect(s, next, b')') {
319                            return Ok((Plist::Array(list), next));
320                        }
321                    } else {
322                        return Err(Error::ExpectedComma);
323                    }
324                }
325            }
326            _ => Err(Error::UnexpectedToken { name: tok.name() }),
327        }
328    }
329
330    fn parse_atom(s: &str) -> Plist {
331        if numeric_ok(s) {
332            if let Ok(num) = s.parse() {
333                return Plist::Integer(num);
334            }
335            if let Ok(num) = s.parse() {
336                return Plist::Float(num);
337            }
338        }
339        Plist::String(s.into())
340    }
341
342    #[allow(clippy::inherent_to_string, unused)]
343    pub fn to_string(&self) -> String {
344        let mut s = String::new();
345        self.push_to_string(&mut s);
346        s
347    }
348
349    fn push_to_string(&self, s: &mut String) {
350        match self {
351            Plist::Array(a) => {
352                s.push('(');
353                let mut delim = "\n";
354                for el in a {
355                    s.push_str(delim);
356                    el.push_to_string(s);
357                    delim = ",\n";
358                }
359                s.push_str("\n)");
360            }
361            Plist::Dictionary(a) => {
362                s.push_str("{\n");
363                let mut keys: Vec<_> = a.keys().collect();
364                keys.sort();
365                for k in keys {
366                    let el = &a[k];
367                    // TODO: quote if needed?
368                    escape_string(s, k);
369                    s.push_str(" = ");
370                    el.push_to_string(s);
371                    s.push_str(";\n");
372                }
373                s.push('}');
374            }
375            Plist::String(st) => escape_string(s, st),
376            Plist::Integer(i) => {
377                s.push_str(&format!("{i}"));
378            }
379            Plist::Float(f) => {
380                s.push_str(&format!("{f}"));
381            }
382            Plist::Data(data) => {
383                s.push('<');
384                for byte in data {
385                    s.extend(hex_digits_for_byte(*byte))
386                }
387                s.push('>');
388            }
389        }
390    }
391}
392
393impl FromPlist for Plist {
394    fn parse(tokenizer: &mut Tokenizer) -> Result<Self, Error> {
395        let Tokenizer { content, idx } = tokenizer;
396        let (val, end_idx) = Self::parse_rec(content, *idx)?;
397        *idx = end_idx;
398        Ok(val)
399    }
400}
401
402impl Default for Plist {
403    fn default() -> Self {
404        // kind of arbitrary but seems okay
405        Plist::Array(Vec::new())
406    }
407}
408
409fn hex_digits_for_byte(byte: u8) -> [char; 2] {
410    fn to_hex_digit(val: u8) -> char {
411        match val {
412            0..=9 => ('0' as u32 as u8 + val).into(),
413            10..=15 => (('a' as u32 as u8) + val - 10).into(),
414            _ => unreachable!("only called with values in range 0..=15"),
415        }
416    }
417
418    [to_hex_digit(byte >> 4), to_hex_digit(byte & 0x0f)]
419}
420
421fn byte_from_hex(hex: [u8; 2]) -> Result<u8, Error> {
422    fn hex_digit_to_byte(digit: u8) -> Result<u8, Error> {
423        match digit {
424            b'0'..=b'9' => Ok(digit - b'0'),
425            b'a'..=b'f' => Ok(digit - b'a' + 10),
426            b'A'..=b'F' => Ok(digit - b'A' + 10),
427            _ => Err(Error::BadData),
428        }
429    }
430    let maj = hex_digit_to_byte(hex[0])? << 4;
431    let min = hex_digit_to_byte(hex[1])?;
432    Ok(maj | min)
433}
434
435impl<'a> Token<'a> {
436    fn lex(s: &'a str, ix: usize) -> Result<(Token<'a>, usize), Error> {
437        let start = skip_ws(s, ix);
438        if start == s.len() {
439            return Ok((Token::Eof, start));
440        }
441        let b = s.as_bytes()[start];
442        match b {
443            b'{' => Ok((Token::OpenBrace, start + 1)),
444            b'(' => Ok((Token::OpenParen, start + 1)),
445            b'<' => {
446                let data_start = start + 1;
447                let data_end = data_start
448                    + s.as_bytes()[data_start..]
449                        .iter()
450                        .position(|b| *b == b'>')
451                        .ok_or(Error::UnclosedData)?;
452                let chunks = s.as_bytes()[data_start..data_end].chunks_exact(2);
453                if !chunks.remainder().is_empty() {
454                    return Err(Error::BadData);
455                }
456                let data = chunks
457                    .map(|x| byte_from_hex(x.try_into().unwrap()))
458                    .collect::<Result<_, _>>()?;
459                Ok((Token::Data(data), data_end + 1))
460            }
461            b'"' => {
462                let mut ix = start + 1;
463                let mut cow_start = ix;
464                let mut buf = String::new();
465                while ix < s.len() {
466                    let b = s.as_bytes()[ix];
467                    match b {
468                        b'"' => {
469                            // End of string
470                            let string = if buf.is_empty() {
471                                s[cow_start..ix].into()
472                            } else {
473                                buf.push_str(&s[cow_start..ix]);
474                                buf.into()
475                            };
476                            return Ok((Token::String(string), ix + 1));
477                        }
478                        b'\\' => {
479                            buf.push_str(&s[cow_start..ix]);
480                            if ix + 1 == s.len() {
481                                return Err(Error::UnclosedString);
482                            }
483                            let (c, len) = parse_escape(&s[ix..])?;
484                            buf.push(c);
485                            ix += len;
486                            cow_start = ix;
487                        }
488                        _ => ix += 1,
489                    }
490                }
491                Err(Error::UnclosedString)
492            }
493            _ => {
494                if is_alnum(b) {
495                    let mut ix = start + 1;
496                    while ix < s.len() {
497                        if !is_alnum(s.as_bytes()[ix]) {
498                            break;
499                        }
500                        ix += 1;
501                    }
502                    Ok((Token::Atom(&s[start..ix]), ix))
503                } else {
504                    Err(Error::UnexpectedChar(s[start..].chars().next().unwrap()))
505                }
506            }
507        }
508    }
509
510    fn try_into_smolstr(self) -> Result<SmolStr, Error> {
511        match self {
512            Token::Atom(s) => Ok(s.into()),
513            Token::String(s) => Ok(s.into()),
514            _ => Err(Error::NotAString {
515                token_name: self.name(),
516            }),
517        }
518    }
519
520    pub fn as_str(&self) -> Option<&str> {
521        match self {
522            Token::Atom(s) => Some(*s),
523            Token::String(s) => Some(s),
524            Token::Eof => None,
525            Token::OpenBrace => None,
526            Token::OpenParen => None,
527            Token::Data(_) => None,
528        }
529    }
530
531    fn expect(s: &str, ix: usize, delim: u8) -> Option<usize> {
532        let ix = skip_ws(s, ix);
533        if ix < s.len() {
534            let b = s.as_bytes()[ix];
535            if b == delim {
536                return Some(ix + 1);
537            }
538        }
539        None
540    }
541
542    pub(crate) fn name(&self) -> &'static str {
543        match self {
544            Token::Atom(..) => "Atom",
545            Token::String(..) => "String",
546            Token::Eof => "Eof",
547            Token::OpenBrace => "OpenBrace",
548            Token::OpenParen => "OpenParen",
549            Token::Data(_) => "Data",
550        }
551    }
552}
553
554fn parse_escape(s: &str) -> Result<(char, usize), Error> {
555    // checked before this is called
556    assert!(s.starts_with('\\') && s.len() > 1);
557
558    let mut ix = 1;
559    let b = s.as_bytes()[ix];
560    match b {
561        b'"' | b'\\' => Ok((b as _, 2)),
562        b'n' => Ok(('\n', 2)),
563        b'r' => Ok(('\r', 2)),
564        b't' => Ok(('\t', 2)),
565        // unicode escapes
566        b'U' if s.len() >= 3 => {
567            // here we will parse up to 4 hexdigits:
568            // https://github.com/opensource-apple/CF/blob/3cc41a76b1491f5/CFOldStylePList.c#L150C2-L150C6
569            ix += 1;
570            let (val, len) = parse_hex_digit(&s.as_bytes()[ix..])?;
571            ix += len;
572            let result = if !is_surrogate(val) || !s.as_bytes()[ix..].starts_with(b"\\U") {
573                // we can't cast! this is a utf-16 value, not a codepoint
574                char::decode_utf16([val]).next()
575            } else {
576                ix += 2;
577                let (val2, len) = parse_hex_digit(&s.as_bytes()[ix..])?;
578                ix += len;
579                char::decode_utf16([val, val2]).next()
580            };
581            result
582                .transpose()
583                .ok()
584                .flatten()
585                .ok_or_else(|| Error::InvalidUnicodeEscape(s[..ix].to_string()))
586                .map(|c| (c, ix))
587        }
588        b'0'..=b'3' if s.len() >= 4 => {
589            // octal escape
590            let b1 = s.as_bytes()[ix + 1];
591            let b2 = s.as_bytes()[ix + 2];
592            if (b'0'..=b'7').contains(&b1) && (b'0'..=b'7').contains(&b2) {
593                let oct = (b - b'0') * 64 + (b1 - b'0') * 8 + (b2 - b'0');
594                ix += 3;
595                Ok((oct as _, ix))
596            } else {
597                Err(Error::UnknownEscape)
598            }
599        }
600        _ => Err(Error::UnknownEscape),
601    }
602}
603
604fn is_surrogate(val: u16) -> bool {
605    matches!(val, 0xD800..=0xDFFF)
606}
607
608// parse up to four hex digits as a u16
609// returns an error if the first byte is not a valid ascii hex digit,
610// then will read up to four bytes
611fn parse_hex_digit(bytes: &[u8]) -> Result<(u16, usize), Error> {
612    match bytes {
613        &[] => Err(Error::UnknownEscape),
614        &[one, ..] if !one.is_ascii_hexdigit() => Err(Error::UnknownEscape),
615        other => Ok(other
616            .iter()
617            .take(4)
618            .map_while(|b| (*b as char).to_digit(16).map(|x| x as u16))
619            .fold((0u16, 0usize), |(num, len), hexval| {
620                ((num << 4) + hexval, len + 1)
621            })),
622    }
623}
624
625impl From<String> for Plist {
626    fn from(x: String) -> Plist {
627        Plist::String(x)
628    }
629}
630
631impl From<i64> for Plist {
632    fn from(x: i64) -> Plist {
633        Plist::Integer(x)
634    }
635}
636
637impl From<f64> for Plist {
638    fn from(x: f64) -> Plist {
639        Plist::Float(x.into())
640    }
641}
642
643impl From<Vec<Plist>> for Plist {
644    fn from(x: Vec<Plist>) -> Plist {
645        Plist::Array(x)
646    }
647}
648
649impl From<Dictionary> for Plist {
650    fn from(x: Dictionary) -> Plist {
651        Plist::Dictionary(x)
652    }
653}
654
655pub(crate) fn parse_int(s: &str) -> Result<i64, Error> {
656    if numeric_ok(s) {
657        if let Ok(num) = s.parse::<i64>() {
658            return Ok(num);
659        }
660        if let Ok(num) = s.parse::<f64>() {
661            return Ok(num as i64);
662        }
663    }
664    Err(Error::ExpectedNumber)
665}
666
667pub(crate) fn parse_float(s: &str) -> Result<f64, Error> {
668    if numeric_ok(s) {
669        if let Ok(num) = s.parse::<f64>() {
670            return Ok(num);
671        }
672    }
673    Err(Error::ExpectedNumber)
674}
675
676/// This type can be parsed from a Plist string
677pub trait FromPlist
678where
679    Self: Sized,
680{
681    fn parse(tokenizer: &mut Tokenizer) -> Result<Self, Error>;
682
683    fn parse_plist(plist: &str) -> Result<Self, Error> {
684        Tokenizer::new(plist).parse()
685    }
686}
687
688impl<T> FromPlist for Vec<T>
689where
690    T: FromPlist,
691{
692    fn parse(tokenizer: &mut Tokenizer<'_>) -> Result<Self, crate::plist::Error> {
693        tokenizer.parse_delimited_vec(VecDelimiters::CSV_IN_PARENS)
694    }
695}
696
697impl<T: FromPlist> FromPlist for BTreeMap<SmolStr, T> {
698    fn parse(tokenizer: &mut Tokenizer) -> Result<Self, Error> {
699        tokenizer.parse_map()
700    }
701}
702
703impl<T> FromPlist for Option<T>
704where
705    T: FromPlist,
706{
707    fn parse(tokenizer: &mut Tokenizer<'_>) -> Result<Self, crate::plist::Error> {
708        Ok(Some(tokenizer.parse()?))
709    }
710}
711
712pub struct Tokenizer<'a> {
713    content: &'a str,
714    idx: usize,
715}
716
717impl Debug for Tokenizer<'_> {
718    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
719        let start = self.idx;
720        let end = (start + 16).min(self.content.len());
721        f.debug_struct("Tokenizer")
722            .field("content", &&self.content[start..end])
723            .field("idx", &self.idx)
724            .finish()
725    }
726}
727
728impl<'a> Tokenizer<'a> {
729    pub fn new(content: &'a str) -> Tokenizer<'a> {
730        Tokenizer { content, idx: 0 }
731    }
732
733    pub(crate) fn peek(&mut self) -> Result<Token<'a>, Error> {
734        let (tok, _) = Token::lex(self.content, self.idx)?;
735        Ok(tok)
736    }
737
738    pub(crate) fn lex(&mut self) -> Result<Token<'a>, Error> {
739        let (tok, idx) = Token::lex(self.content, self.idx)?;
740        self.idx = idx;
741        Ok(tok)
742    }
743
744    pub(crate) fn eat(&mut self, delim: u8) -> Result<(), Error> {
745        let Some(idx) = Token::expect(self.content, self.idx, delim) else {
746            return Err(Error::ExpectedChar(delim as char));
747        };
748        self.idx = idx;
749        Ok(())
750    }
751
752    /// Jump over the next thing, regardless of whether it's simple (atom or string) or complex
753    /// (bracketed or braced construct)
754    ///
755    /// Named to match parse_rec.
756    pub(crate) fn skip_rec(&mut self) -> Result<(), Error> {
757        match self.lex()? {
758            Token::Atom(..) | Token::String(..) | Token::Data(..) => Ok(()),
759            Token::OpenBrace => loop {
760                if self.eat(b'}').is_ok() {
761                    return Ok(());
762                }
763                let key = self.lex()?;
764                Token::try_into_smolstr(key)?;
765                self.eat(b'=')?;
766                self.skip_rec()?;
767                self.eat(b';')?;
768            },
769            Token::OpenParen => {
770                if self.eat(b')').is_ok() {
771                    return Ok(());
772                }
773                loop {
774                    self.skip_rec()?;
775                    if self.eat(b')').is_ok() {
776                        return Ok(());
777                    }
778                    self.eat(b',')?;
779                    if self.eat(b')').is_ok() {
780                        return Ok(());
781                    }
782                }
783            }
784            other => Err(Error::UnexpectedToken { name: other.name() }),
785        }
786    }
787
788    pub(crate) fn parse_delimited_vec<T>(
789        &mut self,
790        delim: VecDelimiters,
791    ) -> Result<Vec<T>, crate::plist::Error>
792    where
793        T: FromPlist,
794    {
795        let mut list = Vec::new();
796        self.eat(delim.start)?;
797        loop {
798            if self.eat(delim.end).is_ok() {
799                return Ok(list);
800            }
801            list.push(self.parse()?);
802            if self.eat(delim.end).is_ok() {
803                return Ok(list);
804            }
805            self.eat(delim.sep)?;
806            // handle possible traliing separator
807            if self.eat(delim.end).is_ok() {
808                return Ok(list);
809            }
810        }
811    }
812
813    pub(crate) fn parse_map<T: FromPlist>(&mut self) -> Result<BTreeMap<SmolStr, T>, Error> {
814        self.eat(b'{')?;
815        let mut map = BTreeMap::new();
816        loop {
817            if self.eat(b'}').is_ok() {
818                break;
819            }
820            let key = self.parse::<SmolStr>()?;
821            self.eat(b'=')?;
822            map.insert(key, self.parse()?);
823            self.eat(b';')?;
824        }
825        Ok(map)
826    }
827
828    pub(crate) fn parse<T>(&mut self) -> Result<T, crate::plist::Error>
829    where
830        T: FromPlist,
831    {
832        T::parse(self)
833    }
834}
835
836pub(crate) struct VecDelimiters {
837    start: u8,
838    end: u8,
839    sep: u8,
840}
841
842impl VecDelimiters {
843    pub(crate) const CSV_IN_PARENS: VecDelimiters = VecDelimiters {
844        start: b'(',
845        end: b')',
846        sep: b',',
847    };
848    pub(crate) const CSV_IN_BRACES: VecDelimiters = VecDelimiters {
849        start: b'{',
850        end: b'}',
851        sep: b',',
852    };
853}
854
855impl FromPlist for OrderedFloat<f64> {
856    fn parse(tokenizer: &mut Tokenizer<'_>) -> Result<Self, Error> {
857        let val: f64 = tokenizer.parse()?;
858        Ok(val.into())
859    }
860}
861
862impl FromPlist for f64 {
863    fn parse(tokenizer: &mut Tokenizer<'_>) -> Result<Self, Error> {
864        match tokenizer.lex()? {
865            Token::Atom(val) => parse_float(val),
866            Token::String(val) => parse_float(&val),
867            _ => Err(Error::ExpectedNumber),
868        }
869    }
870}
871
872impl FromPlist for i64 {
873    fn parse(tokenizer: &mut Tokenizer<'_>) -> Result<Self, Error> {
874        match tokenizer.lex()? {
875            Token::Atom(val) => parse_int(val),
876            Token::String(val) => parse_int(&val),
877            _ => Err(Error::ExpectedNumber),
878        }
879    }
880}
881
882impl FromPlist for bool {
883    fn parse(tokenizer: &mut Tokenizer<'_>) -> Result<Self, Error> {
884        match tokenizer.lex()? {
885            Token::Atom(val) => parse_int(val).map(|v| v == 1),
886            Token::String(val) => parse_int(&val).map(|v| v == 1),
887            _ => Err(Error::ExpectedNumber),
888        }
889    }
890}
891
892impl FromPlist for String {
893    fn parse(tokenizer: &mut Tokenizer<'_>) -> Result<Self, Error> {
894        match tokenizer.lex()? {
895            Token::Atom(val) => Ok(val.to_string()),
896            Token::String(val) => Ok(val.to_string()),
897            _ => Err(Error::ExpectedString),
898        }
899    }
900}
901
902impl FromPlist for SmolStr {
903    fn parse(tokenizer: &mut Tokenizer<'_>) -> Result<Self, Error> {
904        match tokenizer.lex()? {
905            Token::Atom(val) => Ok(val.into()),
906            Token::String(val) => Ok(val.into()),
907            _ => Err(Error::ExpectedString),
908        }
909    }
910}
911
912/// Hand-written because Glyphs 2 points don't look like Glyphs 3 points
913impl FromPlist for Point {
914    fn parse(tokenizer: &mut Tokenizer<'_>) -> Result<Self, Error> {
915        let delims = if let Token::OpenBrace = tokenizer.peek()? {
916            VecDelimiters::CSV_IN_BRACES
917        } else {
918            VecDelimiters::CSV_IN_PARENS
919        };
920        let coords: Vec<f64> = tokenizer.parse_delimited_vec(delims)?;
921        if coords.len() != 2 {
922            return Err(Error::Parse("wrong number of coords in point".to_string()));
923        }
924        Ok((coords[0], coords[1]).into())
925    }
926}
927
928/// Hand-written because it's a String that becomes a Thing
929impl FromPlist for Affine {
930    fn parse(tokenizer: &mut Tokenizer<'_>) -> Result<Self, Error> {
931        let tok = tokenizer.lex()?;
932        let raw = match &tok {
933            Token::Atom(val) => *val,
934            Token::String(val) => val,
935            _ => return Err(Error::ExpectedString),
936        };
937        let raw = &raw[1..raw.len() - 1];
938        let coords: Vec<f64> = raw.split(", ").map(|c| c.parse().unwrap()).collect();
939        Ok(Affine::new([
940            coords[0], coords[1], coords[2], coords[3], coords[4], coords[5],
941        ]))
942    }
943}
944
945#[cfg(test)]
946mod tests {
947    use ascii_plist_derive::FromPlist;
948    use std::collections::BTreeMap;
949
950    use super::*;
951
952    #[test]
953    fn parse_unquoted_strings() {
954        let contents = r#"
955        {
956            name = "UFO Filename";
957            value1 = ../../build/instance_ufos/Testing_Rg.ufo;
958            value2 = _;
959            value3 = $;
960            value4 = /;
961            value5 = :;
962            value6 = .;
963            value7 = -;
964        }
965        "#;
966
967        let plist = Plist::parse(contents).unwrap();
968        let plist_expected = Plist::Dictionary(BTreeMap::from_iter([
969            ("name".into(), Plist::String("UFO Filename".into())),
970            (
971                "value1".into(),
972                Plist::String("../../build/instance_ufos/Testing_Rg.ufo".into()),
973            ),
974            ("value2".into(), Plist::String("_".into())),
975            ("value3".into(), Plist::String("$".into())),
976            ("value4".into(), Plist::String("/".into())),
977            ("value5".into(), Plist::String(":".into())),
978            ("value6".into(), Plist::String(".".into())),
979            ("value7".into(), Plist::String("-".into())),
980        ]));
981        assert_eq!(plist, plist_expected);
982    }
983
984    #[test]
985    fn parse_int_map() {
986        let contents = r#"
987        {
988            foo = 5;
989            bar = 32;
990        }"#;
991
992        let foobar = BTreeMap::<SmolStr, i64>::parse_plist(contents).unwrap();
993        assert_eq!(foobar.get("foo"), Some(&5));
994        assert_eq!(foobar.get("bar"), Some(&32));
995    }
996
997    #[test]
998    #[should_panic(expected = "ExpectedNumber")]
999    fn parse_map_fail() {
1000        let contents = r#"
1001        {
1002            foo = hello;
1003            bar = 32;
1004        }"#;
1005
1006        let _foobar = BTreeMap::<SmolStr, i64>::parse_plist(contents).unwrap();
1007    }
1008
1009    #[test]
1010    fn parse_binary_data() {
1011        let contents = r#"
1012        {
1013            mydata = <deadbeef>;
1014        }
1015            "#;
1016        let plist = Plist::parse(contents).unwrap();
1017        let data = plist.get("mydata").unwrap().clone().expect_data().unwrap();
1018        assert_eq!(data, [0xde, 0xad, 0xbe, 0xef])
1019    }
1020
1021    #[test]
1022    fn hex_to_ascii() {
1023        assert_eq!(hex_digits_for_byte(0x01), ['0', '1']);
1024        assert_eq!(hex_digits_for_byte(0x00), ['0', '0']);
1025        assert_eq!(hex_digits_for_byte(0xff), ['f', 'f']);
1026        assert_eq!(hex_digits_for_byte(0xf0), ['f', '0']);
1027        assert_eq!(hex_digits_for_byte(0x0f), ['0', 'f']);
1028    }
1029
1030    #[test]
1031    fn ascii_to_hex() {
1032        assert_eq!(byte_from_hex([b'0', b'1']), Ok(0x01));
1033        assert_eq!(byte_from_hex([b'0', b'0']), Ok(0x00));
1034        assert_eq!(byte_from_hex([b'f', b'f']), Ok(0xff));
1035        assert_eq!(byte_from_hex([b'f', b'0']), Ok(0xf0));
1036        assert_eq!(byte_from_hex([b'0', b'f']), Ok(0x0f));
1037    }
1038
1039    // in arrays the trailing comma is optional but supported
1040    #[test]
1041    fn array_optional_trailing_comma() {
1042        let _ = env_logger::builder().is_test(true).try_init();
1043        // we include a list that is not parsed in derive because that
1044        // takes a second codepath.
1045        let trailing = r#"
1046        {
1047            items = (
1048                "a",
1049                "b",
1050            );
1051            skip_me = (
1052                "c",
1053                "d",
1054            );
1055        }"#;
1056
1057        let no_trailing = r#"
1058        {
1059            items = (
1060                "a",
1061                "b"
1062            );
1063            skip_me = (
1064                "c",
1065                "d"
1066            );
1067        }"#;
1068
1069        #[derive(Default, FromPlist)]
1070        struct TestMe {
1071            items: Vec<String>,
1072        }
1073
1074        let trailing = TestMe::parse_plist(trailing).unwrap();
1075        assert_eq!(trailing.items, ["a", "b"]);
1076        let no_trailing = TestMe::parse_plist(no_trailing).unwrap();
1077        assert_eq!(trailing.items, no_trailing.items);
1078    }
1079
1080    #[test]
1081    fn parse_to_plist_type() {
1082        let plist_str = r#"
1083        {
1084            name = "meta";
1085            value = (
1086                {
1087                    data = latn;
1088                    tag = dlng;
1089                    num = 5;
1090                },
1091                {
1092                    data = "latn,cyrl";
1093                    tag = slng;
1094                    num = -3.0;
1095                }
1096            );
1097        }"#;
1098
1099        let plist = Plist::parse_plist(plist_str).unwrap();
1100        let root = plist.expect_dict().unwrap();
1101        assert_eq!(root.get("name").unwrap().as_str(), Some("meta"));
1102        let value = root.get("value").unwrap().as_array().unwrap();
1103        assert_eq!(value.len(), 2);
1104        let first = value[0].as_dict().unwrap();
1105        assert_eq!(first.get("data").and_then(Plist::as_str), Some("latn"));
1106        assert_eq!(first.get("tag").and_then(Plist::as_str), Some("dlng"));
1107        assert_eq!(first.get("num").and_then(Plist::as_i64), Some(5));
1108        let second = value[1].as_dict().unwrap();
1109        assert_eq!(
1110            second.get("data").and_then(Plist::as_str),
1111            Some("latn,cyrl")
1112        );
1113        assert_eq!(second.get("tag").and_then(Plist::as_str), Some("slng"));
1114        assert_eq!(second.get("num").and_then(Plist::as_f64), Some(-3.0));
1115    }
1116
1117    #[test]
1118    fn parse_hex_digit_sanity() {
1119        assert_eq!(parse_hex_digit(b"2019"), Ok((0x2019, 4)));
1120        assert_eq!(parse_hex_digit(b"201"), Ok((0x201, 3)));
1121        assert_eq!(parse_hex_digit(b"201z"), Ok((0x201, 3)));
1122        assert_eq!(parse_hex_digit(b"fu"), Ok((0xf, 1)));
1123        assert_eq!(parse_hex_digit(b"z"), Err(Error::UnknownEscape));
1124    }
1125
1126    // partially borrowed from from python: https://github.com/fonttools/openstep-plist/blob/2fa77b267d67/tests/test_parser.py#L135
1127    #[test]
1128    fn escape_parsing_good() {
1129        for (input, expected, expected_len) in [
1130            ("\\n", '\n', 2),                    // octal escape
1131            ("\\012", '\n', 4),                  // octal escape
1132            ("\\U2019", '\u{2019}', 6),          // unicode escape (’)
1133            ("\\UD83D\\UDCA9", '\u{1F4A9}', 12), // surrogate pair (💩)
1134        ] {
1135            let (result, len) = parse_escape(input).unwrap();
1136            {
1137                assert_eq!((result, len), (expected, expected_len));
1138            }
1139        }
1140    }
1141
1142    #[test]
1143    fn escape_parsing_bad() {
1144        assert_eq!(
1145            parse_escape("\\UD83D"),
1146            Err(Error::InvalidUnicodeEscape("\\UD83D".to_string()))
1147        );
1148    }
1149
1150    #[test]
1151    fn parsing_escape_in_string() {
1152        for (input, expected, expected_len) in [
1153            ("\"a\\012b\"", "a\nb", 8),
1154            ("\"a\\nb\"", "a\nb", 6),
1155            ("\"a\\U000Ab\"", "a\nb", 10),
1156        ] {
1157            let (token, len) = Token::lex(input, 0).unwrap();
1158            assert_eq!(token, Token::String(Cow::Borrowed(expected)));
1159            assert_eq!(len, expected_len);
1160        }
1161    }
1162
1163    #[test]
1164    fn parse_quoted_and_unquoted_ints_and_bools() {
1165        assert_eq!(
1166            (Ok(1), Ok(1), Ok(true), Ok(true), Ok(false), Ok(false)),
1167            (
1168                Tokenizer::new("1").parse::<i64>(),
1169                Tokenizer::new("\"1\"").parse::<i64>(),
1170                Tokenizer::new("1").parse::<bool>(),
1171                Tokenizer::new("\"1\"").parse::<bool>(),
1172                Tokenizer::new("0").parse::<bool>(),
1173                Tokenizer::new("\"0\"").parse::<bool>(),
1174            )
1175        );
1176    }
1177}