openstep_plist/
lib.rs

1use std::{borrow::Cow, collections::BTreeMap, fmt::Debug};
2
3use serde::{Deserialize, Serialize};
4use smol_str::SmolStr;
5
6pub mod de;
7pub mod error;
8pub mod ser;
9
10pub use crate::error::Error;
11use crate::error::LineColumn;
12pub use ser::to_string;
13
14/// A plist dictionary
15pub type Dictionary = BTreeMap<SmolStr, Plist>;
16
17/// An array of plist values
18pub type Array = Vec<Plist>;
19
20/// An enum representing a property list.
21#[derive(Clone, Debug, Serialize, Deserialize)]
22#[serde(untagged)]
23pub enum Plist {
24    Dictionary(Dictionary),
25    Array(Array),
26    String(String),
27    Integer(i64),
28    Float(f64),
29    #[serde(with = "serde_bytes")]
30    Data(Vec<u8>),
31}
32
33impl PartialEq for Plist {
34    fn eq(&self, other: &Self) -> bool {
35        match (self, other) {
36            (Plist::Dictionary(a), Plist::Dictionary(b)) => a == b,
37            (Plist::Array(a), Plist::Array(b)) => a == b,
38            (Plist::String(a), Plist::String(b)) => a == b,
39            (Plist::Integer(a), Plist::Integer(b)) => a == b,
40            (Plist::Float(a), Plist::Float(b)) => (a * 1000.0).round() == (b * 1000.0).round(),
41            (Plist::Data(a), Plist::Data(b)) => a == b,
42            _ => false,
43        }
44    }
45}
46
47#[derive(Debug)]
48pub(crate) enum Token<'a> {
49    Eof,
50    OpenBrace,
51    OpenParen,
52    Data(Vec<u8>),
53    String(Cow<'a, str>),
54    Atom(&'a str),
55}
56
57fn is_numeric(b: u8) -> bool {
58    b.is_ascii_digit() || b == b'.' || b == b'-'
59}
60
61fn is_alnum(b: u8) -> bool {
62    // https://github.com/opensource-apple/CF/blob/3cc41a76b1491f50813e28a4ec09954ffa359e6f/CFOldStylePList.c#L79
63    is_numeric(b)
64        || b.is_ascii_uppercase()
65        || b.is_ascii_lowercase()
66        || b == b'_'
67        || b == b'$'
68        || b == b'/'
69        || b == b':'
70        || b == b'.'
71        || b == b'-'
72}
73
74// Used for serialization; make sure UUID's get quoted
75fn is_alnum_strict(b: u8) -> bool {
76    is_alnum(b) && b != b'-' && b != b'/'
77}
78
79fn is_hex_upper(b: u8) -> bool {
80    b.is_ascii_digit() || (b'A'..=b'F').contains(&b)
81}
82
83fn is_ascii_whitespace(b: u8) -> bool {
84    b == b' ' || b == b'\t' || b == b'\r' || b == b'\n'
85}
86
87fn numeric_ok(s: &str) -> bool {
88    let s = s.as_bytes();
89    if s.is_empty() {
90        return false;
91    }
92    let s = if s.len() > 1 && (*s.first().unwrap(), *s.last().unwrap()) == (b'"', b'"') {
93        &s[1..s.len()]
94    } else {
95        s
96    };
97    if s.iter().all(|&b| is_hex_upper(b)) && !s.iter().all(|&b| b.is_ascii_digit()) {
98        return false;
99    }
100    if s.len() > 1 && s[0] == b'0' {
101        return !s.iter().all(|&b| b.is_ascii_digit());
102    }
103    // Prevent parsing of "infinity", "inf", "nan" as numbers, we
104    // want to keep them as strings (e.g. glyphname)
105    // https://doc.rust-lang.org/std/primitive.f64.html#grammar
106    if s.eq_ignore_ascii_case(b"infinity")
107        || s.eq_ignore_ascii_case(b"inf")
108        || s.eq_ignore_ascii_case(b"nan")
109    {
110        return false;
111    }
112    true
113}
114
115fn skip_ws(s: &str, mut ix: usize) -> usize {
116    while ix < s.len() && is_ascii_whitespace(s.as_bytes()[ix]) {
117        ix += 1;
118    }
119    ix
120}
121
122impl Plist {
123    pub fn parse(s: &str) -> Result<Plist, Error> {
124        let (plist, _ix) = Plist::parse_rec(s, 0)?;
125        // TODO: check that we're actually at eof
126        Ok(plist)
127    }
128
129    fn name(&self) -> &'static str {
130        match self {
131            Plist::Array(..) => "array",
132            Plist::Dictionary(..) => "dictionary",
133            Plist::Float(..) => "float",
134            Plist::Integer(..) => "integer",
135            Plist::String(..) => "string",
136            Plist::Data(..) => "data",
137        }
138    }
139
140    pub fn get(&self, key: &str) -> Option<&Plist> {
141        match self {
142            Plist::Dictionary(d) => d.get(key),
143            _ => None,
144        }
145    }
146
147    pub fn as_dict(&self) -> Option<&BTreeMap<SmolStr, Plist>> {
148        match self {
149            Plist::Dictionary(d) => Some(d),
150            _ => None,
151        }
152    }
153
154    pub fn as_array(&self) -> Option<&[Plist]> {
155        match self {
156            Plist::Array(a) => Some(a),
157            _ => None,
158        }
159    }
160
161    pub fn as_str(&self) -> Option<&str> {
162        match self {
163            Plist::String(s) => Some(s),
164            _ => None,
165        }
166    }
167
168    pub fn as_i64(&self) -> Option<i64> {
169        match self {
170            Plist::Integer(i) => Some(*i),
171            _ => None,
172        }
173    }
174
175    pub fn as_f64(&self) -> Option<f64> {
176        match self {
177            Plist::Integer(i) => Some(*i as f64),
178            Plist::Float(f) => Some(*f),
179            _ => None,
180        }
181    }
182
183    pub fn flatten_to_integer(&self) -> Plist {
184        match self {
185            Plist::Float(f) => {
186                if f.fract() == 0.0 {
187                    Plist::Integer(*f as i64)
188                } else {
189                    Plist::Float(*f)
190                }
191            }
192            Plist::String(s) => {
193                if let Ok(num) = s.parse() {
194                    Plist::Integer(num)
195                } else {
196                    self.clone()
197                }
198            }
199            _ => self.clone(),
200        }
201    }
202    pub fn flatten_to_string(&self) -> Plist {
203        match self {
204            Plist::Integer(i) => Plist::String(i.to_string()),
205            Plist::Float(f) => {
206                if f.fract() == 0.0 {
207                    Plist::String((*f as i64).to_string())
208                } else {
209                    Plist::String(f.to_string())
210                }
211            }
212            _ => self.clone(),
213        }
214    }
215
216    pub fn expect_dict(self) -> Result<Dictionary, Error> {
217        match self {
218            Plist::Dictionary(dict) => Ok(dict),
219            _other => Err(Error::UnexpectedDataType {
220                expected: "dictionary",
221                found: _other.name(),
222            }),
223        }
224    }
225
226    pub fn expect_array(self) -> Result<Array, Error> {
227        match self {
228            Plist::Array(array) => Ok(array),
229            _other => Err(Error::UnexpectedDataType {
230                expected: "array",
231                found: _other.name(),
232            }),
233        }
234    }
235
236    pub fn expect_string(self) -> Result<String, Error> {
237        match self {
238            Plist::String(string) => Ok(string),
239            _other => Err(Error::UnexpectedDataType {
240                expected: "string",
241                found: _other.name(),
242            }),
243        }
244    }
245
246    pub fn expect_data(self) -> Result<Vec<u8>, Error> {
247        match self {
248            Plist::Data(bytes) => Ok(bytes),
249            _other => Err(Error::UnexpectedDataType {
250                expected: "data",
251                found: _other.name(),
252            }),
253        }
254    }
255
256    fn parse_rec(s: &str, ix: usize) -> Result<(Plist, usize), Error> {
257        let (tok, mut ix) = Token::lex(s, ix)?;
258        match tok {
259            Token::Atom(s) => Ok((Plist::parse_atom(s), ix)),
260            Token::String(s) => Ok((Plist::String(s.into()), ix)),
261            Token::Data(bytes) => Ok((Plist::Data(bytes), ix)),
262            Token::OpenBrace => {
263                let mut dict = BTreeMap::new();
264                loop {
265                    if let Some(ix) = Token::expect(s, ix, b'}') {
266                        return Ok((Plist::Dictionary(dict), ix));
267                    }
268                    let (key, next) = Token::lex(s, ix)?;
269                    let key_str = Token::try_into_smolstr(key).map_err(|e| e.at(s, ix))?;
270                    let next = Token::expect(s, next, b'=');
271                    if next.is_none() {
272                        return Err(Error::ExpectedEquals {
273                            lc: LineColumn::from_pos(s, ix),
274                        });
275                    }
276                    let (val, next) = Self::parse_rec(s, next.unwrap())?;
277                    dict.insert(key_str, val);
278                    if let Some(next_semicolon) = Token::expect(s, next, b';') {
279                        ix = next_semicolon;
280                    } else {
281                        return Err(Error::ExpectedSemicolon {
282                            lc: LineColumn::from_pos(s, next),
283                        });
284                    }
285                }
286            }
287            Token::OpenParen => {
288                let mut list = Vec::new();
289                loop {
290                    if let Some(ix) = Token::expect(s, ix, b')') {
291                        return Ok((Plist::Array(list), ix));
292                    }
293                    let (val, next) = Self::parse_rec(s, ix)?;
294                    list.push(val);
295                    if let Some(ix) = Token::expect(s, next, b')') {
296                        return Ok((Plist::Array(list), ix));
297                    }
298                    if let Some(next_comma) = Token::expect(s, next, b',') {
299                        ix = next_comma;
300                        if let Some(next_paren) = Token::expect(s, next_comma, b')') {
301                            return Ok((Plist::Array(list), next_paren));
302                        }
303                    } else {
304                        return Err(Error::ExpectedComma {
305                            lc: LineColumn::from_pos(s, next),
306                        });
307                    }
308                }
309            }
310            _ => Err(Error::UnexpectedToken {
311                name: tok.name(),
312                lc: LineColumn::from_pos(s, ix),
313            }),
314        }
315    }
316
317    fn parse_atom(s: &str) -> Plist {
318        if numeric_ok(s) {
319            if let Ok(num) = s.parse() {
320                return Plist::Integer(num);
321            }
322            if let Ok(num) = s.parse() {
323                return Plist::Float(num);
324            }
325        }
326        Plist::String(s.into())
327    }
328
329    #[allow(clippy::inherent_to_string, unused)]
330    pub fn to_string(&self) -> String {
331        crate::ser::to_string(&self).unwrap()
332    }
333
334    pub fn is_meaningful(&self) -> bool {
335        match self {
336            Plist::Array(a) => !a.is_empty(),
337            Plist::Dictionary(d) => !d.is_empty(),
338            Plist::String(s) => !s.is_empty(),
339            Plist::Integer(i) => *i != 0,
340            Plist::Float(f) => *f != 0.0,
341            Plist::Data(d) => !d.is_empty(),
342        }
343    }
344}
345
346impl Default for Plist {
347    fn default() -> Self {
348        // kind of arbitrary but seems okay
349        Plist::Array(Vec::new())
350    }
351}
352
353fn byte_from_hex(hex: [u8; 2]) -> Result<u8, Error> {
354    fn hex_digit_to_byte(digit: u8) -> Result<u8, Error> {
355        match digit {
356            b'0'..=b'9' => Ok(digit - b'0'),
357            b'a'..=b'f' => Ok(digit - b'a' + 10),
358            b'A'..=b'F' => Ok(digit - b'A' + 10),
359            _ => Err(Error::BadDataInternal),
360        }
361    }
362    let maj = hex_digit_to_byte(hex[0])? << 4;
363    let min = hex_digit_to_byte(hex[1])?;
364    Ok(maj | min)
365}
366
367impl<'a> Token<'a> {
368    fn lex(s: &'a str, ix: usize) -> Result<(Token<'a>, usize), Error> {
369        let start = skip_ws(s, ix);
370        if start == s.len() {
371            return Ok((Token::Eof, start));
372        }
373        let b = s.as_bytes()[start];
374        match b {
375            b'{' => Ok((Token::OpenBrace, start + 1)),
376            b'(' => Ok((Token::OpenParen, start + 1)),
377            b'<' => {
378                let data_start = start + 1;
379                let data_end = data_start
380                    + s.as_bytes()[data_start..]
381                        .iter()
382                        .position(|b| *b == b'>')
383                        .ok_or(Error::UnclosedData {
384                            lc: LineColumn::from_pos(s, start),
385                        })?;
386                let chunks = s.as_bytes()[data_start..data_end].chunks_exact(2);
387                if !chunks.remainder().is_empty() {
388                    return Err(Error::BadData {
389                        lc: LineColumn::from_pos(s, data_start),
390                    });
391                }
392                let data = chunks
393                    .map(|x| byte_from_hex(x.try_into().unwrap()))
394                    .collect::<Result<_, _>>()
395                    .map_err(|e| e.at(s, data_start))?;
396                Ok((Token::Data(data), data_end + 1))
397            }
398            b'"' => {
399                let mut ix = start + 1;
400                let mut cow_start = ix;
401                let mut buf = String::new();
402                while ix < s.len() {
403                    let b = s.as_bytes()[ix];
404                    match b {
405                        b'"' => {
406                            // End of string
407                            let string = if buf.is_empty() {
408                                s[cow_start..ix].into()
409                            } else {
410                                buf.push_str(&s[cow_start..ix]);
411                                buf.into()
412                            };
413                            return Ok((Token::String(string), ix + 1));
414                        }
415                        b'\\' => {
416                            buf.push_str(&s[cow_start..ix]);
417                            if ix + 1 == s.len() {
418                                return Err(Error::UnclosedString {
419                                    lc: LineColumn::from_pos(s, start),
420                                });
421                            }
422                            let (c, len) = parse_escape(&s[ix..]).map_err(|e| e.at(s, start))?;
423                            buf.push(c);
424                            ix += len;
425                            cow_start = ix;
426                        }
427                        _ => ix += 1,
428                    }
429                }
430                Err(Error::UnclosedString {
431                    lc: LineColumn::from_pos(s, start),
432                })
433            }
434            _ => {
435                if is_alnum(b) {
436                    let mut ix = start + 1;
437                    while ix < s.len() {
438                        if !is_alnum(s.as_bytes()[ix]) {
439                            break;
440                        }
441                        ix += 1;
442                    }
443                    Ok((Token::Atom(&s[start..ix]), ix))
444                } else {
445                    Err(Error::UnexpectedChar {
446                        ch: s[start..].chars().next().unwrap(),
447                        lc: LineColumn::from_pos(s, start),
448                    })
449                }
450            }
451        }
452    }
453
454    fn try_into_smolstr(self) -> Result<SmolStr, Error> {
455        match self {
456            Token::Atom(s) => Ok(s.into()),
457            Token::String(s) => Ok(s.into()),
458            _ => Err(Error::NotAStringInternal {
459                token_name: self.name(),
460            }),
461        }
462    }
463
464    fn expect(s: &str, ix: usize, delim: u8) -> Option<usize> {
465        let ix = skip_ws(s, ix);
466        if ix < s.len() {
467            let b = s.as_bytes()[ix];
468            if b == delim {
469                return Some(ix + 1);
470            }
471        }
472        None
473    }
474
475    pub(crate) fn name(&self) -> &'static str {
476        match self {
477            Token::Atom(..) => "Atom",
478            Token::String(..) => "String",
479            Token::Eof => "Eof",
480            Token::OpenBrace => "OpenBrace",
481            Token::OpenParen => "OpenParen",
482            Token::Data(_) => "Data",
483        }
484    }
485}
486
487fn parse_escape(s: &str) -> Result<(char, usize), Error> {
488    // checked before this is called
489    assert!(s.starts_with('\\') && s.len() > 1);
490
491    let mut ix = 1;
492    let b = s.as_bytes()[ix];
493    match b {
494        b'"' | b'\\' => Ok((b as _, 2)),
495        b'n' => Ok(('\n', 2)),
496        b'r' => Ok(('\r', 2)),
497        b't' => Ok(('\t', 2)),
498        // unicode escapes
499        b'U' if s.len() >= 3 => {
500            // here we will parse up to 4 hexdigits:
501            // https://github.com/opensource-apple/CF/blob/3cc41a76b1491f5/CFOldStylePList.c#L150C2-L150C6
502            ix += 1;
503            let (val, len) = parse_hex_digit(&s.as_bytes()[ix..])?;
504            ix += len;
505            let result = if !is_surrogate(val) || !s.as_bytes()[ix..].starts_with(b"\\U") {
506                // we can't cast! this is a utf-16 value, not a codepoint
507                char::decode_utf16([val]).next()
508            } else {
509                ix += 2;
510                let (val2, len) = parse_hex_digit(&s.as_bytes()[ix..])?;
511                ix += len;
512                char::decode_utf16([val, val2]).next()
513            };
514            result
515                .transpose()
516                .ok()
517                .flatten()
518                .ok_or_else(|| Error::InvalidUnicodeEscapeInternal {
519                    seq: s[..ix].to_string(),
520                })
521                .map(|c| (c, ix))
522        }
523        b'0'..=b'3' if s.len() >= 4 => {
524            // octal escape
525            let b1 = s.as_bytes()[ix + 1];
526            let b2 = s.as_bytes()[ix + 2];
527            if (b'0'..=b'7').contains(&b1) && (b'0'..=b'7').contains(&b2) {
528                let oct = (b - b'0') * 64 + (b1 - b'0') * 8 + (b2 - b'0');
529                ix += 3;
530                Ok((oct as _, ix))
531            } else {
532                Err(Error::UnknownEscapeInternal)
533            }
534        }
535        _ => Err(Error::UnknownEscapeInternal),
536    }
537}
538
539fn is_surrogate(val: u16) -> bool {
540    matches!(val, 0xD800..=0xDFFF)
541}
542
543// parse up to four hex digits as a u16
544// returns an error if the first byte is not a valid ascii hex digit,
545// then will read up to four bytes
546fn parse_hex_digit(bytes: &[u8]) -> Result<(u16, usize), Error> {
547    match bytes {
548        &[] => Err(Error::UnknownEscapeInternal),
549        &[one, ..] if !one.is_ascii_hexdigit() => Err(Error::UnknownEscapeInternal),
550        other => Ok(other
551            .iter()
552            .take(4)
553            .map_while(|b| (*b as char).to_digit(16).map(|x| x as u16))
554            .fold((0u16, 0usize), |(num, len), hexval| {
555                ((num << 4) + hexval, len + 1)
556            })),
557    }
558}
559
560impl From<bool> for Plist {
561    fn from(x: bool) -> Plist {
562        Plist::Integer(x as i64)
563    }
564}
565
566impl From<String> for Plist {
567    fn from(x: String) -> Plist {
568        Plist::String(x)
569    }
570}
571
572impl From<SmolStr> for Plist {
573    fn from(x: SmolStr) -> Plist {
574        Plist::String(x.into())
575    }
576}
577
578impl From<i64> for Plist {
579    fn from(x: i64) -> Plist {
580        Plist::Integer(x)
581    }
582}
583
584impl From<f64> for Plist {
585    fn from(x: f64) -> Plist {
586        Plist::Float(x)
587    }
588}
589
590impl From<Dictionary> for Plist {
591    fn from(x: Dictionary) -> Plist {
592        Plist::Dictionary(x)
593    }
594}
595
596impl<T> From<Vec<T>> for Plist
597where
598    T: Into<Plist>,
599{
600    fn from(x: Vec<T>) -> Plist {
601        Plist::Array(x.into_iter().map(Into::into).collect())
602    }
603}
604
605#[cfg(test)]
606mod tests {
607    use std::collections::BTreeMap;
608
609    use super::*;
610
611    #[test]
612    fn parse_unquoted_strings() {
613        let contents = r#"
614        {
615            name = "UFO Filename";
616            value1 = ../../build/instance_ufos/Testing_Rg.ufo;
617            value2 = _;
618            value3 = $;
619            value4 = /;
620            value5 = :;
621            value6 = .;
622            value7 = -;
623        }
624        "#;
625
626        let plist = Plist::parse(contents).unwrap();
627        let plist_expected = Plist::Dictionary(BTreeMap::from_iter([
628            ("name".into(), Plist::String("UFO Filename".into())),
629            (
630                "value1".into(),
631                Plist::String("../../build/instance_ufos/Testing_Rg.ufo".into()),
632            ),
633            ("value2".into(), Plist::String("_".into())),
634            ("value3".into(), Plist::String("$".into())),
635            ("value4".into(), Plist::String("/".into())),
636            ("value5".into(), Plist::String(":".into())),
637            ("value6".into(), Plist::String(".".into())),
638            ("value7".into(), Plist::String("-".into())),
639        ]));
640        assert_eq!(plist, plist_expected);
641    }
642
643    #[test]
644    fn parse_binary_data() {
645        let contents = r#"
646        {
647            mydata = <deadbeef>;
648        }
649            "#;
650        let plist = Plist::parse(contents).unwrap();
651        let data = plist.get("mydata").unwrap().clone().expect_data().unwrap();
652        assert_eq!(data, [0xde, 0xad, 0xbe, 0xef]);
653    }
654
655    #[test]
656    fn test_serde_binary_data() {
657        // let contents = r#"
658        // {
659        //     de.kutilek.scrawl.data = <89504e470d0a1a0a>;
660        // }
661        // "#;
662        // let plist = Plist::parse(contents).unwrap();
663        // println!("{:?}", plist);
664        let plist = Plist::Data(vec![0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a]);
665        let mut deserializer = crate::de::Deserializer::from_plist(&plist);
666        Plist::deserialize(&mut deserializer).unwrap();
667    }
668
669    #[test]
670    fn ascii_to_hex() {
671        assert_eq!(byte_from_hex([b'0', b'1']), Ok(0x01));
672        assert_eq!(byte_from_hex([b'0', b'0']), Ok(0x00));
673        assert_eq!(byte_from_hex([b'f', b'f']), Ok(0xff));
674        assert_eq!(byte_from_hex([b'f', b'0']), Ok(0xf0));
675        assert_eq!(byte_from_hex([b'0', b'f']), Ok(0x0f));
676    }
677
678    #[test]
679    fn parse_to_plist_type() {
680        let plist_str = r#"
681        {
682            name = "meta";
683            value = (
684                {
685                    data = latn;
686                    tag = dlng;
687                    num = 5;
688                },
689                {
690                    data = "latn,cyrl";
691                    tag = slng;
692                    num = -3.0;
693                }
694            );
695        }"#;
696
697        let plist = Plist::parse(plist_str).unwrap();
698        let root = plist.expect_dict().unwrap();
699        assert_eq!(root.get("name").unwrap().as_str(), Some("meta"));
700        let value = root.get("value").unwrap().as_array().unwrap();
701        assert_eq!(value.len(), 2);
702        let first = value[0].as_dict().unwrap();
703        assert_eq!(first.get("data").and_then(Plist::as_str), Some("latn"));
704        assert_eq!(first.get("tag").and_then(Plist::as_str), Some("dlng"));
705        assert_eq!(first.get("num").and_then(Plist::as_i64), Some(5));
706        let second = value[1].as_dict().unwrap();
707        assert_eq!(
708            second.get("data").and_then(Plist::as_str),
709            Some("latn,cyrl")
710        );
711        assert_eq!(second.get("tag").and_then(Plist::as_str), Some("slng"));
712        assert_eq!(second.get("num").and_then(Plist::as_f64), Some(-3.0));
713    }
714
715    #[test]
716    fn error_messages_with_line_column() {
717        // Test missing semicolon with line/column information
718        let input = r#"{
719            name = "test";
720            value = 123
721        }"#;
722        let err = Plist::parse(input).unwrap_err();
723        let err_msg = format!("{}", err);
724        assert!(err_msg.contains("line 3"));
725        assert!(err_msg.contains("column"));
726
727        // Test unexpected character with line/column information
728        let input2 = "{ foo = bar@ }";
729        let err2 = Plist::parse(input2).unwrap_err();
730        let err_msg2 = format!("{}", err2);
731        // Error message should contain line and column
732        assert!(err_msg2.contains("line"));
733        assert!(err_msg2.contains("column"));
734
735        // Test missing equals sign with line/column information
736        let input3 = r#"{
737            foo bar = 1;
738        }"#;
739        let err3 = Plist::parse(input3).unwrap_err();
740        let err_msg3 = format!("{}", err3);
741        // The error happens when lexing "bar" and expecting an '=' but finding another atom
742        assert!(err_msg3.contains("line") && err_msg3.contains("column"));
743    }
744}