Skip to main content

ocapn_syrup/
format.rs

1use std::str::FromStr;
2use std::{fmt::Display, hash::Hash};
3
4use nom::{
5    branch::alt,
6    bytes::streaming::{tag, take},
7    character::streaming::digit1,
8    error::context,
9    multi::{length_count, many_till},
10    sequence::{pair, preceded, terminated},
11    Finish, IResult, Parser,
12};
13use num_bigint::{BigInt, Sign};
14
15/// Represent a parsed syrup value.
16///
17/// Value implements equality, ordering and hashing traits (Eq, Ord, Hash) based
18/// on the binary on-the-wire representation. This is used to canonicalize
19/// values according to the syrup specification.
20#[derive(Debug, Clone)]
21pub enum Value {
22    Boolean(bool),
23    Float(f32),
24    Double(f64),
25    Integer(BigInt),
26    Binary(Vec<u8>),
27    String(String),
28    Symbol(String),
29    Dictionary(Vec<(Self, Self)>),
30    Sequence(Vec<Self>),
31    Record { label: Box<Self>, fields: Vec<Self> },
32    Set(Vec<Self>),
33}
34
35impl Value {
36    /// Create a syrup boolean value.
37    pub fn boolean(b: bool) -> Value {
38        Value::Boolean(b)
39    }
40    /// Create a syrup float value.
41    pub fn float(f: f32) -> Value {
42        Value::Float(f)
43    }
44    /// Create a syrup double value.
45    pub fn double(d: f64) -> Value {
46        Value::Double(d)
47    }
48    /// Create a syrup integer value.
49    pub fn integer<T: Into<BigInt>>(i: T) -> Value {
50        Value::Integer(i.into())
51    }
52    /// Create a syrup binary data value.
53    pub fn binary<'a, T: Into<&'a [u8]>>(b: T) -> Value {
54        Value::Binary(b.into().to_vec())
55    }
56    /// Create a syrup utf-8 string value.
57    pub fn string<'a, T: Into<&'a str>>(s: T) -> Value {
58        Value::String(s.into().to_string())
59    }
60    /// Create a syrup symbol value.
61    pub fn symbol<'a, T: Into<&'a str>>(s: T) -> Value {
62        Value::Symbol(s.into().to_string())
63    }
64    /// Create a canonicalized syrup dictionary value.
65    pub fn dictionary(mut d: Vec<(Value, Value)>) -> Value {
66        d.sort();
67        Value::Dictionary(d)
68    }
69    /// Create a syrup sequence value.
70    pub fn sequence(s: Vec<Value>) -> Value {
71        Value::Sequence(s)
72    }
73    /// Create a syrup record value.
74    pub fn record(label: Value, fields: Vec<Value>) -> Value {
75        Value::Record {
76            label: Box::new(label),
77            fields,
78        }
79    }
80    /// Create a canonicalized syrup set value.
81    pub fn set(mut s: Vec<Value>) -> Value {
82        s.sort();
83        Value::Set(s)
84    }
85
86    /// Compare one syrup value to another, according to canonicalization rules
87    /// for sorting.
88    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
89        self.to_vec().cmp(other.to_vec().as_ref())
90    }
91
92    /// Render syrup value to its binary on-the-wire representation.
93    pub fn to_vec(&self) -> Vec<u8> {
94        match self {
95            Value::Boolean(true) => [b't'].to_vec(),
96            Value::Boolean(false) => [b'f'].to_vec(),
97            Value::Float(f) => [[b'F'].as_slice(), f.to_be_bytes().as_slice()].concat(),
98            Value::Double(d) => [[b'D'].as_slice(), d.to_be_bytes().as_slice()].concat(),
99            Value::Integer(big_int) => {
100                let suffix = if big_int.sign() == Sign::Minus {
101                    "-"
102                } else {
103                    "+"
104                };
105                format!("{}{}", big_int.magnitude().to_str_radix(10), suffix)
106                    .as_bytes()
107                    .to_vec()
108            }
109            Value::Binary(b) => [format!("{}:", b.len()).as_bytes(), b].concat(),
110            Value::String(s) => {
111                [format!("{}\"", s.as_bytes().len()).as_bytes(), s.as_bytes()].concat()
112            }
113            Value::Symbol(s) => {
114                [format!("{}'", s.as_bytes().len()).as_bytes(), s.as_bytes()].concat()
115            }
116            Value::Dictionary(d) => [
117                [b'{'].as_slice(),
118                d.iter()
119                    .map(|(k, v)| vec![k.to_vec(), v.to_vec()].concat())
120                    .collect::<Vec<Vec<u8>>>()
121                    .concat()
122                    .as_slice(),
123                [b'}'].as_slice(),
124            ]
125            .concat(),
126            Value::Sequence(s) => [
127                [b'['].as_slice(),
128                s.iter()
129                    .map(|v| v.to_vec())
130                    .collect::<Vec<Vec<u8>>>()
131                    .concat()
132                    .as_slice(),
133                [b']'].as_slice(),
134            ]
135            .concat(),
136            Value::Record { label, fields } => [
137                [b'<'].as_slice(),
138                label.to_vec().as_slice(),
139                fields
140                    .iter()
141                    .map(|v| v.to_vec())
142                    .collect::<Vec<Vec<u8>>>()
143                    .concat()
144                    .as_slice(),
145                [b'>'].as_slice(),
146            ]
147            .concat(),
148            Value::Set(s) => [
149                [b'#'].as_slice(),
150                s.iter()
151                    .map(|v| v.to_vec())
152                    .collect::<Vec<Vec<u8>>>()
153                    .concat()
154                    .as_slice(),
155                [b'$'].as_slice(),
156            ]
157            .concat(),
158        }
159    }
160}
161
162/// Error during syrup format processing.
163#[derive(Debug, PartialEq)]
164pub enum Error {
165    Message(String),
166    Parse(String),
167    Incomplete,
168}
169
170impl Error {
171    pub(crate) fn message<T: ToString>(s: T) -> Error {
172        Error::Message(s.to_string())
173    }
174}
175
176pub type Result<T> = std::result::Result<T, Error>;
177
178impl Display for Error {
179    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
180        match self {
181            Error::Message(msg) => f.write_str(msg),
182            Error::Parse(msg) => f.write_str(msg),
183            Error::Incomplete => f.write_str("incomplete"),
184        }
185    }
186}
187
188impl From<nom::Err<nom::error::Error<&[u8]>>> for Error {
189    fn from(value: nom::Err<nom::error::Error<&[u8]>>) -> Self {
190        match value {
191            nom::Err::Incomplete(_) => Error::Incomplete,
192            nom::Err::Error(e) => e.into(),
193            nom::Err::Failure(e) => e.into(),
194        }
195    }
196}
197
198impl From<nom::error::Error<&[u8]>> for Error {
199    fn from(e: nom::error::Error<&[u8]>) -> Self {
200        Error::Parse(format!(
201            "near {}: {}",
202            String::from_utf8_lossy(e.input),
203            e.code.description()
204        ))
205    }
206}
207
208impl std::error::Error for Error {}
209
210impl TryFrom<&[u8]> for Value {
211    type Error = Error;
212
213    fn try_from(v: &[u8]) -> Result<Self> {
214        value(v)
215            .finish()
216            .map(|(_, res)| res)
217            .map_err(|e| Error::Parse(format!("{:?}", e)))
218    }
219}
220
221impl FromStr for Value {
222    type Err = Error;
223
224    fn from_str(s: &str) -> Result<Self> {
225        value(s.as_bytes())
226            .finish()
227            .map(|(_, res)| res)
228            .map_err(|e| e.into())
229    }
230}
231
232pub(crate) fn value<'a>(input: &'a [u8]) -> IResult<&'a [u8], Value> {
233    context(
234        "value",
235        alt((
236            boolean_value,
237            float_value,
238            double_value,
239            integer_value,
240            binary_value,
241            string_value,
242            symbol_value,
243            dictionary_value,
244            sequence_value,
245            record_value,
246            set_value,
247        )),
248    )(input)
249}
250
251pub fn parse_value(input: &[u8]) -> IResult<Vec<u8>, Value> {
252    match value(input) {
253        Ok((rest, value)) => Ok((rest.to_vec(), value)),
254        Err(nom::Err::Incomplete(e)) => Err(nom::Err::Incomplete(e)),
255        Err(nom::Err::Failure(e)) => Err(nom::Err::Error(nom::error::Error {
256            input: e.input.to_vec(),
257            code: e.code,
258        })),
259        Err(nom::Err::Error(e)) => Err(nom::Err::Error(nom::error::Error {
260            input: e.input.to_vec(),
261            code: e.code,
262        })),
263    }
264}
265
266fn boolean_value(input: &[u8]) -> IResult<&[u8], Value> {
267    context("boolean", alt((tag("t"), tag("f"))))(input).map(|(next_input, res)| {
268        (
269            next_input,
270            match res {
271                b"t" => Value::Boolean(true),
272                b"f" => Value::Boolean(false),
273                _ => unreachable!("parser"),
274            },
275        )
276    })
277}
278
279fn float_value(input: &[u8]) -> IResult<&[u8], Value> {
280    context("float", preceded(tag("F"), take(4u8)))(input).map(|(next_input, res)| {
281        (
282            next_input,
283            Value::Float(f32::from_be_bytes(res.try_into().unwrap())),
284        )
285    })
286}
287
288fn double_value(input: &[u8]) -> IResult<&[u8], Value> {
289    context("double", preceded(tag("D"), take(8u8)))(input).map(|(next_input, res)| {
290        (
291            next_input,
292            Value::Double(f64::from_be_bytes(res.try_into().unwrap())),
293        )
294    })
295}
296
297fn integer_value(input: &[u8]) -> IResult<&[u8], Value> {
298    context("integer", pair(digit1, alt((tag("+"), tag("-")))))(input).map(|(next_input, res)| {
299        let (num_str, sign_str) = res;
300        let sign = match sign_str {
301            b"+" => Sign::Plus,
302            b"-" => Sign::Minus,
303            _ => unreachable!(),
304        };
305        (
306            next_input,
307            Value::Integer(
308                BigInt::from_radix_be(
309                    sign,
310                    num_str
311                        .iter()
312                        .map(|d| d - 0x30)
313                        .collect::<Vec<u8>>()
314                        .as_slice(),
315                    10,
316                )
317                .unwrap(),
318            ),
319        )
320    })
321}
322
323fn binary_value(input: &[u8]) -> IResult<&[u8], Value> {
324    context(
325        "binary",
326        length_count(
327            terminated(digit1, tag(":"))
328                .map(|res| u32::from_str(String::from_utf8_lossy(res).as_ref()).unwrap()),
329            take(1u8),
330        ),
331    )(input)
332    .map(|(next_input, res)| {
333        (
334            next_input,
335            Value::Binary(res.iter().map(|b| b[0]).collect()),
336        )
337    })
338}
339
340fn string_value(input: &[u8]) -> IResult<&[u8], Value> {
341    context(
342        "string",
343        length_count(
344            terminated(digit1, tag("\""))
345                .map(|res| u32::from_str(String::from_utf8_lossy(res).as_ref()).unwrap()),
346            take(1u8),
347        ),
348    )(input)
349    .map(|(next_input, res)| {
350        (
351            next_input,
352            Value::String(
353                String::from_utf8_lossy(res.iter().map(|b| b[0]).collect::<Vec<u8>>().as_slice())
354                    .into_owned(),
355            ),
356        )
357    })
358}
359
360fn symbol_value(input: &[u8]) -> IResult<&[u8], Value> {
361    context(
362        "symbol",
363        length_count(
364            terminated(digit1, tag("\'"))
365                .map(|res| u32::from_str(String::from_utf8_lossy(res).as_ref()).unwrap()),
366            take(1u8),
367        ),
368    )(input)
369    .map(|(next_input, res)| {
370        (
371            next_input,
372            Value::Symbol(
373                String::from_utf8_lossy(res.iter().map(|b| b[0]).collect::<Vec<u8>>().as_slice())
374                    .into_owned(),
375            ),
376        )
377    })
378}
379
380fn sequence_value(input: &[u8]) -> IResult<&[u8], Value> {
381    context("sequence", preceded(tag("["), many_till(value, tag("]"))))(input)
382        .map(|(next_input, res)| (next_input, Value::Sequence(res.0)))
383}
384
385fn dictionary_value(input: &[u8]) -> IResult<&[u8], Value> {
386    context(
387        "dictionary",
388        preceded(tag("{"), many_till(pair(value, value), tag("}"))),
389    )(input)
390    .map(|(next_input, mut res)| {
391        res.0.sort();
392        (next_input, Value::Dictionary(res.0))
393    })
394}
395
396fn record_value(input: &[u8]) -> IResult<&[u8], Value> {
397    context(
398        "sequence",
399        preceded(tag("<"), pair(value, many_till(value, tag(">")))),
400    )(input)
401    .map(|(next_input, res)| {
402        (
403            next_input,
404            Value::Record {
405                label: Box::new(res.0),
406                fields: res.1 .0,
407            },
408        )
409    })
410}
411
412fn set_value(input: &[u8]) -> IResult<&[u8], Value> {
413    context("sequence", preceded(tag("#"), many_till(value, tag("$"))))(input).map(
414        |(next_input, mut res)| {
415            res.0.sort();
416            (next_input, Value::Set(res.0))
417        },
418    )
419}
420
421impl PartialEq for Value {
422    fn eq(&self, other: &Self) -> bool {
423        return self.cmp(other).is_eq();
424    }
425}
426
427impl Eq for Value {}
428
429impl Hash for Value {
430    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
431        self.to_vec().hash(state);
432    }
433}
434
435impl PartialOrd for Value {
436    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
437        Some(self.cmp(other))
438    }
439}
440
441impl Ord for Value {
442    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
443        self.cmp(other)
444    }
445}
446
447#[cfg(test)]
448mod tests {
449    use std::{fs::File, io::Read, path::PathBuf};
450
451    use nom::AsBytes;
452
453    use super::*;
454
455    #[test]
456    fn try_from_slice() {
457        assert_eq!(b"t".as_slice().try_into(), Ok(Value::boolean(true)),);
458        assert_eq!(b"f".as_slice().try_into(), Ok(Value::boolean(false)),);
459        assert_eq!(
460            b"F\x3d\xcc\xcc\xcd".as_slice().try_into(),
461            Ok(Value::float(0.1)),
462        );
463        assert_eq!(
464            b"D\x3f\xb9\x99\x99\x99\x99\x99\x9a".as_slice().try_into(),
465            Ok(Value::double(0.1)),
466        );
467    }
468
469    #[test]
470    fn invalid() {
471        // TODO: improve nom error messages
472        assert_eq!(
473            Value::from_str("nope"),
474            Err::<Value, Error>(Error::Parse("near nope: Tag".to_string()))
475        )
476    }
477
478    #[test]
479    fn from_str() {
480        assert_eq!(Value::from_str("t"), Ok(Value::boolean(true)),);
481        assert_eq!(Value::from_str("f"), Ok(Value::boolean(false)),);
482        assert_eq!(Value::from_str("42+"), Ok(Value::integer(42)),);
483        assert_eq!(Value::from_str("42-"), Ok(Value::integer(-42)),);
484        assert_eq!(
485            Value::from_str("5:hello"),
486            Ok(Value::binary(b"hello".as_slice()))
487        );
488        assert_eq!(Value::from_str("3\"foo"), Ok(Value::string("foo")));
489        assert_eq!(Value::from_str("3'foo"), Ok(Value::symbol("foo")));
490        assert_eq!(
491            Value::from_str("[1+2+3+]"),
492            Ok(Value::sequence(vec![
493                Value::integer(1),
494                Value::integer(2),
495                Value::integer(3),
496            ]))
497        );
498        assert_eq!(
499            Value::from_str("{3\"goo4\"muck3\"foo3\"bar}"),
500            Ok(Value::Dictionary(vec![
501                (Value::string("foo"), Value::string("bar")),
502                (Value::string("goo"), Value::string("muck"))
503            ]))
504        );
505        assert_eq!(
506            Value::from_str("<6:person5:Alice30+t>"),
507            Ok(Value::record(
508                Value::binary(b"person".as_slice()),
509                vec![
510                    Value::binary(b"Alice".as_slice()),
511                    Value::integer(30),
512                    Value::boolean(true),
513                ]
514            ))
515        );
516        assert_eq!(
517            Value::from_str("#3\"foo3\"bar$"),
518            Ok(Value::set(vec![Value::string("bar"), Value::string("foo")]))
519        );
520    }
521
522    #[test]
523    fn round_trip_from_str_to_vec() {
524        for s in [
525            "t",
526            "f",
527            "10+",
528            "10-",
529            "5:hello",
530            "3\"foo",
531            "4'none",
532            "[1+2+3+]",
533            "{3\"foo3\"bar3\"goo4\"muck}",
534            "<6:person5:Alice30+t>",
535            "#3\"bar3\"foo$",
536        ] {
537            assert_eq!(
538                Value::from_str(s).unwrap().to_vec(),
539                s.as_bytes().to_vec(),
540                "round trip value: {}",
541                s
542            );
543        }
544    }
545
546    #[test]
547    fn parse_zoo() {
548        let zoo_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
549            .join("testdata")
550            .join("zoo.bin");
551        let mut zoo_file = File::open(zoo_path).expect("open testdata/zoo.bin");
552        let mut buf = vec![];
553        zoo_file
554            .read_to_end(&mut buf)
555            .expect("read testdata/zoo.bin");
556        let zoo_actual: Value = buf.as_bytes().try_into().expect("parse zoo.bin");
557        let zoo_expected = Value::record(
558            Value::binary(b"zoo".as_slice()),
559            vec![
560                Value::string("The Grand Menagerie"),
561                Value::sequence(vec![
562                    Value::dictionary(vec![
563                        (Value::symbol("species"), Value::binary(b"cat".as_slice())),
564                        (Value::symbol("name"), Value::string("Tabatha")),
565                        (Value::symbol("age"), Value::integer(12)),
566                        (Value::symbol("weight"), Value::double(8.2)),
567                        (Value::symbol("alive?"), Value::boolean(true)),
568                        (
569                            Value::symbol("eats"),
570                            Value::set(vec![
571                                Value::binary(b"mice".as_slice()),
572                                Value::binary(b"fish".as_slice()),
573                                Value::binary(b"kibble".as_slice()),
574                            ]),
575                        ),
576                    ]),
577                    Value::dictionary(vec![
578                        (
579                            Value::symbol("species"),
580                            Value::binary(b"monkey".as_slice()),
581                        ),
582                        (Value::symbol("name"), Value::string("George")),
583                        (Value::symbol("age"), Value::integer(6)),
584                        (Value::symbol("weight"), Value::double(17.24)),
585                        (Value::symbol("alive?"), Value::boolean(false)),
586                        (
587                            Value::symbol("eats"),
588                            Value::set(vec![
589                                Value::binary(b"bananas".as_slice()),
590                                Value::binary(b"insects".as_slice()),
591                            ]),
592                        ),
593                    ]),
594                    Value::dictionary(vec![
595                        (Value::symbol("species"), Value::binary(b"ghost".as_slice())),
596                        (Value::symbol("name"), Value::string("Casper")),
597                        (Value::symbol("age"), Value::integer(-12)),
598                        (Value::symbol("weight"), Value::double(-34.5)),
599                        (Value::symbol("alive?"), Value::boolean(false)),
600                        (Value::symbol("eats"), Value::set(vec![])),
601                    ]),
602                ]),
603            ],
604        );
605        assert_eq!(zoo_expected, zoo_actual);
606    }
607}