1use std::str::FromStr;
2use std::{fmt::Display, hash::Hash};
3
4use nom::{
5 branch::alt,
6 bytes::streaming::{tag, take},
7 character::streaming::digit1,
8 error::context,
9 multi::{length_count, many_till},
10 sequence::{pair, preceded, terminated},
11 Finish, IResult, Parser,
12};
13use num_bigint::{BigInt, Sign};
14
15#[derive(Debug, Clone)]
21pub enum Value {
22 Boolean(bool),
23 Float(f32),
24 Double(f64),
25 Integer(BigInt),
26 Binary(Vec<u8>),
27 String(String),
28 Symbol(String),
29 Dictionary(Vec<(Self, Self)>),
30 Sequence(Vec<Self>),
31 Record { label: Box<Self>, fields: Vec<Self> },
32 Set(Vec<Self>),
33}
34
35impl Value {
36 pub fn boolean(b: bool) -> Value {
38 Value::Boolean(b)
39 }
40 pub fn float(f: f32) -> Value {
42 Value::Float(f)
43 }
44 pub fn double(d: f64) -> Value {
46 Value::Double(d)
47 }
48 pub fn integer<T: Into<BigInt>>(i: T) -> Value {
50 Value::Integer(i.into())
51 }
52 pub fn binary<'a, T: Into<&'a [u8]>>(b: T) -> Value {
54 Value::Binary(b.into().to_vec())
55 }
56 pub fn string<'a, T: Into<&'a str>>(s: T) -> Value {
58 Value::String(s.into().to_string())
59 }
60 pub fn symbol<'a, T: Into<&'a str>>(s: T) -> Value {
62 Value::Symbol(s.into().to_string())
63 }
64 pub fn dictionary(mut d: Vec<(Value, Value)>) -> Value {
66 d.sort();
67 Value::Dictionary(d)
68 }
69 pub fn sequence(s: Vec<Value>) -> Value {
71 Value::Sequence(s)
72 }
73 pub fn record(label: Value, fields: Vec<Value>) -> Value {
75 Value::Record {
76 label: Box::new(label),
77 fields,
78 }
79 }
80 pub fn set(mut s: Vec<Value>) -> Value {
82 s.sort();
83 Value::Set(s)
84 }
85
86 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
89 self.to_vec().cmp(other.to_vec().as_ref())
90 }
91
92 pub fn to_vec(&self) -> Vec<u8> {
94 match self {
95 Value::Boolean(true) => [b't'].to_vec(),
96 Value::Boolean(false) => [b'f'].to_vec(),
97 Value::Float(f) => [[b'F'].as_slice(), f.to_be_bytes().as_slice()].concat(),
98 Value::Double(d) => [[b'D'].as_slice(), d.to_be_bytes().as_slice()].concat(),
99 Value::Integer(big_int) => {
100 let suffix = if big_int.sign() == Sign::Minus {
101 "-"
102 } else {
103 "+"
104 };
105 format!("{}{}", big_int.magnitude().to_str_radix(10), suffix)
106 .as_bytes()
107 .to_vec()
108 }
109 Value::Binary(b) => [format!("{}:", b.len()).as_bytes(), b].concat(),
110 Value::String(s) => {
111 [format!("{}\"", s.as_bytes().len()).as_bytes(), s.as_bytes()].concat()
112 }
113 Value::Symbol(s) => {
114 [format!("{}'", s.as_bytes().len()).as_bytes(), s.as_bytes()].concat()
115 }
116 Value::Dictionary(d) => [
117 [b'{'].as_slice(),
118 d.iter()
119 .map(|(k, v)| vec![k.to_vec(), v.to_vec()].concat())
120 .collect::<Vec<Vec<u8>>>()
121 .concat()
122 .as_slice(),
123 [b'}'].as_slice(),
124 ]
125 .concat(),
126 Value::Sequence(s) => [
127 [b'['].as_slice(),
128 s.iter()
129 .map(|v| v.to_vec())
130 .collect::<Vec<Vec<u8>>>()
131 .concat()
132 .as_slice(),
133 [b']'].as_slice(),
134 ]
135 .concat(),
136 Value::Record { label, fields } => [
137 [b'<'].as_slice(),
138 label.to_vec().as_slice(),
139 fields
140 .iter()
141 .map(|v| v.to_vec())
142 .collect::<Vec<Vec<u8>>>()
143 .concat()
144 .as_slice(),
145 [b'>'].as_slice(),
146 ]
147 .concat(),
148 Value::Set(s) => [
149 [b'#'].as_slice(),
150 s.iter()
151 .map(|v| v.to_vec())
152 .collect::<Vec<Vec<u8>>>()
153 .concat()
154 .as_slice(),
155 [b'$'].as_slice(),
156 ]
157 .concat(),
158 }
159 }
160}
161
162#[derive(Debug, PartialEq)]
164pub enum Error {
165 Message(String),
166 Parse(String),
167 Incomplete,
168}
169
170impl Error {
171 pub(crate) fn message<T: ToString>(s: T) -> Error {
172 Error::Message(s.to_string())
173 }
174}
175
176pub type Result<T> = std::result::Result<T, Error>;
177
178impl Display for Error {
179 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
180 match self {
181 Error::Message(msg) => f.write_str(msg),
182 Error::Parse(msg) => f.write_str(msg),
183 Error::Incomplete => f.write_str("incomplete"),
184 }
185 }
186}
187
188impl From<nom::Err<nom::error::Error<&[u8]>>> for Error {
189 fn from(value: nom::Err<nom::error::Error<&[u8]>>) -> Self {
190 match value {
191 nom::Err::Incomplete(_) => Error::Incomplete,
192 nom::Err::Error(e) => e.into(),
193 nom::Err::Failure(e) => e.into(),
194 }
195 }
196}
197
198impl From<nom::error::Error<&[u8]>> for Error {
199 fn from(e: nom::error::Error<&[u8]>) -> Self {
200 Error::Parse(format!(
201 "near {}: {}",
202 String::from_utf8_lossy(e.input),
203 e.code.description()
204 ))
205 }
206}
207
208impl std::error::Error for Error {}
209
210impl TryFrom<&[u8]> for Value {
211 type Error = Error;
212
213 fn try_from(v: &[u8]) -> Result<Self> {
214 value(v)
215 .finish()
216 .map(|(_, res)| res)
217 .map_err(|e| Error::Parse(format!("{:?}", e)))
218 }
219}
220
221impl FromStr for Value {
222 type Err = Error;
223
224 fn from_str(s: &str) -> Result<Self> {
225 value(s.as_bytes())
226 .finish()
227 .map(|(_, res)| res)
228 .map_err(|e| e.into())
229 }
230}
231
232pub(crate) fn value<'a>(input: &'a [u8]) -> IResult<&'a [u8], Value> {
233 context(
234 "value",
235 alt((
236 boolean_value,
237 float_value,
238 double_value,
239 integer_value,
240 binary_value,
241 string_value,
242 symbol_value,
243 dictionary_value,
244 sequence_value,
245 record_value,
246 set_value,
247 )),
248 )(input)
249}
250
251pub fn parse_value(input: &[u8]) -> IResult<Vec<u8>, Value> {
252 match value(input) {
253 Ok((rest, value)) => Ok((rest.to_vec(), value)),
254 Err(nom::Err::Incomplete(e)) => Err(nom::Err::Incomplete(e)),
255 Err(nom::Err::Failure(e)) => Err(nom::Err::Error(nom::error::Error {
256 input: e.input.to_vec(),
257 code: e.code,
258 })),
259 Err(nom::Err::Error(e)) => Err(nom::Err::Error(nom::error::Error {
260 input: e.input.to_vec(),
261 code: e.code,
262 })),
263 }
264}
265
266fn boolean_value(input: &[u8]) -> IResult<&[u8], Value> {
267 context("boolean", alt((tag("t"), tag("f"))))(input).map(|(next_input, res)| {
268 (
269 next_input,
270 match res {
271 b"t" => Value::Boolean(true),
272 b"f" => Value::Boolean(false),
273 _ => unreachable!("parser"),
274 },
275 )
276 })
277}
278
279fn float_value(input: &[u8]) -> IResult<&[u8], Value> {
280 context("float", preceded(tag("F"), take(4u8)))(input).map(|(next_input, res)| {
281 (
282 next_input,
283 Value::Float(f32::from_be_bytes(res.try_into().unwrap())),
284 )
285 })
286}
287
288fn double_value(input: &[u8]) -> IResult<&[u8], Value> {
289 context("double", preceded(tag("D"), take(8u8)))(input).map(|(next_input, res)| {
290 (
291 next_input,
292 Value::Double(f64::from_be_bytes(res.try_into().unwrap())),
293 )
294 })
295}
296
297fn integer_value(input: &[u8]) -> IResult<&[u8], Value> {
298 context("integer", pair(digit1, alt((tag("+"), tag("-")))))(input).map(|(next_input, res)| {
299 let (num_str, sign_str) = res;
300 let sign = match sign_str {
301 b"+" => Sign::Plus,
302 b"-" => Sign::Minus,
303 _ => unreachable!(),
304 };
305 (
306 next_input,
307 Value::Integer(
308 BigInt::from_radix_be(
309 sign,
310 num_str
311 .iter()
312 .map(|d| d - 0x30)
313 .collect::<Vec<u8>>()
314 .as_slice(),
315 10,
316 )
317 .unwrap(),
318 ),
319 )
320 })
321}
322
323fn binary_value(input: &[u8]) -> IResult<&[u8], Value> {
324 context(
325 "binary",
326 length_count(
327 terminated(digit1, tag(":"))
328 .map(|res| u32::from_str(String::from_utf8_lossy(res).as_ref()).unwrap()),
329 take(1u8),
330 ),
331 )(input)
332 .map(|(next_input, res)| {
333 (
334 next_input,
335 Value::Binary(res.iter().map(|b| b[0]).collect()),
336 )
337 })
338}
339
340fn string_value(input: &[u8]) -> IResult<&[u8], Value> {
341 context(
342 "string",
343 length_count(
344 terminated(digit1, tag("\""))
345 .map(|res| u32::from_str(String::from_utf8_lossy(res).as_ref()).unwrap()),
346 take(1u8),
347 ),
348 )(input)
349 .map(|(next_input, res)| {
350 (
351 next_input,
352 Value::String(
353 String::from_utf8_lossy(res.iter().map(|b| b[0]).collect::<Vec<u8>>().as_slice())
354 .into_owned(),
355 ),
356 )
357 })
358}
359
360fn symbol_value(input: &[u8]) -> IResult<&[u8], Value> {
361 context(
362 "symbol",
363 length_count(
364 terminated(digit1, tag("\'"))
365 .map(|res| u32::from_str(String::from_utf8_lossy(res).as_ref()).unwrap()),
366 take(1u8),
367 ),
368 )(input)
369 .map(|(next_input, res)| {
370 (
371 next_input,
372 Value::Symbol(
373 String::from_utf8_lossy(res.iter().map(|b| b[0]).collect::<Vec<u8>>().as_slice())
374 .into_owned(),
375 ),
376 )
377 })
378}
379
380fn sequence_value(input: &[u8]) -> IResult<&[u8], Value> {
381 context("sequence", preceded(tag("["), many_till(value, tag("]"))))(input)
382 .map(|(next_input, res)| (next_input, Value::Sequence(res.0)))
383}
384
385fn dictionary_value(input: &[u8]) -> IResult<&[u8], Value> {
386 context(
387 "dictionary",
388 preceded(tag("{"), many_till(pair(value, value), tag("}"))),
389 )(input)
390 .map(|(next_input, mut res)| {
391 res.0.sort();
392 (next_input, Value::Dictionary(res.0))
393 })
394}
395
396fn record_value(input: &[u8]) -> IResult<&[u8], Value> {
397 context(
398 "sequence",
399 preceded(tag("<"), pair(value, many_till(value, tag(">")))),
400 )(input)
401 .map(|(next_input, res)| {
402 (
403 next_input,
404 Value::Record {
405 label: Box::new(res.0),
406 fields: res.1 .0,
407 },
408 )
409 })
410}
411
412fn set_value(input: &[u8]) -> IResult<&[u8], Value> {
413 context("sequence", preceded(tag("#"), many_till(value, tag("$"))))(input).map(
414 |(next_input, mut res)| {
415 res.0.sort();
416 (next_input, Value::Set(res.0))
417 },
418 )
419}
420
421impl PartialEq for Value {
422 fn eq(&self, other: &Self) -> bool {
423 return self.cmp(other).is_eq();
424 }
425}
426
427impl Eq for Value {}
428
429impl Hash for Value {
430 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
431 self.to_vec().hash(state);
432 }
433}
434
435impl PartialOrd for Value {
436 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
437 Some(self.cmp(other))
438 }
439}
440
441impl Ord for Value {
442 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
443 self.cmp(other)
444 }
445}
446
447#[cfg(test)]
448mod tests {
449 use std::{fs::File, io::Read, path::PathBuf};
450
451 use nom::AsBytes;
452
453 use super::*;
454
455 #[test]
456 fn try_from_slice() {
457 assert_eq!(b"t".as_slice().try_into(), Ok(Value::boolean(true)),);
458 assert_eq!(b"f".as_slice().try_into(), Ok(Value::boolean(false)),);
459 assert_eq!(
460 b"F\x3d\xcc\xcc\xcd".as_slice().try_into(),
461 Ok(Value::float(0.1)),
462 );
463 assert_eq!(
464 b"D\x3f\xb9\x99\x99\x99\x99\x99\x9a".as_slice().try_into(),
465 Ok(Value::double(0.1)),
466 );
467 }
468
469 #[test]
470 fn invalid() {
471 assert_eq!(
473 Value::from_str("nope"),
474 Err::<Value, Error>(Error::Parse("near nope: Tag".to_string()))
475 )
476 }
477
478 #[test]
479 fn from_str() {
480 assert_eq!(Value::from_str("t"), Ok(Value::boolean(true)),);
481 assert_eq!(Value::from_str("f"), Ok(Value::boolean(false)),);
482 assert_eq!(Value::from_str("42+"), Ok(Value::integer(42)),);
483 assert_eq!(Value::from_str("42-"), Ok(Value::integer(-42)),);
484 assert_eq!(
485 Value::from_str("5:hello"),
486 Ok(Value::binary(b"hello".as_slice()))
487 );
488 assert_eq!(Value::from_str("3\"foo"), Ok(Value::string("foo")));
489 assert_eq!(Value::from_str("3'foo"), Ok(Value::symbol("foo")));
490 assert_eq!(
491 Value::from_str("[1+2+3+]"),
492 Ok(Value::sequence(vec![
493 Value::integer(1),
494 Value::integer(2),
495 Value::integer(3),
496 ]))
497 );
498 assert_eq!(
499 Value::from_str("{3\"goo4\"muck3\"foo3\"bar}"),
500 Ok(Value::Dictionary(vec![
501 (Value::string("foo"), Value::string("bar")),
502 (Value::string("goo"), Value::string("muck"))
503 ]))
504 );
505 assert_eq!(
506 Value::from_str("<6:person5:Alice30+t>"),
507 Ok(Value::record(
508 Value::binary(b"person".as_slice()),
509 vec![
510 Value::binary(b"Alice".as_slice()),
511 Value::integer(30),
512 Value::boolean(true),
513 ]
514 ))
515 );
516 assert_eq!(
517 Value::from_str("#3\"foo3\"bar$"),
518 Ok(Value::set(vec![Value::string("bar"), Value::string("foo")]))
519 );
520 }
521
522 #[test]
523 fn round_trip_from_str_to_vec() {
524 for s in [
525 "t",
526 "f",
527 "10+",
528 "10-",
529 "5:hello",
530 "3\"foo",
531 "4'none",
532 "[1+2+3+]",
533 "{3\"foo3\"bar3\"goo4\"muck}",
534 "<6:person5:Alice30+t>",
535 "#3\"bar3\"foo$",
536 ] {
537 assert_eq!(
538 Value::from_str(s).unwrap().to_vec(),
539 s.as_bytes().to_vec(),
540 "round trip value: {}",
541 s
542 );
543 }
544 }
545
546 #[test]
547 fn parse_zoo() {
548 let zoo_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
549 .join("testdata")
550 .join("zoo.bin");
551 let mut zoo_file = File::open(zoo_path).expect("open testdata/zoo.bin");
552 let mut buf = vec![];
553 zoo_file
554 .read_to_end(&mut buf)
555 .expect("read testdata/zoo.bin");
556 let zoo_actual: Value = buf.as_bytes().try_into().expect("parse zoo.bin");
557 let zoo_expected = Value::record(
558 Value::binary(b"zoo".as_slice()),
559 vec![
560 Value::string("The Grand Menagerie"),
561 Value::sequence(vec![
562 Value::dictionary(vec![
563 (Value::symbol("species"), Value::binary(b"cat".as_slice())),
564 (Value::symbol("name"), Value::string("Tabatha")),
565 (Value::symbol("age"), Value::integer(12)),
566 (Value::symbol("weight"), Value::double(8.2)),
567 (Value::symbol("alive?"), Value::boolean(true)),
568 (
569 Value::symbol("eats"),
570 Value::set(vec![
571 Value::binary(b"mice".as_slice()),
572 Value::binary(b"fish".as_slice()),
573 Value::binary(b"kibble".as_slice()),
574 ]),
575 ),
576 ]),
577 Value::dictionary(vec![
578 (
579 Value::symbol("species"),
580 Value::binary(b"monkey".as_slice()),
581 ),
582 (Value::symbol("name"), Value::string("George")),
583 (Value::symbol("age"), Value::integer(6)),
584 (Value::symbol("weight"), Value::double(17.24)),
585 (Value::symbol("alive?"), Value::boolean(false)),
586 (
587 Value::symbol("eats"),
588 Value::set(vec![
589 Value::binary(b"bananas".as_slice()),
590 Value::binary(b"insects".as_slice()),
591 ]),
592 ),
593 ]),
594 Value::dictionary(vec![
595 (Value::symbol("species"), Value::binary(b"ghost".as_slice())),
596 (Value::symbol("name"), Value::string("Casper")),
597 (Value::symbol("age"), Value::integer(-12)),
598 (Value::symbol("weight"), Value::double(-34.5)),
599 (Value::symbol("alive?"), Value::boolean(false)),
600 (Value::symbol("eats"), Value::set(vec![])),
601 ]),
602 ]),
603 ],
604 );
605 assert_eq!(zoo_expected, zoo_actual);
606 }
607}