nachricht/
value.rs

1//! The atom of a `nachricht` is the `Value`.
2//! Values are encoded on wire as headers and, if necessary, additional bytes which directly follow the header. Record layouts and
3//! values with datatype `Value::Symbol` are defined within a symbol table an can be referenced later within the wire format,
4//! so you pay their full bandwidth costs only once. This encoding is transparent, there is no need
5//! to manually define a symbol table within the model.
6
7use crate::header::{Header, Sign};
8use crate::error::{DecodeError, DecoderError, EncodeError};
9use std::mem::size_of;
10use std::io::Write;
11use std::convert::TryInto;
12use std::str::from_utf8;
13use std::iter::repeat;
14use std::borrow::Cow;
15use std::collections::{BTreeMap, HashMap};
16
17/// The possible values according to the `nachricht` data model.
18#[derive(Debug, Clone, PartialEq)]
19pub enum Value<'a> {
20    Null,
21    Bool(bool),
22    F32(f32),
23    F64(f64),
24    Bytes(Cow<'a, [u8]>),
25    Int(Sign, u64),
26    Str(Cow<'a, str>),
27    Symbol(Cow<'a, str>),
28    Record(BTreeMap<Cow<'a, str>, Value<'a>>),
29    Map(Vec<(Value<'a>, Value<'a>)>),
30    Array(Vec<Value<'a>>),
31}
32
33impl<'a> Value<'a> {
34
35    const PROTECTED_CHARS: &'static str = "\n\\$ ,:\"'()[]{}#";
36
37    fn b64(input: &[u8]) -> String {
38        const CHAR_SET: &'static [char] = &['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
39            'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
40            'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
41            '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'
42        ];
43        let mut array = [0; 4];
44        input.chunks(3).flat_map(|chunk| {
45            let len = chunk.len();
46            array[1..1 + len].copy_from_slice(chunk);
47            for i in 0..(3 - len) {
48                array[3 - i] = 0;
49            }
50            let x = u32::from_be_bytes(array);
51            (0..=len).map(move |o| CHAR_SET[(x >> (18 - 6*o) & 0x3f) as usize]).chain(repeat('=').take(3-len))
52        }).collect()
53    }
54
55    fn typename(&self) -> &'static str {
56        match *self {
57            Self::Null      => "null",
58            Self::Bool(_)   => "bool",
59            Self::F32(_)    => "f32",
60            Self::F64(_)    => "f64",
61            Self::Bytes(_)  => "bytes",
62            Self::Int(_, _) => "integer",
63            Self::Str(_)    => "string",
64            Self::Symbol(_) => "symbol",
65            Self::Record(_) => "record",
66            Self::Map(_)    => "map",
67            Self::Array(_)  => "array",
68        }
69    }
70
71}
72
73
74
75impl<'a> std::fmt::Display for Value<'a> {
76    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
77        match self {
78            Value::Null         => f.write_str("null"),
79            Value::Bool(true)   => f.write_str("true"),
80            Value::Bool(false)  => f.write_str("false"),
81            Value::F32(v)       => write!(f, "${}", v),
82            Value::F64(v)       => write!(f, "$${}", v),
83            Value::Bytes(v)     => write!(f, "'{}'", Self::b64(v).as_str()),
84            Value::Int(s, v)    => write!(f, "{}{}", match s { Sign::Pos => "", Sign::Neg => "-" }, v),
85            Value::Str(v)       => write!(f, "\"{}\"", v.replace("\\", "\\\\").replace("\"", "\\\"").replace("\n", "\\n")),
86            Value::Symbol(v) if v.chars().any(|c| Self::PROTECTED_CHARS.contains(c))
87                                => write!(f, "#\"{}\"", v.replace("\\", "\\\\").replace("\"", "\\\"").replace("\n", "\\n")),
88            Value::Symbol(v)    => write!(f, "#{}", v),
89            Value::Record(v)    => write!(f, "(\n{}\n)", v.iter()
90                .flat_map(|(k, f)| format!("{}: {},", if k.chars().any(|c| Self::PROTECTED_CHARS.contains(c)) {
91                    format!("\"{}\"", k.replace("\\", "\\\\").replace("\"", "\\\"").replace("\n", "\\n"))
92                } else {
93                    format!("{}", k )
94                }, f).lines().map(|line| format!("  {}", line)).collect::<Vec<String>>())
95                .collect::<Vec<String>>().join("\n")),
96            Value::Map(v)       => write!(f, "{{\n{}\n}}", v.iter()
97                .flat_map(|(k, f)| format!("{}: {},", k, f).lines().map(|line| format!("  {}", line)).collect::<Vec<String>>())
98                .collect::<Vec<String>>().join("\n")),
99            Value::Array(v)    => write!(f, "[\n{}\n]", v.iter()
100                .flat_map(|f| format!("{},", f).lines().map(|line| format!("  {}", line)).collect::<Vec<String>>())
101                .collect::<Vec<String>>().join("\n")),
102        }
103    }
104}
105
106#[derive(PartialEq, Clone)]
107#[repr(u8)]
108pub enum Refable<'a> {
109    Sym(&'a str),
110    Rec(Vec<&'a str>),
111}
112
113impl<'a> Refable<'a> {
114    pub fn name(&self) -> &'static str {
115        match *self {
116            Refable::Sym(_) => "Sym",
117            Refable::Rec(_) => "Rec",
118        }
119    }
120}
121
122/// Used to encode `nachricht` fields. This uses a symbol table to allow referencing symbols and
123/// record layouts which get repeated.
124pub struct Encoder<'w, W: Write> {
125    writer: &'w mut W,
126    /// Next free value to insert into the table
127    next_free: usize,
128    /// Map symbol -> entry in the table
129    symbols: HashMap<Cow<'w, str>, usize>,
130    /// Map record -> entry in the table
131    records: HashMap<Vec<Cow<'w, str>>, usize>,
132}
133
134impl<'w, W: Write> Encoder<'w, W> {
135
136    /// Encode a field to the given writer. The resulting `usize` is the amount of bytes that got written.
137    pub fn encode(field: &'w Value, writer: &'w mut W) -> Result<usize, EncodeError> {
138        Self { writer, symbols: HashMap::new(), records: HashMap::new(), next_free: 0 }.encode_inner(field)
139    }
140
141    fn encode_inner(&mut self, field: &'w Value) -> Result<usize, EncodeError> {
142        let mut c = 0;
143        match &field {
144            Value::Null        => Header::Null.encode(self.writer),
145            Value::Bool(true)  => Header::True.encode(self.writer),
146            Value::Bool(false) => Header::False.encode(self.writer),
147            Value::F32(v)    => {
148                c += Header::F32.encode(self.writer)?;
149                self.writer.write_all(&v.to_be_bytes())?;
150                Ok(c + size_of::<f32>())
151            },
152            Value::F64(v)    => {
153                c += Header::F64.encode(self.writer)?;
154                self.writer.write_all(&v.to_be_bytes())?;
155                Ok(c + size_of::<f64>())
156            },
157            Value::Bytes(v)  => {
158                c += Header::Bin(v.len()).encode(self.writer)?;
159                self.writer.write_all(v)?;
160                Ok(c + v.len())
161            },
162            Value::Int(s, v) => Header::Int(*s, *v).encode(self.writer),
163            Value::Str(v) => {
164                c += Header::Str(v.len()).encode(self.writer)?;
165                self.writer.write_all(v.as_bytes())?;
166                Ok(c + v.len())
167            },
168            Value::Symbol(v) => self.encode_symbol(v),
169            Value::Array(inner) => {
170                c += Header::Arr(inner.len()).encode(self.writer)?;
171                for field in inner.iter() {
172                    c += self.encode_inner(field)?;
173                }
174                Ok(c)
175            },
176            Value::Record(inner) => self.encode_record(inner),
177            Value::Map(inner) => {
178                c += Header::Map(inner.len()).encode(self.writer)?;
179                for (key, val) in inner.iter() {
180                    c += self.encode_inner(key)?;
181                    c += self.encode_inner(val)?;
182                }
183                Ok(c)
184            },
185        }
186    }
187
188    fn encode_record(&mut self, inner: &'w BTreeMap<Cow<'w, str>, Value<'w>>) -> Result<usize, EncodeError> {
189        let mut c = match self.records.get(&inner.keys().map(|i| i.clone()).collect::<Vec<_>>()) {
190            Some(i) => Header::Ref(*i).encode(self.writer)?,
191            None    => {
192                let mut x = Header::Rec(inner.len()).encode(self.writer)?;
193                for sym in inner.keys() {
194                    x += self.encode_symbol(sym)?;
195                }
196                let index = self.next();
197                self.records.insert(inner.keys().map(|i| i.clone()).collect(), index);
198                x
199            }
200        };
201        for val in inner.values() {
202            c += self.encode_inner(val)?;
203        }
204        Ok(c)
205    }
206
207    fn encode_symbol(&mut self, symbol: &'w str) -> Result<usize, EncodeError> {
208        match self.symbols.get(symbol) {
209            Some(i) => Header::Ref(*i).encode(self.writer),
210            None    => {
211                let index = self.next();
212                self.symbols.insert(symbol.into(), index);
213                let c = Header::Sym(symbol.len()).encode(self.writer)?;
214                self.writer.write_all(symbol.as_bytes())?;
215                Ok(c + symbol.len())
216            }
217        }
218    }
219
220    fn next(&mut self) -> usize {
221        self.next_free += 1;
222        self.next_free - 1
223    }
224
225}
226/// Used to decode `nachricht` fields. This uses a symbol table to allow the decoding of encountered references.
227pub struct Decoder<'a> {
228    symbols: Vec<Refable<'a>>,
229    buf: &'a [u8],
230    pos: usize,
231}
232
233impl<'a> Decoder<'a> {
234
235    /// Decode a single value from the given buffer. All strings, keys, symbols and byte data will be borrowed from the
236    /// buffer instead of copied. This means that the decoded field may only live as long as the buffer does. However,
237    /// some allocations still occur: containers need their own heap space.
238    pub fn decode<B: ?Sized + AsRef<[u8]>>(buf: &'a B) -> Result<(Value<'a>, usize), DecoderError> {
239        let mut decoder = Self { buf: buf.as_ref(), symbols: Vec::new(), pos: 0 };
240        let value = decoder.decode_value().map_err(|e| e.at(decoder.pos))?;
241        Ok((value, decoder.pos))
242    }
243
244    fn decode_value(&mut self) -> Result<Value<'a>, DecodeError> {
245        let header = self.decode_header()?;
246        match header {
247            Header::Null      => Ok(Value::Null),
248            Header::True      => Ok(Value::Bool(true)),
249            Header::False     => Ok(Value::Bool(false)),
250            Header::F32       => Ok(Value::F32(<f32>::from_be_bytes(self.decode_slice(4)?.try_into().unwrap()))),
251            Header::F64       => Ok(Value::F64(<f64>::from_be_bytes(self.decode_slice(8)?.try_into().unwrap()))),
252            Header::Bin(v)    => Ok(Value::Bytes(Cow::Borrowed(self.decode_slice(v)?))),
253            Header::Int(s, v) => Ok(Value::Int(s, v)),
254            Header::Arr(v) => {
255                let mut elements = Vec::with_capacity(0);
256                elements.try_reserve(v)?;
257                for _ in 0..v {
258                    elements.push(self.decode_value()?);
259                }
260                Ok(Value::Array(elements))
261            },
262            Header::Map(v) => {
263                let mut elements = Vec::with_capacity(0);
264                elements.try_reserve(v)?;
265                for _ in 0..v {
266                    let key = self.decode_value()?;
267                    let val = self.decode_value()?;
268                    elements.push((key, val));
269                }
270                Ok(Value::Map(elements))
271            }
272            Header::Str(v) => Ok(Value::Str(Cow::Borrowed(from_utf8(&self.decode_slice(v)?)?))),
273            Header::Sym(v) => {
274                let sym = from_utf8(&self.decode_slice(v)?)?;
275                self.symbols.push(Refable::Sym(sym));
276                Ok(Value::Symbol(Cow::Borrowed(sym)))
277            },
278            Header::Rec(v) => {
279                let mut fields = BTreeMap::new();
280                let mut keys = Vec::with_capacity(0);
281                keys.try_reserve(v)?;
282                for _ in 0..v {
283                    match self.decode_value()? {
284                        Value::Symbol(Cow::Borrowed(sym)) => { keys.push(sym); },
285                        x => { return Err(DecodeError::IllegalKey(x.typename())); }
286                    }
287                }
288                self.symbols.push(Refable::Rec(keys.clone()));
289                for key in keys {
290                    let val = self.decode_value()?;
291                    fields.insert(Cow::Borrowed(key), val);
292                }
293                Ok(Value::Record(fields))
294            },
295            Header::Ref(v) => {
296                match self.symbols.get(v) {
297                    Some(Refable::Sym(s)) => Ok(Value::Symbol(Cow::Borrowed(s))),
298                    Some(Refable::Rec(ref s)) => {
299                        let mut fields = BTreeMap::<Cow<'a, str>, Value<'a>>::new();
300                        for key in s.clone() {
301                            fields.insert(Cow::Borrowed(key), self.decode_value()?);
302                        }
303                        Ok(Value::Record(fields))
304                    }
305                    None => Err(DecodeError::InvalidRef(v))
306                }
307            },
308        }
309    }
310
311    fn decode_header(&mut self) -> Result<Header, DecodeError> {
312        let (header, c) = Header::decode(&self.buf[self.pos..])?;
313        self.pos += c;
314        Ok(header)
315    }
316
317    fn decode_slice(&mut self, len: usize) -> Result<&'a [u8], DecodeError> {
318        if self.buf[self.pos..].len() < len {
319            Err(DecodeError::Eof)
320        } else {
321            self.pos += len;
322            Ok(&self.buf[self.pos - len .. self.pos])
323        }
324    }
325
326}
327
328
329#[cfg(test)]
330mod test {
331    use super::{Value, Sign, Encoder, Decoder, DecodeError};
332    use std::borrow::Cow;
333    use std::collections::BTreeMap;
334
335    #[test]
336    fn simple_values() {
337        let mut buf = Vec::new();
338        assert_roundtrip(Value::Null, &mut buf);
339        assert_roundtrip(Value::Bool(true), &mut buf);
340        assert_roundtrip(Value::Bool(false), &mut buf);
341        for i in (0..u64::MAX).step_by(3_203_431_780_337) {
342            assert_roundtrip(Value::Int(Sign::Pos, i), &mut buf);
343            assert_roundtrip(Value::Int(Sign::Neg, if i == 0 { 1 } else { i }), &mut buf);
344        }
345    }
346
347    #[test]
348    fn floats() {
349        let mut buf = Vec::new();
350        assert_roundtrip(Value::F64(f64::MAX), &mut buf);
351        assert_roundtrip(Value::F64(f64::MIN), &mut buf);
352        assert_roundtrip(Value::F64(std::f64::consts::PI), &mut buf);
353        assert_roundtrip(Value::F32(f32::MAX), &mut buf);
354        assert_roundtrip(Value::F32(f32::MIN), &mut buf);
355        assert_roundtrip(Value::F32(std::f32::consts::PI), &mut buf);
356    }
357
358    #[test]
359    fn strings() {
360        let mut buf = Vec::new();
361        assert_roundtrip(Value::Str(Cow::Borrowed("Üben von Xylophon und Querflöte ist ja zweckmäßig.")), &mut buf);
362    }
363
364    #[test]
365    fn symbols() {
366        let mut buf = Vec::new();
367        assert_roundtrip(Value::Array(vec![
368                Value::Symbol(Cow::Borrowed("PrionailurusViverrinus")),
369                Value::Symbol(Cow::Borrowed("PrionailurusViverrinus")),
370                Value::Symbol(Cow::Borrowed("PrionailurusViverrinus")),
371                Value::Symbol(Cow::Borrowed("PrionailurusViverrinus")),
372        ]), &mut buf);
373    }
374
375    #[test]
376    fn bytes() {
377        let mut buf = Vec::new();
378        assert_roundtrip(Value::Bytes(Cow::Borrowed(&[1, 2, 3, 4, 255])), &mut buf);
379    }
380
381    #[test]
382    fn array_mixed() {
383        let mut buf = Vec::new();
384        assert_roundtrip(Value::Array(vec![
385                Value::Int(Sign::Pos, 1),
386                Value::Str(Cow::Borrowed("Jessica")),
387                Value::Symbol(Cow::Borrowed("FelisCatus")),
388                Value::F32(std::f32::consts::PI),
389        ]), &mut buf);
390    }
391
392    #[test]
393    fn array_long() {
394        let mut buf = Vec::new();
395        for i in 0..1 << 10 {
396            assert_roundtrip(Value::Array(vec![ Value::Int(Sign::Pos, 1); i as usize ]), &mut buf);
397        }
398    }
399
400    #[test]
401    fn map() {
402        let mut buf = Vec::new();
403        assert_roundtrip(Value::Map(vec![
404                (Value::Str(Cow::Borrowed("first")),  Value::Int(Sign::Pos, 1)),
405                (Value::Str(Cow::Borrowed("second")), Value::Int(Sign::Pos, 2)),
406                (Value::Str(Cow::Borrowed("third")),  Value::Int(Sign::Pos, 3)),
407                (Value::Str(Cow::Borrowed("fourth")), Value::Int(Sign::Pos, 4)),
408        ]), &mut buf);
409    }
410
411    #[test]
412    fn record() {
413        let mut buf = Vec::new();
414        assert_roundtrip(Value::Array(vec![
415                Value::Record(BTreeMap::from([
416                        (Cow::Borrowed("name"), Value::Str(Cow::Borrowed("Jessica"))),
417                        (Cow::Borrowed("species"), Value::Symbol(Cow::Borrowed("PrionailurusViverrinus"))),
418                ])),
419                Value::Record(BTreeMap::from([
420                        (Cow::Borrowed("name"), Value::Str(Cow::Borrowed("Wantan"))),
421                        (Cow::Borrowed("species"), Value::Symbol(Cow::Borrowed("LynxLynx"))),
422                ])),
423        ]), &mut buf);
424    }
425
426    #[test]
427    fn errors() {
428        let buf = [];
429        assert!(matches!(Decoder::decode(&buf).unwrap_err().into_inner(), DecodeError::Eof));
430        let buf = [2 << 5 | 2, 0xc3, 0x28];
431        assert!(matches!(Decoder::decode(&buf).unwrap_err().into_inner(), DecodeError::Utf8(_)));
432        let buf = [7 << 5 | 0];
433        assert!(matches!(Decoder::decode(&buf).unwrap_err().into_inner(), DecodeError::InvalidRef(0)));
434        let buf = [5 << 5 | 1, 5 << 5];
435        assert!(matches!(dbg!(Decoder::decode(&buf)).unwrap_err().into_inner(), DecodeError::IllegalKey("record")));
436    }
437
438    #[test]
439    fn too_big_allocations() {
440        let mut buf = [0u8; 9];
441        buf[0] = 0x7f;
442        for i in (1..u64::MAX).step_by(3_203_431_780_337) {
443                let i = i.to_be_bytes();
444                buf[1..].copy_from_slice(&i[..]);
445                assert!(Decoder::decode(&buf).is_err()); // should never panic
446        }
447    }
448
449    #[test]
450    fn display_record_key() {
451        let value = Value::Record(BTreeMap::from([(Cow::Borrowed("true or false"), Value::Bool(false))]));
452        assert_eq!("(\n  \"true or false\": false,\n)", format!("{}", &value));
453    }
454
455    fn assert_roundtrip(val: Value, buf: &mut Vec<u8>) {
456        buf.clear();
457        let _ = Encoder::encode(&val, buf);
458        assert_eq!(val, Decoder::decode(buf).unwrap().0);
459    }
460
461}