1use crate::header::{Header, Sign};
8use crate::error::{DecodeError, DecoderError, EncodeError};
9use std::mem::size_of;
10use std::io::Write;
11use std::convert::TryInto;
12use std::str::from_utf8;
13use std::iter::repeat;
14use std::borrow::Cow;
15use std::collections::{BTreeMap, HashMap};
16
17#[derive(Debug, Clone, PartialEq)]
19pub enum Value<'a> {
20 Null,
21 Bool(bool),
22 F32(f32),
23 F64(f64),
24 Bytes(Cow<'a, [u8]>),
25 Int(Sign, u64),
26 Str(Cow<'a, str>),
27 Symbol(Cow<'a, str>),
28 Record(BTreeMap<Cow<'a, str>, Value<'a>>),
29 Map(Vec<(Value<'a>, Value<'a>)>),
30 Array(Vec<Value<'a>>),
31}
32
33impl<'a> Value<'a> {
34
35 const PROTECTED_CHARS: &'static str = "\n\\$ ,:\"'()[]{}#";
36
37 fn b64(input: &[u8]) -> String {
38 const CHAR_SET: &'static [char] = &['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
39 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
40 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
41 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'
42 ];
43 let mut array = [0; 4];
44 input.chunks(3).flat_map(|chunk| {
45 let len = chunk.len();
46 array[1..1 + len].copy_from_slice(chunk);
47 for i in 0..(3 - len) {
48 array[3 - i] = 0;
49 }
50 let x = u32::from_be_bytes(array);
51 (0..=len).map(move |o| CHAR_SET[(x >> (18 - 6*o) & 0x3f) as usize]).chain(repeat('=').take(3-len))
52 }).collect()
53 }
54
55 fn typename(&self) -> &'static str {
56 match *self {
57 Self::Null => "null",
58 Self::Bool(_) => "bool",
59 Self::F32(_) => "f32",
60 Self::F64(_) => "f64",
61 Self::Bytes(_) => "bytes",
62 Self::Int(_, _) => "integer",
63 Self::Str(_) => "string",
64 Self::Symbol(_) => "symbol",
65 Self::Record(_) => "record",
66 Self::Map(_) => "map",
67 Self::Array(_) => "array",
68 }
69 }
70
71}
72
73
74
75impl<'a> std::fmt::Display for Value<'a> {
76 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
77 match self {
78 Value::Null => f.write_str("null"),
79 Value::Bool(true) => f.write_str("true"),
80 Value::Bool(false) => f.write_str("false"),
81 Value::F32(v) => write!(f, "${}", v),
82 Value::F64(v) => write!(f, "$${}", v),
83 Value::Bytes(v) => write!(f, "'{}'", Self::b64(v).as_str()),
84 Value::Int(s, v) => write!(f, "{}{}", match s { Sign::Pos => "", Sign::Neg => "-" }, v),
85 Value::Str(v) => write!(f, "\"{}\"", v.replace("\\", "\\\\").replace("\"", "\\\"").replace("\n", "\\n")),
86 Value::Symbol(v) if v.chars().any(|c| Self::PROTECTED_CHARS.contains(c))
87 => write!(f, "#\"{}\"", v.replace("\\", "\\\\").replace("\"", "\\\"").replace("\n", "\\n")),
88 Value::Symbol(v) => write!(f, "#{}", v),
89 Value::Record(v) => write!(f, "(\n{}\n)", v.iter()
90 .flat_map(|(k, f)| format!("{}: {},", if k.chars().any(|c| Self::PROTECTED_CHARS.contains(c)) {
91 format!("\"{}\"", k.replace("\\", "\\\\").replace("\"", "\\\"").replace("\n", "\\n"))
92 } else {
93 format!("{}", k )
94 }, f).lines().map(|line| format!(" {}", line)).collect::<Vec<String>>())
95 .collect::<Vec<String>>().join("\n")),
96 Value::Map(v) => write!(f, "{{\n{}\n}}", v.iter()
97 .flat_map(|(k, f)| format!("{}: {},", k, f).lines().map(|line| format!(" {}", line)).collect::<Vec<String>>())
98 .collect::<Vec<String>>().join("\n")),
99 Value::Array(v) => write!(f, "[\n{}\n]", v.iter()
100 .flat_map(|f| format!("{},", f).lines().map(|line| format!(" {}", line)).collect::<Vec<String>>())
101 .collect::<Vec<String>>().join("\n")),
102 }
103 }
104}
105
106#[derive(PartialEq, Clone)]
107#[repr(u8)]
108pub enum Refable<'a> {
109 Sym(&'a str),
110 Rec(Vec<&'a str>),
111}
112
113impl<'a> Refable<'a> {
114 pub fn name(&self) -> &'static str {
115 match *self {
116 Refable::Sym(_) => "Sym",
117 Refable::Rec(_) => "Rec",
118 }
119 }
120}
121
122pub struct Encoder<'w, W: Write> {
125 writer: &'w mut W,
126 next_free: usize,
128 symbols: HashMap<Cow<'w, str>, usize>,
130 records: HashMap<Vec<Cow<'w, str>>, usize>,
132}
133
134impl<'w, W: Write> Encoder<'w, W> {
135
136 pub fn encode(field: &'w Value, writer: &'w mut W) -> Result<usize, EncodeError> {
138 Self { writer, symbols: HashMap::new(), records: HashMap::new(), next_free: 0 }.encode_inner(field)
139 }
140
141 fn encode_inner(&mut self, field: &'w Value) -> Result<usize, EncodeError> {
142 let mut c = 0;
143 match &field {
144 Value::Null => Header::Null.encode(self.writer),
145 Value::Bool(true) => Header::True.encode(self.writer),
146 Value::Bool(false) => Header::False.encode(self.writer),
147 Value::F32(v) => {
148 c += Header::F32.encode(self.writer)?;
149 self.writer.write_all(&v.to_be_bytes())?;
150 Ok(c + size_of::<f32>())
151 },
152 Value::F64(v) => {
153 c += Header::F64.encode(self.writer)?;
154 self.writer.write_all(&v.to_be_bytes())?;
155 Ok(c + size_of::<f64>())
156 },
157 Value::Bytes(v) => {
158 c += Header::Bin(v.len()).encode(self.writer)?;
159 self.writer.write_all(v)?;
160 Ok(c + v.len())
161 },
162 Value::Int(s, v) => Header::Int(*s, *v).encode(self.writer),
163 Value::Str(v) => {
164 c += Header::Str(v.len()).encode(self.writer)?;
165 self.writer.write_all(v.as_bytes())?;
166 Ok(c + v.len())
167 },
168 Value::Symbol(v) => self.encode_symbol(v),
169 Value::Array(inner) => {
170 c += Header::Arr(inner.len()).encode(self.writer)?;
171 for field in inner.iter() {
172 c += self.encode_inner(field)?;
173 }
174 Ok(c)
175 },
176 Value::Record(inner) => self.encode_record(inner),
177 Value::Map(inner) => {
178 c += Header::Map(inner.len()).encode(self.writer)?;
179 for (key, val) in inner.iter() {
180 c += self.encode_inner(key)?;
181 c += self.encode_inner(val)?;
182 }
183 Ok(c)
184 },
185 }
186 }
187
188 fn encode_record(&mut self, inner: &'w BTreeMap<Cow<'w, str>, Value<'w>>) -> Result<usize, EncodeError> {
189 let mut c = match self.records.get(&inner.keys().map(|i| i.clone()).collect::<Vec<_>>()) {
190 Some(i) => Header::Ref(*i).encode(self.writer)?,
191 None => {
192 let mut x = Header::Rec(inner.len()).encode(self.writer)?;
193 for sym in inner.keys() {
194 x += self.encode_symbol(sym)?;
195 }
196 let index = self.next();
197 self.records.insert(inner.keys().map(|i| i.clone()).collect(), index);
198 x
199 }
200 };
201 for val in inner.values() {
202 c += self.encode_inner(val)?;
203 }
204 Ok(c)
205 }
206
207 fn encode_symbol(&mut self, symbol: &'w str) -> Result<usize, EncodeError> {
208 match self.symbols.get(symbol) {
209 Some(i) => Header::Ref(*i).encode(self.writer),
210 None => {
211 let index = self.next();
212 self.symbols.insert(symbol.into(), index);
213 let c = Header::Sym(symbol.len()).encode(self.writer)?;
214 self.writer.write_all(symbol.as_bytes())?;
215 Ok(c + symbol.len())
216 }
217 }
218 }
219
220 fn next(&mut self) -> usize {
221 self.next_free += 1;
222 self.next_free - 1
223 }
224
225}
226pub struct Decoder<'a> {
228 symbols: Vec<Refable<'a>>,
229 buf: &'a [u8],
230 pos: usize,
231}
232
233impl<'a> Decoder<'a> {
234
235 pub fn decode<B: ?Sized + AsRef<[u8]>>(buf: &'a B) -> Result<(Value<'a>, usize), DecoderError> {
239 let mut decoder = Self { buf: buf.as_ref(), symbols: Vec::new(), pos: 0 };
240 let value = decoder.decode_value().map_err(|e| e.at(decoder.pos))?;
241 Ok((value, decoder.pos))
242 }
243
244 fn decode_value(&mut self) -> Result<Value<'a>, DecodeError> {
245 let header = self.decode_header()?;
246 match header {
247 Header::Null => Ok(Value::Null),
248 Header::True => Ok(Value::Bool(true)),
249 Header::False => Ok(Value::Bool(false)),
250 Header::F32 => Ok(Value::F32(<f32>::from_be_bytes(self.decode_slice(4)?.try_into().unwrap()))),
251 Header::F64 => Ok(Value::F64(<f64>::from_be_bytes(self.decode_slice(8)?.try_into().unwrap()))),
252 Header::Bin(v) => Ok(Value::Bytes(Cow::Borrowed(self.decode_slice(v)?))),
253 Header::Int(s, v) => Ok(Value::Int(s, v)),
254 Header::Arr(v) => {
255 let mut elements = Vec::with_capacity(0);
256 elements.try_reserve(v)?;
257 for _ in 0..v {
258 elements.push(self.decode_value()?);
259 }
260 Ok(Value::Array(elements))
261 },
262 Header::Map(v) => {
263 let mut elements = Vec::with_capacity(0);
264 elements.try_reserve(v)?;
265 for _ in 0..v {
266 let key = self.decode_value()?;
267 let val = self.decode_value()?;
268 elements.push((key, val));
269 }
270 Ok(Value::Map(elements))
271 }
272 Header::Str(v) => Ok(Value::Str(Cow::Borrowed(from_utf8(&self.decode_slice(v)?)?))),
273 Header::Sym(v) => {
274 let sym = from_utf8(&self.decode_slice(v)?)?;
275 self.symbols.push(Refable::Sym(sym));
276 Ok(Value::Symbol(Cow::Borrowed(sym)))
277 },
278 Header::Rec(v) => {
279 let mut fields = BTreeMap::new();
280 let mut keys = Vec::with_capacity(0);
281 keys.try_reserve(v)?;
282 for _ in 0..v {
283 match self.decode_value()? {
284 Value::Symbol(Cow::Borrowed(sym)) => { keys.push(sym); },
285 x => { return Err(DecodeError::IllegalKey(x.typename())); }
286 }
287 }
288 self.symbols.push(Refable::Rec(keys.clone()));
289 for key in keys {
290 let val = self.decode_value()?;
291 fields.insert(Cow::Borrowed(key), val);
292 }
293 Ok(Value::Record(fields))
294 },
295 Header::Ref(v) => {
296 match self.symbols.get(v) {
297 Some(Refable::Sym(s)) => Ok(Value::Symbol(Cow::Borrowed(s))),
298 Some(Refable::Rec(ref s)) => {
299 let mut fields = BTreeMap::<Cow<'a, str>, Value<'a>>::new();
300 for key in s.clone() {
301 fields.insert(Cow::Borrowed(key), self.decode_value()?);
302 }
303 Ok(Value::Record(fields))
304 }
305 None => Err(DecodeError::InvalidRef(v))
306 }
307 },
308 }
309 }
310
311 fn decode_header(&mut self) -> Result<Header, DecodeError> {
312 let (header, c) = Header::decode(&self.buf[self.pos..])?;
313 self.pos += c;
314 Ok(header)
315 }
316
317 fn decode_slice(&mut self, len: usize) -> Result<&'a [u8], DecodeError> {
318 if self.buf[self.pos..].len() < len {
319 Err(DecodeError::Eof)
320 } else {
321 self.pos += len;
322 Ok(&self.buf[self.pos - len .. self.pos])
323 }
324 }
325
326}
327
328
329#[cfg(test)]
330mod test {
331 use super::{Value, Sign, Encoder, Decoder, DecodeError};
332 use std::borrow::Cow;
333 use std::collections::BTreeMap;
334
335 #[test]
336 fn simple_values() {
337 let mut buf = Vec::new();
338 assert_roundtrip(Value::Null, &mut buf);
339 assert_roundtrip(Value::Bool(true), &mut buf);
340 assert_roundtrip(Value::Bool(false), &mut buf);
341 for i in (0..u64::MAX).step_by(3_203_431_780_337) {
342 assert_roundtrip(Value::Int(Sign::Pos, i), &mut buf);
343 assert_roundtrip(Value::Int(Sign::Neg, if i == 0 { 1 } else { i }), &mut buf);
344 }
345 }
346
347 #[test]
348 fn floats() {
349 let mut buf = Vec::new();
350 assert_roundtrip(Value::F64(f64::MAX), &mut buf);
351 assert_roundtrip(Value::F64(f64::MIN), &mut buf);
352 assert_roundtrip(Value::F64(std::f64::consts::PI), &mut buf);
353 assert_roundtrip(Value::F32(f32::MAX), &mut buf);
354 assert_roundtrip(Value::F32(f32::MIN), &mut buf);
355 assert_roundtrip(Value::F32(std::f32::consts::PI), &mut buf);
356 }
357
358 #[test]
359 fn strings() {
360 let mut buf = Vec::new();
361 assert_roundtrip(Value::Str(Cow::Borrowed("Üben von Xylophon und Querflöte ist ja zweckmäßig.")), &mut buf);
362 }
363
364 #[test]
365 fn symbols() {
366 let mut buf = Vec::new();
367 assert_roundtrip(Value::Array(vec![
368 Value::Symbol(Cow::Borrowed("PrionailurusViverrinus")),
369 Value::Symbol(Cow::Borrowed("PrionailurusViverrinus")),
370 Value::Symbol(Cow::Borrowed("PrionailurusViverrinus")),
371 Value::Symbol(Cow::Borrowed("PrionailurusViverrinus")),
372 ]), &mut buf);
373 }
374
375 #[test]
376 fn bytes() {
377 let mut buf = Vec::new();
378 assert_roundtrip(Value::Bytes(Cow::Borrowed(&[1, 2, 3, 4, 255])), &mut buf);
379 }
380
381 #[test]
382 fn array_mixed() {
383 let mut buf = Vec::new();
384 assert_roundtrip(Value::Array(vec![
385 Value::Int(Sign::Pos, 1),
386 Value::Str(Cow::Borrowed("Jessica")),
387 Value::Symbol(Cow::Borrowed("FelisCatus")),
388 Value::F32(std::f32::consts::PI),
389 ]), &mut buf);
390 }
391
392 #[test]
393 fn array_long() {
394 let mut buf = Vec::new();
395 for i in 0..1 << 10 {
396 assert_roundtrip(Value::Array(vec![ Value::Int(Sign::Pos, 1); i as usize ]), &mut buf);
397 }
398 }
399
400 #[test]
401 fn map() {
402 let mut buf = Vec::new();
403 assert_roundtrip(Value::Map(vec![
404 (Value::Str(Cow::Borrowed("first")), Value::Int(Sign::Pos, 1)),
405 (Value::Str(Cow::Borrowed("second")), Value::Int(Sign::Pos, 2)),
406 (Value::Str(Cow::Borrowed("third")), Value::Int(Sign::Pos, 3)),
407 (Value::Str(Cow::Borrowed("fourth")), Value::Int(Sign::Pos, 4)),
408 ]), &mut buf);
409 }
410
411 #[test]
412 fn record() {
413 let mut buf = Vec::new();
414 assert_roundtrip(Value::Array(vec![
415 Value::Record(BTreeMap::from([
416 (Cow::Borrowed("name"), Value::Str(Cow::Borrowed("Jessica"))),
417 (Cow::Borrowed("species"), Value::Symbol(Cow::Borrowed("PrionailurusViverrinus"))),
418 ])),
419 Value::Record(BTreeMap::from([
420 (Cow::Borrowed("name"), Value::Str(Cow::Borrowed("Wantan"))),
421 (Cow::Borrowed("species"), Value::Symbol(Cow::Borrowed("LynxLynx"))),
422 ])),
423 ]), &mut buf);
424 }
425
426 #[test]
427 fn errors() {
428 let buf = [];
429 assert!(matches!(Decoder::decode(&buf).unwrap_err().into_inner(), DecodeError::Eof));
430 let buf = [2 << 5 | 2, 0xc3, 0x28];
431 assert!(matches!(Decoder::decode(&buf).unwrap_err().into_inner(), DecodeError::Utf8(_)));
432 let buf = [7 << 5 | 0];
433 assert!(matches!(Decoder::decode(&buf).unwrap_err().into_inner(), DecodeError::InvalidRef(0)));
434 let buf = [5 << 5 | 1, 5 << 5];
435 assert!(matches!(dbg!(Decoder::decode(&buf)).unwrap_err().into_inner(), DecodeError::IllegalKey("record")));
436 }
437
438 #[test]
439 fn too_big_allocations() {
440 let mut buf = [0u8; 9];
441 buf[0] = 0x7f;
442 for i in (1..u64::MAX).step_by(3_203_431_780_337) {
443 let i = i.to_be_bytes();
444 buf[1..].copy_from_slice(&i[..]);
445 assert!(Decoder::decode(&buf).is_err()); }
447 }
448
449 #[test]
450 fn display_record_key() {
451 let value = Value::Record(BTreeMap::from([(Cow::Borrowed("true or false"), Value::Bool(false))]));
452 assert_eq!("(\n \"true or false\": false,\n)", format!("{}", &value));
453 }
454
455 fn assert_roundtrip(val: Value, buf: &mut Vec<u8>) {
456 buf.clear();
457 let _ = Encoder::encode(&val, buf);
458 assert_eq!(val, Decoder::decode(buf).unwrap().0);
459 }
460
461}