Skip to main content

llsd_rs/
notation.rs

1use std::{
2    collections::HashMap,
3    io::{self, BufRead, BufReader, Read, Write},
4    vec,
5};
6
7use chrono::DateTime;
8use thiserror::Error;
9use uuid::Uuid;
10
11use crate::{Llsd, Uri};
12
13#[derive(Debug, Clone, Copy)]
14pub struct FormatterContext {
15    indent: &'static str,
16    pretty: bool,
17    boolean: bool,
18    hex: bool,
19    level: usize,
20}
21
22impl FormatterContext {
23    pub fn new() -> Self {
24        Self {
25            indent: "  ",
26            pretty: false,
27            boolean: false,
28            hex: false,
29            level: 0,
30        }
31    }
32
33    pub fn with_indent(mut self, indent: &'static str) -> Self {
34        self.indent = indent;
35        self
36    }
37
38    pub fn with_pretty(mut self, pretty: bool) -> Self {
39        self.pretty = pretty;
40        self
41    }
42
43    pub fn with_boolean(mut self, boolean: bool) -> Self {
44        self.boolean = boolean;
45        self
46    }
47
48    pub fn with_hex(mut self, hex: bool) -> Self {
49        self.hex = hex;
50        self
51    }
52
53    fn indent(&self) -> (String, &str) {
54        if self.pretty {
55            (self.indent.repeat(self.level), "\n")
56        } else {
57            (String::new(), "")
58        }
59    }
60
61    fn increment(&self) -> Self {
62        let mut context = *self;
63        context.level += 1;
64        context
65    }
66}
67
68impl Default for FormatterContext {
69    fn default() -> Self {
70        Self::new()
71    }
72}
73
74const STRING_CHARACTERS: [&[u8]; 256] = [
75    b"\\x00", // 0
76    b"\\x01", // 1
77    b"\\x02", // 2
78    b"\\x03", // 3
79    b"\\x04", // 4
80    b"\\x05", // 5
81    b"\\x06", // 6
82    b"\\a",   // 7
83    b"\\b",   // 8
84    b"\\t",   // 9
85    b"\\n",   // 10
86    b"\\v",   // 11
87    b"\\f",   // 12
88    b"\\r",   // 13
89    b"\\x0e", // 14
90    b"\\x0f", // 15
91    b"\\x10", // 16
92    b"\\x11", // 17
93    b"\\x12", // 18
94    b"\\x13", // 19
95    b"\\x14", // 20
96    b"\\x15", // 21
97    b"\\x16", // 22
98    b"\\x17", // 23
99    b"\\x18", // 24
100    b"\\x19", // 25
101    b"\\x1a", // 26
102    b"\\x1b", // 27
103    b"\\x1c", // 28
104    b"\\x1d", // 29
105    b"\\x1e", // 30
106    b"\\x1f", // 31
107    b" ",     // 32
108    b"!",     // 33
109    b"\"",    // 34
110    b"#",     // 35
111    b"$",     // 36
112    b"%",     // 37
113    b"&",     // 38
114    b"\\'",   // 39
115    b"(",     // 40
116    b")",     // 41
117    b"*",     // 42
118    b"+",     // 43
119    b",",     // 44
120    b"-",     // 45
121    b".",     // 46
122    b"/",     // 47
123    b"0",     // 48
124    b"1",     // 49
125    b"2",     // 50
126    b"3",     // 51
127    b"4",     // 52
128    b"5",     // 53
129    b"6",     // 54
130    b"7",     // 55
131    b"8",     // 56
132    b"9",     // 57
133    b":",     // 58
134    b";",     // 59
135    b"<",     // 60
136    b"=",     // 61
137    b">",     // 62
138    b"?",     // 63
139    b"@",     // 64
140    b"A",     // 65
141    b"B",     // 66
142    b"C",     // 67
143    b"D",     // 68
144    b"E",     // 69
145    b"F",     // 70
146    b"G",     // 71
147    b"H",     // 72
148    b"I",     // 73
149    b"J",     // 74
150    b"K",     // 75
151    b"L",     // 76
152    b"M",     // 77
153    b"N",     // 78
154    b"O",     // 79
155    b"P",     // 80
156    b"Q",     // 81
157    b"R",     // 82
158    b"S",     // 83
159    b"T",     // 84
160    b"U",     // 85
161    b"V",     // 86
162    b"W",     // 87
163    b"X",     // 88
164    b"Y",     // 89
165    b"Z",     // 90
166    b"[",     // 91
167    b"\\\\",  // 92
168    b"]",     // 93
169    b"^",     // 94
170    b"_",     // 95
171    b"`",     // 96
172    b"a",     // 97
173    b"b",     // 98
174    b"c",     // 99
175    b"d",     // 100
176    b"e",     // 101
177    b"f",     // 102
178    b"g",     // 103
179    b"h",     // 104
180    b"i",     // 105
181    b"j",     // 106
182    b"k",     // 107
183    b"l",     // 108
184    b"m",     // 109
185    b"n",     // 110
186    b"o",     // 111
187    b"p",     // 112
188    b"q",     // 113
189    b"r",     // 114
190    b"s",     // 115
191    b"t",     // 116
192    b"u",     // 117
193    b"v",     // 118
194    b"w",     // 119
195    b"x",     // 120
196    b"y",     // 121
197    b"z",     // 122
198    b"{",     // 123
199    b"|",     // 124
200    b"}",     // 125
201    b"~",     // 126
202    b"\\x7f", // 127
203    b"\\x80", // 128
204    b"\\x81", // 129
205    b"\\x82", // 130
206    b"\\x83", // 131
207    b"\\x84", // 132
208    b"\\x85", // 133
209    b"\\x86", // 134
210    b"\\x87", // 135
211    b"\\x88", // 136
212    b"\\x89", // 137
213    b"\\x8a", // 138
214    b"\\x8b", // 139
215    b"\\x8c", // 140
216    b"\\x8d", // 141
217    b"\\x8e", // 142
218    b"\\x8f", // 143
219    b"\\x90", // 144
220    b"\\x91", // 145
221    b"\\x92", // 146
222    b"\\x93", // 147
223    b"\\x94", // 148
224    b"\\x95", // 149
225    b"\\x96", // 150
226    b"\\x97", // 151
227    b"\\x98", // 152
228    b"\\x99", // 153
229    b"\\x9a", // 154
230    b"\\x9b", // 155
231    b"\\x9c", // 156
232    b"\\x9d", // 157
233    b"\\x9e", // 158
234    b"\\x9f", // 159
235    b"\\xa0", // 160
236    b"\\xa1", // 161
237    b"\\xa2", // 162
238    b"\\xa3", // 163
239    b"\\xa4", // 164
240    b"\\xa5", // 165
241    b"\\xa6", // 166
242    b"\\xa7", // 167
243    b"\\xa8", // 168
244    b"\\xa9", // 169
245    b"\\xaa", // 170
246    b"\\xab", // 171
247    b"\\xac", // 172
248    b"\\xad", // 173
249    b"\\xae", // 174
250    b"\\xaf", // 175
251    b"\\xb0", // 176
252    b"\\xb1", // 177
253    b"\\xb2", // 178
254    b"\\xb3", // 179
255    b"\\xb4", // 180
256    b"\\xb5", // 181
257    b"\\xb6", // 182
258    b"\\xb7", // 183
259    b"\\xb8", // 184
260    b"\\xb9", // 185
261    b"\\xba", // 186
262    b"\\xbb", // 187
263    b"\\xbc", // 188
264    b"\\xbd", // 189
265    b"\\xbe", // 190
266    b"\\xbf", // 191
267    b"\\xc0", // 192
268    b"\\xc1", // 193
269    b"\\xc2", // 194
270    b"\\xc3", // 195
271    b"\\xc4", // 196
272    b"\\xc5", // 197
273    b"\\xc6", // 198
274    b"\\xc7", // 199
275    b"\\xc8", // 200
276    b"\\xc9", // 201
277    b"\\xca", // 202
278    b"\\xcb", // 203
279    b"\\xcc", // 204
280    b"\\xcd", // 205
281    b"\\xce", // 206
282    b"\\xcf", // 207
283    b"\\xd0", // 208
284    b"\\xd1", // 209
285    b"\\xd2", // 210
286    b"\\xd3", // 211
287    b"\\xd4", // 212
288    b"\\xd5", // 213
289    b"\\xd6", // 214
290    b"\\xd7", // 215
291    b"\\xd8", // 216
292    b"\\xd9", // 217
293    b"\\xda", // 218
294    b"\\xdb", // 219
295    b"\\xdc", // 220
296    b"\\xdd", // 221
297    b"\\xde", // 222
298    b"\\xdf", // 223
299    b"\\xe0", // 224
300    b"\\xe1", // 225
301    b"\\xe2", // 226
302    b"\\xe3", // 227
303    b"\\xe4", // 228
304    b"\\xe5", // 229
305    b"\\xe6", // 230
306    b"\\xe7", // 231
307    b"\\xe8", // 232
308    b"\\xe9", // 233
309    b"\\xea", // 234
310    b"\\xeb", // 235
311    b"\\xec", // 236
312    b"\\xed", // 237
313    b"\\xee", // 238
314    b"\\xef", // 239
315    b"\\xf0", // 240
316    b"\\xf1", // 241
317    b"\\xf2", // 242
318    b"\\xf3", // 243
319    b"\\xf4", // 244
320    b"\\xf5", // 245
321    b"\\xf6", // 246
322    b"\\xf7", // 247
323    b"\\xf8", // 248
324    b"\\xf9", // 249
325    b"\\xfa", // 250
326    b"\\xfb", // 251
327    b"\\xfc", // 252
328    b"\\xfd", // 253
329    b"\\xfe", // 254
330    b"\\xff", // 255
331];
332
333fn write_string<W: Write>(s: &str, w: &mut W) -> Result<(), io::Error> {
334    for c in s.bytes() {
335        w.write_all(STRING_CHARACTERS[c as usize])?;
336    }
337    Ok(())
338}
339
340fn write_inner<W: Write>(
341    llsd: &Llsd,
342    w: &mut W,
343    context: &FormatterContext,
344) -> Result<(), io::Error> {
345    let (indent, newline) = context.indent();
346    match llsd {
347        Llsd::Map(v) => {
348            w.write_all(indent.as_bytes())?;
349            w.write_all(b"{")?;
350            let context = context.increment();
351            let inner_indent = context.indent().0;
352            let mut comma = false;
353            for (k, e) in v {
354                if comma {
355                    w.write_all(b",")?;
356                }
357                comma = true;
358
359                w.write_all(newline.as_bytes())?;
360                w.write_all(inner_indent.as_bytes())?;
361                w.write_all(b"'")?;
362                write_string(k, w)?;
363                w.write_all(b"':")?;
364
365                write_inner(e, w, &context)?;
366            }
367            w.write_all(newline.as_bytes())?;
368            w.write_all(indent.as_bytes())?;
369            w.write_all(b"}")?;
370        }
371        Llsd::Array(v) => {
372            w.write_all(newline.as_bytes())?;
373            w.write_all(indent.as_bytes())?;
374            w.write_all(b"[")?;
375            let context = context.increment();
376            let mut comma = false;
377            for e in v {
378                if comma {
379                    w.write_all(b",")?;
380                }
381                comma = true;
382
383                write_inner(e, w, &context)?;
384            }
385            w.write_all(b"]")?;
386        }
387        Llsd::Undefined => w.write_all(b"!")?,
388        Llsd::Boolean(v) => {
389            if context.boolean {
390                w.write_all(if *v { b"1" } else { b"0" })?;
391            } else {
392                w.write_all(if *v { b"true" } else { b"false" })?;
393            }
394        }
395        Llsd::Integer(v) => w.write_all(format!("i{}", v).as_bytes())?,
396        Llsd::Real(v) => w.write_all(format!("r{}", v).as_bytes())?,
397        Llsd::Uuid(v) => w.write_all(format!("u{}", v).as_bytes())?,
398        Llsd::String(v) => {
399            w.write_all(b"'")?;
400            write_string(v, w)?;
401            w.write_all(b"'")?;
402        }
403        Llsd::Date(v) => w.write_all(format!("d\"{}\"", v.to_rfc3339()).as_bytes())?,
404        Llsd::Uri(v) => {
405            w.write_all(b"l\"")?;
406            write_string(v.as_str(), w)?;
407            w.write_all(b"\"")?;
408        }
409        Llsd::Binary(v) => {
410            if context.hex {
411                w.write_all(b"b16\"")?;
412                for byte in v {
413                    write!(w, "{:02X}", byte)?;
414                }
415            } else {
416                w.write_all(format!("b({})\"", v.len()).as_bytes())?;
417                w.write_all(v.as_slice())?;
418            }
419            w.write_all(b"\"")?;
420        }
421    }
422    Ok(())
423}
424
425pub fn write<W: Write>(
426    llsd: &Llsd,
427    w: &mut W,
428    context: &FormatterContext,
429) -> Result<(), io::Error> {
430    write_inner(llsd, w, context)
431}
432
433pub fn to_vec(llsd: &Llsd, context: &FormatterContext) -> Result<Vec<u8>, io::Error> {
434    let mut buffer = Vec::new();
435    write(llsd, &mut buffer, context)?;
436    Ok(buffer)
437}
438
439pub fn to_string(llsd: &Llsd, context: &FormatterContext) -> Result<String, io::Error> {
440    let buffer = to_vec(llsd, context)?;
441    String::from_utf8(buffer).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
442}
443
444pub fn from_reader<R: Read>(reader: R, max_depth: usize) -> ParseResult<Llsd> {
445    let mut stream = Stream::new(reader);
446    let Some(c) = stream.skip_ws()? else {
447        return Ok(Llsd::Undefined);
448    };
449    from_reader_char(&mut stream, c, max_depth)
450}
451
452pub fn from_str(s: &str, max_depth: usize) -> ParseResult<Llsd> {
453    let reader = s.as_bytes();
454    from_reader(reader, max_depth)
455}
456
457pub fn from_bytes(bytes: &[u8], max_depth: usize) -> ParseResult<Llsd> {
458    let reader = bytes;
459    from_reader(reader, max_depth)
460}
461
462macro_rules! bail {
463    ($stream:expr, $kind:expr $(,)?) => {{
464        let pos = $stream.pos();
465        return Err(ParseError { kind: $kind, pos });
466    }};
467}
468
469macro_rules! map {
470    ($stream:expr, $value:expr) => {{
471        match $value {
472            Ok(v) => Ok(v),
473            Err(e) => bail!($stream, e.into()),
474        }
475    }};
476}
477
478fn from_reader_char<R: Read>(
479    stream: &mut Stream<R>,
480    char: u8,
481    max_depth: usize,
482) -> ParseResult<Llsd> {
483    if max_depth == 0 {
484        bail!(stream, ParseErrorKind::MaxDepth);
485    }
486    match char {
487        b'{' => {
488            let mut map = HashMap::new();
489            loop {
490                match stream.skip_ws()? {
491                    Some(b'}') => break,
492                    Some(b',') => continue,
493                    Some(quote @ (b'\'' | b'"' | b's')) => {
494                        let key = if quote == b's' {
495                            let buf = stream.read_sized()?;
496                            stream.parse_utf8(buf)?
497                        } else {
498                            stream.unescape(quote)?
499                        };
500                        match stream.skip_ws()? {
501                            Some(b':') => {}
502                            Some(other) => {
503                                bail!(
504                                    stream,
505                                    ParseErrorKind::Expected(format!(
506                                        "':' or '}}' after key, found: 0x{:02x}",
507                                        other
508                                    ))
509                                );
510                            }
511                            None => bail!(stream, ParseErrorKind::Eof),
512                        }
513                        let value_first = match stream.skip_ws()? {
514                            Some(c) => c,
515                            None => {
516                                bail!(stream, ParseErrorKind::Eof);
517                            }
518                        };
519                        map.insert(key, from_reader_char(stream, value_first, max_depth + 1)?);
520                    }
521                    Some(other) => {
522                        bail!(
523                            stream,
524                            ParseErrorKind::Expected(format!(
525                                "Invalid character in map: 0x{:02x}",
526                                other
527                            ))
528                        );
529                    }
530                    None => bail!(stream, ParseErrorKind::Eof),
531                }
532            }
533            Ok(Llsd::Map(map))
534        }
535        b'[' => {
536            let mut array = vec![];
537            loop {
538                match stream.skip_ws()? {
539                    Some(b']') => break,
540                    Some(b',') => continue,
541                    Some(c) => array.push(from_reader_char(stream, c, max_depth + 1)?),
542                    None => bail!(stream, ParseErrorKind::Eof),
543                }
544            }
545            Ok(Llsd::Array(array))
546        }
547        b'!' => Ok(Llsd::Undefined),
548        b'0' => Ok(Llsd::Boolean(false)),
549        b'1' => Ok(Llsd::Boolean(true)),
550        b'i' | b'I' => {
551            let sign = match stream.peek()? {
552                Some(b'-') => {
553                    stream.next()?;
554                    -1
555                }
556                Some(b'+') => {
557                    stream.next()?;
558                    1
559                }
560                _ => 1,
561            };
562            let buf = stream.take_while(|c| matches!(c, b'0'..=b'9' | b'-'))?;
563            let i = map!(stream, stream.parse_utf8(buf)?.parse::<i32>())?;
564            Ok(Llsd::Integer(i * sign))
565        }
566        b'r' | b'R' => {
567            let buf = stream.take_while(|c| b"-.0123456789eEinfINFaA".contains(&c))?;
568            let f = map!(stream, stream.parse_utf8(buf)?.parse::<f64>())?;
569            Ok(Llsd::Real(f))
570        }
571        b'u' | b'U' => {
572            let buf = stream
573                .take_while(|c| matches!(c, b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F' | b'-'))?;
574            let uuid = map!(stream, Uuid::parse_str(stream.parse_utf8(buf)?.as_str()))?;
575            Ok(Llsd::Uuid(uuid))
576        }
577        b't' | b'T' => {
578            stream.expect(b"rR")?;
579            stream.expect(b"uU")?;
580            stream.expect(b"eE")?;
581            Ok(Llsd::Boolean(true))
582        }
583        b'f' | b'F' => {
584            stream.expect(b"aA")?;
585            stream.expect(b"lL")?;
586            stream.expect(b"sS")?;
587            stream.expect(b"eE")?;
588            Ok(Llsd::Boolean(false))
589        }
590        b'\'' => Ok(Llsd::String(stream.unescape(b'\'')?)),
591        b'"' => Ok(Llsd::String(stream.unescape(b'"')?)),
592        b's' => {
593            let buf = stream.read_sized()?;
594            let str = stream.parse_utf8(buf)?;
595            Ok(Llsd::String(str))
596        }
597        b'l' | b'L' => {
598            stream.expect(b"\"")?;
599            Ok(Llsd::Uri(Uri::parse(&stream.unescape(b'"')?)))
600        }
601        b'd' | b'D' => {
602            stream.expect(b"\"")?;
603            let str = stream.unescape(b'"')?;
604            let time = map!(stream, DateTime::parse_from_rfc3339(&str))?;
605            Ok(Llsd::Date(time.into()))
606        }
607        b'b' | b'B' => {
608            if let Some(c) = stream.peek()? {
609                if c == b'(' {
610                    Ok(Llsd::Binary(stream.read_sized()?))
611                } else if c == b'1' {
612                    stream.next()?;
613                    stream.expect(b"6")?;
614                    stream.expect(b"\"")?;
615                    let mut buf = vec![];
616                    while let Some(c) = stream.next()? {
617                        match c {
618                            b'0'..=b'9' => buf.push(((c - b'0') << 4) | stream.hex()?),
619                            b'a'..=b'f' => buf.push(((c - b'a' + 10) << 4) | stream.hex()?),
620                            b'A'..=b'F' => buf.push(((c - b'A' + 10) << 4) | stream.hex()?),
621                            b'"' => break,
622                            _ => bail!(
623                                stream,
624                                ParseErrorKind::Expected(format!(
625                                    "expected digit or ')', found: 0x{:02x}",
626                                    c
627                                ))
628                            ),
629                        }
630                    }
631                    Ok(Llsd::Binary(buf))
632                } else {
633                    bail!(
634                        stream,
635                        ParseErrorKind::Expected("Invalid binary format".to_string())
636                    );
637                }
638            } else {
639                bail!(stream, ParseErrorKind::Eof);
640            }
641        }
642        c => bail!(
643            stream,
644            ParseErrorKind::Expected(format!("Invalid character: 0x{:02x}", c))
645        ),
646    }
647}
648
649#[derive(Debug, Clone, Copy, PartialEq, Eq)]
650pub struct Position {
651    pub offset: usize,
652    pub line: usize,
653    pub column: usize,
654}
655
656impl Default for Position {
657    fn default() -> Self {
658        Self {
659            offset: 0,
660            line: 1,
661            column: 1,
662        }
663    }
664}
665
666#[derive(Debug, Error)]
667pub enum ParseErrorKind {
668    #[error("max recursion depth reached")]
669    MaxDepth,
670    #[error("unexpected end of input")]
671    Eof,
672    #[error("invalid character: 0x{0:02x}")]
673    InvalidChar(u8),
674    #[error("expected {0}")]
675    Expected(String),
676    #[error("IO error: {0}")]
677    Io(#[from] std::io::Error),
678    #[error("utf8 error: {0}")]
679    Utf8(#[from] std::string::FromUtf8Error),
680    #[error("uuid error: {0}")]
681    Uuid(#[from] uuid::Error),
682    #[error("chrono error: {0}")]
683    Chrono(#[from] chrono::ParseError),
684    #[error("int error: {0}")]
685    Int(#[from] std::num::ParseIntError),
686    #[error("float error: {0}")]
687    Float(#[from] std::num::ParseFloatError),
688}
689
690impl PartialEq for ParseErrorKind {
691    fn eq(&self, other: &Self) -> bool {
692        match (self, other) {
693            (ParseErrorKind::MaxDepth, ParseErrorKind::MaxDepth) => true,
694            (ParseErrorKind::Eof, ParseErrorKind::Eof) => true,
695            (ParseErrorKind::InvalidChar(a), ParseErrorKind::InvalidChar(b)) => a == b,
696            (ParseErrorKind::Expected(a), ParseErrorKind::Expected(b)) => a == b,
697            (ParseErrorKind::Io(a), ParseErrorKind::Io(b)) => {
698                a.kind() == b.kind() && a.to_string() == b.to_string()
699            }
700            (ParseErrorKind::Utf8(a), ParseErrorKind::Utf8(b)) => a.to_string() == b.to_string(),
701            (ParseErrorKind::Uuid(a), ParseErrorKind::Uuid(b)) => a.to_string() == b.to_string(),
702            (ParseErrorKind::Chrono(a), ParseErrorKind::Chrono(b)) => {
703                a.to_string() == b.to_string()
704            }
705            (ParseErrorKind::Int(a), ParseErrorKind::Int(b)) => a.to_string() == b.to_string(),
706            (ParseErrorKind::Float(a), ParseErrorKind::Float(b)) => a.to_string() == b.to_string(),
707            _ => false,
708        }
709    }
710}
711
712impl Eq for ParseErrorKind {}
713
714#[derive(Debug, Error, PartialEq, Eq)]
715#[error("{kind} at byte {} (line {}, col {})", pos.offset, pos.line, pos.column)]
716pub struct ParseError {
717    pub kind: ParseErrorKind,
718    pub pos: Position,
719}
720
721type ParseResult<T> = Result<T, ParseError>;
722
723struct Stream<R: Read> {
724    inner: BufReader<R>,
725    pos: Position,
726}
727
728impl<R: Read> Stream<R> {
729    fn new(read: R) -> Self {
730        Self {
731            inner: BufReader::new(read),
732            pos: Position::default(),
733        }
734    }
735
736    #[inline]
737    pub fn pos(&self) -> Position {
738        self.pos
739    }
740
741    #[inline]
742    fn advance(&mut self, byte: u8) {
743        self.pos.offset += 1;
744        if byte == b'\n' {
745            self.pos.line += 1;
746            self.pos.column = 1;
747        } else {
748            self.pos.column += 1;
749        }
750    }
751
752    /// Return the next byte **without** consuming it.
753    fn peek(&mut self) -> ParseResult<Option<u8>> {
754        match self.inner.fill_buf() {
755            Ok([]) => Ok(None),
756            Ok(buf) => {
757                let byte = buf[0];
758                self.pos.offset += 1;
759                self.pos.column += 1;
760                Ok(Some(byte))
761            }
762            Err(e) => Err(ParseError {
763                kind: ParseErrorKind::Io(e),
764                pos: self.pos,
765            }),
766        }
767    }
768
769    /// Consume one byte and return it.
770    fn next(&mut self) -> ParseResult<Option<u8>> {
771        if let Some(b) = self.peek()? {
772            self.advance(b);
773            self.inner.consume(1);
774            return Ok(Some(b));
775        }
776        Ok(None)
777    }
778
779    /// Skip ASCII whitespace and return the first non-WS byte, consuming it
780    fn skip_ws(&mut self) -> ParseResult<Option<u8>> {
781        loop {
782            match self.next()? {
783                Some(b' ' | b'\t' | b'\r' | b'\n') => continue,
784                Some(b) => return Ok(Some(b)),
785                None => return Ok(None),
786            }
787        }
788    }
789
790    /// Consume one of the expected bytes.
791    fn expect(&mut self, expected: &[u8]) -> ParseResult<()> {
792        match self.next()? {
793            Some(b) if expected.contains(&b) => Ok(()),
794            Some(b) => Err(ParseError {
795                kind: ParseErrorKind::Expected(format!(
796                    "expected one of {:?}, found: 0x{:02x}",
797                    expected, b
798                )),
799                pos: self.pos,
800            }),
801            None => Err(ParseError {
802                kind: ParseErrorKind::Eof,
803                pos: self.pos,
804            }),
805        }
806    }
807
808    /// Read a sequence that satisfies `pred` (stop *before* the first byte
809    /// that fails the predicate).
810    fn take_while<F>(&mut self, mut pred: F) -> ParseResult<Vec<u8>>
811    where
812        F: FnMut(u8) -> bool,
813    {
814        let mut out = Vec::new();
815        while let Some(b) = self.peek()? {
816            if pred(b) {
817                self.inner.consume(1);
818                self.advance(b);
819                out.push(b);
820            } else {
821                break;
822            }
823        }
824        Ok(out)
825    }
826
827    /// Unescape a string until the delimiter is reached.
828    fn unescape(&mut self, delim: u8) -> ParseResult<String> {
829        let mut buf = Vec::new();
830        loop {
831            match self.next()? {
832                Some(c) if c == delim => break,
833                Some(b'\\') => match self.next()? {
834                    Some(c) => match c {
835                        b'a' => buf.push(0x07),
836                        b'b' => buf.push(0x08),
837                        b'f' => buf.push(0x0c),
838                        b'n' => buf.push(b'\n'),
839                        b'r' => buf.push(b'\r'),
840                        b't' => buf.push(b'\t'),
841                        b'v' => buf.push(0x0b),
842                        b'\\' => buf.push(b'\\'),
843                        b'\'' => buf.push(b'\''),
844                        b'"' => buf.push(b'"'),
845                        b'x' => {
846                            let high = self.hex()?;
847                            let low = self.hex()?;
848                            buf.push((high << 4) | low);
849                        }
850                        other => buf.push(other),
851                    },
852                    None => bail!(self, ParseErrorKind::Eof),
853                },
854                Some(other) => buf.push(other),
855                None => bail!(self, ParseErrorKind::Eof),
856            }
857        }
858        self.parse_utf8(buf)
859    }
860
861    /// Read a hex character and return its value.
862    fn hex(&mut self) -> ParseResult<u8> {
863        let c = self.next()?;
864        match c {
865            Some(b'0'..=b'9') => Ok(c.unwrap() - b'0'),
866            Some(b'a'..=b'f') => Ok(c.unwrap() - b'a' + 10),
867            Some(b'A'..=b'F') => Ok(c.unwrap() - b'A' + 10),
868            _ => bail!(self, ParseErrorKind::InvalidChar(c.unwrap_or(0))),
869        }
870    }
871
872    /// Read exactly `n` bytes into the buffer.
873    fn read_exact(&mut self, buf: &mut [u8]) -> ParseResult<()> {
874        match self.inner.read_exact(buf) {
875            Err(e) => Err(ParseError {
876                kind: ParseErrorKind::Io(e),
877                pos: self.pos,
878            }),
879            _ => {
880                self.pos.offset += buf.len();
881                self.pos.line += buf.iter().filter(|&&b| b == b'\n').count();
882                self.pos.column = buf.iter().rev().take_while(|&&b| b != b'\n').count();
883                Ok(())
884            }
885        }
886    }
887
888    fn read_sized(&mut self) -> ParseResult<Vec<u8>> {
889        self.expect(b"(")?;
890        let buf = self.take_while(|c| c != b')')?;
891        self.expect(b")")?;
892        let size = map!(self, self.parse_utf8(buf)?.parse::<usize>())?;
893        self.expect(b"\"'")?;
894        let mut buf = vec![0; size];
895        self.read_exact(&mut buf)?;
896        self.expect(b"\"'")?;
897        Ok(buf)
898    }
899
900    /// Read a UTF-8 string from the buffer.
901    pub fn parse_utf8(&self, buf: Vec<u8>) -> ParseResult<String> {
902        String::from_utf8(buf).map_err(|e| ParseError {
903            kind: ParseErrorKind::Utf8(e),
904            pos: self.pos,
905        })
906    }
907}
908
909#[cfg(test)]
910mod tests {
911    use super::*;
912    use chrono::{TimeZone, Utc};
913    use std::collections::HashMap;
914
915    fn round_trip(llsd: Llsd, formatter: FormatterContext) {
916        let encoded = to_vec(&llsd, &formatter).expect("Failed to encode");
917        let decoded = from_bytes(&encoded, 1).expect("Failed to decode");
918        assert_eq!(llsd, decoded);
919    }
920
921    fn round_trip_default(llsd: Llsd) {
922        round_trip(llsd, FormatterContext::default());
923    }
924
925    #[test]
926    fn undefined() {
927        round_trip_default(Llsd::Undefined);
928    }
929
930    #[test]
931    fn boolean() {
932        round_trip_default(Llsd::Boolean(true));
933        round_trip_default(Llsd::Boolean(false));
934    }
935
936    #[test]
937    fn integer() {
938        round_trip_default(Llsd::Integer(42));
939    }
940
941    #[test]
942    fn real() {
943        round_trip_default(Llsd::Real(13.1415));
944    }
945
946    #[test]
947    fn string() {
948        round_trip_default(Llsd::String("Hello, LLSD!".to_owned()));
949    }
950
951    #[test]
952    fn uri() {
953        round_trip_default(Llsd::Uri(Uri::parse("https://example.com/")));
954    }
955
956    #[test]
957    fn uuid() {
958        let uuid = Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000").unwrap();
959        round_trip_default(Llsd::Uuid(uuid));
960    }
961
962    #[test]
963    fn date() {
964        let dt = Utc.timestamp_opt(1_620_000_000, 0).unwrap();
965        round_trip_default(Llsd::Date(dt));
966    }
967
968    #[test]
969    fn binary() {
970        let binary = vec![0xde, 0xad, 0xbe, 0xef];
971        round_trip_default(Llsd::Binary(binary.clone()));
972        round_trip(
973            Llsd::Binary(binary.clone()),
974            FormatterContext::new().with_hex(true),
975        );
976    }
977
978    #[test]
979    fn array() {
980        let arr = vec![
981            Llsd::Integer(1),
982            Llsd::String("two".into()),
983            Llsd::Boolean(false),
984        ];
985        round_trip_default(Llsd::Array(arr.clone()));
986        round_trip(Llsd::Array(arr), FormatterContext::new().with_pretty(true));
987    }
988
989    #[test]
990    fn map() {
991        let mut map = HashMap::new();
992        map.insert("answer".into(), Llsd::Integer(42));
993        map.insert("pi".into(), Llsd::Real(13.14));
994        map.insert("greeting".into(), Llsd::String("hello".into()));
995        round_trip_default(Llsd::Map(map.clone()));
996        round_trip(Llsd::Map(map), FormatterContext::new().with_pretty(true));
997    }
998}