datum/
decoder.rs

1/*
2 * datum-rs - Quick to implement S-expression format
3 * Written starting in 2024 by contributors (see CREDITS.txt at repository's root)
4 * To the extent possible under law, the author(s) have dedicated all copyright and related and neighboring rights to this software to the public domain worldwide. This software is distributed without any warranty.
5 * A copy of the Unlicense should have been supplied as COPYING.txt in this repository. Alternatively, you can find it at <https://unlicense.org/>.
6 */
7
8use crate::{datum_error, DatumChar, DatumOffset, DatumPipe, DatumResult};
9
10/// Decoder's state machine
11#[derive(Clone, Copy, PartialEq, Eq, Debug)]
12enum DatumDecoderState {
13    Normal,
14    Escaping(DatumOffset),
15    HexEscape(DatumOffset, u32),
16}
17
18/// Decoder for the Datum encoding layer.
19#[derive(Clone, Copy, PartialEq, Eq, Debug)]
20pub struct DatumDecoder(DatumDecoderState);
21
22impl Default for DatumDecoder {
23    #[inline]
24    fn default() -> DatumDecoder {
25        DatumDecoder(DatumDecoderState::Normal)
26    }
27}
28
29impl DatumPipe for DatumDecoder {
30    type Input = char;
31    type Output = DatumChar;
32
33    fn feed<F: FnMut(DatumOffset, DatumChar) -> DatumResult<()>>(
34        &mut self,
35        at: DatumOffset,
36        char: Option<char>,
37        f: &mut F,
38    ) -> DatumResult<()> {
39        if char.is_none() {
40            return if self.0 != DatumDecoderState::Normal {
41                self.0 = DatumDecoderState::Normal;
42                Err(datum_error!(Interrupted, at, "decoder: interrupted"))
43            } else {
44                Ok(())
45            };
46        }
47        let char = char.unwrap();
48        if char == '\r' {
49            return Ok(());
50        }
51        let new_state = match self.0 {
52            DatumDecoderState::Normal => {
53                if char == '\\' {
54                    Ok(DatumDecoderState::Escaping(at))
55                } else {
56                    match DatumChar::identify(char) {
57                        Some(v) => {
58                            f(at, v)?;
59                            Ok(DatumDecoderState::Normal)
60                        }
61                        None => Err(datum_error!(BadData, at, "decoder: forbidden character")),
62                    }
63                }
64            }
65            DatumDecoderState::Escaping(start) => match char {
66                'r' => {
67                    f(start, DatumChar::content('\r'))?;
68                    Ok(DatumDecoderState::Normal)
69                }
70                'n' => {
71                    f(start, DatumChar::content('\n'))?;
72                    Ok(DatumDecoderState::Normal)
73                }
74                't' => {
75                    f(start, DatumChar::content('\t'))?;
76                    Ok(DatumDecoderState::Normal)
77                }
78                'x' => Ok(DatumDecoderState::HexEscape(start, 0)),
79                '\n' => Err(datum_error!(
80                    BadData,
81                    at,
82                    "decoder: newline in escape sequence"
83                )),
84                _ => {
85                    f(start, DatumChar::content(char))?;
86                    Ok(DatumDecoderState::Normal)
87                }
88            },
89            DatumDecoderState::HexEscape(start, v) => {
90                if char == ';' {
91                    if let Some(rustchar) = char::from_u32(v) {
92                        f(start, DatumChar::content(rustchar))?;
93                        Ok(DatumDecoderState::Normal)
94                    } else {
95                        Err(datum_error!(
96                            BadData,
97                            at,
98                            "decoder: invalid unicode in hex escape"
99                        ))
100                    }
101                } else {
102                    let mut v_new = v;
103                    v_new <<= 4;
104                    if let Some(digit) = char.to_digit(16) {
105                        v_new |= digit;
106                        Ok(DatumDecoderState::HexEscape(start, v_new))
107                    } else {
108                        Err(datum_error!(BadData, at, "decoder: invalid hex digit"))
109                    }
110                }
111            }
112        }?;
113        self.0 = new_state;
114        Ok(())
115    }
116}