cbor_data/
reader.rs

1use crate::{constants::*, Tags};
2
3macro_rules! check {
4    ($e:expr) => {
5        if !($e) {
6            return None;
7        }
8    };
9    ($e:expr, $v:expr) => {
10        if !($e) {
11            return None;
12        } else {
13            $v
14        }
15    };
16}
17
18/// Low-level representation of major type 7 values.
19///
20/// Bool, null, and undefined are represented by L0 while L2–L4 represent the underlying
21/// bytes of floating-point numbers (16-, 32-, and 64-bit IEE754).
22pub enum Literal {
23    L0(u8),
24    L1(u8),
25    L2(u16),
26    L4(u32),
27    L8(u64),
28}
29
30#[inline]
31pub(crate) fn major(bytes: &[u8]) -> Option<u8> {
32    Some(*bytes.get(0)? >> 5)
33}
34
35pub(crate) fn careful_literal(bytes: &[u8]) -> Option<(Literal, &[u8])> {
36    let (int, b, rest) = integer(bytes)?;
37    match b.len() {
38        1 => Some((Literal::L0(int as u8), rest)),
39        2 => Some((Literal::L1(int as u8), rest)),
40        3 => Some((Literal::L2(int as u16), rest)),
41        5 => Some((Literal::L4(int as u32), rest)),
42        9 => Some((Literal::L8(int as u64), rest)),
43        _ => None,
44    }
45}
46
47pub(crate) fn integer(bytes: &[u8]) -> Option<(u64, &[u8], &[u8])> {
48    match bytes[0] & 31 {
49        // fun fact: explicit bounds checks make the code a lot smaller and faster because
50        // otherwise the panic’s line number dictates a separate check for each array access
51        24 => check!(
52            bytes.len() > 1,
53            Some((bytes[1] as u64, &bytes[..2], &bytes[2..]))
54        ),
55        25 => check!(
56            bytes.len() > 2,
57            Some((
58                ((bytes[1] as u64) << 8) | (bytes[2] as u64),
59                &bytes[..3],
60                &bytes[3..]
61            ))
62        ),
63        26 => check!(
64            bytes.len() > 4,
65            Some((
66                // fun fact: these expressions compile down to mov-shl-bswap
67                ((bytes[1] as u64) << 24)
68                    | ((bytes[2] as u64) << 16)
69                    | ((bytes[3] as u64) << 8)
70                    | (bytes[4] as u64),
71                &bytes[..5],
72                &bytes[5..],
73            ))
74        ),
75        27 => check!(
76            bytes.len() > 8,
77            Some((
78                ((bytes[1] as u64) << 56)
79                    | ((bytes[2] as u64) << 48)
80                    | ((bytes[3] as u64) << 40)
81                    | ((bytes[4] as u64) << 32)
82                    | ((bytes[5] as u64) << 24)
83                    | ((bytes[6] as u64) << 16)
84                    | ((bytes[7] as u64) << 8)
85                    | (bytes[8] as u64),
86                &bytes[..9],
87                &bytes[9..],
88            ))
89        ),
90        x if x < 24 => Some(((x as u64), &bytes[..1], &bytes[1..])),
91        _ => None,
92    }
93}
94
95// inline to reuse the bounds check already made by the caller
96#[inline(always)]
97pub(crate) fn indefinite(bytes: &[u8]) -> Option<(u64, &[u8], &[u8])> {
98    if bytes[0] & 31 == INDEFINITE_SIZE {
99        // since an item takes at least 1 byte, u64::MAX is an impossible size
100        Some((u64::MAX, &bytes[..1], &bytes[1..]))
101    } else {
102        None
103    }
104}
105
106pub(crate) fn float(bytes: &[u8]) -> Option<(f64, &[u8], &[u8])> {
107    integer(bytes).and_then(|(x, b, rest)| match b.len() {
108        3 => Some((half::f16::from_bits(x as u16).to_f64(), b, rest)),
109        5 => Some((f32::from_bits(x as u32) as f64, b, rest)),
110        9 => Some((f64::from_bits(x), b, rest)),
111        _ => None,
112    })
113}
114
115pub(crate) fn tags(bytes: &[u8]) -> Option<(Tags, &[u8])> {
116    let mut remaining = bytes;
117    while let Some(value) = remaining.get(0) {
118        if (*value >> 5) != MAJOR_TAG {
119            break;
120        }
121        let (_, _, r) = integer(remaining)?;
122        remaining = r;
123    }
124    let len = bytes.len() - remaining.len();
125    Some((Tags::new(&bytes[..len]), remaining))
126}
127
128#[cfg(test)]
129mod tests {
130    use crate::{index_str, Cbor, CborOwned, ItemKind};
131    use serde_json::json;
132
133    fn sample() -> CborOwned {
134        CborOwned::canonical(
135            serde_cbor::to_vec(&json!({
136                "a": {
137                    "b": 12
138                },
139                "c": null
140            }))
141            .unwrap(),
142        )
143        .unwrap()
144    }
145
146    #[test]
147    fn must_read_serde() {
148        assert_eq!(
149            sample().index(index_str("a.b")).unwrap().kind(),
150            ItemKind::Pos(12)
151        );
152        assert_eq!(
153            sample().index(index_str("c")).unwrap().kind(),
154            ItemKind::Null
155        );
156    }
157
158    #[test]
159    fn indefinite_strings() {
160        let cases = vec![
161            // 2 chunks (with unicode)
162            (
163                "exampleα≤β",
164                vec![
165                    0x7fu8, 0x67, 101, 120, 97, 109, 112, 108, 101, 0x67, 206, 177, 226, 137, 164,
166                    206, 178, 0xff,
167                ],
168            ),
169            // 1 chunk
170            (
171                "example",
172                vec![0x7fu8, 0x67, 101, 120, 97, 109, 112, 108, 101, 0xff],
173            ),
174            // 0 chunks
175            ("", vec![0x7fu8, 0xff]),
176            // empty chunk
177            ("", vec![0x7fu8, 0x60, 0xff]),
178        ];
179
180        for (res, bytes) in cases {
181            let cbor = Cbor::unchecked(&*bytes);
182            assert!(
183                matches!(cbor.kind(), ItemKind::Str(s) if s == res),
184                "value was {:?}",
185                cbor.kind()
186            );
187
188            let cbor = CborOwned::canonical(bytes).unwrap();
189            assert!(
190                matches!(cbor.kind(), ItemKind::Str(s) if s.as_str().unwrap() == res),
191                "value was {:?}",
192                cbor.kind()
193            );
194        }
195    }
196
197    #[test]
198    fn float() {
199        let bytes = vec![0xfau8, 0, 0, 51, 17];
200        let cbor = Cbor::unchecked(&*bytes);
201        assert_eq!(cbor.kind(), ItemKind::Float(1.8319174824118334e-41));
202        let cbor = CborOwned::canonical(bytes).unwrap();
203        assert_eq!(cbor.kind(), ItemKind::Float(1.8319174824118334e-41));
204    }
205}