sem_reg/data_conversion/
byte_seq.rs

1use std::mem;
2use zerocopy::{AsBytes, FromBytes};
3
4#[derive(Debug)]
5pub struct ByteSeq {
6    bytes: Vec<u8>,
7    read_index: usize,
8}
9
10impl ByteSeq {
11    pub fn new() -> Self {
12        Self {
13            bytes: Vec::new(),
14            read_index: 0,
15        }
16    }
17
18    pub fn with_capacity(capacity: usize) -> Self {
19        Self {
20            bytes: Vec::with_capacity(capacity),
21            read_index: 0,
22        }
23    }
24
25    pub fn from_bytes(bytes: Vec<u8>) -> Self {
26        Self {
27            bytes,
28            read_index: 0,
29        }
30    }
31
32    pub fn as_slice(&self) -> &[u8] {
33        &self.bytes
34    }
35
36    pub fn read_index(&self) -> usize {
37        self.read_index
38    }
39
40    pub fn len(&self) -> usize {
41        self.bytes.len()
42    }
43
44    pub fn num_bytes_left(&self) -> usize {
45        self.bytes.len() - self.read_index
46    }
47
48    pub fn seek(&mut self, index: usize) -> bool {
49        if index <= self.bytes.len() {
50            self.read_index = index;
51            true
52        } else {
53            false
54        }
55    }
56
57    pub fn seek_by(&mut self, num_bytes: usize) -> bool {
58        self.seek(self.read_index + num_bytes)
59    }
60
61    pub fn assert_const(&mut self, r#const: &[u8]) -> Result<(), ParseError> {
62        self.bytes[self.read_index..]
63            .starts_with(r#const)
64            .then(|| {
65                self.read_index += r#const.len();
66                ()
67            })
68            .ok_or(ParseError::ExpectedConst(self.read_index))
69    }
70
71    pub fn push_const(&mut self, r#const: &[u8]) {
72        self.bytes.extend_from_slice(r#const);
73    }
74
75    pub fn assert_zero(&mut self) -> Result<(), ParseError> {
76        self.bytes[self.read_index..]
77            .starts_with(&[0x00])
78            .then(|| {
79                self.read_index += 1;
80                ()
81            })
82            .ok_or(ParseError::ExpectedZero(self.read_index))
83    }
84
85    pub fn push_zero(&mut self) {
86        self.bytes.push(0);
87    }
88
89    pub fn read_int<T: FromBytes>(&mut self) -> Result<T, ParseError> {
90        //! This and the respective push-function can just use the native byte order, because this code is just for Windows, which always uses little endian, and registry values should also always use little endian.
91
92        if let Some((value, size)) = self.get_int(self.read_index) {
93            self.read_index += size;
94            Ok(value)
95        } else {
96            Err(ParseError::ExpectedInt(self.read_index))
97        }
98    }
99
100    fn get_int<T: FromBytes>(&self, index: usize) -> Option<(T, usize)> {
101        T::read_from_prefix(&self.bytes[index..]).map(|value| (value, mem::size_of::<T>()))
102    }
103
104    pub fn push_int<T: AsBytes>(&mut self, int: T) {
105        self.bytes.extend_from_slice(T::as_bytes(&int));
106    }
107
108    pub fn read_vlq_64(&mut self) -> Result<u64, ParseError> {
109        if let Some((value, size)) = self.get_vlq_64(self.read_index) {
110            self.read_index += size;
111            Ok(value)
112        } else {
113            Err(ParseError::ExpectedVlq64(self.read_index))
114        }
115    }
116
117    fn get_vlq_64(&self, start_index: usize) -> Option<(u64, usize)> {
118        let mut value = 0;
119
120        let mut index = start_index;
121        let mut shift = 0;
122
123        loop {
124            let byte = if let Some(byte) = self.bytes.get(index) {
125                byte
126            } else {
127                // No concluding byte - not VLQ.
128                break None;
129            };
130
131            if shift == 63 && byte & 0b1111_1110 != 0 {
132                // Bits other than LSB would result in overflow. This also rules out even more loop iterations, because it ensures that the MSB is zero, which leads to the loop successfully breaking below.
133                break None;
134            }
135
136            value += ((byte & 0b0111_1111) as u64) << shift;
137
138            index += 1;
139            shift += 7;
140
141            if byte & 0b1000_0000 == 0 {
142                // Concluding byte - done.
143                break Some((value, index - start_index));
144            }
145        }
146    }
147
148    pub fn push_vlq_64(&mut self, mut value: u64) {
149        loop {
150            let mut byte = (value & 0b0111_1111) as u8;
151            value >>= 7;
152            if value != 0 {
153                // One or more bytes will follow. Set continuation bit.
154                byte |= 0b1000_0000;
155            }
156            self.bytes.push(byte);
157
158            if value == 0 {
159                break;
160            }
161        }
162    }
163
164    pub fn read_zigzag_vlq_64(&mut self) -> Result<i64, ParseError> {
165        if let Some((value, size)) = self.get_zigzag_vlq_64(self.read_index) {
166            self.read_index += size;
167            Ok(value)
168        } else {
169            Err(ParseError::ExpectedVlq64(self.read_index))
170        }
171    }
172
173    fn get_zigzag_vlq_64(&self, start_index: usize) -> Option<(i64, usize)> {
174        self.get_vlq_64(start_index)
175            .map(|(value, size)| (Self::zigzag_64_decode(value), size))
176    }
177
178    pub fn push_zigzag_vlq_64(&mut self, value: i64) {
179        self.push_vlq_64(Self::zigzag_64_encode(value));
180    }
181
182    fn zigzag_64_decode(encoded: u64) -> i64 {
183        //! Performs zigzag decoding on an unsigned integer to retrieve the original signed integer.
184
185        // Get rid of sign bit and correct placement of data bits (shifting unsigned data type inserts zeroes).
186        let data_bits = (encoded >> 1) as i64;
187
188        // Negate data bits if sign bit (LSB) is set.
189        if encoded & 1 != 0 {
190            !data_bits
191        } else {
192            data_bits
193        }
194    }
195
196    fn zigzag_64_encode(value: i64) -> u64 {
197        // Shift data bits by 1 to swap them with sign bit - negated, if negative. Then potentially add an LSB sign bit.
198        (if value >= 0 {
199            value << 1
200        } else {
201            !value << 1 | 1
202        }) as u64
203    }
204
205    pub fn exhausted(&self) -> bool {
206        self.read_index >= self.bytes.len()
207    }
208
209    pub fn assert_exhausted(&self) -> Result<(), ParseError> {
210        self.exhausted()
211            .then_some(())
212            .ok_or(ParseError::DataAfterExpectedEnd)
213    }
214
215    pub fn extend(&mut self, other: &Self) {
216        self.bytes.extend_from_slice(&other.bytes);
217    }
218}
219
220impl From<ByteSeq> for Vec<u8> {
221    fn from(value: ByteSeq) -> Self {
222        value.bytes
223    }
224}
225
226#[derive(thiserror::Error, PartialEq, Debug)]
227pub enum ParseError {
228    /// Expected certain bytes. This and some other variants bring the byte index with it where the respective item was expected, but not found.
229    #[error("expected one or more constant bytes at index {0}")]
230    ExpectedConst(usize),
231    /// Expected a zero-byte.
232    #[error("expected a zero byte at index {0}")]
233    ExpectedZero(usize),
234    /// Expected an integer with a certain byte size (little endian).
235    #[error("expected a fixed-width integer at byte index {0}")]
236    ExpectedInt(usize),
237    /// Expected a VLQ (variable-length quantity) with a maximum of 64 data bits (little endian; possibly also zigzag-encoded).
238    #[error("expected a variable-length quantity at byte index {0}")]
239    ExpectedVlq64(usize),
240    /// Encountered an exceptional value.
241    #[error("value not in expected range")]
242    ValueNotInRange,
243    /// Different parts of the data don't harmonize with each other.
244    #[error("parts of data inconsistent with each other")]
245    InconsistentData,
246    /// Expected the end of the byte stream, but still found data.
247    #[error("expected end of byte stream, got more data")]
248    DataAfterExpectedEnd,
249}