irox_tools/codec/
varint.rs

1// SPDX-License-Identifier: MIT
2// Copyright 2025 IROX Contributors
3//
4
5use crate::buf::FixedU8Buf;
6use crate::IntegerValue;
7use irox_bits::{Bits, BitsError, Error, MutBits};
8
9///
10/// Encodes up to 9 bytes in the sqlite4 varint format.
11pub trait EncodeVarintTo {
12    ///
13    /// Encodes up to 9 bytes in the sqlite4 varint format.  Returns the number of
14    /// bytes actually written.
15    fn encode_varint_to<T: MutBits + ?Sized>(&self, out: &mut T) -> Result<usize, BitsError>;
16}
17
18///
19/// Encodes up to 9 bytes in the sqlite4 varint format.  Returns the number of
20/// bytes actually written.
21pub fn encode_varint_to<T: MutBits + ?Sized>(
22    value: IntegerValue,
23    out: &mut T,
24) -> Result<usize, BitsError> {
25    let v = value.to_be_u64();
26    Ok(if v <= 0xF0 {
27        // 240
28        out.write_u8(v as u8)?;
29        1
30    } else if v <= 0x8EF {
31        // 2287
32        let a0 = ((v - 240) / 256 + 241) as u8;
33        let a1 = ((v - 240) & 0xFF) as u8;
34        out.write_all_bytes(&[a0, a1])?;
35        2
36    } else if v <= 0x108EF {
37        // 67823
38        let a0 = 249u8;
39        let a1 = ((v - 2288) / 256) as u8;
40        let a2 = ((v - 2288) & 0xFF) as u8;
41        out.write_all_bytes(&[a0, a1, a2])?;
42        3
43    } else if v <= 0x00FF_FFFF {
44        let v = v as u32 | 0xFA000000;
45        out.write_be_u32(v)?;
46        4
47    } else if v <= 0xFFFF_FFFF {
48        let v = v as u32;
49        out.write_u8(250)?;
50        out.write_be_u32(v)?;
51        5
52    } else if v <= 0xFF_FFFF_FFFF {
53        let [_, _, _, a1, a2, a3, a4, a5] = v.to_be_bytes();
54        out.write_all_bytes(&[252, a1, a2, a3, a4, a5])?;
55        6
56    } else if v <= 0xFFFF_FFFF_FFFF {
57        let [_, _, a1, a2, a3, a4, a5, a6] = v.to_be_bytes();
58        out.write_all_bytes(&[253, a1, a2, a3, a4, a5, a6])?;
59        7
60    } else if v <= 0xFF_FFFF_FFFF_FFFF {
61        let [_, a1, a2, a3, a4, a5, a6, a7] = v.to_be_bytes();
62        out.write_all_bytes(&[254, a1, a2, a3, a4, a5, a6, a7])?;
63        8
64    } else {
65        out.write_u8(255)?;
66        out.write_be_u64(v)?;
67        9
68    })
69}
70
71impl<V> EncodeVarintTo for V
72where
73    V: Into<IntegerValue> + Copy,
74{
75    fn encode_varint_to<T: MutBits + ?Sized>(&self, out: &mut T) -> Result<usize, BitsError> {
76        encode_varint_to(Into::<IntegerValue>::into(*self), out)
77    }
78}
79
80///
81/// Decodes up to 9 bytes in the sqlite4 varint format
82pub fn decode_varint<T: Bits>(inp: &mut T) -> Result<u64, Error> {
83    let mut out = 0;
84    let a0 = inp.read_u8()? as u64;
85    match a0 {
86        241..=248 => {
87            let a1 = inp.read_u8()? as u64;
88            out = 240 + 258 * (a0 - 241) + a1;
89        }
90        249 => {
91            let a1 = inp.read_u8()? as u64;
92            let a2 = inp.read_u8()? as u64;
93            out = 2288 + 258 * a1 + a2;
94        }
95        250 => {
96            let a1 = inp.read_u8()?;
97            let a2 = inp.read_u8()?;
98            let a3 = inp.read_u8()?;
99            out = u32::from_be_bytes([0, a1, a2, a3]) as u64;
100        }
101        251 => {
102            out = inp.read_be_u32()? as u64;
103        }
104        252 => {
105            let a1 = (inp.read_u8()? as u64) << 32;
106            out = a1 | inp.read_be_u32()? as u64;
107        }
108        253 => {
109            let a1 = (inp.read_be_u16()? as u64) << 32;
110            out = a1 | inp.read_be_u32()? as u64;
111        }
112        254 => {
113            let a1 = (inp.read_u8()? as u64) << 40;
114            let a2 = (inp.read_be_u16()? as u64) << 32;
115            out = a1 | a2 | inp.read_be_u32()? as u64;
116        }
117        255 => {
118            out = inp.read_be_u64()?;
119        }
120        _ => {}
121    }
122    Ok(out)
123}
124
125///
126/// Decodes up to 9 bytes in the sqlite4 varint format
127pub trait DecodeVarint {
128    fn decode_varint(&mut self) -> Result<u64, Error>;
129}
130
131impl<T: Bits> DecodeVarint for T {
132    fn decode_varint(&mut self) -> Result<u64, Error> {
133        decode_varint(self)
134    }
135}
136
137///
138/// The required length to encode in the group varint format.  Either 1, 2, 3, or 4 bytes.
139pub const fn gvarint_length(value: IntegerValue) -> u8 {
140    let value = value.to_be_u32();
141    match value {
142        0x0000_0000..=0x0000_00FF => 1,
143        0x0000_0100..=0x0000_FFFF => 2,
144        0x0001_0000..=0x00FF_FFFF => 3,
145        _ => 4,
146    }
147}
148///
149/// The required length to encode in the group varint format.  Either 1, 2, 3, or 4 bytes.
150pub trait GroupVarintRequiredLength {
151    fn gvarint_length(&self) -> u8;
152}
153impl<T> GroupVarintRequiredLength for T
154where
155    T: Into<IntegerValue> + Copy,
156{
157    fn gvarint_length(&self) -> u8 {
158        gvarint_length(Into::<IntegerValue>::into(*self))
159    }
160}
161
162///
163/// Writes only the used number of bytes in the integer to the output stream.
164pub trait EncodeUsedBytesTo {
165    fn encode_used_bytes_to<T: MutBits + ?Sized>(&self, out: &mut T) -> Result<usize, BitsError>;
166}
167impl EncodeUsedBytesTo for u32 {
168    fn encode_used_bytes_to<T: MutBits + ?Sized>(&self, out: &mut T) -> Result<usize, BitsError> {
169        let used = self.gvarint_length() as usize;
170        if used == 1 {
171            out.write_u8(*self as u8)?;
172        } else if used == 2 {
173            out.write_be_u16(*self as u16)?;
174        } else if used == 3 {
175            let [_, a, b, c] = self.to_be_bytes();
176            out.write_all_bytes(&[a, b, c])?;
177        } else {
178            out.write_be_u32(*self)?;
179        }
180        Ok(used)
181    }
182}
183
184pub trait DecodeUsedBytesFrom: Sized {
185    fn decode_used_bytes<T: Bits + ?Sized>(inp: &mut T, len: u8) -> Result<Self, Error>;
186}
187impl DecodeUsedBytesFrom for u32 {
188    fn decode_used_bytes<T: Bits + ?Sized>(inp: &mut T, len: u8) -> Result<Self, Error> {
189        let mut out = 0u32;
190        for _ in 0..len {
191            out <<= 8;
192            out |= inp.read_u8()? as u32;
193        }
194        Ok(out)
195    }
196}
197
198///
199/// The 'Group Varint' format, which moves all the control bits to header bytes
200pub trait EncodeGroupVarintTo {
201    fn encode_group_varint_to<T: MutBits + ?Sized>(&self, out: &mut T) -> Result<usize, BitsError>;
202}
203impl EncodeGroupVarintTo for [u32; 4] {
204    fn encode_group_varint_to<T: MutBits + ?Sized>(&self, out: &mut T) -> Result<usize, BitsError> {
205        let mut buf = FixedU8Buf::<16>::new();
206        let [a, b, c, d] = *self;
207        let a = a.encode_used_bytes_to(&mut buf)? as u8;
208        let b = b.encode_used_bytes_to(&mut buf)? as u8;
209        let c = c.encode_used_bytes_to(&mut buf)? as u8;
210        let d = d.encode_used_bytes_to(&mut buf)? as u8;
211        let hdr = ((a - 1) & 0x03) << 6;
212        let hdr = hdr | (((b - 1) & 0x03) << 4);
213        let hdr = hdr | (((c - 1) & 0x03) << 2);
214        let hdr = hdr | ((d - 1) & 0x03);
215        out.write_u8(hdr)?;
216        out.write_all_bytes(buf.as_ref_used())?;
217        Ok(buf.len() + 1)
218    }
219}
220
221pub trait DecodeGroupVarintFrom: Sized {
222    fn decode_group_varint_from<T: Bits>(inp: &mut T) -> Result<Option<[Self; 4]>, Error>;
223}
224impl DecodeGroupVarintFrom for u32 {
225    fn decode_group_varint_from<T: Bits>(inp: &mut T) -> Result<Option<[Self; 4]>, Error> {
226        let Some(ctrl) = inp.next_u8()? else {
227            return Ok(None);
228        };
229        let dl = (ctrl & 0x3) + 1;
230        let cl = ((ctrl >> 2) & 0x3) + 1;
231        let bl = ((ctrl >> 4) & 0x3) + 1;
232        let al = ((ctrl >> 6) & 0x3) + 1;
233
234        Ok(Some([
235            u32::decode_used_bytes(inp, al)?,
236            u32::decode_used_bytes(inp, bl)?,
237            u32::decode_used_bytes(inp, cl)?,
238            u32::decode_used_bytes(inp, dl)?,
239        ]))
240    }
241}
242
243#[cfg(test)]
244mod test {
245    use crate::buf::{Buffer, FixedU8Buf, RoundU8Buffer};
246    use crate::codec::{DecodeGroupVarintFrom, EncodeGroupVarintTo};
247    use irox_bits::Error;
248
249    #[test]
250    pub fn test_group_encoding() -> Result<(), Error> {
251        let mut buf = FixedU8Buf::<16>::new();
252        let used = [0xAAAAu32, 0xBBBBBB, 0xCC, 0xDDDDDDDD].encode_group_varint_to(&mut buf)?;
253        assert_eq_hex_slice!(
254            &[0x63, 0xAA, 0xAA, 0xBB, 0xBB, 0xBB, 0xCC, 0xDD, 0xDD, 0xDD, 0xDD],
255            buf.as_ref()
256        );
257        assert_eq!(11, used);
258
259        Ok(())
260    }
261
262    #[test]
263    pub fn test_group_decoding() -> Result<(), Error> {
264        let mut buf = RoundU8Buffer::from([
265            0x63, 0xAA, 0xAA, 0xBB, 0xBB, 0xBB, 0xCC, 0xDD, 0xDD, 0xDD, 0xDD,
266        ]);
267
268        let res = u32::decode_group_varint_from(&mut buf)?;
269        assert!(res.is_some());
270        if let Some(res) = res {
271            assert_eq_hex_slice!(&[0xAAAA, 0xBBBBBB, 0xCC, 0xDDDDDDDD], res.as_ref());
272        }
273
274        assert_eq!(0, buf.len());
275
276        Ok(())
277    }
278}