Skip to main content

sqlrite/sql/pager/
varint.rs

1//! Variable-length integer encoding.
2//!
3//! Unsigned integers use **LEB128** — little-endian base 128. Each byte
4//! carries 7 data bits; the high bit is 1 while more bytes follow and 0 on
5//! the last byte. Values 0..127 fit in one byte.
6//!
7//! Signed integers use **ZigZag** mapping (positive and negative interleaved
8//! into the unsigned range) and are then LEB128-encoded. Small positive
9//! values like rowid=1 take one byte; small negative values do too.
10//!
11//! These are the encodings used for lengths, column counts, rowids, and
12//! Integer cell values. Fixed-width encodings stay in place for tags (u8)
13//! and `Real` values (f64, 8 bytes).
14
15use crate::error::{Result, SQLRiteError};
16
17/// Upper bound on bytes for a 64-bit LEB128 value: `ceil(64 / 7) = 10`.
18pub const MAX_VARINT_BYTES: usize = 10;
19
20/// Appends a LEB128-encoded `u64` to `out`. Returns the number of bytes written.
21pub fn write_u64(out: &mut Vec<u8>, mut value: u64) -> usize {
22    let mut written = 0;
23    loop {
24        let mut byte = (value & 0x7f) as u8;
25        value >>= 7;
26        written += 1;
27        if value == 0 {
28            out.push(byte);
29            return written;
30        }
31        byte |= 0x80;
32        out.push(byte);
33    }
34}
35
36/// Writes a ZigZag-encoded signed `i64` as LEB128. Returns bytes written.
37pub fn write_i64(out: &mut Vec<u8>, value: i64) -> usize {
38    write_u64(out, zigzag_encode(value))
39}
40
41/// Reads a LEB128 `u64` from `buf` starting at `pos`. Returns `(value, bytes_consumed)`.
42pub fn read_u64(buf: &[u8], pos: usize) -> Result<(u64, usize)> {
43    let mut result: u64 = 0;
44    let mut shift: u32 = 0;
45    for i in 0..MAX_VARINT_BYTES {
46        let byte = *buf.get(pos + i).ok_or_else(|| {
47            SQLRiteError::Internal(format!("varint read past buffer end at offset {}", pos + i))
48        })?;
49        result |= ((byte & 0x7f) as u64) << shift;
50        if byte & 0x80 == 0 {
51            return Ok((result, i + 1));
52        }
53        shift += 7;
54        if shift >= 64 {
55            return Err(SQLRiteError::Internal(
56                "varint u64 overflow (more than 10 bytes)".to_string(),
57            ));
58        }
59    }
60    Err(SQLRiteError::Internal(
61        "varint u64 overflow (no terminator in 10 bytes)".to_string(),
62    ))
63}
64
65/// Reads a ZigZag-encoded signed `i64` (LEB128). Returns `(value, bytes_consumed)`.
66pub fn read_i64(buf: &[u8], pos: usize) -> Result<(i64, usize)> {
67    let (u, n) = read_u64(buf, pos)?;
68    Ok((zigzag_decode(u), n))
69}
70
71/// Returns the number of bytes `write_u64(value)` would produce, without writing.
72pub fn u64_len(value: u64) -> usize {
73    let mut v = value;
74    let mut n = 0;
75    loop {
76        v >>= 7;
77        n += 1;
78        if v == 0 {
79            return n;
80        }
81    }
82}
83
84/// Same as `u64_len` for a zigzagged signed `i64`.
85pub fn i64_len(value: i64) -> usize {
86    u64_len(zigzag_encode(value))
87}
88
89#[inline]
90fn zigzag_encode(v: i64) -> u64 {
91    ((v << 1) ^ (v >> 63)) as u64
92}
93
94#[inline]
95fn zigzag_decode(v: u64) -> i64 {
96    ((v >> 1) as i64) ^ -((v & 1) as i64)
97}
98
99#[cfg(test)]
100mod tests {
101    use super::*;
102
103    fn round_trip_u(v: u64) {
104        let mut buf = Vec::new();
105        let n = write_u64(&mut buf, v);
106        assert_eq!(n, buf.len());
107        assert_eq!(n, u64_len(v));
108        let (back, consumed) = read_u64(&buf, 0).unwrap();
109        assert_eq!(back, v);
110        assert_eq!(consumed, n);
111    }
112
113    fn round_trip_i(v: i64) {
114        let mut buf = Vec::new();
115        let n = write_i64(&mut buf, v);
116        assert_eq!(n, buf.len());
117        assert_eq!(n, i64_len(v));
118        let (back, consumed) = read_i64(&buf, 0).unwrap();
119        assert_eq!(back, v);
120        assert_eq!(consumed, n);
121    }
122
123    #[test]
124    fn u64_round_trips_cover_boundaries() {
125        for v in [
126            0u64,
127            1,
128            127,    // last 1-byte value
129            128,    // first 2-byte value
130            16_383, // last 2-byte value
131            16_384, // first 3-byte value
132            u32::MAX as u64,
133            u64::MAX,
134        ] {
135            round_trip_u(v);
136        }
137    }
138
139    #[test]
140    fn i64_round_trips_cover_signs_and_boundaries() {
141        for v in [
142            0i64,
143            1,
144            -1,
145            63,
146            -64,
147            64,
148            -65,
149            i32::MAX as i64,
150            i32::MIN as i64,
151            i64::MAX,
152            i64::MIN,
153        ] {
154            round_trip_i(v);
155        }
156    }
157
158    #[test]
159    fn reading_past_buffer_end_errors_cleanly() {
160        // A single high-bit byte "needs more" but there isn't more.
161        let buf = [0x80u8];
162        let err = read_u64(&buf, 0).unwrap_err();
163        assert!(format!("{err}").contains("varint"));
164    }
165
166    #[test]
167    fn malformed_overlong_varint_errors() {
168        // 11 consecutive high-bit bytes would overflow.
169        let buf = [0xff; 11];
170        let err = read_u64(&buf, 0).unwrap_err();
171        assert!(format!("{err}").contains("overflow"));
172    }
173
174    #[test]
175    fn small_positive_zigzag_is_one_byte() {
176        assert_eq!(i64_len(0), 1);
177        assert_eq!(i64_len(1), 1);
178        assert_eq!(i64_len(63), 1);
179        assert_eq!(i64_len(-1), 1);
180        assert_eq!(i64_len(-64), 1);
181    }
182
183    #[test]
184    fn concatenated_varints_read_sequentially() {
185        let mut buf = Vec::new();
186        write_u64(&mut buf, 7);
187        write_i64(&mut buf, -42);
188        write_u64(&mut buf, 999);
189        let (a, n1) = read_u64(&buf, 0).unwrap();
190        let (b, n2) = read_i64(&buf, n1).unwrap();
191        let (c, _) = read_u64(&buf, n1 + n2).unwrap();
192        assert_eq!((a, b, c), (7, -42, 999));
193    }
194}