Skip to main content

rustpython_compiler_core/
varint.rs

1//! Variable-length integer encoding utilities.
2//!
3//! Two encodings are used:
4//! - **Little-endian** (low bits first): linetable
5//! - **Big-endian** (high bits first): exception tables
6//!
7//! Both use 6-bit chunks with 0x40 as the continuation bit.
8
9use alloc::vec::Vec;
10
11/// Write a little-endian varint (used by linetable).
12#[inline]
13pub fn write_varint(buf: &mut Vec<u8>, mut val: u32) -> usize {
14    let start_len = buf.len();
15    while val >= 64 {
16        buf.push(0x40 | (val & 0x3f) as u8);
17        val >>= 6;
18    }
19    buf.push(val as u8);
20    buf.len() - start_len
21}
22
23/// Write a little-endian signed varint.
24#[inline]
25pub fn write_signed_varint(buf: &mut Vec<u8>, val: i32) -> usize {
26    let uval = if val < 0 {
27        ((0u32.wrapping_sub(val as u32)) << 1) | 1
28    } else {
29        (val as u32) << 1
30    };
31    write_varint(buf, uval)
32}
33
34/// Write a big-endian varint (used by exception tables).
35pub fn write_varint_be(buf: &mut Vec<u8>, val: u32) -> usize {
36    let start_len = buf.len();
37    if val >= 1 << 30 {
38        buf.push(0x40 | ((val >> 30) & 0x3f) as u8);
39    }
40    if val >= 1 << 24 {
41        buf.push(0x40 | ((val >> 24) & 0x3f) as u8);
42    }
43    if val >= 1 << 18 {
44        buf.push(0x40 | ((val >> 18) & 0x3f) as u8);
45    }
46    if val >= 1 << 12 {
47        buf.push(0x40 | ((val >> 12) & 0x3f) as u8);
48    }
49    if val >= 1 << 6 {
50        buf.push(0x40 | ((val >> 6) & 0x3f) as u8);
51    }
52    buf.push((val & 0x3f) as u8);
53    buf.len() - start_len
54}
55
56/// Write a big-endian varint with the start marker (0x80) on the first byte.
57pub fn write_varint_with_start(data: &mut Vec<u8>, val: u32) {
58    let start_pos = data.len();
59    write_varint_be(data, val);
60    if let Some(first) = data.get_mut(start_pos) {
61        *first |= 0x80;
62    }
63}
64
65/// Read a big-endian varint with start marker (0x80).
66pub fn read_varint_with_start(data: &[u8], pos: &mut usize) -> Option<u32> {
67    if *pos >= data.len() {
68        return None;
69    }
70    let first = data[*pos];
71    if first & 0x80 == 0 {
72        return None;
73    }
74    *pos += 1;
75    let mut val = (first & 0x3f) as u32;
76    let mut cont = first & 0x40 != 0;
77    while cont && *pos < data.len() {
78        let b = data[*pos];
79        *pos += 1;
80        val = (val << 6) | (b & 0x3f) as u32;
81        cont = b & 0x40 != 0;
82    }
83    Some(val)
84}
85
86/// Read a big-endian varint (no start marker).
87pub fn read_varint(data: &[u8], pos: &mut usize) -> Option<u32> {
88    if *pos >= data.len() {
89        return None;
90    }
91    let first = data[*pos];
92    *pos += 1;
93    let mut val = (first & 0x3f) as u32;
94    let mut cont = first & 0x40 != 0;
95    while cont && *pos < data.len() {
96        let b = data[*pos];
97        *pos += 1;
98        val = (val << 6) | (b & 0x3f) as u32;
99        cont = b & 0x40 != 0;
100    }
101    Some(val)
102}
103
104#[cfg(test)]
105mod tests {
106    use super::*;
107
108    #[test]
109    fn test_le_varint_roundtrip() {
110        // Little-endian is only used internally in linetable,
111        // no read function needed outside of linetable parsing.
112        let mut buf = Vec::new();
113        write_varint(&mut buf, 0);
114        write_varint(&mut buf, 63);
115        write_varint(&mut buf, 64);
116        write_varint(&mut buf, 4095);
117        assert_eq!(buf.len(), 1 + 1 + 2 + 2);
118    }
119
120    #[test]
121    fn test_be_varint_roundtrip() {
122        for &val in &[0u32, 1, 63, 64, 127, 128, 4095, 4096, 1_000_000] {
123            let mut buf = Vec::new();
124            write_varint_be(&mut buf, val);
125            let mut pos = 0;
126            assert_eq!(read_varint(&buf, &mut pos), Some(val), "val={val}");
127            assert_eq!(pos, buf.len());
128        }
129    }
130
131    #[test]
132    fn test_be_varint_with_start() {
133        let mut buf = Vec::new();
134        write_varint_with_start(&mut buf, 42);
135        write_varint_with_start(&mut buf, 100);
136        write_varint_with_start(&mut buf, 71);
137
138        let mut pos = 0;
139        assert_eq!(read_varint_with_start(&buf, &mut pos), Some(42));
140        assert_eq!(read_varint_with_start(&buf, &mut pos), Some(100));
141        assert_eq!(read_varint_with_start(&buf, &mut pos), Some(71));
142        assert_eq!(read_varint_with_start(&buf, &mut pos), None);
143    }
144}