loro_protocol/
bytes.rs

1/// Simple growable buffer writer with helpers for the protocol primitives.
2///
3/// This mirrors the JavaScript implementation's `BytesWriter` from
4/// `packages/loro-protocol/src/bytes.ts`.
5pub struct BytesWriter {
6    buf: Vec<u8>,
7}
8
9impl BytesWriter {
10    #[must_use]
11    pub fn new() -> Self {
12        Self { buf: Vec::with_capacity(32) }
13    }
14
15    #[inline]
16    pub fn push_bytes(&mut self, bytes: &[u8]) {
17        self.buf.extend_from_slice(bytes);
18    }
19
20    #[inline]
21    pub fn push_byte(&mut self, byte: u8) {
22        self.buf.push(byte);
23    }
24
25    /// Encode an unsigned LEB128 integer (sufficient for lengths and indices here).
26    pub fn push_uleb128(&mut self, mut n: u64) {
27        loop {
28            let byte = (n & 0x7f) as u8;
29            n >>= 7;
30            if n == 0 {
31                self.push_byte(byte);
32                break;
33            }
34            self.push_byte(byte | 0x80);
35        }
36    }
37
38    #[inline]
39    pub fn push_var_bytes(&mut self, bytes: &[u8]) {
40        self.push_uleb128(bytes.len() as u64);
41        self.push_bytes(bytes);
42    }
43
44    #[inline]
45    pub fn push_var_string(&mut self, s: &str) {
46        self.push_var_bytes(s.as_bytes());
47    }
48
49    #[inline]
50    #[must_use]
51    pub fn finalize(self) -> Vec<u8> {
52        self.buf
53    }
54
55    #[inline]
56    #[must_use]
57    pub fn len(&self) -> usize { self.buf.len() }
58
59    #[inline]
60    #[must_use]
61    pub fn is_empty(&self) -> bool { self.buf.is_empty() }
62}
63
64impl Default for BytesWriter {
65    fn default() -> Self { Self::new() }
66}
67
68pub struct BytesReader<'a> {
69    buf: &'a [u8],
70    off: usize,
71}
72
73impl<'a> BytesReader<'a> {
74    #[must_use]
75    pub fn new(buf: &'a [u8]) -> Self { Self { buf, off: 0 } }
76
77    #[inline]
78    #[must_use]
79    pub fn remaining(&self) -> usize { self.buf.len().saturating_sub(self.off) }
80
81    pub fn read_byte(&mut self) -> Result<u8, String> {
82        if self.off >= self.buf.len() {
83            return Err("readByte out of bounds".into());
84        }
85        let b = self.buf[self.off];
86        self.off += 1;
87        Ok(b)
88    }
89
90    pub fn read_bytes(&mut self, len: usize) -> Result<&'a [u8], String> {
91        if self.off.checked_add(len).is_some_and(|e| e <= self.buf.len()) {
92            let start = self.off;
93            self.off += len;
94            Ok(&self.buf[start..start+len])
95        } else {
96            Err("readBytes out of bounds".into())
97        }
98    }
99
100    /// Decode an unsigned LEB128 into u64, guarding against excessive shifts.
101    pub fn read_uleb128(&mut self) -> Result<u64, String> {
102        let mut result: u64 = 0;
103        let mut shift: u32 = 0;
104        loop {
105            let byte = self.read_byte()?;
106            result |= u64::from(byte & 0x7f) << shift;
107            if (byte & 0x80) == 0 { break; }
108            shift += 7;
109            if shift > 63 { return Err("uleb128 too large".into()); }
110        }
111        Ok(result)
112    }
113
114    pub fn read_var_bytes(&mut self) -> Result<&'a [u8], String> {
115        let len = usize::try_from(self.read_uleb128()?)
116            .map_err(|_| "length too large".to_string())?;
117        self.read_bytes(len)
118    }
119
120    pub fn read_var_string(&mut self) -> Result<String, String> {
121        let bytes = self.read_var_bytes()?;
122        std::str::from_utf8(bytes)
123            .map(str::to_owned)
124            .map_err(|_| "invalid UTF-8 string".to_string())
125    }
126
127    /// Current cursor position (number of bytes consumed so far).
128    #[inline]
129    pub fn position(&self) -> usize { self.off }
130}
131
132#[cfg(test)]
133mod tests {
134    use super::*;
135
136    #[test]
137    fn round_trip_uleb128() {
138        let values: [u64; 14] = [
139            0, 1, 2, 10, 127, 128, 129, 255, 256, 16383, 16384, 0xffff, 0x1f_ffff, 0x0fff_ffff,
140        ];
141        let mut w = BytesWriter::new();
142        for &n in &values { w.push_uleb128(n); }
143        let buf = w.finalize();
144        let mut r = BytesReader::new(&buf);
145        let mut out = Vec::new();
146        for _ in 0..values.len() { out.push(r.read_uleb128().unwrap()); }
147        assert_eq!(out, values);
148        assert_eq!(r.remaining(), 0);
149    }
150
151    #[test]
152    fn round_trip_varbytes_and_varstring() {
153        let empty: Vec<u8> = vec![];
154        let small: Vec<u8> = vec![1,2,3,4,5];
155        let mut large: Vec<u8> = vec![0; 5000];
156        for (i, b) in large.iter_mut().enumerate() { *b = (i & 0xff) as u8; }
157
158        let mut w = BytesWriter::new();
159        w.push_var_bytes(&empty);
160        w.push_var_bytes(&small);
161        w.push_var_bytes(&large);
162        w.push_var_string("hello δΈ–η•Œ πŸš€");
163        let buf = w.finalize();
164
165        let mut r = BytesReader::new(&buf);
166        assert_eq!(r.read_var_bytes().unwrap(), &empty[..]);
167        assert_eq!(r.read_var_bytes().unwrap(), &small[..]);
168        assert_eq!(r.read_var_bytes().unwrap(), &large[..]);
169        assert_eq!(r.read_var_string().unwrap(), "hello δΈ–η•Œ πŸš€");
170        assert_eq!(r.remaining(), 0);
171    }
172}