Skip to main content

protobuf_core/
varint.rs

1// Copyright 2021 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//      http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Varint encoding and decoding logic for Protocol Buffers.
16//!
17//! This module provides basic varint operations including encoding, decoding,
18//! and conversion to various protobuf integer types.
19//!
20//! This is a **reference implementation**. Not optimized for performance.
21
22use crate::wire_format::{MAX_VARINT_BYTES, VARINT_CONTINUATION_BIT, VARINT_PAYLOAD_MASK};
23use crate::{ProtobufError, Result};
24use ::std::io::Write;
25
26mod read;
27
28// VarintIterator is re-exported for the public API (return type of read_varints()).
29#[allow(unused_imports)]
30pub use read::{
31    DecodeOutcome, DecodeState, IteratorExtVarint, ReadExtVarint, TryIteratorExtVarint,
32    VarintIterator,
33};
34/// A deserialized varint value.
35///
36/// This type represents the decoded 8-byte value from serialized bytes
37/// to protobuf integer types.
38#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
39pub struct Varint([u8; 8]);
40
41impl Varint {
42    // ============================================================================
43    // Converting from / to [u8; 8]
44    // ============================================================================
45
46    /// Create a new Varint from raw bytes.
47    ///
48    /// The bytes given are, essentially, a little-endian encoded u64.
49    /// Note that this is NOT the "protobuf encoded" varint bytes.
50    pub fn new(bytes: [u8; 8]) -> Self {
51        Self(bytes)
52    }
53
54    /// Get the underlying byte array, the little-endian encoded u64.
55    /// Note that this is NOT the "protobuf encoded" varint bytes.
56    pub fn as_bytes(&self) -> &[u8; 8] {
57        &self.0
58    }
59
60    // ============================================================================
61    // from / to rust integer types, using certain protobuf integer types formats.
62    // ============================================================================
63
64    /// Create a Varint from `u64`, assuming `UInt64` protobuf type.
65    pub fn from_uint64(value: u64) -> Self {
66        let bytes = value.to_le_bytes();
67        Self(bytes)
68    }
69
70    /// Create a Varint from `u32`, assuming `UInt32` protobuf type.
71    pub fn from_uint32(value: u32) -> Self {
72        let bytes = (value as u64).to_le_bytes();
73        Self(bytes)
74    }
75
76    /// Create a Varint from `i64`, assuming `SInt64` protobuf type.
77    pub fn from_sint64(value: i64) -> Self {
78        let zigzag_value = if value < 0 {
79            ((-value) as u64) * 2 - 1
80        } else {
81            (value as u64) * 2
82        };
83        let bytes = zigzag_value.to_le_bytes();
84        Self(bytes)
85    }
86
87    /// Create a Varint from `i32`, assuming `SInt32` protobuf type.
88    pub fn from_sint32(value: i32) -> Self {
89        Self::from_sint64(value as i64)
90    }
91
92    /// Create a Varint from `i64`, assuming `Int64` protobuf type.
93    pub fn from_int64(value: i64) -> Self {
94        let bytes = (value as u64).to_le_bytes();
95        Self(bytes)
96    }
97
98    /// Create a Varint from `i32`, assuming `Int32` protobuf type.
99    pub fn from_int32(value: i32) -> Self {
100        let bytes = (value as u64).to_le_bytes();
101        Self(bytes)
102    }
103
104    /// Create a Varint from `bool`, assuming `Bool` protobuf type.
105    pub fn from_bool(value: bool) -> Self {
106        let bytes = (if value { 1u64 } else { 0u64 }).to_le_bytes();
107        Self(bytes)
108    }
109
110    /// Convert to `u64`, assuming `UInt64` protobuf type.
111    pub fn to_uint64(&self) -> u64 {
112        u64::from_le_bytes(self.0)
113    }
114
115    /// Convert to `u32`, assuming `UInt32` protobuf type.
116    /// Returns an error if the value is out of range for `u32`.
117    pub fn try_to_uint32(&self) -> Result<u32> {
118        let value = self.to_uint64();
119        u32::try_from(value).map_err(|_| ProtobufError::VarintDowncastOutOfRange {
120            value,
121            target_type: "u32",
122        })
123    }
124
125    /// Convert to `i64`, assuming `SInt64` protobuf type.
126    pub fn to_sint64(&self) -> i64 {
127        let value = self.to_uint64();
128        ((value >> 1) as i64) ^ (-((value & 1) as i64))
129    }
130
131    /// Convert to `i32`, assuming `SInt32` protobuf type.
132    /// Returns an error if the value is out of range for `i32`.
133    pub fn try_to_sint32(&self) -> Result<i32> {
134        let sint64_value = self.to_sint64();
135        i32::try_from(sint64_value).map_err(|_| ProtobufError::VarintDowncastOutOfRange {
136            value: sint64_value as u64,
137            target_type: "i32",
138        })
139    }
140
141    /// Convert to `i64`, assuming `Int64` protobuf type.
142    pub fn to_int64(&self) -> i64 {
143        i64::from_le_bytes(self.0)
144    }
145
146    /// Convert to `i32`, assuming `Int32` protobuf type.
147    /// Returns an error if the value is out of range for `i32`.
148    pub fn try_to_int32(&self) -> Result<i32> {
149        let value = self.to_int64();
150        i32::try_from(value).map_err(|_| ProtobufError::VarintDowncastOutOfRange {
151            value: value as u64,
152            target_type: "i32",
153        })
154    }
155
156    /// Convert to `bool`, assuming `Bool` protobuf type.
157    pub fn to_bool(&self) -> bool {
158        self.to_uint64() != 0
159    }
160
161    // ============================================================================
162    // serialization
163    // ============================================================================
164
165    /// Get the size of this varint when encoded as a varint.
166    ///
167    /// This method calculates the exact number of bytes needed to encode
168    /// the underlying value as a protobuf varint.
169    pub fn varint_size(&self) -> usize {
170        let value = self.to_uint64();
171        if value == 0 {
172            1
173        } else {
174            (64 - value.leading_zeros() as usize).div_ceil(7)
175        }
176    }
177
178    /// Encode this varint as a varint and return the bytes with count.
179    ///
180    /// Returns a tuple of (bytes, count) where:
181    /// - bytes: fixed-size array containing the encoded varint
182    /// - count: actual number of bytes used (1-MAX_VARINT_BYTES)
183    ///
184    /// # Example
185    /// ```
186    /// use ::protobuf_core::Varint;
187    ///
188    /// let varint = Varint::from_uint64(150);
189    /// let (bytes, count) = varint.encode();
190    /// assert_eq!(count, 2);
191    /// assert_eq!(&bytes[..count], &[0x96, 0x01]);
192    /// ```
193    pub fn encode(&self) -> ([u8; MAX_VARINT_BYTES], usize) {
194        let value = self.to_uint64();
195        let mut bytes = [0u8; MAX_VARINT_BYTES];
196        let mut bytes_written = 0;
197        let mut remaining_value = value;
198
199        for byte in bytes.iter_mut() {
200            *byte = (remaining_value & VARINT_PAYLOAD_MASK as u64) as u8;
201            remaining_value >>= 7;
202            bytes_written += 1;
203
204            if remaining_value == 0 {
205                break;
206            } else {
207                *byte |= VARINT_CONTINUATION_BIT; // continuation bit
208            }
209        }
210
211        (bytes, bytes_written)
212    }
213}
214
215/// Extension trait for writing varints to Write instances.
216///
217/// This trait provides a convenient method to write varints directly to
218/// any type that implements `std::io::Write`.
219///
220/// # Example
221/// ```
222/// use ::std::io::Write;
223/// use ::protobuf_core::{WriteExtVarint, Varint};
224///
225/// let varint = Varint::from_uint64(150);
226/// let mut writer = Vec::new();
227/// writer.write_varint(&varint).unwrap();
228/// assert_eq!(writer, vec![0x96, 0x01]);
229/// ```
230pub trait WriteExtVarint {
231    /// Write a varint to this writer.
232    ///
233    /// Encodes a Varint as a varint and writes it to this writer.
234    /// Returns the number of bytes written on success.
235    ///
236    /// # Arguments
237    /// * `value` - The Varint to encode and write
238    ///
239    /// # Returns
240    /// * `Ok(usize)` - Number of bytes written
241    /// * `Err(::std::io::Error)` - I/O error from the writer
242    ///
243    /// # Example
244    /// ```
245    /// use ::std::io::Write;
246    /// use ::protobuf_core::{WriteExtVarint, Varint};
247    ///
248    /// let varint = Varint::from_uint64(150);
249    /// let mut buffer = Vec::new();
250    /// let bytes_written = buffer.write_varint(&varint).unwrap();
251    /// assert_eq!(bytes_written, 2);
252    /// assert_eq!(buffer, vec![0x96, 0x01]);
253    /// ```
254    fn write_varint(&mut self, value: &Varint) -> ::std::io::Result<usize>;
255}
256
257impl<W> WriteExtVarint for W
258where
259    W: Write,
260{
261    fn write_varint(&mut self, value: &Varint) -> ::std::io::Result<usize> {
262        let (bytes, count) = value.encode();
263        self.write_all(&bytes[..count])?;
264        Ok(count)
265    }
266}
267
268// ============================================================================
269// Tests (Varint type and WriteExtVarint)
270// ============================================================================
271
272#[cfg(test)]
273mod tests {
274    use super::{Varint, WriteExtVarint};
275    use crate::wire_format::MAX_VARINT_BYTES;
276
277    #[test]
278    fn test_varint_value_creation() {
279        let bytes = [0x96, 0x01, 0, 0, 0, 0, 0, 0];
280        let varint = Varint::new(bytes);
281        assert_eq!(varint.as_bytes(), &bytes);
282    }
283
284    #[test]
285    fn test_varint_conversions() {
286        let bytes = [0x96, 0x01, 0, 0, 0, 0, 0, 0];
287        let varint = Varint::new(bytes);
288
289        assert_eq!(varint.to_uint64(), 406);
290        match varint.try_to_uint32() {
291            Ok(value) => assert_eq!(value, 406),
292            Err(e) => panic!("Expected Ok(406), got error: {:?}", e),
293        }
294
295        let varint = Varint::new(bytes);
296        assert_eq!(varint.to_sint64(), 203);
297
298        let varint = Varint::new(bytes);
299        match varint.try_to_sint32() {
300            Ok(value) => assert_eq!(value, 203),
301            Err(e) => panic!("Expected Ok(203), got error: {:?}", e),
302        }
303
304        let varint = Varint::new(bytes);
305        assert_eq!(varint.to_bool(), true);
306    }
307
308    #[test]
309    fn test_signed_integer_conversions() {
310        let bytes = [0x01, 0, 0, 0, 0, 0, 0, 0];
311        let varint = Varint::new(bytes);
312
313        assert_eq!(varint.to_sint64(), -1);
314
315        let varint = Varint::new(bytes);
316        match varint.try_to_sint32() {
317            Ok(value) => assert_eq!(value, -1),
318            Err(e) => panic!("Expected Ok(-1), got error: {:?}", e),
319        }
320    }
321
322    #[test]
323    fn test_from_traits() {
324        let varint = Varint::from_uint64(150);
325        assert_eq!(varint.to_uint64(), 150);
326
327        let varint = Varint::from_uint32(150);
328        assert_eq!(varint.to_uint64(), 150);
329
330        let varint = Varint::from_sint64(150);
331        assert_eq!(varint.to_sint64(), 150);
332
333        let varint = Varint::from_sint64(-1);
334        assert_eq!(varint.to_sint64(), -1);
335
336        let varint = Varint::from_bool(true);
337        assert_eq!(varint.to_bool(), true);
338
339        let varint = Varint::from_int32(150);
340        assert_eq!(varint.to_int64(), 150);
341
342        let varint = Varint::from_int64(150);
343        assert_eq!(varint.to_int64(), 150);
344    }
345
346    #[test]
347    fn test_to_methods() {
348        let bytes = [150, 0, 0, 0, 0, 0, 0, 0];
349        let varint = Varint::new(bytes);
350
351        assert_eq!(varint.try_to_uint32().unwrap(), 150);
352        assert_eq!(varint.try_to_sint32().unwrap(), 75);
353        assert_eq!(varint.to_sint64(), 75);
354        assert_eq!(varint.to_bool(), true);
355        assert_eq!(varint.try_to_int32().unwrap(), 150);
356        assert_eq!(varint.to_int64(), 150);
357    }
358
359    #[test]
360    fn test_roundtrip_conversions() {
361        let original = 150u64;
362        let varint = Varint::from_uint64(original);
363        assert_eq!(varint.to_uint64(), original);
364
365        let original = 150u32;
366        let varint = Varint::from_uint32(original);
367        let converted = varint.try_to_uint32().unwrap();
368        assert_eq!(converted, original);
369
370        let original = -1i64;
371        let varint = Varint::from_sint64(original);
372        let converted = varint.to_sint64();
373        assert_eq!(converted, original);
374
375        let original = 150i64;
376        let varint = Varint::from_int64(original);
377        let converted = varint.to_int64();
378        assert_eq!(converted, original);
379
380        let original = 150i32;
381        let varint = Varint::from_int32(original);
382        let converted = varint.try_to_int32().unwrap();
383        assert_eq!(converted, original);
384
385        let original = true;
386        let varint = Varint::from_bool(original);
387        let converted = varint.to_bool();
388        assert_eq!(converted, original);
389    }
390
391    #[test]
392    fn test_encode_varint() {
393        let varint = Varint::from_uint64(150);
394        let (bytes, count) = varint.encode();
395        assert_eq!(count, 2);
396        assert_eq!(&bytes[..count], &[0x96, 0x01]);
397
398        let varint = Varint::from_uint64(127);
399        let (bytes, count) = varint.encode();
400        assert_eq!(count, 1);
401        assert_eq!(&bytes[..count], &[0x7F]);
402
403        let varint = Varint::from_uint64(0);
404        let (bytes, count) = varint.encode();
405        assert_eq!(count, 1);
406        assert_eq!(&bytes[..count], &[0x00]);
407
408        let varint = Varint::from_uint64(0x7FFFFFFFFFFFFFFF);
409        let (bytes, count) = varint.encode();
410        assert_eq!(count, 9);
411        assert_eq!(
412            &bytes[..count],
413            &[0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F]
414        );
415
416        let varint = Varint::from_uint64(0xFFFFFFFFFFFFFFFF);
417        let (bytes, count) = varint.encode();
418        assert_eq!(count, MAX_VARINT_BYTES);
419        assert_eq!(
420            &bytes[..count],
421            &[0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01]
422        );
423    }
424
425    #[test]
426    fn test_write_varint() {
427        let varint = Varint::from_uint64(150);
428        let mut buffer = Vec::new();
429        let bytes_written = buffer.write_varint(&varint).unwrap();
430        assert_eq!(bytes_written, 2);
431        assert_eq!(buffer, vec![0x96, 0x01]);
432
433        let varint = Varint::from_uint64(127);
434        let mut buffer = Vec::new();
435        let bytes_written = buffer.write_varint(&varint).unwrap();
436        assert_eq!(bytes_written, 1);
437        assert_eq!(buffer, vec![0x7F]);
438
439        let varint = Varint::from_uint64(0);
440        let mut buffer = Vec::new();
441        let bytes_written = buffer.write_varint(&varint).unwrap();
442        assert_eq!(bytes_written, 1);
443        assert_eq!(buffer, vec![0x00]);
444
445        let varint = Varint::from_uint64(0x7FFFFFFFFFFFFFFF);
446        let mut buffer = Vec::new();
447        let bytes_written = buffer.write_varint(&varint).unwrap();
448        assert_eq!(bytes_written, 9);
449
450        let varint = Varint::from_uint64(0xFFFFFFFFFFFFFFFF);
451        let mut buffer = Vec::new();
452        let bytes_written = buffer.write_varint(&varint).unwrap();
453        assert_eq!(bytes_written, MAX_VARINT_BYTES);
454    }
455
456    #[test]
457    fn test_all_encoding_methods_consistency() {
458        let test_values = vec![0, 1, 127, 128, 150, 255, 256, 65535, 0x7FFFFFFF];
459
460        for &value in &test_values {
461            let varint = Varint::from_uint64(value);
462            let (array_bytes, array_count) = varint.encode();
463
464            let varint2 = Varint::from_uint64(value);
465            let mut vec_buffer = Vec::new();
466            let vec_count = vec_buffer.write_varint(&varint2).unwrap();
467
468            assert_eq!(array_count, vec_count);
469            assert_eq!(&array_bytes[..array_count], &vec_buffer[..]);
470        }
471    }
472}