protobuf_core/
wire_format.rs

1// Copyright 2021 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//      http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Protocol Buffers wire format constants and definitions.
16//!
17//! This module provides the fundamental constants and types needed for
18//! implementing Protocol Buffers encoding and decoding according to the
19//! official wire format specification.
20
21use crate::field_number::FieldNumber;
22use ::std::convert::TryFrom;
23
24/// Wire types used in Protocol Buffers encoding.
25///
26/// The wire type tells the parser how big the payload is and how to interpret it.
27/// This allows old parsers to skip over new fields they don't understand.
28#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
29#[repr(u8)]
30pub enum WireType {
31    /// Variable-width integers (Int32, Int64, UInt32, UInt64, SInt32, SInt64, Bool, Enum)
32    Varint = 0,
33    /// 64-bit fixed-width values (Fixed64, SFixed64, Double)
34    Int64 = 1,
35    /// Length-delimited values (String, Bytes, embedded messages, packed repeated fields)
36    Len = 2,
37    /// Start group (deprecated feature)
38    SGroup = 3,
39    /// End group (deprecated feature)
40    EGroup = 4,
41    /// 32-bit fixed-width values (Fixed32, SFixed32, Float)
42    Int32 = 5,
43}
44
45impl From<WireType> for u8 {
46    #[inline]
47    fn from(wire_type: WireType) -> Self {
48        wire_type as u8
49    }
50}
51
52impl TryFrom<u8> for WireType {
53    type Error = crate::ProtobufError;
54
55    fn try_from(value: u8) -> Result<Self, Self::Error> {
56        match value {
57            0 => Ok(WireType::Varint),
58            1 => Ok(WireType::Int64),
59            2 => Ok(WireType::Len),
60            3 => Ok(WireType::SGroup),
61            4 => Ok(WireType::EGroup),
62            5 => Ok(WireType::Int32),
63            _ => Err(crate::ProtobufError::InvalidWireType { value }),
64        }
65    }
66}
67
68/// Maximum field number allowed in Protocol Buffers.
69///
70/// Field numbers must be in the range [1, 2^29 - 1].
71pub const MAX_FIELD_NUMBER: FieldNumber = FieldNumber::MAX;
72
73/// Minimum field number allowed in Protocol Buffers.
74pub const MIN_FIELD_NUMBER: FieldNumber = FieldNumber::MIN;
75
76/// Maximum message size when serialized (2 GiB).
77pub const MAX_MESSAGE_SIZE: usize = 2 * 1024 * 1024 * 1024;
78
79/// Maximum string/bytes field size (2 GiB).
80pub const MAX_STRING_SIZE: usize = 2 * 1024 * 1024 * 1024;
81
82/// Bit mask for extracting the wire type from a tag.
83///
84/// The wire type is stored in the least significant 3 bits of the tag.
85pub const WIRE_TYPE_MASK: u32 = 0b111;
86
87/// Bit shift for extracting the field number from a tag.
88///
89/// The field number is stored in the upper bits of the tag.
90pub const FIELD_NUMBER_SHIFT: u32 = 3;
91
92/// Maximum varint size in bytes.
93///
94/// A varint can use anywhere between 1 and 10 bytes.
95pub const MAX_VARINT_BYTES: usize = 10;
96
97/// Maximum variable-length integer value that can be encoded in 9 bytes.
98///
99/// This is used for optimization in variable-length integer encoding/decoding.
100pub const MAX_9_BYTE_VARINT: u64 = 0x7FFF_FFFF_FFFF_FFFF;
101
102/// Maximum variable-length integer value that can be encoded in 8 bytes.
103pub const MAX_8_BYTE_VARINT: u64 = 0xFF_FFFF_FFFF_FFFF;
104
105/// Maximum variable-length integer value that can be encoded in 7 bytes.
106pub const MAX_7_BYTE_VARINT: u64 = 0x0001_FFFF_FFFF_FFFF;
107
108/// Maximum variable-length integer value that can be encoded in 6 bytes.
109pub const MAX_6_BYTE_VARINT: u64 = 0x3FF_FFFF_FFFF;
110
111/// Maximum variable-length integer value that can be encoded in 5 bytes.
112pub const MAX_5_BYTE_VARINT: u64 = 0x7_FFFF_FFFF;
113
114/// Maximum variable-length integer value that can be encoded in 4 bytes.
115pub const MAX_4_BYTE_VARINT: u64 = 0xFFF_FFFF;
116
117/// Maximum variable-length integer value that can be encoded in 3 bytes.
118pub const MAX_3_BYTE_VARINT: u64 = 0x1F_FFFF;
119
120/// Maximum variable-length integer value that can be encoded in 2 bytes.
121pub const MAX_2_BYTE_VARINT: u64 = 0x3FFF;
122
123/// Maximum variable-length integer value that can be encoded in 1 byte.
124pub const MAX_1_BYTE_VARINT: u64 = 0x7F;
125
126/// Continuation bit mask for varint encoding.
127///
128/// The most significant bit (MSB) of each byte indicates if more bytes follow.
129pub const VARINT_CONTINUATION_BIT: u8 = 0x80;
130
131/// Payload bit mask for varint encoding.
132///
133/// The lower 7 bits of each byte contain the actual data.
134pub const VARINT_PAYLOAD_MASK: u8 = 0x7F;
135
136/// Size of a 32-bit fixed-width value in bytes.
137///
138/// Used for fixed32, sfixed32, and float types.
139pub const FIXED32_BYTES: usize = 4;
140
141/// Size of a 64-bit fixed-width value in bytes.
142///
143/// Used for fixed64, sfixed64, and double types.
144pub const FIXED64_BYTES: usize = 8;
145
146#[cfg(test)]
147mod tests {
148    use super::*;
149
150    #[test]
151    fn test_wire_type_try_from_trait() {
152        // Test valid wire types
153        assert_eq!(WireType::try_from(0).unwrap(), WireType::Varint);
154        assert_eq!(WireType::try_from(1).unwrap(), WireType::Int64);
155        assert_eq!(WireType::try_from(2).unwrap(), WireType::Len);
156        assert_eq!(WireType::try_from(3).unwrap(), WireType::SGroup);
157        assert_eq!(WireType::try_from(4).unwrap(), WireType::EGroup);
158        assert_eq!(WireType::try_from(5).unwrap(), WireType::Int32);
159
160        // Test invalid wire types
161        let result = WireType::try_from(6);
162        assert!(result.is_err());
163        if let Err(crate::ProtobufError::InvalidWireType { value }) = result {
164            assert_eq!(value, 6);
165        } else {
166            panic!("Expected InvalidWireType error");
167        }
168
169        let result = WireType::try_from(255);
170        assert!(result.is_err());
171        if let Err(crate::ProtobufError::InvalidWireType { value }) = result {
172            assert_eq!(value, 255);
173        } else {
174            panic!("Expected InvalidWireType error");
175        }
176    }
177
178    #[test]
179    fn test_varint_size_constants_mathematical() {
180        // Each constant should be exactly 2^(7*N) - 1 where N is the number of bytes
181        let constants = [
182            MAX_1_BYTE_VARINT,
183            MAX_2_BYTE_VARINT,
184            MAX_3_BYTE_VARINT,
185            MAX_4_BYTE_VARINT,
186            MAX_5_BYTE_VARINT,
187            MAX_6_BYTE_VARINT,
188            MAX_7_BYTE_VARINT,
189            MAX_8_BYTE_VARINT,
190            MAX_9_BYTE_VARINT,
191        ];
192
193        for (i, &constant) in constants.iter().enumerate() {
194            let expected_bits = (i + 1) * 7;
195            let expected_value = (1u64 << expected_bits) - 1;
196            assert_eq!(
197                constant,
198                expected_value,
199                "MAX_{}_BYTE_VARINT should be 2^{} - 1 = {}",
200                i + 1,
201                expected_bits,
202                expected_value
203            );
204        }
205    }
206}