crous_core/wire.rs
1//! Wire types for the Crous binary format.
2//!
3//! Each field in a Crous document is prefixed with a tag byte encoding:
4//! - Low 4 bits: wire type (the physical encoding of the data)
5//! - High 4 bits: flags (reserved, currently used for null/optional markers)
6//!
7//! Wire types define *how* data is serialized on the wire, independent of
8//! the logical schema type. This enables forward-compatible skipping of
9//! unknown fields: a decoder that doesn't know a field's schema type can
10//! still determine how many bytes to skip.
11
12/// Wire type identifiers (low 4 bits of tag byte).
13///
14/// Design note: 16 possible wire types (4 bits). We use 11 currently and
15/// reserve 5 for future use (e.g., decimal, timestamp, map).
16#[derive(Debug, Clone, Copy, PartialEq, Eq)]
17#[repr(u8)]
18pub enum WireType {
19 /// Null value: no payload.
20 Null = 0x00,
21 /// Boolean: 1-byte payload (0x00 = false, 0x01 = true).
22 Bool = 0x01,
23 /// Unsigned integer: LEB128 varint payload.
24 VarUInt = 0x02,
25 /// Signed integer: ZigZag + LEB128 varint payload.
26 VarInt = 0x03,
27 /// 64-bit fixed-width: 8-byte little-endian payload (for f64, fixed i64/u64).
28 Fixed64 = 0x04,
29 /// Length-delimited: varint length prefix + raw bytes (strings, binary blobs).
30 LenDelimited = 0x05,
31 /// Start of an object/map: followed by field entries until EndObject.
32 StartObject = 0x06,
33 /// End of an object/map: no payload.
34 EndObject = 0x07,
35 /// Start of an array: followed by elements until EndArray.
36 StartArray = 0x08,
37 /// End of an array: no payload.
38 EndArray = 0x09,
39 /// Reference to a previously-seen value (dedup): varint reference ID payload.
40 Reference = 0x0A,
41}
42
43impl WireType {
44 /// Parse a wire type from the low 4 bits of a tag byte.
45 pub fn from_tag(tag: u8) -> Option<WireType> {
46 match tag & 0x0F {
47 0x00 => Some(WireType::Null),
48 0x01 => Some(WireType::Bool),
49 0x02 => Some(WireType::VarUInt),
50 0x03 => Some(WireType::VarInt),
51 0x04 => Some(WireType::Fixed64),
52 0x05 => Some(WireType::LenDelimited),
53 0x06 => Some(WireType::StartObject),
54 0x07 => Some(WireType::EndObject),
55 0x08 => Some(WireType::StartArray),
56 0x09 => Some(WireType::EndArray),
57 0x0A => Some(WireType::Reference),
58 _ => None,
59 }
60 }
61
62 /// Encode this wire type as a tag byte (flags in high nibble are zero).
63 pub fn to_tag(self) -> u8 {
64 self as u8
65 }
66
67 /// Encode this wire type with flags in the high nibble.
68 pub fn to_tag_with_flags(self, flags: u8) -> u8 {
69 (flags << 4) | (self as u8)
70 }
71}
72
73/// Tag byte flags (high 4 bits).
74pub mod flags {
75 /// No flags set.
76 pub const NONE: u8 = 0x00;
77 /// Field is a string dictionary reference (optimization hint).
78 pub const STRING_DICT_REF: u8 = 0x01;
79 /// Field has an inline schema annotation.
80 pub const HAS_SCHEMA_ANNOTATION: u8 = 0x02;
81}
82
83/// Compression type identifiers for block headers.
84/// Citation: https://facebook.github.io/zstd/ and https://github.com/lz4/lz4
85#[derive(Debug, Clone, Copy, PartialEq, Eq)]
86#[repr(u8)]
87pub enum CompressionType {
88 None = 0x00,
89 Zstd = 0x01,
90 Snappy = 0x02,
91 /// LZ4 block compression: https://github.com/lz4/lz4
92 Lz4 = 0x03,
93}
94
95impl CompressionType {
96 pub fn from_byte(b: u8) -> Option<Self> {
97 match b {
98 0x00 => Some(Self::None),
99 0x01 => Some(Self::Zstd),
100 0x02 => Some(Self::Snappy),
101 0x03 => Some(Self::Lz4),
102 _ => None,
103 }
104 }
105}
106
107/// Block type identifiers.
108#[derive(Debug, Clone, Copy, PartialEq, Eq)]
109#[repr(u8)]
110pub enum BlockType {
111 /// Data block containing encoded values.
112 Data = 0x01,
113 /// Index block for random access.
114 Index = 0x02,
115 /// Schema block embedding type information.
116 Schema = 0x03,
117 /// String dictionary block for deduplication.
118 StringDict = 0x04,
119 /// File trailer/footer.
120 Trailer = 0xFF,
121}
122
123impl BlockType {
124 pub fn from_byte(b: u8) -> Option<Self> {
125 match b {
126 0x01 => Some(Self::Data),
127 0x02 => Some(Self::Index),
128 0x03 => Some(Self::Schema),
129 0x04 => Some(Self::StringDict),
130 0xFF => Some(Self::Trailer),
131 _ => None,
132 }
133 }
134}
135
136#[cfg(test)]
137mod tests {
138 use super::*;
139
140 #[test]
141 fn wire_type_roundtrip() {
142 for tag in 0x00..=0x0Au8 {
143 let wt = WireType::from_tag(tag).unwrap();
144 assert_eq!(wt.to_tag(), tag);
145 }
146 }
147
148 #[test]
149 fn unknown_wire_type() {
150 assert!(WireType::from_tag(0x0B).is_none());
151 assert!(WireType::from_tag(0x0F).is_none());
152 }
153
154 #[test]
155 fn tag_with_flags() {
156 let tag = WireType::VarUInt.to_tag_with_flags(flags::STRING_DICT_REF);
157 assert_eq!(tag, 0x12); // 0x01 << 4 | 0x02
158 assert_eq!(WireType::from_tag(tag), Some(WireType::VarUInt));
159 assert_eq!(tag >> 4, flags::STRING_DICT_REF);
160 }
161}