Skip to main content

crous_core/
wire.rs

1//! Wire types for the Crous binary format.
2//!
3//! Each field in a Crous document is prefixed with a tag byte encoding:
4//! - Low 4 bits: wire type (the physical encoding of the data)
5//! - High 4 bits: flags (reserved, currently used for null/optional markers)
6//!
7//! Wire types define *how* data is serialized on the wire, independent of
8//! the logical schema type. This enables forward-compatible skipping of
9//! unknown fields: a decoder that doesn't know a field's schema type can
10//! still determine how many bytes to skip.
11
12/// Wire type identifiers (low 4 bits of tag byte).
13///
14/// Design note: 16 possible wire types (4 bits). We use 11 currently and
15/// reserve 5 for future use (e.g., decimal, timestamp, map).
16#[derive(Debug, Clone, Copy, PartialEq, Eq)]
17#[repr(u8)]
18pub enum WireType {
19    /// Null value: no payload.
20    Null = 0x00,
21    /// Boolean: 1-byte payload (0x00 = false, 0x01 = true).
22    Bool = 0x01,
23    /// Unsigned integer: LEB128 varint payload.
24    VarUInt = 0x02,
25    /// Signed integer: ZigZag + LEB128 varint payload.
26    VarInt = 0x03,
27    /// 64-bit fixed-width: 8-byte little-endian payload (for f64, fixed i64/u64).
28    Fixed64 = 0x04,
29    /// Length-delimited: varint length prefix + raw bytes (strings, binary blobs).
30    LenDelimited = 0x05,
31    /// Start of an object/map: followed by field entries until EndObject.
32    StartObject = 0x06,
33    /// End of an object/map: no payload.
34    EndObject = 0x07,
35    /// Start of an array: followed by elements until EndArray.
36    StartArray = 0x08,
37    /// End of an array: no payload.
38    EndArray = 0x09,
39    /// Reference to a previously-seen value (dedup): varint reference ID payload.
40    Reference = 0x0A,
41}
42
43impl WireType {
44    /// Parse a wire type from the low 4 bits of a tag byte.
45    pub fn from_tag(tag: u8) -> Option<WireType> {
46        match tag & 0x0F {
47            0x00 => Some(WireType::Null),
48            0x01 => Some(WireType::Bool),
49            0x02 => Some(WireType::VarUInt),
50            0x03 => Some(WireType::VarInt),
51            0x04 => Some(WireType::Fixed64),
52            0x05 => Some(WireType::LenDelimited),
53            0x06 => Some(WireType::StartObject),
54            0x07 => Some(WireType::EndObject),
55            0x08 => Some(WireType::StartArray),
56            0x09 => Some(WireType::EndArray),
57            0x0A => Some(WireType::Reference),
58            _ => None,
59        }
60    }
61
62    /// Encode this wire type as a tag byte (flags in high nibble are zero).
63    pub fn to_tag(self) -> u8 {
64        self as u8
65    }
66
67    /// Encode this wire type with flags in the high nibble.
68    pub fn to_tag_with_flags(self, flags: u8) -> u8 {
69        (flags << 4) | (self as u8)
70    }
71}
72
73/// Tag byte flags (high 4 bits).
74pub mod flags {
75    /// No flags set.
76    pub const NONE: u8 = 0x00;
77    /// Field is a string dictionary reference (optimization hint).
78    pub const STRING_DICT_REF: u8 = 0x01;
79    /// Field has an inline schema annotation.
80    pub const HAS_SCHEMA_ANNOTATION: u8 = 0x02;
81}
82
83/// Compression type identifiers for block headers.
84/// Citation: https://facebook.github.io/zstd/ and https://github.com/lz4/lz4
85#[derive(Debug, Clone, Copy, PartialEq, Eq)]
86#[repr(u8)]
87pub enum CompressionType {
88    None = 0x00,
89    Zstd = 0x01,
90    Snappy = 0x02,
91    /// LZ4 block compression: https://github.com/lz4/lz4
92    Lz4 = 0x03,
93}
94
95impl CompressionType {
96    pub fn from_byte(b: u8) -> Option<Self> {
97        match b {
98            0x00 => Some(Self::None),
99            0x01 => Some(Self::Zstd),
100            0x02 => Some(Self::Snappy),
101            0x03 => Some(Self::Lz4),
102            _ => None,
103        }
104    }
105}
106
107/// Block type identifiers.
108#[derive(Debug, Clone, Copy, PartialEq, Eq)]
109#[repr(u8)]
110pub enum BlockType {
111    /// Data block containing encoded values.
112    Data = 0x01,
113    /// Index block for random access.
114    Index = 0x02,
115    /// Schema block embedding type information.
116    Schema = 0x03,
117    /// String dictionary block for deduplication.
118    StringDict = 0x04,
119    /// File trailer/footer.
120    Trailer = 0xFF,
121}
122
123impl BlockType {
124    pub fn from_byte(b: u8) -> Option<Self> {
125        match b {
126            0x01 => Some(Self::Data),
127            0x02 => Some(Self::Index),
128            0x03 => Some(Self::Schema),
129            0x04 => Some(Self::StringDict),
130            0xFF => Some(Self::Trailer),
131            _ => None,
132        }
133    }
134}
135
136#[cfg(test)]
137mod tests {
138    use super::*;
139
140    #[test]
141    fn wire_type_roundtrip() {
142        for tag in 0x00..=0x0Au8 {
143            let wt = WireType::from_tag(tag).unwrap();
144            assert_eq!(wt.to_tag(), tag);
145        }
146    }
147
148    #[test]
149    fn unknown_wire_type() {
150        assert!(WireType::from_tag(0x0B).is_none());
151        assert!(WireType::from_tag(0x0F).is_none());
152    }
153
154    #[test]
155    fn tag_with_flags() {
156        let tag = WireType::VarUInt.to_tag_with_flags(flags::STRING_DICT_REF);
157        assert_eq!(tag, 0x12); // 0x01 << 4 | 0x02
158        assert_eq!(WireType::from_tag(tag), Some(WireType::VarUInt));
159        assert_eq!(tag >> 4, flags::STRING_DICT_REF);
160    }
161}