Skip to main content

cynos_jsonb/
binary.rs

1//! Binary encoding and decoding for JSONB values.
2//!
3//! This module provides efficient binary serialization for JsonbValue.
4//! The format is designed for compact storage and fast decoding.
5//!
6//! ## Encoding Format
7//!
8//! Each value is encoded as: `[type_tag: u8] [data...]`
9//!
10//! Type tags:
11//! - 0x00: null
12//! - 0x01: false
13//! - 0x02: true
14//! - 0x03: number (8 bytes, f64 little-endian)
15//! - 0x04: string (varint length + UTF-8 bytes)
16//! - 0x05: array (varint count + encoded elements)
17//! - 0x06: object (varint count + sorted key-value pairs)
18
19use crate::value::{JsonbObject, JsonbValue};
20use alloc::string::String;
21use alloc::vec::Vec;
22
23const TAG_NULL: u8 = 0x00;
24const TAG_FALSE: u8 = 0x01;
25const TAG_TRUE: u8 = 0x02;
26const TAG_NUMBER: u8 = 0x03;
27const TAG_STRING: u8 = 0x04;
28const TAG_ARRAY: u8 = 0x05;
29const TAG_OBJECT: u8 = 0x06;
30
31/// Binary representation of a JSONB value.
32#[derive(Clone, Debug, PartialEq)]
33pub struct JsonbBinary {
34    data: Vec<u8>,
35}
36
37impl JsonbBinary {
38    /// Creates a new JsonbBinary from raw bytes.
39    pub fn from_bytes(data: Vec<u8>) -> Self {
40        Self { data }
41    }
42
43    /// Returns the underlying bytes.
44    pub fn as_bytes(&self) -> &[u8] {
45        &self.data
46    }
47
48    /// Consumes self and returns the underlying bytes.
49    pub fn into_bytes(self) -> Vec<u8> {
50        self.data
51    }
52
53    /// Returns the size in bytes.
54    pub fn len(&self) -> usize {
55        self.data.len()
56    }
57
58    /// Returns true if empty.
59    pub fn is_empty(&self) -> bool {
60        self.data.is_empty()
61    }
62
63    /// Encodes a JsonbValue into binary format.
64    pub fn encode(value: &JsonbValue) -> Self {
65        let mut data = Vec::new();
66        encode_value(value, &mut data);
67        Self { data }
68    }
69
70    /// Decodes binary data into a JsonbValue.
71    pub fn decode(&self) -> JsonbValue {
72        let mut pos = 0;
73        decode_value(&self.data, &mut pos)
74    }
75}
76
77/// Encodes a varint (variable-length integer).
78fn encode_varint(value: usize, out: &mut Vec<u8>) {
79    let mut v = value;
80    loop {
81        let mut byte = (v & 0x7F) as u8;
82        v >>= 7;
83        if v != 0 {
84            byte |= 0x80;
85        }
86        out.push(byte);
87        if v == 0 {
88            break;
89        }
90    }
91}
92
93/// Decodes a varint from the buffer.
94fn decode_varint(data: &[u8], pos: &mut usize) -> usize {
95    let mut result = 0usize;
96    let mut shift = 0;
97    loop {
98        if *pos >= data.len() {
99            break;
100        }
101        let byte = data[*pos];
102        *pos += 1;
103        result |= ((byte & 0x7F) as usize) << shift;
104        if byte & 0x80 == 0 {
105            break;
106        }
107        shift += 7;
108    }
109    result
110}
111
112/// Encodes a JsonbValue into the output buffer.
113fn encode_value(value: &JsonbValue, out: &mut Vec<u8>) {
114    match value {
115        JsonbValue::Null => {
116            out.push(TAG_NULL);
117        }
118        JsonbValue::Bool(false) => {
119            out.push(TAG_FALSE);
120        }
121        JsonbValue::Bool(true) => {
122            out.push(TAG_TRUE);
123        }
124        JsonbValue::Number(n) => {
125            out.push(TAG_NUMBER);
126            out.extend_from_slice(&n.to_le_bytes());
127        }
128        JsonbValue::String(s) => {
129            out.push(TAG_STRING);
130            encode_varint(s.len(), out);
131            out.extend_from_slice(s.as_bytes());
132        }
133        JsonbValue::Array(arr) => {
134            out.push(TAG_ARRAY);
135            encode_varint(arr.len(), out);
136            for item in arr {
137                encode_value(item, out);
138            }
139        }
140        JsonbValue::Object(obj) => {
141            out.push(TAG_OBJECT);
142            encode_varint(obj.len(), out);
143            for (key, val) in obj.iter() {
144                encode_varint(key.len(), out);
145                out.extend_from_slice(key.as_bytes());
146                encode_value(val, out);
147            }
148        }
149    }
150}
151
152/// Decodes a JsonbValue from the buffer.
153fn decode_value(data: &[u8], pos: &mut usize) -> JsonbValue {
154    if *pos >= data.len() {
155        return JsonbValue::Null;
156    }
157
158    let tag = data[*pos];
159    *pos += 1;
160
161    match tag {
162        TAG_NULL => JsonbValue::Null,
163        TAG_FALSE => JsonbValue::Bool(false),
164        TAG_TRUE => JsonbValue::Bool(true),
165        TAG_NUMBER => {
166            if *pos + 8 > data.len() {
167                return JsonbValue::Null;
168            }
169            let bytes: [u8; 8] = data[*pos..*pos + 8].try_into().unwrap_or([0; 8]);
170            *pos += 8;
171            JsonbValue::Number(f64::from_le_bytes(bytes))
172        }
173        TAG_STRING => {
174            let len = decode_varint(data, pos);
175            if *pos + len > data.len() {
176                return JsonbValue::Null;
177            }
178            let s = String::from_utf8_lossy(&data[*pos..*pos + len]).into_owned();
179            *pos += len;
180            JsonbValue::String(s)
181        }
182        TAG_ARRAY => {
183            let count = decode_varint(data, pos);
184            let mut arr = Vec::with_capacity(count);
185            for _ in 0..count {
186                arr.push(decode_value(data, pos));
187            }
188            JsonbValue::Array(arr)
189        }
190        TAG_OBJECT => {
191            let count = decode_varint(data, pos);
192            let mut obj = JsonbObject::with_capacity(count);
193            for _ in 0..count {
194                let key_len = decode_varint(data, pos);
195                if *pos + key_len > data.len() {
196                    break;
197                }
198                let key = String::from_utf8_lossy(&data[*pos..*pos + key_len]).into_owned();
199                *pos += key_len;
200                let val = decode_value(data, pos);
201                obj.insert(key, val);
202            }
203            JsonbValue::Object(obj)
204        }
205        _ => JsonbValue::Null,
206    }
207}
208
209#[cfg(test)]
210mod tests {
211    use super::*;
212    use alloc::vec;
213
214    #[test]
215    fn test_encode_decode_null() {
216        let value = JsonbValue::Null;
217        let binary = JsonbBinary::encode(&value);
218        let decoded = binary.decode();
219        assert_eq!(value, decoded);
220    }
221
222    #[test]
223    fn test_encode_decode_bool() {
224        let value = JsonbValue::Bool(true);
225        let binary = JsonbBinary::encode(&value);
226        let decoded = binary.decode();
227        assert_eq!(value, decoded);
228
229        let value = JsonbValue::Bool(false);
230        let binary = JsonbBinary::encode(&value);
231        let decoded = binary.decode();
232        assert_eq!(value, decoded);
233    }
234
235    #[test]
236    fn test_encode_decode_number() {
237        let value = JsonbValue::Number(42.5);
238        let binary = JsonbBinary::encode(&value);
239        let decoded = binary.decode();
240        assert_eq!(value, decoded);
241
242        let value = JsonbValue::Number(-123.456);
243        let binary = JsonbBinary::encode(&value);
244        let decoded = binary.decode();
245        assert_eq!(value, decoded);
246    }
247
248    #[test]
249    fn test_encode_decode_string() {
250        let value = JsonbValue::String("hello world".into());
251        let binary = JsonbBinary::encode(&value);
252        let decoded = binary.decode();
253        assert_eq!(value, decoded);
254
255        let value = JsonbValue::String("".into());
256        let binary = JsonbBinary::encode(&value);
257        let decoded = binary.decode();
258        assert_eq!(value, decoded);
259    }
260
261    #[test]
262    fn test_encode_decode_array() {
263        let value = JsonbValue::Array(vec![
264            JsonbValue::Number(1.0),
265            JsonbValue::String("two".into()),
266            JsonbValue::Bool(true),
267        ]);
268        let binary = JsonbBinary::encode(&value);
269        let decoded = binary.decode();
270        assert_eq!(value, decoded);
271    }
272
273    #[test]
274    fn test_encode_decode_object() {
275        let mut obj = JsonbObject::new();
276        obj.insert("name".into(), JsonbValue::String("Alice".into()));
277        obj.insert("age".into(), JsonbValue::Number(25.0));
278        obj.insert("active".into(), JsonbValue::Bool(true));
279
280        let value = JsonbValue::Object(obj);
281        let binary = JsonbBinary::encode(&value);
282        let decoded = binary.decode();
283        assert_eq!(value, decoded);
284    }
285
286    #[test]
287    fn test_encode_decode_nested() {
288        let mut inner_obj = JsonbObject::new();
289        inner_obj.insert("city".into(), JsonbValue::String("NYC".into()));
290
291        let mut obj = JsonbObject::new();
292        obj.insert("name".into(), JsonbValue::String("Alice".into()));
293        obj.insert("address".into(), JsonbValue::Object(inner_obj));
294        obj.insert(
295            "tags".into(),
296            JsonbValue::Array(vec![
297                JsonbValue::String("admin".into()),
298                JsonbValue::String("developer".into()),
299            ]),
300        );
301
302        let value = JsonbValue::Object(obj);
303        let binary = JsonbBinary::encode(&value);
304        let decoded = binary.decode();
305        assert_eq!(value, decoded);
306    }
307
308    #[test]
309    fn test_varint_encoding() {
310        let mut buf = Vec::new();
311
312        // Small number
313        encode_varint(127, &mut buf);
314        let mut pos = 0;
315        assert_eq!(decode_varint(&buf, &mut pos), 127);
316
317        // Larger number
318        buf.clear();
319        encode_varint(300, &mut buf);
320        pos = 0;
321        assert_eq!(decode_varint(&buf, &mut pos), 300);
322
323        // Large number
324        buf.clear();
325        encode_varint(100000, &mut buf);
326        pos = 0;
327        assert_eq!(decode_varint(&buf, &mut pos), 100000);
328    }
329
330    #[test]
331    fn test_binary_roundtrip_complex() {
332        let mut obj = JsonbObject::new();
333        obj.insert("string".into(), JsonbValue::String("hello".into()));
334        obj.insert("number".into(), JsonbValue::Number(42.5));
335        obj.insert("bool".into(), JsonbValue::Bool(true));
336        obj.insert("null".into(), JsonbValue::Null);
337        obj.insert(
338            "array".into(),
339            JsonbValue::Array(vec![
340                JsonbValue::Number(1.0),
341                JsonbValue::Number(2.0),
342                JsonbValue::Number(3.0),
343            ]),
344        );
345
346        let mut nested = JsonbObject::new();
347        nested.insert("a".into(), JsonbValue::Number(1.0));
348        obj.insert("nested".into(), JsonbValue::Object(nested));
349
350        let original = JsonbValue::Object(obj);
351        let binary = JsonbBinary::encode(&original);
352        let decoded = binary.decode();
353
354        assert_eq!(original, decoded);
355    }
356
357    // Edge case tests
358    #[test]
359    fn test_encode_decode_empty_array() {
360        let value = JsonbValue::Array(vec![]);
361        let binary = JsonbBinary::encode(&value);
362        let decoded = binary.decode();
363        assert_eq!(value, decoded);
364    }
365
366    #[test]
367    fn test_encode_decode_empty_object() {
368        let obj = JsonbObject::new();
369        let value = JsonbValue::Object(obj);
370        let binary = JsonbBinary::encode(&value);
371        let decoded = binary.decode();
372        assert_eq!(value, decoded);
373    }
374
375    #[test]
376    fn test_encode_decode_unicode_string() {
377        let value = JsonbValue::String("你好世界 🌍 émojis".into());
378        let binary = JsonbBinary::encode(&value);
379        let decoded = binary.decode();
380        assert_eq!(value, decoded);
381    }
382
383    #[test]
384    fn test_encode_decode_unicode_keys() {
385        let mut obj = JsonbObject::new();
386        obj.insert("名前".into(), JsonbValue::String("田中".into()));
387        obj.insert("年齢".into(), JsonbValue::Number(25.0));
388        obj.insert("🔑".into(), JsonbValue::String("emoji key".into()));
389
390        let value = JsonbValue::Object(obj);
391        let binary = JsonbBinary::encode(&value);
392        let decoded = binary.decode();
393        assert_eq!(value, decoded);
394    }
395
396    #[test]
397    fn test_encode_decode_special_numbers() {
398        // Zero
399        let value = JsonbValue::Number(0.0);
400        let binary = JsonbBinary::encode(&value);
401        assert_eq!(value, binary.decode());
402
403        // Negative zero
404        let value = JsonbValue::Number(-0.0);
405        let binary = JsonbBinary::encode(&value);
406        let decoded = binary.decode();
407        // -0.0 and 0.0 are equal in f64 comparison
408        assert_eq!(decoded.as_f64(), Some(0.0));
409
410        // Very large number
411        let value = JsonbValue::Number(1e308);
412        let binary = JsonbBinary::encode(&value);
413        assert_eq!(value, binary.decode());
414
415        // Very small number
416        let value = JsonbValue::Number(1e-308);
417        let binary = JsonbBinary::encode(&value);
418        assert_eq!(value, binary.decode());
419
420        // Infinity
421        let value = JsonbValue::Number(f64::INFINITY);
422        let binary = JsonbBinary::encode(&value);
423        assert_eq!(value, binary.decode());
424
425        // NaN - special case, NaN != NaN
426        let value = JsonbValue::Number(f64::NAN);
427        let binary = JsonbBinary::encode(&value);
428        let decoded = binary.decode();
429        assert!(decoded.as_f64().unwrap().is_nan());
430    }
431
432    #[test]
433    fn test_encode_decode_special_strings() {
434        // Empty string
435        let value = JsonbValue::String("".into());
436        let binary = JsonbBinary::encode(&value);
437        assert_eq!(value, binary.decode());
438
439        // String with null bytes
440        let value = JsonbValue::String("hello\0world".into());
441        let binary = JsonbBinary::encode(&value);
442        assert_eq!(value, binary.decode());
443
444        // String with newlines and tabs
445        let value = JsonbValue::String("line1\nline2\ttab".into());
446        let binary = JsonbBinary::encode(&value);
447        assert_eq!(value, binary.decode());
448
449        // String with quotes
450        let value = JsonbValue::String("say \"hello\"".into());
451        let binary = JsonbBinary::encode(&value);
452        assert_eq!(value, binary.decode());
453    }
454
455    #[test]
456    fn test_decode_malformed_truncated_number() {
457        // Number tag but not enough bytes
458        let data = vec![TAG_NUMBER, 0x00, 0x00]; // Only 3 bytes instead of 8
459        let binary = JsonbBinary::from_bytes(data);
460        let decoded = binary.decode();
461        assert_eq!(decoded, JsonbValue::Null);
462    }
463
464    #[test]
465    fn test_decode_malformed_truncated_string() {
466        // String tag with length 10 but only 3 bytes of data
467        let data = vec![TAG_STRING, 10, b'a', b'b', b'c'];
468        let binary = JsonbBinary::from_bytes(data);
469        let decoded = binary.decode();
470        assert_eq!(decoded, JsonbValue::Null);
471    }
472
473    #[test]
474    fn test_decode_unknown_tag() {
475        let data = vec![0xFF]; // Unknown tag
476        let binary = JsonbBinary::from_bytes(data);
477        let decoded = binary.decode();
478        assert_eq!(decoded, JsonbValue::Null);
479    }
480
481    #[test]
482    fn test_decode_empty_data() {
483        let binary = JsonbBinary::from_bytes(vec![]);
484        let decoded = binary.decode();
485        assert_eq!(decoded, JsonbValue::Null);
486    }
487
488    #[test]
489    fn test_deeply_nested_structure() {
490        // Create a deeply nested structure: [[[[[[1]]]]]]
491        let mut value = JsonbValue::Number(1.0);
492        for _ in 0..10 {
493            value = JsonbValue::Array(vec![value]);
494        }
495        let binary = JsonbBinary::encode(&value);
496        let decoded = binary.decode();
497        assert_eq!(value, decoded);
498    }
499
500    #[test]
501    fn test_varint_edge_cases() {
502        let mut buf = Vec::new();
503
504        // Zero
505        encode_varint(0, &mut buf);
506        let mut pos = 0;
507        assert_eq!(decode_varint(&buf, &mut pos), 0);
508
509        // Max single byte (127)
510        buf.clear();
511        encode_varint(127, &mut buf);
512        assert_eq!(buf.len(), 1);
513        pos = 0;
514        assert_eq!(decode_varint(&buf, &mut pos), 127);
515
516        // Min two bytes (128)
517        buf.clear();
518        encode_varint(128, &mut buf);
519        assert_eq!(buf.len(), 2);
520        pos = 0;
521        assert_eq!(decode_varint(&buf, &mut pos), 128);
522
523        // Large value
524        buf.clear();
525        encode_varint(usize::MAX >> 1, &mut buf);
526        pos = 0;
527        assert_eq!(decode_varint(&buf, &mut pos), usize::MAX >> 1);
528    }
529}