fsqlite_types/
serial_type.rs

1/// SQLite record serial type encoding.
2///
3/// Each value in a record is preceded by a serial type (stored as a varint)
4/// that describes the type and size of the data that follows:
5///
6/// | Serial Type | Content Size | Meaning                    |
7/// |-------------|-------------|----------------------------|
8/// | 0           | 0           | NULL                       |
9/// | 1           | 1           | 8-bit signed integer       |
10/// | 2           | 2           | 16-bit big-endian integer  |
11/// | 3           | 3           | 24-bit big-endian integer  |
12/// | 4           | 4           | 32-bit big-endian integer  |
13/// | 5           | 6           | 48-bit big-endian integer  |
14/// | 6           | 8           | 64-bit big-endian integer  |
15/// | 7           | 8           | IEEE 754 float             |
16/// | 8           | 0           | Integer constant 0         |
17/// | 9           | 0           | Integer constant 1         |
18/// | 10, 11      | —           | Reserved                   |
19/// | N >= 12 even| (N-12)/2    | BLOB of (N-12)/2 bytes     |
20/// | N >= 13 odd | (N-13)/2    | TEXT of (N-13)/2 bytes      |
21///
22/// Compute the number of bytes of data for a given serial type.
23///
24/// Returns `None` for reserved serial types (10, 11).
25#[allow(clippy::inline_always)]
26#[inline(always)]
27pub const fn serial_type_len(serial_type: u64) -> Option<u64> {
28    match serial_type {
29        0 | 8 | 9 => Some(0),
30        1 => Some(1),
31        2 => Some(2),
32        3 => Some(3),
33        4 => Some(4),
34        5 => Some(6),
35        6 | 7 => Some(8),
36        10 | 11 => None, // reserved
37        n if n % 2 == 0 => Some((n - 12) / 2),
38        n => Some((n - 13) / 2),
39    }
40}
41
42/// Determine the serial type classification.
43#[derive(Debug, Clone, Copy, PartialEq, Eq)]
44pub enum SerialTypeClass {
45    /// SQL NULL (serial type 0).
46    Null,
47    /// Signed integer of 1-8 bytes (serial types 1-6).
48    Integer,
49    /// IEEE 754 double (serial type 7).
50    Float,
51    /// Integer constant 0 (serial type 8).
52    Zero,
53    /// Integer constant 1 (serial type 9).
54    One,
55    /// Reserved for future use (serial types 10, 11).
56    Reserved,
57    /// BLOB of `(N-12)/2` bytes (even serial types >= 12).
58    Blob,
59    /// TEXT of `(N-13)/2` bytes (odd serial types >= 13).
60    Text,
61}
62
63/// Classify a serial type value.
64#[allow(clippy::inline_always)]
65#[inline(always)]
66pub const fn classify_serial_type(serial_type: u64) -> SerialTypeClass {
67    match serial_type {
68        0 => SerialTypeClass::Null,
69        1..=6 => SerialTypeClass::Integer,
70        7 => SerialTypeClass::Float,
71        8 => SerialTypeClass::Zero,
72        9 => SerialTypeClass::One,
73        10 | 11 => SerialTypeClass::Reserved,
74        n if n % 2 == 0 => SerialTypeClass::Blob,
75        _ => SerialTypeClass::Text,
76    }
77}
78
79/// Compute the serial type for an integer value (choosing the smallest encoding).
80#[allow(clippy::cast_sign_loss)]
81pub const fn serial_type_for_integer(value: i64) -> u64 {
82    let u = if value < 0 {
83        !(value as u64)
84    } else {
85        value as u64
86    };
87
88    if u <= 127 {
89        if value == 0 {
90            return 8;
91        }
92        if value == 1 {
93            return 9;
94        }
95        1
96    } else if u <= 32767 {
97        2
98    } else if u <= 8_388_607 {
99        3
100    } else if u <= 2_147_483_647 {
101        4
102    } else if u <= 0x0000_7FFF_FFFF_FFFF {
103        5
104    } else {
105        6
106    }
107}
108
109/// Compute the serial type for a text value of `len` bytes.
110pub const fn serial_type_for_text(len: u64) -> u64 {
111    len.saturating_mul(2).saturating_add(13)
112}
113
114/// Compute the serial type for a blob value of `len` bytes.
115pub const fn serial_type_for_blob(len: u64) -> u64 {
116    len.saturating_mul(2).saturating_add(12)
117}
118
119/// The sizes for serial types less than 128, matching C SQLite's
120/// `sqlite3SmallTypeSizes` lookup table.
121pub const SMALL_TYPE_SIZES: [u8; 128] = {
122    let mut table = [0u8; 128];
123    let mut i: usize = 0;
124    loop {
125        if i >= 128 {
126            break;
127        }
128        #[allow(clippy::cast_possible_truncation)]
129        let size = match serial_type_len(i as u64) {
130            Some(n) if n <= 255 => n as u8,
131            _ => 0,
132        };
133        table[i] = size;
134        i += 1;
135    }
136    table
137};
138
139/// Read a varint from a byte slice, returning `(value, bytes_consumed)`.
140///
141/// SQLite varints are 1-9 bytes. The high bit of each byte indicates whether
142/// more bytes follow (except the 9th byte which uses all 8 bits).
143#[allow(clippy::inline_always)]
144#[inline(always)]
145pub fn read_varint(buf: &[u8]) -> Option<(u64, usize)> {
146    if buf.is_empty() {
147        return None;
148    }
149
150    let first = buf[0];
151    // 1-byte fast path (~50% of varints in typical SQLite records).
152    if first < 0x80 {
153        return Some((u64::from(first), 1));
154    }
155
156    // 2-byte fast path (~30-40% of remaining varints: serial types 13-16383,
157    // header sizes 128-16383).  Avoids the loop + enumerate + skip overhead.
158    if buf.len() >= 2 {
159        let second = buf[1];
160        if second & 0x80 == 0 {
161            return Some(((u64::from(first & 0x7F) << 7) | u64::from(second), 2));
162        }
163    }
164
165    // General case: 3-9 byte varints (rare in practice).
166    let mut value: u64 = u64::from(first & 0x7F);
167    for (i, &byte) in buf.iter().enumerate().skip(1).take(7) {
168        if byte & 0x80 == 0 {
169            value = (value << 7) | u64::from(byte);
170            return Some((value, i + 1));
171        }
172        value = (value << 7) | u64::from(byte & 0x7F);
173    }
174
175    // 9th byte (if present) uses all 8 bits
176    if buf.len() > 8 {
177        value = (value << 8) | u64::from(buf[8]);
178        return Some((value, 9));
179    }
180
181    None
182}
183
184/// Compute the number of bytes needed to encode a value as a varint.
185pub const fn varint_len(value: u64) -> usize {
186    if value <= 0x7F {
187        1
188    } else if value <= 0x3FFF {
189        2
190    } else if value <= 0x001F_FFFF {
191        3
192    } else if value <= 0x0FFF_FFFF {
193        4
194    } else if value <= 0x07_FFFF_FFFF {
195        5
196    } else if value <= 0x03FF_FFFF_FFFF {
197        6
198    } else if value <= 0x01_FFFF_FFFF_FFFF {
199        7
200    } else if value <= 0xFF_FFFF_FFFF_FFFF {
201        8
202    } else {
203        9
204    }
205}
206
207/// Write a varint to a byte buffer, returning the number of bytes written.
208///
209/// The buffer must have at least 9 bytes available.
210#[allow(clippy::cast_possible_truncation)]
211pub fn write_varint(buf: &mut [u8], value: u64) -> usize {
212    let len = varint_len(value);
213
214    if len == 1 {
215        buf[0] = value as u8;
216    } else if len == 9 {
217        // First 8 bytes: each has high bit set, carries 7 bits
218        let mut v = value >> 8;
219        for i in (0..8).rev() {
220            buf[i] = (v as u8 & 0x7F) | 0x80;
221            v >>= 7;
222        }
223        buf[8] = value as u8;
224    } else {
225        let mut v = value;
226        for i in (0..len).rev() {
227            if i == len - 1 {
228                buf[i] = v as u8 & 0x7F;
229            } else {
230                buf[i] = (v as u8 & 0x7F) | 0x80;
231            }
232            v >>= 7;
233        }
234    }
235
236    len
237}
238
239#[cfg(test)]
240mod tests {
241    use super::*;
242
243    #[test]
244    fn serial_type_sizes() {
245        assert_eq!(serial_type_len(0), Some(0)); // NULL
246        assert_eq!(serial_type_len(1), Some(1)); // 8-bit int
247        assert_eq!(serial_type_len(2), Some(2)); // 16-bit int
248        assert_eq!(serial_type_len(3), Some(3)); // 24-bit int
249        assert_eq!(serial_type_len(4), Some(4)); // 32-bit int
250        assert_eq!(serial_type_len(5), Some(6)); // 48-bit int
251        assert_eq!(serial_type_len(6), Some(8)); // 64-bit int
252        assert_eq!(serial_type_len(7), Some(8)); // float
253        assert_eq!(serial_type_len(8), Some(0)); // constant 0
254        assert_eq!(serial_type_len(9), Some(0)); // constant 1
255        assert_eq!(serial_type_len(10), None); // reserved
256        assert_eq!(serial_type_len(11), None); // reserved
257    }
258
259    #[test]
260    fn serial_type_blob_text() {
261        // Even >= 12 is BLOB
262        assert_eq!(serial_type_len(12), Some(0)); // empty blob
263        assert_eq!(serial_type_len(14), Some(1)); // 1-byte blob
264        assert_eq!(serial_type_len(20), Some(4)); // 4-byte blob
265
266        // Odd >= 13 is TEXT
267        assert_eq!(serial_type_len(13), Some(0)); // empty text
268        assert_eq!(serial_type_len(15), Some(1)); // 1-byte text
269        assert_eq!(serial_type_len(21), Some(4)); // 4-byte text
270    }
271
272    #[test]
273    fn classification() {
274        assert_eq!(classify_serial_type(0), SerialTypeClass::Null);
275        assert_eq!(classify_serial_type(1), SerialTypeClass::Integer);
276        assert_eq!(classify_serial_type(6), SerialTypeClass::Integer);
277        assert_eq!(classify_serial_type(7), SerialTypeClass::Float);
278        assert_eq!(classify_serial_type(8), SerialTypeClass::Zero);
279        assert_eq!(classify_serial_type(9), SerialTypeClass::One);
280        assert_eq!(classify_serial_type(10), SerialTypeClass::Reserved);
281        assert_eq!(classify_serial_type(11), SerialTypeClass::Reserved);
282        assert_eq!(classify_serial_type(12), SerialTypeClass::Blob);
283        assert_eq!(classify_serial_type(13), SerialTypeClass::Text);
284        assert_eq!(classify_serial_type(14), SerialTypeClass::Blob);
285        assert_eq!(classify_serial_type(15), SerialTypeClass::Text);
286    }
287
288    #[test]
289    fn serial_type_for_integers() {
290        assert_eq!(serial_type_for_integer(0), 8);
291        assert_eq!(serial_type_for_integer(1), 9);
292        assert_eq!(serial_type_for_integer(2), 1);
293        assert_eq!(serial_type_for_integer(127), 1);
294        assert_eq!(serial_type_for_integer(-1), 1);
295        assert_eq!(serial_type_for_integer(-128), 1);
296        assert_eq!(serial_type_for_integer(128), 2);
297        assert_eq!(serial_type_for_integer(32767), 2);
298        assert_eq!(serial_type_for_integer(32768), 3);
299        assert_eq!(serial_type_for_integer(8_388_607), 3);
300        assert_eq!(serial_type_for_integer(8_388_608), 4);
301        assert_eq!(serial_type_for_integer(2_147_483_647), 4);
302        assert_eq!(serial_type_for_integer(2_147_483_648), 5);
303        assert_eq!(serial_type_for_integer(i64::MAX), 6);
304        assert_eq!(serial_type_for_integer(i64::MIN), 6);
305    }
306
307    #[test]
308    fn serial_type_for_text_and_blob() {
309        assert_eq!(serial_type_for_text(0), 13);
310        assert_eq!(serial_type_for_text(1), 15);
311        assert_eq!(serial_type_for_text(5), 23);
312        assert_eq!(serial_type_for_blob(0), 12);
313        assert_eq!(serial_type_for_blob(1), 14);
314        assert_eq!(serial_type_for_blob(5), 22);
315    }
316
317    #[test]
318    fn small_type_sizes_table() {
319        assert_eq!(SMALL_TYPE_SIZES[0], 0);
320        assert_eq!(SMALL_TYPE_SIZES[1], 1);
321        assert_eq!(SMALL_TYPE_SIZES[2], 2);
322        assert_eq!(SMALL_TYPE_SIZES[3], 3);
323        assert_eq!(SMALL_TYPE_SIZES[4], 4);
324        assert_eq!(SMALL_TYPE_SIZES[5], 6);
325        assert_eq!(SMALL_TYPE_SIZES[6], 8);
326        assert_eq!(SMALL_TYPE_SIZES[7], 8);
327        assert_eq!(SMALL_TYPE_SIZES[8], 0);
328        assert_eq!(SMALL_TYPE_SIZES[9], 0);
329    }
330
331    #[test]
332    fn varint_roundtrip() {
333        let test_values: &[u64] = &[
334            0,
335            1,
336            127,
337            128,
338            0x3FFF,
339            0x4000,
340            0x001F_FFFF,
341            0x0020_0000,
342            0x0FFF_FFFF,
343            0x1000_0000,
344            u64::from(u32::MAX),
345            u64::MAX / 2,
346            u64::MAX,
347        ];
348
349        let mut buf = [0u8; 9];
350        for &value in test_values {
351            let written = write_varint(&mut buf, value);
352            let (decoded, consumed) = read_varint(&buf[..written]).unwrap();
353            assert_eq!(decoded, value, "roundtrip failed for {value}");
354            assert_eq!(written, consumed, "length mismatch for {value}");
355            assert_eq!(
356                written,
357                varint_len(value),
358                "varint_len mismatch for {value}"
359            );
360        }
361    }
362
363    #[test]
364    fn varint_single_byte() {
365        let mut buf = [0u8; 9];
366        assert_eq!(write_varint(&mut buf, 0), 1);
367        assert_eq!(buf[0], 0);
368
369        assert_eq!(write_varint(&mut buf, 127), 1);
370        assert_eq!(buf[0], 127);
371    }
372
373    #[test]
374    fn varint_two_bytes() {
375        let mut buf = [0u8; 9];
376        let written = write_varint(&mut buf, 128);
377        assert_eq!(written, 2);
378        let (value, consumed) = read_varint(&buf[..written]).unwrap();
379        assert_eq!(value, 128);
380        assert_eq!(consumed, 2);
381    }
382
383    #[test]
384    fn varint_nine_bytes_uses_full_8bit_last_byte() {
385        // Pick a value that requires 9 bytes and has a low byte with the high bit set (0xFF).
386        // If the 9th byte were incorrectly treated as 7-bit, this would not round-trip.
387        let value: u64 = (1u64 << 56) | 0xFF;
388
389        let mut buf = [0u8; 9];
390        let written = write_varint(&mut buf, value);
391        assert_eq!(written, 9);
392        assert_eq!(buf[8], 0xFF);
393
394        // The first 8 bytes must all have the continuation bit set.
395        assert!(buf[..8].iter().all(|b| b & 0x80 != 0));
396
397        let (decoded, consumed) = read_varint(&buf).unwrap();
398        assert_eq!(decoded, value);
399        assert_eq!(consumed, 9);
400    }
401
402    #[test]
403    fn read_varint_empty() {
404        assert!(read_varint(&[]).is_none());
405    }
406
407    // -----------------------------------------------------------------------
408    // bd-1y7b: §11.2 Varint Edge Cases
409    // -----------------------------------------------------------------------
410
411    const BEAD_ID: &str = "bd-1y7b";
412
413    /// Byte-length boundary values: (min_value, max_value, expected_bytes).
414    const BYTE_BOUNDARIES: [(u64, u64, usize); 9] = [
415        (0, 0x7F, 1),                                  // 1 byte: [0, 127]
416        (0x80, 0x3FFF, 2),                             // 2 bytes: [128, 16383]
417        (0x4000, 0x001F_FFFF, 3),                      // 3 bytes: [16384, 2097151]
418        (0x0020_0000, 0x0FFF_FFFF, 4),                 // 4 bytes: [2097152, 268435455]
419        (0x1000_0000, 0x07_FFFF_FFFF, 5),              // 5 bytes: [268435456, 34359738367]
420        (0x08_0000_0000, 0x03FF_FFFF_FFFF, 6),         // 6 bytes
421        (0x0400_0000_0000, 0x01_FFFF_FFFF_FFFF, 7),    // 7 bytes
422        (0x02_0000_0000_0000, 0xFF_FFFF_FFFF_FFFF, 8), // 8 bytes
423        (0x0100_0000_0000_0000, u64::MAX, 9),          // 9 bytes
424    ];
425
426    #[test]
427    fn test_varint_1byte_boundary() {
428        let mut buf = [0u8; 9];
429        for value in [0u64, 1, 42, 126, 127] {
430            let written = write_varint(&mut buf, value);
431            assert_eq!(
432                written, 1,
433                "bead_id={BEAD_ID} case=1byte_boundary value={value}"
434            );
435            let (decoded, consumed) = read_varint(&buf[..written]).unwrap();
436            assert_eq!(decoded, value);
437            assert_eq!(consumed, 1);
438        }
439    }
440
441    #[test]
442    fn test_varint_2byte_boundary() {
443        let mut buf = [0u8; 9];
444        // min 2-byte: 128
445        let written = write_varint(&mut buf, 128);
446        assert_eq!(written, 2, "bead_id={BEAD_ID} case=2byte_min");
447        assert_eq!(
448            &buf[..2],
449            [0x81, 0x00],
450            "bead_id={BEAD_ID} case=2byte_min_bytes"
451        );
452        let (decoded, _) = read_varint(&buf[..2]).unwrap();
453        assert_eq!(decoded, 128);
454
455        // max 2-byte: 16383
456        let written = write_varint(&mut buf, 16383);
457        assert_eq!(written, 2, "bead_id={BEAD_ID} case=2byte_max");
458        assert_eq!(
459            &buf[..2],
460            [0xFF, 0x7F],
461            "bead_id={BEAD_ID} case=2byte_max_bytes"
462        );
463        let (decoded, _) = read_varint(&buf[..2]).unwrap();
464        assert_eq!(decoded, 16383);
465    }
466
467    #[test]
468    fn test_varint_3byte_boundary() {
469        let mut buf = [0u8; 9];
470        let written = write_varint(&mut buf, 16384);
471        assert_eq!(written, 3, "bead_id={BEAD_ID} case=3byte_min");
472        let (decoded, consumed) = read_varint(&buf[..written]).unwrap();
473        assert_eq!(decoded, 16384);
474        assert_eq!(consumed, 3);
475
476        let written = write_varint(&mut buf, 2_097_151);
477        assert_eq!(written, 3, "bead_id={BEAD_ID} case=3byte_max");
478        let (decoded, _) = read_varint(&buf[..written]).unwrap();
479        assert_eq!(decoded, 2_097_151);
480    }
481
482    #[test]
483    fn test_varint_4byte_boundary() {
484        let mut buf = [0u8; 9];
485        let written = write_varint(&mut buf, 2_097_152);
486        assert_eq!(written, 4, "bead_id={BEAD_ID} case=4byte_min");
487        let (decoded, _) = read_varint(&buf[..written]).unwrap();
488        assert_eq!(decoded, 2_097_152);
489
490        let written = write_varint(&mut buf, 268_435_455);
491        assert_eq!(written, 4, "bead_id={BEAD_ID} case=4byte_max");
492        let (decoded, _) = read_varint(&buf[..written]).unwrap();
493        assert_eq!(decoded, 268_435_455);
494    }
495
496    #[test]
497    fn test_varint_5byte_boundary() {
498        let mut buf = [0u8; 9];
499        let written = write_varint(&mut buf, 268_435_456);
500        assert_eq!(written, 5, "bead_id={BEAD_ID} case=5byte_min");
501        let (decoded, _) = read_varint(&buf[..written]).unwrap();
502        assert_eq!(decoded, 268_435_456);
503
504        let written = write_varint(&mut buf, 34_359_738_367);
505        assert_eq!(written, 5, "bead_id={BEAD_ID} case=5byte_max");
506        let (decoded, _) = read_varint(&buf[..written]).unwrap();
507        assert_eq!(decoded, 34_359_738_367);
508    }
509
510    #[test]
511    fn test_varint_6byte_boundary() {
512        let mut buf = [0u8; 9];
513        let written = write_varint(&mut buf, 34_359_738_368);
514        assert_eq!(written, 6, "bead_id={BEAD_ID} case=6byte_min");
515        let (decoded, _) = read_varint(&buf[..written]).unwrap();
516        assert_eq!(decoded, 34_359_738_368);
517
518        let written = write_varint(&mut buf, 4_398_046_511_103);
519        assert_eq!(written, 6, "bead_id={BEAD_ID} case=6byte_max");
520        let (decoded, _) = read_varint(&buf[..written]).unwrap();
521        assert_eq!(decoded, 4_398_046_511_103);
522    }
523
524    #[test]
525    fn test_varint_7byte_boundary() {
526        let mut buf = [0u8; 9];
527        let written = write_varint(&mut buf, 4_398_046_511_104);
528        assert_eq!(written, 7, "bead_id={BEAD_ID} case=7byte_min");
529        let (decoded, _) = read_varint(&buf[..written]).unwrap();
530        assert_eq!(decoded, 4_398_046_511_104);
531
532        let written = write_varint(&mut buf, 562_949_953_421_311);
533        assert_eq!(written, 7, "bead_id={BEAD_ID} case=7byte_max");
534        let (decoded, _) = read_varint(&buf[..written]).unwrap();
535        assert_eq!(decoded, 562_949_953_421_311);
536    }
537
538    #[test]
539    fn test_varint_8byte_boundary() {
540        let mut buf = [0u8; 9];
541        let written = write_varint(&mut buf, 562_949_953_421_312);
542        assert_eq!(written, 8, "bead_id={BEAD_ID} case=8byte_min");
543        let (decoded, _) = read_varint(&buf[..written]).unwrap();
544        assert_eq!(decoded, 562_949_953_421_312);
545
546        let written = write_varint(&mut buf, 72_057_594_037_927_935);
547        assert_eq!(written, 8, "bead_id={BEAD_ID} case=8byte_max");
548        let (decoded, _) = read_varint(&buf[..written]).unwrap();
549        assert_eq!(decoded, 72_057_594_037_927_935);
550    }
551
552    #[test]
553    fn test_varint_9byte_full_u64() {
554        let mut buf = [0u8; 9];
555
556        // min 9-byte value
557        let min9 = 72_057_594_037_927_936u64; // 2^56
558        let written = write_varint(&mut buf, min9);
559        assert_eq!(written, 9, "bead_id={BEAD_ID} case=9byte_min");
560        let (decoded, consumed) = read_varint(&buf).unwrap();
561        assert_eq!(decoded, min9);
562        assert_eq!(consumed, 9);
563
564        // u64::MAX
565        let written = write_varint(&mut buf, u64::MAX);
566        assert_eq!(written, 9, "bead_id={BEAD_ID} case=9byte_max");
567        assert_eq!(
568            buf,
569            [0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF],
570            "bead_id={BEAD_ID} case=9byte_max_bytes u64::MAX must be all-0xFF"
571        );
572        let (decoded, consumed) = read_varint(&buf).unwrap();
573        assert_eq!(decoded, u64::MAX);
574        assert_eq!(consumed, 9);
575    }
576
577    #[test]
578    fn test_varint_9th_byte_all_bits() {
579        // Verify the 9th byte contributes ALL 8 bits, not just 7.
580        // Value chosen so the 9th byte has its high bit set (0x80+).
581        let mut buf = [0u8; 9];
582
583        for low_byte in [0x80u8, 0xFF, 0xAB, 0xFE] {
584            let value = (1u64 << 56) | u64::from(low_byte);
585            let written = write_varint(&mut buf, value);
586            assert_eq!(written, 9);
587            assert_eq!(
588                buf[8], low_byte,
589                "bead_id={BEAD_ID} case=9th_byte_all_bits low={low_byte:#04x}"
590            );
591            // First 8 bytes must all have continuation bit set.
592            for (i, &b) in buf[..8].iter().enumerate() {
593                assert_ne!(
594                    b & 0x80,
595                    0,
596                    "bead_id={BEAD_ID} case=continuation_bit byte={i}"
597                );
598            }
599            let (decoded, consumed) = read_varint(&buf).unwrap();
600            assert_eq!(decoded, value);
601            assert_eq!(consumed, 9);
602        }
603    }
604
605    #[test]
606    fn test_varint_signed_negative_rowid() {
607        let mut buf = [0u8; 9];
608
609        // i64::MIN as u64 via two's complement = 0x8000_0000_0000_0000
610        #[allow(clippy::cast_sign_loss)]
611        let min_u64 = i64::MIN as u64;
612        assert_eq!(min_u64, 0x8000_0000_0000_0000);
613
614        let written = write_varint(&mut buf, min_u64);
615        assert_eq!(written, 9, "bead_id={BEAD_ID} case=i64_min_length");
616        let (decoded, _) = read_varint(&buf[..written]).unwrap();
617        assert_eq!(decoded, min_u64);
618
619        // Cast back to i64
620        #[allow(clippy::cast_possible_wrap)]
621        let signed = decoded as i64;
622        assert_eq!(signed, i64::MIN, "bead_id={BEAD_ID} case=i64_min_roundtrip");
623    }
624
625    #[test]
626    fn test_varint_signed_minus_one() {
627        let mut buf = [0u8; 9];
628
629        // -1i64 as u64 = u64::MAX
630        #[allow(clippy::cast_sign_loss)]
631        let minus_one_u64 = (-1i64) as u64;
632        assert_eq!(minus_one_u64, u64::MAX);
633
634        let written = write_varint(&mut buf, minus_one_u64);
635        assert_eq!(written, 9, "bead_id={BEAD_ID} case=minus_one_length");
636        let (decoded, _) = read_varint(&buf[..written]).unwrap();
637
638        #[allow(clippy::cast_possible_wrap)]
639        let signed = decoded as i64;
640        assert_eq!(signed, -1, "bead_id={BEAD_ID} case=minus_one_roundtrip");
641    }
642
643    #[test]
644    fn test_varint_not_protobuf() {
645        // SQLite varint for u64::MAX: exactly 9 bytes.
646        // Protobuf LEB128 for u64::MAX: 10 bytes (7 bits per byte).
647        let mut buf = [0u8; 9];
648        let sqlite_len = write_varint(&mut buf, u64::MAX);
649        assert_eq!(
650            sqlite_len, 9,
651            "bead_id={BEAD_ID} case=not_protobuf SQLite u64::MAX must be 9 bytes"
652        );
653
654        // Compute protobuf LEB128 length for u64::MAX.
655        let protobuf_len = leb128_len(u64::MAX);
656        assert_eq!(
657            protobuf_len, 10,
658            "bead_id={BEAD_ID} case=not_protobuf protobuf u64::MAX must be 10 bytes"
659        );
660
661        // Also verify a mid-range 9-byte value.
662        let value = 1u64 << 56;
663        let sqlite_len = write_varint(&mut buf, value);
664        assert_eq!(sqlite_len, 9);
665        let protobuf_len = leb128_len(value);
666        assert_eq!(protobuf_len, 9); // protobuf is also 9 for 2^56 (57 bits / 7 = 9 bytes)
667
668        // But the BYTE SEQUENCES differ. Encode both and compare.
669        let mut leb_buf = [0u8; 10];
670        let leb_n = leb128_encode(&mut leb_buf, value);
671        assert_ne!(
672            &buf[..sqlite_len],
673            &leb_buf[..leb_n],
674            "bead_id={BEAD_ID} case=not_protobuf byte sequences must differ for 2^56"
675        );
676    }
677
678    /// Protobuf LEB128 encoding length (for comparison — NOT used by SQLite).
679    fn leb128_len(mut v: u64) -> usize {
680        let mut len = 1;
681        while v >= 0x80 {
682            v >>= 7;
683            len += 1;
684        }
685        len
686    }
687
688    /// Protobuf LEB128 encode (for comparison — NOT used by SQLite).
689    fn leb128_encode(buf: &mut [u8], mut v: u64) -> usize {
690        let mut i = 0;
691        while v >= 0x80 {
692            #[allow(clippy::cast_possible_truncation)]
693            {
694                buf[i] = (v as u8 & 0x7F) | 0x80;
695            }
696            v >>= 7;
697            i += 1;
698        }
699        #[allow(clippy::cast_possible_truncation)]
700        {
701            buf[i] = v as u8;
702        }
703        i + 1
704    }
705
706    #[test]
707    fn test_varint_all_boundaries_roundtrip() {
708        let mut buf = [0u8; 9];
709        for &(min_val, max_val, expected_len) in &BYTE_BOUNDARIES {
710            // Test min value
711            let written = write_varint(&mut buf, min_val);
712            assert_eq!(
713                written, expected_len,
714                "bead_id={BEAD_ID} case=boundary_min value={min_val} expected_len={expected_len}"
715            );
716            let (decoded, consumed) = read_varint(&buf[..written]).unwrap();
717            assert_eq!(decoded, min_val);
718            assert_eq!(consumed, expected_len);
719
720            // Test max value
721            let written = write_varint(&mut buf, max_val);
722            assert_eq!(
723                written, expected_len,
724                "bead_id={BEAD_ID} case=boundary_max value={max_val} expected_len={expected_len}"
725            );
726            let (decoded, consumed) = read_varint(&buf[..written]).unwrap();
727            assert_eq!(decoded, max_val);
728            assert_eq!(consumed, expected_len);
729
730            // Test varint_len matches
731            assert_eq!(varint_len(min_val), expected_len);
732            assert_eq!(varint_len(max_val), expected_len);
733        }
734    }
735
736    #[test]
737    fn test_varint_canonical_encoding() {
738        // Verify encoder always produces minimal-length encoding.
739        // For each boundary, the value just below the min should encode shorter.
740        for &(min_val, _, expected_len) in &BYTE_BOUNDARIES {
741            if min_val == 0 {
742                continue;
743            }
744            let below = min_val - 1;
745            let mut buf = [0u8; 9];
746            let written = write_varint(&mut buf, below);
747            assert!(
748                written < expected_len,
749                "bead_id={BEAD_ID} case=canonical value={below} written={written} \
750                 must be < {expected_len}"
751            );
752        }
753    }
754
755    #[test]
756    fn test_varint_decode_from_longer_buffer() {
757        // Decoder must read exactly N bytes and leave trailing bytes untouched.
758        let mut buf = [0xCC_u8; 16]; // fill with sentinel
759        let written = write_varint(&mut buf, 128); // 2 bytes
760        assert_eq!(written, 2);
761
762        // Read from the full 16-byte buffer.
763        let (decoded, consumed) = read_varint(&buf).unwrap();
764        assert_eq!(decoded, 128);
765        assert_eq!(
766            consumed, 2,
767            "bead_id={BEAD_ID} case=longer_buffer decoder must stop at 2 bytes"
768        );
769        // Trailing bytes must be untouched.
770        assert!(
771            buf[2..].iter().all(|&b| b == 0xCC),
772            "bead_id={BEAD_ID} case=longer_buffer trailing bytes must be untouched"
773        );
774    }
775
776    #[test]
777    fn test_varint_decode_truncated_returns_none() {
778        // A multi-byte varint with insufficient bytes should return None.
779        let mut buf = [0u8; 9];
780        let written = write_varint(&mut buf, 128); // 2 bytes: [0x81, 0x00]
781        assert_eq!(written, 2);
782
783        // Only provide 1 byte of the 2-byte encoding.
784        assert!(
785            read_varint(&buf[..1]).is_none(),
786            "bead_id={BEAD_ID} case=truncated_2byte"
787        );
788
789        // 9-byte value with only 8 bytes available.
790        let written = write_varint(&mut buf, u64::MAX);
791        assert_eq!(written, 9);
792        assert!(
793            read_varint(&buf[..8]).is_none(),
794            "bead_id={BEAD_ID} case=truncated_9byte"
795        );
796    }
797
798    #[test]
799    fn test_varint_golden_vectors() {
800        // Golden byte sequences derived from C SQLite's sqlite3PutVarint.
801        let cases: &[(u64, &[u8])] = &[
802            (0, &[0x00]),
803            (1, &[0x01]),
804            (127, &[0x7F]),
805            (128, &[0x81, 0x00]),
806            (129, &[0x81, 0x01]),
807            (16383, &[0xFF, 0x7F]),
808            (16384, &[0x81, 0x80, 0x00]),
809            (
810                u64::MAX,
811                &[0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF],
812            ),
813        ];
814
815        let mut buf = [0u8; 9];
816        for &(value, expected_bytes) in cases {
817            let written = write_varint(&mut buf, value);
818            assert_eq!(
819                &buf[..written],
820                expected_bytes,
821                "bead_id={BEAD_ID} case=golden_vector value={value}"
822            );
823            let (decoded, consumed) = read_varint(expected_bytes).unwrap();
824            assert_eq!(decoded, value);
825            assert_eq!(consumed, expected_bytes.len());
826        }
827    }
828
829    #[test]
830    fn test_varint_i64_max_and_nearby() {
831        let mut buf = [0u8; 9];
832
833        // i64::MAX = 2^63 - 1 = 0x7FFF_FFFF_FFFF_FFFF
834        #[allow(clippy::cast_sign_loss)]
835        let i64_max_u = i64::MAX as u64;
836        let written = write_varint(&mut buf, i64_max_u);
837        assert_eq!(written, 9, "bead_id={BEAD_ID} case=i64_max");
838        let (decoded, _) = read_varint(&buf[..written]).unwrap();
839        assert_eq!(decoded, i64_max_u);
840
841        // i64::MAX + 1 (first "negative" rowid as u64) = 0x8000_0000_0000_0000
842        let written = write_varint(&mut buf, i64_max_u + 1);
843        assert_eq!(written, 9);
844        let (decoded, _) = read_varint(&buf[..written]).unwrap();
845        assert_eq!(decoded, i64_max_u + 1);
846    }
847
848    // ================================================================
849    // Property-based tests (bd-309f)
850    // ================================================================
851    use proptest::prelude::*;
852
853    proptest! {
854        /// Varint roundtrip: write then read recovers the original value.
855        #[test]
856        fn prop_varint_roundtrip(value: u64) {
857            let mut buf = [0u8; 9];
858            let written = write_varint(&mut buf, value);
859            let (decoded, consumed) = read_varint(&buf[..written]).unwrap();
860            prop_assert_eq!(decoded, value);
861            prop_assert_eq!(consumed, written);
862        }
863
864        /// varint_len matches actual bytes written by write_varint.
865        #[test]
866        fn prop_varint_len_matches_write(value: u64) {
867            let mut buf = [0u8; 9];
868            let written = write_varint(&mut buf, value);
869            prop_assert_eq!(varint_len(value), written);
870        }
871
872        /// Varint encoding is canonical: no leading zero-value continuation bytes
873        /// (i.e. shorter encodings don't decode to the same value).
874        #[test]
875        fn prop_varint_canonical(value: u64) {
876            let mut buf = [0u8; 9];
877            let written = write_varint(&mut buf, value);
878            // If more than 1 byte, removing the first byte should NOT decode
879            // to the same value (proves minimality).
880            if written > 1 {
881                if let Some((alt, _)) = read_varint(&buf[1..written]) {
882                    prop_assert_ne!(alt, value, "shorter encoding yields same value — not canonical");
883                }
884            }
885        }
886
887        /// serial_type_for_integer always returns a type whose class is
888        /// Integer, Zero, or One (never Blob, Text, etc.).
889        #[test]
890        fn prop_integer_serial_type_class(value: i64) {
891            let st = serial_type_for_integer(value);
892            let class = classify_serial_type(st);
893            prop_assert!(
894                matches!(class, SerialTypeClass::Integer | SerialTypeClass::Zero | SerialTypeClass::One),
895                "integer value {value} got unexpected class {class:?} for serial type {st}"
896            );
897        }
898
899        /// serial_type_for_integer returns a type whose content size fits the value.
900        #[test]
901        fn prop_integer_serial_type_fits(value: i64) {
902            let st = serial_type_for_integer(value);
903            if let Some(size) = serial_type_len(st) {
904                // Zero-length types are only valid for 0 and 1
905                if size == 0 {
906                    prop_assert!(value == 0 || value == 1);
907                }
908            }
909        }
910
911        /// serial_type_for_text produces odd types >= 13 that classify as Text.
912        #[test]
913        fn prop_text_serial_type(len in 0u64..=1_000_000) {
914            let st = serial_type_for_text(len);
915            prop_assert!(st >= 13, "text type {st} < 13");
916            prop_assert!(st % 2 == 1, "text type {st} is even");
917            prop_assert_eq!(classify_serial_type(st), SerialTypeClass::Text);
918            // Inverse: recover original length
919            prop_assert_eq!(serial_type_len(st), Some(len));
920        }
921
922        /// serial_type_for_blob produces even types >= 12 that classify as Blob.
923        #[test]
924        fn prop_blob_serial_type(len in 0u64..=1_000_000) {
925            let st = serial_type_for_blob(len);
926            prop_assert!(st >= 12, "blob type {st} < 12");
927            prop_assert!(st % 2 == 0, "blob type {st} is odd");
928            prop_assert_eq!(classify_serial_type(st), SerialTypeClass::Blob);
929            // Inverse: recover original length
930            prop_assert_eq!(serial_type_len(st), Some(len));
931        }
932
933        /// Classification is exhaustive and deterministic for arbitrary serial types.
934        #[test]
935        fn prop_classification_deterministic(st: u64) {
936            let class = classify_serial_type(st);
937            // Re-classify to confirm determinism
938            prop_assert_eq!(classify_serial_type(st), class);
939            // Verify consistency with serial_type_len
940            match class {
941                SerialTypeClass::Reserved => {
942                    prop_assert!(serial_type_len(st).is_none());
943                }
944                _ => {
945                    prop_assert!(serial_type_len(st).is_some());
946                }
947            }
948        }
949
950        /// SMALL_TYPE_SIZES matches serial_type_len for all indices 0..128.
951        #[test]
952        #[allow(clippy::cast_possible_truncation)]
953        fn prop_small_type_table_consistent(i in 0u64..128) {
954            let expected = match serial_type_len(i) {
955                Some(n) if n <= 255 => n as u8,
956                _ => 0,
957            };
958            prop_assert_eq!(SMALL_TYPE_SIZES[usize::try_from(i).unwrap()], expected);
959        }
960
961        /// Varint encoding uses at most 9 bytes and at least 1 byte.
962        #[test]
963        fn prop_varint_len_bounds(value: u64) {
964            let len = varint_len(value);
965            prop_assert!((1..=9).contains(&len), "varint_len({value}) = {len}");
966        }
967
968        /// For 9-byte varints, the first 8 bytes all have the continuation bit set.
969        #[test]
970        fn prop_nine_byte_varint_continuation_bits(value in 0x0100_0000_0000_0000u64..=u64::MAX) {
971            let mut buf = [0u8; 9];
972            let written = write_varint(&mut buf, value);
973            if written == 9 {
974                for (i, &byte) in buf[..8].iter().enumerate() {
975                    prop_assert!(byte & 0x80 != 0, "byte {i} missing continuation bit for value {value}");
976                }
977            }
978        }
979
980        /// read_varint on a truncated buffer returns None.
981        #[test]
982        fn prop_truncated_varint_returns_none(value: u64) {
983            let mut buf = [0u8; 9];
984            let written = write_varint(&mut buf, value);
985            if written > 1 {
986                // Truncate by removing the last byte
987                prop_assert!(read_varint(&buf[..written - 1]).is_none() ||
988                    read_varint(&buf[..written - 1]).unwrap().0 != value,
989                    "truncated buffer should not decode to original value");
990            }
991        }
992    }
993}
fsqlite_types/serial_type.rs

fsqlite_types/
serial_type.rs