rocksdb_fileformat/
types.rs

1// Copyright 2024 YaleDB Contributors
2// SPDX-License-Identifier: Apache-2.0
3
4pub const ROCKSDB_MAGIC_NUMBER: u64 = 0x88e241b785f4cff7;
5pub const ROCKSDB_FOOTER_SIZE: usize = 53;
6
7pub const LEGACY_MAGIC_NUMBER: u64 = 0xdb4775248b80fb57;
8pub const LEGACY_FOOTER_SIZE: usize = 48;
9
10pub const MAX_BLOCK_HANDLE_ENCODED_LENGTH: usize = 20;
11
12pub const DEFAULT_BLOCK_SIZE: usize = 4096;
13pub const DEFAULT_BLOCK_RESTART_INTERVAL: usize = 16;
14
15/// https://github.com/facebook/rocksdb/blob/v10.5.1/include/rocksdb/table.h#L55
16#[derive(Debug, Clone, Copy, PartialEq, Eq)]
17pub enum ChecksumType {
18    None = 0,
19    CRC32c = 1,
20    Hash = 2,
21    Hash64 = 3,
22    // Supported since RocksDB 6.27
23    XXH3 = 4,
24}
25
26impl TryFrom<u8> for ChecksumType {
27    type Error = crate::error::Error;
28
29    fn try_from(value: u8) -> Result<Self, Self::Error> {
30        match value {
31            0 => Ok(ChecksumType::None),
32            1 => Ok(ChecksumType::CRC32c),
33            2 => Ok(ChecksumType::Hash),
34            3 => Ok(ChecksumType::Hash64),
35            4 => Ok(ChecksumType::XXH3),
36            _ => Err(crate::error::Error::UnsupportedChecksumType(value)),
37        }
38    }
39}
40
41impl ChecksumType {
42    /// Calculate checksum for the given data using RocksDB-compatible algorithms
43    pub fn calculate(self, data: &[u8]) -> u32 {
44        match self {
45            ChecksumType::None => 0,
46            ChecksumType::CRC32c => {
47                // Apply RocksDB CRC32c masking: rotate right by 15 bits and add constant
48                const MASK_DELTA: u32 = 0xa282ead8;
49                let crc = crc32c::crc32c(data);
50                ((crc >> 15) | (crc << 17)).wrapping_add(MASK_DELTA)
51            }
52            ChecksumType::Hash => {
53                use xxhash_rust::xxh32::xxh32;
54                xxh32(data, 0)
55            }
56            ChecksumType::Hash64 => {
57                use xxhash_rust::xxh64::xxh64;
58                (xxh64(data, 0) & 0xFFFFFFFF) as u32
59            }
60            ChecksumType::XXH3 => {
61                if data.is_empty() {
62                    // Special case for empty data
63                    0
64                } else {
65                    use xxhash_rust::xxh3::xxh3_64;
66                    // Compute XXH3 on all bytes except the last one
67                    let without_last = &data[..data.len() - 1];
68                    let v = (xxh3_64(without_last) & 0xFFFFFFFF) as u32;
69                    // Apply ModifyChecksumForLastByte with the last byte
70                    const RANDOM_PRIME: u32 = 0x6b9083d9;
71                    let last_byte = data[data.len() - 1] as u32;
72                    v ^ (last_byte.wrapping_mul(RANDOM_PRIME))
73                }
74            }
75        }
76    }
77}
78
79/// Helper function to split a 64-bit value into lower 32 bits
80fn lower32_of64(v: u64) -> u32 {
81    v as u32
82}
83
84/// Helper function to split a 64-bit value into upper 32 bits  
85fn upper32_of64(v: u64) -> u32 {
86    (v >> 32) as u32
87}
88
89/// Calculate checksum modifier for context based on RocksDB implementation
90/// This provides additional entropy based on the file offset to prevent block reuse attacks
91pub fn checksum_modifier_for_context(base_context_checksum: u32, offset: u64) -> u32 {
92    if base_context_checksum == 0 {
93        0
94    } else {
95        base_context_checksum ^ (lower32_of64(offset).wrapping_add(upper32_of64(offset)))
96    }
97}
98
99#[derive(Debug, Clone, Copy, PartialEq, Eq)]
100pub enum CompressionType {
101    None = 0,
102    Snappy = 1,
103    Zlib = 2,
104    BZip2 = 3,
105    LZ4 = 4,
106    LZ4HC = 5,
107    XPRESS = 6,
108    ZSTD = 7,
109}
110
111impl TryFrom<u8> for CompressionType {
112    type Error = crate::error::Error;
113
114    fn try_from(value: u8) -> Result<Self, Self::Error> {
115        match value {
116            0 => Ok(CompressionType::None),
117            1 => Ok(CompressionType::Snappy),
118            2 => Ok(CompressionType::Zlib),
119            3 => Ok(CompressionType::BZip2),
120            4 => Ok(CompressionType::LZ4),
121            5 => Ok(CompressionType::LZ4HC),
122            6 => Ok(CompressionType::XPRESS),
123            7 => Ok(CompressionType::ZSTD),
124            _ => Err(crate::error::Error::UnsupportedCompressionType(value)),
125        }
126    }
127}
128
129#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
130pub enum FormatVersion {
131    V5 = 5,
132    V6 = 6,
133    V7 = 7,
134}
135
136impl TryFrom<u32> for FormatVersion {
137    type Error = crate::error::Error;
138
139    fn try_from(value: u32) -> Result<Self, Self::Error> {
140        match value {
141            5 => Ok(FormatVersion::V5),
142            6 => Ok(FormatVersion::V6),
143            7 => Ok(FormatVersion::V7),
144            _ => Err(crate::error::Error::UnsupportedFormatVersion(value)),
145        }
146    }
147}
148
149/// Configuration options for SstFileWriter
150#[derive(Debug, Clone)]
151pub struct WriteOptions {
152    pub compression: CompressionType,
153    pub block_size: usize,
154    pub block_restart_interval: usize,
155    pub format_version: FormatVersion,
156    pub checksum_type: ChecksumType,
157}
158
159impl Default for WriteOptions {
160    fn default() -> Self {
161        WriteOptions {
162            compression: CompressionType::None,
163            block_size: DEFAULT_BLOCK_SIZE,
164            block_restart_interval: DEFAULT_BLOCK_RESTART_INTERVAL,
165            format_version: FormatVersion::V5,
166            checksum_type: ChecksumType::CRC32c,
167        }
168    }
169}
170
171/// Configuration options for reading SST files
172#[derive(Debug, Clone)]
173pub struct ReadOptions {
174    /// Whether to verify checksums when reading the file.
175    /// Enabled by default for data integrity protection across all format versions.
176    pub verify_checksums: bool,
177}
178
179impl Default for ReadOptions {
180    fn default() -> Self {
181        ReadOptions {
182            verify_checksums: true,
183        }
184    }
185}
186
187#[cfg(test)]
188mod tests {
189    use super::*;
190
191    #[test]
192    fn test_checksum_modifier_for_context() {
193        // Test with zero base context checksum - should return 0
194        assert_eq!(checksum_modifier_for_context(0, 12345), 0);
195
196        // Test with non-zero base context checksum
197        let base = 0x12345678u32;
198        let offset = 0x9ABCDEF012345678u64;
199
200        // Should be base XOR (lower32(offset) + upper32(offset))
201        let expected = base ^ (0x12345678u32.wrapping_add(0x9ABCDEF0u32));
202        assert_eq!(checksum_modifier_for_context(base, offset), expected);
203
204        // Test edge case with wraparound
205        let base = 0xFFFFFFFFu32;
206        let offset = 0xFFFFFFFFFFFFFFFFu64;
207        let expected = base ^ (0xFFFFFFFFu32.wrapping_add(0xFFFFFFFFu32));
208        assert_eq!(checksum_modifier_for_context(base, offset), expected);
209    }
210
211    #[test]
212    fn test_calculate_checksum_against_rocksdb() {
213        // Test data generated from real RocksDB ComputeBuiltinChecksum
214        // This ensures our Rust implementation matches RocksDB's behavior exactly
215        const TEST_CASES: &[(&str, ChecksumType, &[u8], u32)] = &[
216            ("empty", ChecksumType::None, &[], 0x00000000),
217            ("empty", ChecksumType::CRC32c, &[], 0xa282ead8),
218            ("empty", ChecksumType::Hash, &[], 0x02cc5d05),
219            ("empty", ChecksumType::Hash64, &[], 0x51d8e999),
220            ("empty", ChecksumType::XXH3, &[], 0x00000000),
221            ("single_byte", ChecksumType::None, &[0x41], 0x00000000),
222            ("single_byte", ChecksumType::CRC32c, &[0x41], 0x3e60adb3),
223            ("single_byte", ChecksumType::Hash, &[0x41], 0x10659a4d),
224            ("single_byte", ChecksumType::Hash64, &[0x41], 0xd095b684),
225            ("single_byte", ChecksumType::XXH3, &[0x41], 0x7762eedb),
226            (
227                "hello_world",
228                ChecksumType::None,
229                &[
230                    0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x2c, 0x20, 0x57, 0x6f, 0x72, 0x6c, 0x64, 0x21,
231                ],
232                0x00000000,
233            ),
234            (
235                "hello_world",
236                ChecksumType::CRC32c,
237                &[
238                    0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x2c, 0x20, 0x57, 0x6f, 0x72, 0x6c, 0x64, 0x21,
239                ],
240                0xc3538582,
241            ),
242            (
243                "hello_world",
244                ChecksumType::Hash,
245                &[
246                    0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x2c, 0x20, 0x57, 0x6f, 0x72, 0x6c, 0x64, 0x21,
247                ],
248                0x4007de50,
249            ),
250            (
251                "hello_world",
252                ChecksumType::Hash64,
253                &[
254                    0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x2c, 0x20, 0x57, 0x6f, 0x72, 0x6c, 0x64, 0x21,
255                ],
256                0x080fe47f,
257            ),
258            (
259                "hello_world",
260                ChecksumType::XXH3,
261                &[
262                    0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x2c, 0x20, 0x57, 0x6f, 0x72, 0x6c, 0x64, 0x21,
263                ],
264                0x368463b9,
265            ),
266            (
267                "binary_data",
268                ChecksumType::None,
269                &[0x00, 0x01, 0x02, 0x03, 0xff, 0xfe, 0xfd, 0xfc],
270                0x00000000,
271            ),
272            (
273                "binary_data",
274                ChecksumType::CRC32c,
275                &[0x00, 0x01, 0x02, 0x03, 0xff, 0xfe, 0xfd, 0xfc],
276                0x78d58d88,
277            ),
278            (
279                "binary_data",
280                ChecksumType::Hash,
281                &[0x00, 0x01, 0x02, 0x03, 0xff, 0xfe, 0xfd, 0xfc],
282                0x617c5b1f,
283            ),
284            (
285                "binary_data",
286                ChecksumType::Hash64,
287                &[0x00, 0x01, 0x02, 0x03, 0xff, 0xfe, 0xfd, 0xfc],
288                0xbfef626c,
289            ),
290            (
291                "binary_data",
292                ChecksumType::XXH3,
293                &[0x00, 0x01, 0x02, 0x03, 0xff, 0xfe, 0xfd, 0xfc],
294                0xdd655b30,
295            ),
296            (
297                "repeated_x",
298                ChecksumType::None,
299                &[
300                    0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58,
301                    0x58, 0x58, 0x58,
302                ],
303                0x00000000,
304            ),
305            (
306                "repeated_x",
307                ChecksumType::CRC32c,
308                &[
309                    0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58,
310                    0x58, 0x58, 0x58,
311                ],
312                0x5e3ae519,
313            ),
314            (
315                "repeated_x",
316                ChecksumType::Hash,
317                &[
318                    0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58,
319                    0x58, 0x58, 0x58,
320                ],
321                0xa1cd4bfc,
322            ),
323            (
324                "repeated_x",
325                ChecksumType::Hash64,
326                &[
327                    0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58,
328                    0x58, 0x58, 0x58,
329                ],
330                0x91542cc1,
331            ),
332            (
333                "repeated_x",
334                ChecksumType::XXH3,
335                &[
336                    0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58,
337                    0x58, 0x58, 0x58,
338                ],
339                0x2bbfd401,
340            ),
341            (
342                "all_zeros",
343                ChecksumType::None,
344                &[
345                    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
346                    0x00, 0x00, 0x00,
347                ],
348                0x00000000,
349            ),
350            (
351                "all_zeros",
352                ChecksumType::CRC32c,
353                &[
354                    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
355                    0x00, 0x00, 0x00,
356                ],
357                0xd8576fb9,
358            ),
359            (
360                "all_zeros",
361                ChecksumType::Hash,
362                &[
363                    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
364                    0x00, 0x00, 0x00,
365                ],
366                0x8e022b3a,
367            ),
368            (
369                "all_zeros",
370                ChecksumType::Hash64,
371                &[
372                    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
373                    0x00, 0x00, 0x00,
374                ],
375                0x16247c32,
376            ),
377            (
378                "all_zeros",
379                ChecksumType::XXH3,
380                &[
381                    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
382                    0x00, 0x00, 0x00,
383                ],
384                0xc3ed6bc7,
385            ),
386            (
387                "all_ones",
388                ChecksumType::None,
389                &[
390                    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
391                    0xff, 0xff, 0xff,
392                ],
393                0x00000000,
394            ),
395            (
396                "all_ones",
397                ChecksumType::CRC32c,
398                &[
399                    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
400                    0xff, 0xff, 0xff,
401                ],
402                0x3aa4c936,
403            ),
404            (
405                "all_ones",
406                ChecksumType::Hash,
407                &[
408                    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
409                    0xff, 0xff, 0xff,
410                ],
411                0xd85160aa,
412            ),
413            (
414                "all_ones",
415                ChecksumType::Hash64,
416                &[
417                    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
418                    0xff, 0xff, 0xff,
419                ],
420                0x6a57c444,
421            ),
422            (
423                "all_ones",
424                ChecksumType::XXH3,
425                &[
426                    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
427                    0xff, 0xff, 0xff,
428                ],
429                0xbfb5dfd3,
430            ),
431            (
432                "ascii_sequence",
433                ChecksumType::None,
434                &[
435                    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x41, 0x42, 0x43,
436                    0x44, 0x45, 0x46,
437                ],
438                0x00000000,
439            ),
440            (
441                "ascii_sequence",
442                ChecksumType::CRC32c,
443                &[
444                    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x41, 0x42, 0x43,
445                    0x44, 0x45, 0x46,
446                ],
447                0x02925688,
448            ),
449            (
450                "ascii_sequence",
451                ChecksumType::Hash,
452                &[
453                    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x41, 0x42, 0x43,
454                    0x44, 0x45, 0x46,
455                ],
456                0xf9f50986,
457            ),
458            (
459                "ascii_sequence",
460                ChecksumType::Hash64,
461                &[
462                    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x41, 0x42, 0x43,
463                    0x44, 0x45, 0x46,
464                ],
465                0xdd7aeaa6,
466            ),
467            (
468                "ascii_sequence",
469                ChecksumType::XXH3,
470                &[
471                    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x41, 0x42, 0x43,
472                    0x44, 0x45, 0x46,
473                ],
474                0xfe511f1c,
475            ),
476            (
477                "longer_text",
478                ChecksumType::None,
479                &[
480                    0x54, 0x68, 0x65, 0x20, 0x71, 0x75, 0x69, 0x63, 0x6b, 0x20, 0x62, 0x72, 0x6f,
481                    0x77, 0x6e, 0x20, 0x66, 0x6f, 0x78, 0x20, 0x6a, 0x75, 0x6d, 0x70, 0x73, 0x20,
482                    0x6f, 0x76, 0x65, 0x72, 0x20, 0x74, 0x68, 0x65, 0x20, 0x6c, 0x61, 0x7a, 0x79,
483                    0x20, 0x64, 0x6f, 0x67,
484                ],
485                0x00000000,
486            ),
487            (
488                "longer_text",
489                ChecksumType::CRC32c,
490                &[
491                    0x54, 0x68, 0x65, 0x20, 0x71, 0x75, 0x69, 0x63, 0x6b, 0x20, 0x62, 0x72, 0x6f,
492                    0x77, 0x6e, 0x20, 0x66, 0x6f, 0x78, 0x20, 0x6a, 0x75, 0x6d, 0x70, 0x73, 0x20,
493                    0x6f, 0x76, 0x65, 0x72, 0x20, 0x74, 0x68, 0x65, 0x20, 0x6c, 0x61, 0x7a, 0x79,
494                    0x20, 0x64, 0x6f, 0x67,
495                ],
496                0xaa8b2f9c,
497            ),
498            (
499                "longer_text",
500                ChecksumType::Hash,
501                &[
502                    0x54, 0x68, 0x65, 0x20, 0x71, 0x75, 0x69, 0x63, 0x6b, 0x20, 0x62, 0x72, 0x6f,
503                    0x77, 0x6e, 0x20, 0x66, 0x6f, 0x78, 0x20, 0x6a, 0x75, 0x6d, 0x70, 0x73, 0x20,
504                    0x6f, 0x76, 0x65, 0x72, 0x20, 0x74, 0x68, 0x65, 0x20, 0x6c, 0x61, 0x7a, 0x79,
505                    0x20, 0x64, 0x6f, 0x67,
506                ],
507                0xe85ea4de,
508            ),
509            (
510                "longer_text",
511                ChecksumType::Hash64,
512                &[
513                    0x54, 0x68, 0x65, 0x20, 0x71, 0x75, 0x69, 0x63, 0x6b, 0x20, 0x62, 0x72, 0x6f,
514                    0x77, 0x6e, 0x20, 0x66, 0x6f, 0x78, 0x20, 0x6a, 0x75, 0x6d, 0x70, 0x73, 0x20,
515                    0x6f, 0x76, 0x65, 0x72, 0x20, 0x74, 0x68, 0x65, 0x20, 0x6c, 0x61, 0x7a, 0x79,
516                    0x20, 0x64, 0x6f, 0x67,
517                ],
518                0x1fda71bc,
519            ),
520            (
521                "longer_text",
522                ChecksumType::XXH3,
523                &[
524                    0x54, 0x68, 0x65, 0x20, 0x71, 0x75, 0x69, 0x63, 0x6b, 0x20, 0x62, 0x72, 0x6f,
525                    0x77, 0x6e, 0x20, 0x66, 0x6f, 0x78, 0x20, 0x6a, 0x75, 0x6d, 0x70, 0x73, 0x20,
526                    0x6f, 0x76, 0x65, 0x72, 0x20, 0x74, 0x68, 0x65, 0x20, 0x6c, 0x61, 0x7a, 0x79,
527                    0x20, 0x64, 0x6f, 0x67,
528                ],
529                0xc02e563c,
530            ),
531        ];
532
533        for (name, checksum_type, data, expected) in TEST_CASES {
534            let result = checksum_type.calculate(data);
535            assert_eq!(
536                result, *expected,
537                "Failed for test case '{}' with checksum type {:?}. Expected 0x{:08x}, got 0x{:08x}",
538                name, checksum_type, expected, result
539            );
540        }
541    }
542}