sochdb_storage/
validation.rs

1// Copyright 2025 Sushanth (https://github.com/sushanthpy)
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! SSTable Validation Layer
16//!
17//! Implements defense-in-depth validation for memory-mapped files to prevent crashes
18//! from corrupted, truncated, or tampered files.
19//!
20//! ## Safety Guarantees
21//!
22//! 1. **Pre-mmap validation**: Verify file integrity before memory mapping
23//! 2. **Magic number check**: Ensure file is valid SSTable format
24//! 3. **Size validation**: Prevent reading beyond file boundaries
25//! 4. **Checksum verification**: Detect bit rot and tampering
26//!
27//! ## Formal Safety Invariant
28//!
29//! ∀p ∈ MappedPages: validate_before_mmap(file) = Ok ⟹ p.valid = true
30//!
31//! This establishes that all memory-mapped pages are valid before dereferencing.
32
33use blake3::Hasher;
34use std::fs::File;
35use std::io::{Read, Seek, SeekFrom};
36use std::path::Path;
37use sochdb_core::{Result, SochDBError};
38
39/// Minimum valid SSTable size (header + at least one edge + footer)
40/// Header (8 bytes magic) + Edge (128 bytes) + Footer (144 bytes) = 280 bytes
41pub const MIN_SSTABLE_SIZE: u64 = 280;
42
43/// SSTable magic number: "AFFv2025" in ASCII
44pub const MAGIC_NUMBER: u64 = 0x4146465632303235;
45
46/// Footer size in bytes
47pub const FOOTER_SIZE: usize = 144;
48
49/// Validation error types
50#[derive(Debug)]
51pub enum ValidationError {
52    TooSmall {
53        actual: u64,
54        minimum: u64,
55    },
56    BadMagic {
57        expected: u64,
58        actual: u64,
59    },
60    ChecksumMismatch {
61        expected: [u8; 32],
62        actual: [u8; 32],
63    },
64    IoError(std::io::Error),
65}
66
67impl std::fmt::Display for ValidationError {
68    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
69        match self {
70            ValidationError::TooSmall { actual, minimum } => {
71                write!(
72                    f,
73                    "SSTable file too small: {} bytes (minimum: {})",
74                    actual, minimum
75                )
76            }
77            ValidationError::BadMagic { expected, actual } => {
78                write!(
79                    f,
80                    "Invalid magic number: {:#x} (expected: {:#x})",
81                    actual, expected
82                )
83            }
84            ValidationError::ChecksumMismatch { expected, actual } => {
85                write!(
86                    f,
87                    "Checksum mismatch: expected {}, got {}",
88                    hex::encode(expected),
89                    hex::encode(actual)
90                )
91            }
92            ValidationError::IoError(e) => write!(f, "I/O error during validation: {}", e),
93        }
94    }
95}
96
97impl std::error::Error for ValidationError {}
98
99impl From<std::io::Error> for ValidationError {
100    fn from(e: std::io::Error) -> Self {
101        ValidationError::IoError(e)
102    }
103}
104
105/// SSTable validator for pre-mmap validation
106pub struct SSTableValidator {
107    /// Expected magic number (default: MAGIC_NUMBER)
108    pub expected_magic: u64,
109
110    /// Whether to perform full file checksum (expensive, optional)
111    pub verify_full_checksum: bool,
112
113    /// Expected file checksum (if known from metadata)
114    pub expected_checksum: Option<[u8; 32]>,
115}
116
117impl Default for SSTableValidator {
118    fn default() -> Self {
119        Self {
120            expected_magic: MAGIC_NUMBER,
121            verify_full_checksum: false,
122            expected_checksum: None,
123        }
124    }
125}
126
127impl SSTableValidator {
128    /// Create validator with full checksum verification enabled
129    pub fn with_checksum_verification(expected_checksum: [u8; 32]) -> Self {
130        Self {
131            expected_magic: MAGIC_NUMBER,
132            verify_full_checksum: true,
133            expected_checksum: Some(expected_checksum),
134        }
135    }
136
137    /// Validate SSTable file before memory mapping
138    ///
139    /// This performs comprehensive validation WITHOUT mmap to establish safety invariants:
140    /// 1. File size >= minimum (header + footer)
141    /// 2. Magic number matches expected value
142    /// 3. Footer is readable and well-formed
143    /// 4. Optional: Full file checksum (if verify_full_checksum = true)
144    ///
145    /// **Performance cost:** ~5-10ms for basic validation, ~50-100ms for full checksum
146    ///
147    /// **Safety benefit:** Prevents segfaults from corrupted/truncated files
148    pub fn validate_before_mmap(&self, file: &mut File) -> Result<()> {
149        // 1. Check file size >= minimum
150        let metadata = file.metadata()?;
151
152        let file_size = metadata.len();
153        if file_size < MIN_SSTABLE_SIZE {
154            return Err(SochDBError::Corruption(format!(
155                "SSTable file too small: {} bytes (minimum: {})",
156                file_size, MIN_SSTABLE_SIZE
157            )));
158        }
159
160        // 2. Read and verify magic number from footer (last bytes)
161        file.seek(SeekFrom::End(-(FOOTER_SIZE as i64)))?;
162
163        let mut footer_bytes = vec![0u8; FOOTER_SIZE];
164        file.read_exact(&mut footer_bytes)?;
165
166        // Extract magic number (first 8 bytes of footer)
167        let magic = u64::from_le_bytes(footer_bytes[0..8].try_into().unwrap());
168        if magic != self.expected_magic {
169            return Err(SochDBError::Corruption(format!(
170                "Invalid SSTable magic number: {:#x} (expected: {:#x})",
171                magic, self.expected_magic
172            )));
173        }
174
175        // 3. Verify footer structure integrity
176        // Extract num_entries (offset 56 in footer)
177        let num_entries = u64::from_le_bytes(footer_bytes[56..64].try_into().unwrap());
178
179        // Sanity check: num_entries should be reasonable
180        // Max entries in one SSTable: ~10M edges (each 128 bytes = 1.28GB file)
181        const MAX_REASONABLE_ENTRIES: u64 = 10_000_000;
182        if num_entries > MAX_REASONABLE_ENTRIES {
183            return Err(SochDBError::Corruption(format!(
184                "Unreasonable num_entries in footer: {} (max: {})",
185                num_entries, MAX_REASONABLE_ENTRIES
186            )));
187        }
188
189        // Verify file size matches expected content
190        // Minimum size check: footer + bloom + index + at least num_entries * 128
191        let min_expected_size = FOOTER_SIZE as u64 + num_entries * 128;
192        if file_size < min_expected_size {
193            return Err(SochDBError::Corruption(format!(
194                "File size {} too small for {} entries (expected >= {})",
195                file_size, num_entries, min_expected_size
196            )));
197        }
198
199        // 4. Optional: Verify full file checksum
200        if self.verify_full_checksum
201            && let Some(expected) = self.expected_checksum
202        {
203            let computed = self.compute_file_checksum(file)?;
204            if computed != expected {
205                return Err(SochDBError::Corruption(format!(
206                    "Checksum mismatch: expected {}, got {}",
207                    hex::encode(expected),
208                    hex::encode(computed)
209                )));
210            }
211        }
212
213        Ok(())
214    }
215
216    /// Compute BLAKE3 checksum of entire file
217    ///
218    /// **Performance:** O(file_size) - reads entire file once
219    /// For 1GB file: ~1 second on modern SSD
220    ///
221    /// **Use case:** One-time validation during SSTable open, or periodic integrity checks
222    fn compute_file_checksum(&self, file: &mut File) -> Result<[u8; 32]> {
223        // Seek to beginning
224        file.seek(SeekFrom::Start(0))?;
225
226        // Read file in chunks and hash
227        let mut hasher = Hasher::new();
228        let mut buffer = vec![0u8; 64 * 1024]; // 64KB chunks
229
230        loop {
231            let bytes_read = file.read(&mut buffer)?;
232
233            if bytes_read == 0 {
234                break;
235            }
236
237            hasher.update(&buffer[..bytes_read]);
238        }
239
240        let hash = hasher.finalize();
241        Ok(*hash.as_bytes())
242    }
243
244    /// Fast validation: only check magic number and file size
245    ///
246    /// **Performance:** O(1) - reads only footer
247    /// **Use case:** Production hot path where performance is critical
248    pub fn validate_fast(&self, file: &mut File) -> Result<()> {
249        // 1. Check file size
250        let metadata = file.metadata()?;
251
252        let file_size = metadata.len();
253        if file_size < MIN_SSTABLE_SIZE {
254            return Err(SochDBError::Corruption(format!(
255                "SSTable file too small: {} bytes",
256                file_size
257            )));
258        }
259
260        // 2. Verify magic number
261        file.seek(SeekFrom::End(-(FOOTER_SIZE as i64)))?;
262
263        let mut magic_bytes = [0u8; 8];
264        file.read_exact(&mut magic_bytes)?;
265
266        let magic = u64::from_le_bytes(magic_bytes);
267        if magic != self.expected_magic {
268            return Err(SochDBError::Corruption(format!(
269                "Invalid magic number: {:#x}",
270                magic
271            )));
272        }
273
274        Ok(())
275    }
276}
277
278/// Validate SSTable file at path (convenience function)
279///
280/// Performs fast validation (magic + size only) unless full_validation is true.
281pub fn validate_sstable_file<P: AsRef<Path>>(path: P, full_validation: bool) -> Result<()> {
282    let mut file = File::open(path.as_ref())?;
283
284    let validator = SSTableValidator::default();
285
286    if full_validation {
287        validator.validate_before_mmap(&mut file)
288    } else {
289        validator.validate_fast(&mut file)
290    }
291}
292
293#[cfg(test)]
294mod tests {
295    use super::*;
296    use std::io::Write;
297    use tempfile::NamedTempFile;
298
299    #[test]
300    fn test_validate_too_small() {
301        // Create file that's too small
302        let mut file = NamedTempFile::new().unwrap();
303        file.write_all(&[0u8; 100]).unwrap(); // Only 100 bytes
304        file.flush().unwrap();
305
306        let mut file = File::open(file.path()).unwrap();
307        let validator = SSTableValidator::default();
308
309        let result = validator.validate_fast(&mut file);
310        assert!(result.is_err());
311        assert!(result.unwrap_err().to_string().contains("too small"));
312    }
313
314    #[test]
315    fn test_validate_bad_magic() {
316        // Create file with wrong magic number
317        let mut file = NamedTempFile::new().unwrap();
318
319        // Write enough bytes to pass size check
320        let mut content = vec![0u8; MIN_SSTABLE_SIZE as usize];
321
322        // Write wrong magic number in footer location
323        let footer_offset = content.len() - FOOTER_SIZE;
324        let wrong_magic: u64 = 0xDEADBEEF;
325        content[footer_offset..footer_offset + 8].copy_from_slice(&wrong_magic.to_le_bytes());
326
327        file.write_all(&content).unwrap();
328        file.flush().unwrap();
329
330        let mut file = File::open(file.path()).unwrap();
331        let validator = SSTableValidator::default();
332
333        let result = validator.validate_fast(&mut file);
334        assert!(result.is_err());
335        assert!(result.unwrap_err().to_string().contains("magic"));
336    }
337
338    #[test]
339    fn test_validate_correct_file() {
340        // Create minimal valid SSTable file
341        let mut file = NamedTempFile::new().unwrap();
342
343        let mut content = vec![0u8; MIN_SSTABLE_SIZE as usize];
344
345        // Write correct magic number in footer location
346        let footer_offset = content.len() - FOOTER_SIZE;
347        content[footer_offset..footer_offset + 8].copy_from_slice(&MAGIC_NUMBER.to_le_bytes());
348
349        // Write reasonable num_entries (offset 56 in footer)
350        let num_entries: u64 = 1;
351        content[footer_offset + 56..footer_offset + 64].copy_from_slice(&num_entries.to_le_bytes());
352
353        file.write_all(&content).unwrap();
354        file.flush().unwrap();
355
356        let mut file = File::open(file.path()).unwrap();
357        let validator = SSTableValidator::default();
358
359        let result = validator.validate_fast(&mut file);
360        assert!(result.is_ok());
361    }
362}