sochdb_storage/
validation.rs1use blake3::Hasher;
34use std::fs::File;
35use std::io::{Read, Seek, SeekFrom};
36use std::path::Path;
37use sochdb_core::{Result, SochDBError};
38
39pub const MIN_SSTABLE_SIZE: u64 = 280;
42
43pub const MAGIC_NUMBER: u64 = 0x4146465632303235;
45
46pub const FOOTER_SIZE: usize = 144;
48
49#[derive(Debug)]
51pub enum ValidationError {
52 TooSmall {
53 actual: u64,
54 minimum: u64,
55 },
56 BadMagic {
57 expected: u64,
58 actual: u64,
59 },
60 ChecksumMismatch {
61 expected: [u8; 32],
62 actual: [u8; 32],
63 },
64 IoError(std::io::Error),
65}
66
67impl std::fmt::Display for ValidationError {
68 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
69 match self {
70 ValidationError::TooSmall { actual, minimum } => {
71 write!(
72 f,
73 "SSTable file too small: {} bytes (minimum: {})",
74 actual, minimum
75 )
76 }
77 ValidationError::BadMagic { expected, actual } => {
78 write!(
79 f,
80 "Invalid magic number: {:#x} (expected: {:#x})",
81 actual, expected
82 )
83 }
84 ValidationError::ChecksumMismatch { expected, actual } => {
85 write!(
86 f,
87 "Checksum mismatch: expected {}, got {}",
88 hex::encode(expected),
89 hex::encode(actual)
90 )
91 }
92 ValidationError::IoError(e) => write!(f, "I/O error during validation: {}", e),
93 }
94 }
95}
96
97impl std::error::Error for ValidationError {}
98
99impl From<std::io::Error> for ValidationError {
100 fn from(e: std::io::Error) -> Self {
101 ValidationError::IoError(e)
102 }
103}
104
105pub struct SSTableValidator {
107 pub expected_magic: u64,
109
110 pub verify_full_checksum: bool,
112
113 pub expected_checksum: Option<[u8; 32]>,
115}
116
117impl Default for SSTableValidator {
118 fn default() -> Self {
119 Self {
120 expected_magic: MAGIC_NUMBER,
121 verify_full_checksum: false,
122 expected_checksum: None,
123 }
124 }
125}
126
127impl SSTableValidator {
128 pub fn with_checksum_verification(expected_checksum: [u8; 32]) -> Self {
130 Self {
131 expected_magic: MAGIC_NUMBER,
132 verify_full_checksum: true,
133 expected_checksum: Some(expected_checksum),
134 }
135 }
136
137 pub fn validate_before_mmap(&self, file: &mut File) -> Result<()> {
149 let metadata = file.metadata()?;
151
152 let file_size = metadata.len();
153 if file_size < MIN_SSTABLE_SIZE {
154 return Err(SochDBError::Corruption(format!(
155 "SSTable file too small: {} bytes (minimum: {})",
156 file_size, MIN_SSTABLE_SIZE
157 )));
158 }
159
160 file.seek(SeekFrom::End(-(FOOTER_SIZE as i64)))?;
162
163 let mut footer_bytes = vec![0u8; FOOTER_SIZE];
164 file.read_exact(&mut footer_bytes)?;
165
166 let magic = u64::from_le_bytes(footer_bytes[0..8].try_into().unwrap());
168 if magic != self.expected_magic {
169 return Err(SochDBError::Corruption(format!(
170 "Invalid SSTable magic number: {:#x} (expected: {:#x})",
171 magic, self.expected_magic
172 )));
173 }
174
175 let num_entries = u64::from_le_bytes(footer_bytes[56..64].try_into().unwrap());
178
179 const MAX_REASONABLE_ENTRIES: u64 = 10_000_000;
182 if num_entries > MAX_REASONABLE_ENTRIES {
183 return Err(SochDBError::Corruption(format!(
184 "Unreasonable num_entries in footer: {} (max: {})",
185 num_entries, MAX_REASONABLE_ENTRIES
186 )));
187 }
188
189 let min_expected_size = FOOTER_SIZE as u64 + num_entries * 128;
192 if file_size < min_expected_size {
193 return Err(SochDBError::Corruption(format!(
194 "File size {} too small for {} entries (expected >= {})",
195 file_size, num_entries, min_expected_size
196 )));
197 }
198
199 if self.verify_full_checksum
201 && let Some(expected) = self.expected_checksum
202 {
203 let computed = self.compute_file_checksum(file)?;
204 if computed != expected {
205 return Err(SochDBError::Corruption(format!(
206 "Checksum mismatch: expected {}, got {}",
207 hex::encode(expected),
208 hex::encode(computed)
209 )));
210 }
211 }
212
213 Ok(())
214 }
215
216 fn compute_file_checksum(&self, file: &mut File) -> Result<[u8; 32]> {
223 file.seek(SeekFrom::Start(0))?;
225
226 let mut hasher = Hasher::new();
228 let mut buffer = vec![0u8; 64 * 1024]; loop {
231 let bytes_read = file.read(&mut buffer)?;
232
233 if bytes_read == 0 {
234 break;
235 }
236
237 hasher.update(&buffer[..bytes_read]);
238 }
239
240 let hash = hasher.finalize();
241 Ok(*hash.as_bytes())
242 }
243
244 pub fn validate_fast(&self, file: &mut File) -> Result<()> {
249 let metadata = file.metadata()?;
251
252 let file_size = metadata.len();
253 if file_size < MIN_SSTABLE_SIZE {
254 return Err(SochDBError::Corruption(format!(
255 "SSTable file too small: {} bytes",
256 file_size
257 )));
258 }
259
260 file.seek(SeekFrom::End(-(FOOTER_SIZE as i64)))?;
262
263 let mut magic_bytes = [0u8; 8];
264 file.read_exact(&mut magic_bytes)?;
265
266 let magic = u64::from_le_bytes(magic_bytes);
267 if magic != self.expected_magic {
268 return Err(SochDBError::Corruption(format!(
269 "Invalid magic number: {:#x}",
270 magic
271 )));
272 }
273
274 Ok(())
275 }
276}
277
278pub fn validate_sstable_file<P: AsRef<Path>>(path: P, full_validation: bool) -> Result<()> {
282 let mut file = File::open(path.as_ref())?;
283
284 let validator = SSTableValidator::default();
285
286 if full_validation {
287 validator.validate_before_mmap(&mut file)
288 } else {
289 validator.validate_fast(&mut file)
290 }
291}
292
293#[cfg(test)]
294mod tests {
295 use super::*;
296 use std::io::Write;
297 use tempfile::NamedTempFile;
298
299 #[test]
300 fn test_validate_too_small() {
301 let mut file = NamedTempFile::new().unwrap();
303 file.write_all(&[0u8; 100]).unwrap(); file.flush().unwrap();
305
306 let mut file = File::open(file.path()).unwrap();
307 let validator = SSTableValidator::default();
308
309 let result = validator.validate_fast(&mut file);
310 assert!(result.is_err());
311 assert!(result.unwrap_err().to_string().contains("too small"));
312 }
313
314 #[test]
315 fn test_validate_bad_magic() {
316 let mut file = NamedTempFile::new().unwrap();
318
319 let mut content = vec![0u8; MIN_SSTABLE_SIZE as usize];
321
322 let footer_offset = content.len() - FOOTER_SIZE;
324 let wrong_magic: u64 = 0xDEADBEEF;
325 content[footer_offset..footer_offset + 8].copy_from_slice(&wrong_magic.to_le_bytes());
326
327 file.write_all(&content).unwrap();
328 file.flush().unwrap();
329
330 let mut file = File::open(file.path()).unwrap();
331 let validator = SSTableValidator::default();
332
333 let result = validator.validate_fast(&mut file);
334 assert!(result.is_err());
335 assert!(result.unwrap_err().to_string().contains("magic"));
336 }
337
338 #[test]
339 fn test_validate_correct_file() {
340 let mut file = NamedTempFile::new().unwrap();
342
343 let mut content = vec![0u8; MIN_SSTABLE_SIZE as usize];
344
345 let footer_offset = content.len() - FOOTER_SIZE;
347 content[footer_offset..footer_offset + 8].copy_from_slice(&MAGIC_NUMBER.to_le_bytes());
348
349 let num_entries: u64 = 1;
351 content[footer_offset + 56..footer_offset + 64].copy_from_slice(&num_entries.to_le_bytes());
352
353 file.write_all(&content).unwrap();
354 file.flush().unwrap();
355
356 let mut file = File::open(file.path()).unwrap();
357 let validator = SSTableValidator::default();
358
359 let result = validator.validate_fast(&mut file);
360 assert!(result.is_ok());
361 }
362}