sochdb_storage/
validation.rs1use blake3::Hasher;
37use std::fs::File;
38use std::io::{Read, Seek, SeekFrom};
39use std::path::Path;
40use sochdb_core::{Result, SochDBError};
41
42pub const MIN_SSTABLE_SIZE: u64 = 280;
45
46pub const MAGIC_NUMBER: u64 = 0x4146465632303235;
48
49pub const FOOTER_SIZE: usize = 144;
51
52#[derive(Debug)]
54pub enum ValidationError {
55 TooSmall {
56 actual: u64,
57 minimum: u64,
58 },
59 BadMagic {
60 expected: u64,
61 actual: u64,
62 },
63 ChecksumMismatch {
64 expected: [u8; 32],
65 actual: [u8; 32],
66 },
67 IoError(std::io::Error),
68}
69
70impl std::fmt::Display for ValidationError {
71 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
72 match self {
73 ValidationError::TooSmall { actual, minimum } => {
74 write!(
75 f,
76 "SSTable file too small: {} bytes (minimum: {})",
77 actual, minimum
78 )
79 }
80 ValidationError::BadMagic { expected, actual } => {
81 write!(
82 f,
83 "Invalid magic number: {:#x} (expected: {:#x})",
84 actual, expected
85 )
86 }
87 ValidationError::ChecksumMismatch { expected, actual } => {
88 write!(
89 f,
90 "Checksum mismatch: expected {}, got {}",
91 hex::encode(expected),
92 hex::encode(actual)
93 )
94 }
95 ValidationError::IoError(e) => write!(f, "I/O error during validation: {}", e),
96 }
97 }
98}
99
100impl std::error::Error for ValidationError {}
101
102impl From<std::io::Error> for ValidationError {
103 fn from(e: std::io::Error) -> Self {
104 ValidationError::IoError(e)
105 }
106}
107
108pub struct SSTableValidator {
110 pub expected_magic: u64,
112
113 pub verify_full_checksum: bool,
115
116 pub expected_checksum: Option<[u8; 32]>,
118}
119
120impl Default for SSTableValidator {
121 fn default() -> Self {
122 Self {
123 expected_magic: MAGIC_NUMBER,
124 verify_full_checksum: false,
125 expected_checksum: None,
126 }
127 }
128}
129
130impl SSTableValidator {
131 pub fn with_checksum_verification(expected_checksum: [u8; 32]) -> Self {
133 Self {
134 expected_magic: MAGIC_NUMBER,
135 verify_full_checksum: true,
136 expected_checksum: Some(expected_checksum),
137 }
138 }
139
140 pub fn validate_before_mmap(&self, file: &mut File) -> Result<()> {
152 let metadata = file.metadata()?;
154
155 let file_size = metadata.len();
156 if file_size < MIN_SSTABLE_SIZE {
157 return Err(SochDBError::Corruption(format!(
158 "SSTable file too small: {} bytes (minimum: {})",
159 file_size, MIN_SSTABLE_SIZE
160 )));
161 }
162
163 file.seek(SeekFrom::End(-(FOOTER_SIZE as i64)))?;
165
166 let mut footer_bytes = vec![0u8; FOOTER_SIZE];
167 file.read_exact(&mut footer_bytes)?;
168
169 let magic = u64::from_le_bytes(footer_bytes[0..8].try_into().unwrap());
171 if magic != self.expected_magic {
172 return Err(SochDBError::Corruption(format!(
173 "Invalid SSTable magic number: {:#x} (expected: {:#x})",
174 magic, self.expected_magic
175 )));
176 }
177
178 let num_entries = u64::from_le_bytes(footer_bytes[56..64].try_into().unwrap());
181
182 const MAX_REASONABLE_ENTRIES: u64 = 10_000_000;
185 if num_entries > MAX_REASONABLE_ENTRIES {
186 return Err(SochDBError::Corruption(format!(
187 "Unreasonable num_entries in footer: {} (max: {})",
188 num_entries, MAX_REASONABLE_ENTRIES
189 )));
190 }
191
192 let min_expected_size = FOOTER_SIZE as u64 + num_entries * 128;
195 if file_size < min_expected_size {
196 return Err(SochDBError::Corruption(format!(
197 "File size {} too small for {} entries (expected >= {})",
198 file_size, num_entries, min_expected_size
199 )));
200 }
201
202 if self.verify_full_checksum
204 && let Some(expected) = self.expected_checksum
205 {
206 let computed = self.compute_file_checksum(file)?;
207 if computed != expected {
208 return Err(SochDBError::Corruption(format!(
209 "Checksum mismatch: expected {}, got {}",
210 hex::encode(expected),
211 hex::encode(computed)
212 )));
213 }
214 }
215
216 Ok(())
217 }
218
219 fn compute_file_checksum(&self, file: &mut File) -> Result<[u8; 32]> {
226 file.seek(SeekFrom::Start(0))?;
228
229 let mut hasher = Hasher::new();
231 let mut buffer = vec![0u8; 64 * 1024]; loop {
234 let bytes_read = file.read(&mut buffer)?;
235
236 if bytes_read == 0 {
237 break;
238 }
239
240 hasher.update(&buffer[..bytes_read]);
241 }
242
243 let hash = hasher.finalize();
244 Ok(*hash.as_bytes())
245 }
246
247 pub fn validate_fast(&self, file: &mut File) -> Result<()> {
252 let metadata = file.metadata()?;
254
255 let file_size = metadata.len();
256 if file_size < MIN_SSTABLE_SIZE {
257 return Err(SochDBError::Corruption(format!(
258 "SSTable file too small: {} bytes",
259 file_size
260 )));
261 }
262
263 file.seek(SeekFrom::End(-(FOOTER_SIZE as i64)))?;
265
266 let mut magic_bytes = [0u8; 8];
267 file.read_exact(&mut magic_bytes)?;
268
269 let magic = u64::from_le_bytes(magic_bytes);
270 if magic != self.expected_magic {
271 return Err(SochDBError::Corruption(format!(
272 "Invalid magic number: {:#x}",
273 magic
274 )));
275 }
276
277 Ok(())
278 }
279}
280
281pub fn validate_sstable_file<P: AsRef<Path>>(path: P, full_validation: bool) -> Result<()> {
285 let mut file = File::open(path.as_ref())?;
286
287 let validator = SSTableValidator::default();
288
289 if full_validation {
290 validator.validate_before_mmap(&mut file)
291 } else {
292 validator.validate_fast(&mut file)
293 }
294}
295
296#[cfg(test)]
297mod tests {
298 use super::*;
299 use std::io::Write;
300 use tempfile::NamedTempFile;
301
302 #[test]
303 fn test_validate_too_small() {
304 let mut file = NamedTempFile::new().unwrap();
306 file.write_all(&[0u8; 100]).unwrap(); file.flush().unwrap();
308
309 let mut file = File::open(file.path()).unwrap();
310 let validator = SSTableValidator::default();
311
312 let result = validator.validate_fast(&mut file);
313 assert!(result.is_err());
314 assert!(result.unwrap_err().to_string().contains("too small"));
315 }
316
317 #[test]
318 fn test_validate_bad_magic() {
319 let mut file = NamedTempFile::new().unwrap();
321
322 let mut content = vec![0u8; MIN_SSTABLE_SIZE as usize];
324
325 let footer_offset = content.len() - FOOTER_SIZE;
327 let wrong_magic: u64 = 0xDEADBEEF;
328 content[footer_offset..footer_offset + 8].copy_from_slice(&wrong_magic.to_le_bytes());
329
330 file.write_all(&content).unwrap();
331 file.flush().unwrap();
332
333 let mut file = File::open(file.path()).unwrap();
334 let validator = SSTableValidator::default();
335
336 let result = validator.validate_fast(&mut file);
337 assert!(result.is_err());
338 assert!(result.unwrap_err().to_string().contains("magic"));
339 }
340
341 #[test]
342 fn test_validate_correct_file() {
343 let mut file = NamedTempFile::new().unwrap();
345
346 let mut content = vec![0u8; MIN_SSTABLE_SIZE as usize];
347
348 let footer_offset = content.len() - FOOTER_SIZE;
350 content[footer_offset..footer_offset + 8].copy_from_slice(&MAGIC_NUMBER.to_le_bytes());
351
352 let num_entries: u64 = 1;
354 content[footer_offset + 56..footer_offset + 64].copy_from_slice(&num_entries.to_le_bytes());
355
356 file.write_all(&content).unwrap();
357 file.flush().unwrap();
358
359 let mut file = File::open(file.path()).unwrap();
360 let validator = SSTableValidator::default();
361
362 let result = validator.validate_fast(&mut file);
363 assert!(result.is_ok());
364 }
365}