1use byteorder::{ByteOrder, LittleEndian};
51
52use crate::block_storage::BlockCompression;
53use crate::{Result, SochDBError};
54
55#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
57#[repr(u8)]
58pub enum FormatVersion {
59 V1 = 1,
61 V2 = 2,
63}
64
65impl FormatVersion {
66 pub const CURRENT: FormatVersion = FormatVersion::V2;
68
69 pub fn from_byte(b: u8) -> Option<Self> {
71 match b {
72 1 => Some(FormatVersion::V1),
73 2 => Some(FormatVersion::V2),
74 _ => None,
75 }
76 }
77
78 pub fn header_size(&self) -> usize {
80 match self {
81 FormatVersion::V1 => V1_HEADER_SIZE,
82 FormatVersion::V2 => V2_HEADER_SIZE,
83 }
84 }
85}
86
87const V1_MAGIC: [u8; 4] = *b"TBLK";
89const V2_MAGIC: [u8; 4] = *b"TBL2";
90
91const V1_HEADER_SIZE: usize = 17;
93const V2_HEADER_SIZE: usize = 21;
94
95#[derive(Debug, Clone, Copy, Default)]
97pub struct BlockFlags {
98 pub encrypted: bool,
100 pub extended_checksum: bool,
102 pub spanning: bool,
104 pub has_metadata: bool,
106}
107
108impl BlockFlags {
109 pub fn to_byte(&self) -> u8 {
111 let mut b = 0u8;
112 if self.encrypted {
113 b |= 0x01;
114 }
115 if self.extended_checksum {
116 b |= 0x02;
117 }
118 if self.spanning {
119 b |= 0x04;
120 }
121 if self.has_metadata {
122 b |= 0x08;
123 }
124 b
125 }
126
127 pub fn from_byte(b: u8) -> Self {
129 Self {
130 encrypted: (b & 0x01) != 0,
131 extended_checksum: (b & 0x02) != 0,
132 spanning: (b & 0x04) != 0,
133 has_metadata: (b & 0x08) != 0,
134 }
135 }
136}
137
138#[derive(Debug, Clone)]
147pub struct V1Header {
148 pub compression: BlockCompression,
149 pub original_size: u32,
150 pub compressed_size: u32,
151 pub checksum: u32,
152}
153
154impl V1Header {
155 pub fn from_bytes(buf: &[u8]) -> Result<Self> {
157 if buf.len() < V1_HEADER_SIZE {
158 return Err(SochDBError::InvalidData(format!(
159 "V1 header too short: {} < {}",
160 buf.len(),
161 V1_HEADER_SIZE
162 )));
163 }
164
165 if &buf[0..4] != V1_MAGIC.as_slice() {
166 return Err(SochDBError::InvalidData(format!(
167 "Invalid V1 magic: {:?}",
168 &buf[0..4]
169 )));
170 }
171
172 Ok(Self {
173 compression: BlockCompression::from_byte(buf[4]),
174 original_size: LittleEndian::read_u32(&buf[5..9]),
175 compressed_size: LittleEndian::read_u32(&buf[9..13]),
176 checksum: LittleEndian::read_u32(&buf[13..17]),
177 })
178 }
179
180 pub fn to_bytes(&self) -> [u8; V1_HEADER_SIZE] {
182 let mut buf = [0u8; V1_HEADER_SIZE];
183 buf[0..4].copy_from_slice(&V1_MAGIC);
184 buf[4] = self.compression.to_byte();
185 LittleEndian::write_u32(&mut buf[5..9], self.original_size);
186 LittleEndian::write_u32(&mut buf[9..13], self.compressed_size);
187 LittleEndian::write_u32(&mut buf[13..17], self.checksum);
188 buf
189 }
190
191 pub fn upgrade_to_v2(&self) -> V2Header {
193 V2Header {
194 format_version: FormatVersion::V2,
195 compression: self.compression,
196 flags: BlockFlags::default(),
197 original_size: self.original_size,
198 compressed_size: self.compressed_size,
199 checksum: self.checksum,
200 }
201 }
202}
203
204#[derive(Debug, Clone)]
216pub struct V2Header {
217 pub format_version: FormatVersion,
218 pub compression: BlockCompression,
219 pub flags: BlockFlags,
220 pub original_size: u32,
221 pub compressed_size: u32,
222 pub checksum: u32,
223}
224
225impl V2Header {
226 pub fn from_bytes(buf: &[u8]) -> Result<Self> {
228 if buf.len() < V2_HEADER_SIZE {
229 return Err(SochDBError::InvalidData(format!(
230 "V2 header too short: {} < {}",
231 buf.len(),
232 V2_HEADER_SIZE
233 )));
234 }
235
236 if &buf[0..4] != V2_MAGIC.as_slice() {
237 return Err(SochDBError::InvalidData(format!(
238 "Invalid V2 magic: {:?}",
239 &buf[0..4]
240 )));
241 }
242
243 let format_version = FormatVersion::from_byte(buf[4]).ok_or_else(|| {
244 SochDBError::InvalidData(format!("Unknown format version: {}", buf[4]))
245 })?;
246
247 Ok(Self {
248 format_version,
249 compression: BlockCompression::from_byte(buf[5]),
250 flags: BlockFlags::from_byte(buf[6]),
251 original_size: LittleEndian::read_u32(&buf[7..11]),
252 compressed_size: LittleEndian::read_u32(&buf[11..15]),
253 checksum: LittleEndian::read_u32(&buf[15..19]),
254 })
255 }
256
257 pub fn to_bytes(&self) -> [u8; V2_HEADER_SIZE] {
259 let mut buf = [0u8; V2_HEADER_SIZE];
260 buf[0..4].copy_from_slice(&V2_MAGIC);
261 buf[4] = self.format_version as u8;
262 buf[5] = self.compression.to_byte();
263 buf[6] = self.flags.to_byte();
264 LittleEndian::write_u32(&mut buf[7..11], self.original_size);
265 LittleEndian::write_u32(&mut buf[11..15], self.compressed_size);
266 LittleEndian::write_u32(&mut buf[15..19], self.checksum);
267 buf
269 }
270
271 pub fn downgrade_to_v1(&self) -> V1Header {
273 V1Header {
274 compression: self.compression,
275 original_size: self.original_size,
276 compressed_size: self.compressed_size,
277 checksum: self.checksum,
278 }
279 }
280}
281
282#[derive(Debug, Clone)]
284pub enum BlockHeader {
285 V1(V1Header),
286 V2(V2Header),
287}
288
289impl BlockHeader {
290 pub fn from_bytes(buf: &[u8]) -> Result<Self> {
292 if buf.len() < 4 {
293 return Err(SochDBError::InvalidData(
294 "Buffer too short for magic detection".to_string(),
295 ));
296 }
297
298 let magic = &buf[0..4];
299
300 if magic == V1_MAGIC.as_slice() {
301 Ok(BlockHeader::V1(V1Header::from_bytes(buf)?))
302 } else if magic == V2_MAGIC.as_slice() {
303 Ok(BlockHeader::V2(V2Header::from_bytes(buf)?))
304 } else {
305 Err(SochDBError::InvalidData(format!(
306 "Unknown block magic: {:?}",
307 magic
308 )))
309 }
310 }
311
312 pub fn version(&self) -> FormatVersion {
314 match self {
315 BlockHeader::V1(_) => FormatVersion::V1,
316 BlockHeader::V2(_) => FormatVersion::V2,
317 }
318 }
319
320 pub fn header_size(&self) -> usize {
322 self.version().header_size()
323 }
324
325 pub fn compression(&self) -> BlockCompression {
327 match self {
328 BlockHeader::V1(h) => h.compression,
329 BlockHeader::V2(h) => h.compression,
330 }
331 }
332
333 pub fn original_size(&self) -> u32 {
335 match self {
336 BlockHeader::V1(h) => h.original_size,
337 BlockHeader::V2(h) => h.original_size,
338 }
339 }
340
341 pub fn compressed_size(&self) -> u32 {
343 match self {
344 BlockHeader::V1(h) => h.compressed_size,
345 BlockHeader::V2(h) => h.compressed_size,
346 }
347 }
348
349 pub fn checksum(&self) -> u32 {
351 match self {
352 BlockHeader::V1(h) => h.checksum,
353 BlockHeader::V2(h) => h.checksum,
354 }
355 }
356
357 pub fn flags(&self) -> BlockFlags {
359 match self {
360 BlockHeader::V1(_) => BlockFlags::default(),
361 BlockHeader::V2(h) => h.flags,
362 }
363 }
364
365 pub fn upgrade(&self) -> Self {
367 match self {
368 BlockHeader::V1(h) => BlockHeader::V2(h.upgrade_to_v2()),
369 BlockHeader::V2(_) => self.clone(),
370 }
371 }
372
373 pub fn to_bytes(&self) -> Vec<u8> {
375 match self {
376 BlockHeader::V1(h) => h.to_bytes().to_vec(),
377 BlockHeader::V2(h) => h.to_bytes().to_vec(),
378 }
379 }
380
381 pub fn is_current(&self) -> bool {
383 self.version() == FormatVersion::CURRENT
384 }
385
386 pub fn needs_migration(&self) -> bool {
388 self.version() < FormatVersion::CURRENT
389 }
390}
391
392#[derive(Debug, Clone)]
394pub struct MigratableBlock {
395 pub header: BlockHeader,
396 pub data: Vec<u8>,
397}
398
399impl MigratableBlock {
400 pub fn new(
402 data: Vec<u8>,
403 compression: BlockCompression,
404 original_size: u32,
405 compressed_size: u32,
406 checksum: u32,
407 ) -> Self {
408 Self {
409 header: BlockHeader::V2(V2Header {
410 format_version: FormatVersion::CURRENT,
411 compression,
412 flags: BlockFlags::default(),
413 original_size,
414 compressed_size,
415 checksum,
416 }),
417 data,
418 }
419 }
420
421 pub fn with_flags(mut self, flags: BlockFlags) -> Self {
423 if let BlockHeader::V2(ref mut h) = self.header {
424 h.flags = flags;
425 }
426 self
427 }
428
429 pub fn from_bytes(buf: &[u8]) -> Result<Self> {
431 let header = BlockHeader::from_bytes(buf)?;
432 let header_size = header.header_size();
433 let data_size = header.compressed_size() as usize;
434
435 if buf.len() < header_size + data_size {
436 return Err(SochDBError::InvalidData(format!(
437 "Block buffer too short: {} < {}",
438 buf.len(),
439 header_size + data_size
440 )));
441 }
442
443 Ok(Self {
444 header,
445 data: buf[header_size..header_size + data_size].to_vec(),
446 })
447 }
448
449 pub fn to_bytes(&self) -> Vec<u8> {
451 let header_bytes = self.header.to_bytes();
452 let mut result = Vec::with_capacity(header_bytes.len() + self.data.len());
453 result.extend_from_slice(&header_bytes);
454 result.extend_from_slice(&self.data);
455 result
456 }
457
458 pub fn migrate(&mut self) {
460 self.header = self.header.upgrade();
461 }
462
463 pub fn needs_migration(&self) -> bool {
465 self.header.needs_migration()
466 }
467
468 pub fn verify_checksum(&self) -> Result<()> {
470 let computed = crc32fast::hash(&self.data);
471 let stored = self.header.checksum();
472
473 if computed != stored {
474 return Err(SochDBError::DataCorruption {
475 details: format!(
476 "Checksum mismatch: computed {} != stored {}",
477 computed, stored
478 ),
479 location: "block data".to_string(),
480 hint: "Block may be corrupted, try restoring from backup".to_string(),
481 });
482 }
483
484 Ok(())
485 }
486}
487
488#[derive(Debug, Default)]
490pub struct MigrationStats {
491 pub blocks_read: u64,
492 pub blocks_migrated: u64,
493 pub v1_blocks_found: u64,
494 pub v2_blocks_found: u64,
495 pub checksum_failures: u64,
496}
497
498impl MigrationStats {
499 pub fn record_read(&mut self, version: FormatVersion) {
500 self.blocks_read += 1;
501 match version {
502 FormatVersion::V1 => self.v1_blocks_found += 1,
503 FormatVersion::V2 => self.v2_blocks_found += 1,
504 }
505 }
506
507 pub fn record_migration(&mut self) {
508 self.blocks_migrated += 1;
509 }
510
511 pub fn record_checksum_failure(&mut self) {
512 self.checksum_failures += 1;
513 }
514
515 pub fn migration_progress(&self) -> f64 {
517 if self.v1_blocks_found == 0 {
518 100.0
519 } else {
520 (self.blocks_migrated as f64 / self.v1_blocks_found as f64) * 100.0
521 }
522 }
523}
524
525pub struct FormatMigrator {
527 stats: MigrationStats,
528 verify_checksums: bool,
529}
530
531impl FormatMigrator {
532 pub fn new() -> Self {
533 Self {
534 stats: MigrationStats::default(),
535 verify_checksums: true,
536 }
537 }
538
539 pub fn with_checksum_verification(mut self, verify: bool) -> Self {
541 self.verify_checksums = verify;
542 self
543 }
544
545 pub fn migrate_block(&mut self, block: &mut MigratableBlock) -> Result<bool> {
547 self.stats.record_read(block.header.version());
548
549 if self.verify_checksums
550 && let Err(e) = block.verify_checksum()
551 {
552 self.stats.record_checksum_failure();
553 return Err(e);
554 }
555
556 if block.needs_migration() {
557 block.migrate();
558 self.stats.record_migration();
559 Ok(true)
560 } else {
561 Ok(false)
562 }
563 }
564
565 pub fn migrate_blocks(&mut self, blocks: &mut [MigratableBlock]) -> Result<usize> {
567 let mut migrated = 0;
568 for block in blocks {
569 if self.migrate_block(block)? {
570 migrated += 1;
571 }
572 }
573 Ok(migrated)
574 }
575
576 pub fn stats(&self) -> &MigrationStats {
578 &self.stats
579 }
580}
581
582impl Default for FormatMigrator {
583 fn default() -> Self {
584 Self::new()
585 }
586}
587
588#[cfg(test)]
589mod tests {
590 use super::*;
591
592 #[test]
593 fn test_v1_header_roundtrip() {
594 let header = V1Header {
595 compression: BlockCompression::None,
596 original_size: 1024,
597 compressed_size: 1024,
598 checksum: 0xDEADBEEF,
599 };
600
601 let bytes = header.to_bytes();
602 assert_eq!(bytes.len(), V1_HEADER_SIZE);
603
604 let parsed = V1Header::from_bytes(&bytes).unwrap();
605 assert_eq!(parsed.compression, BlockCompression::None);
606 assert_eq!(parsed.original_size, 1024);
607 assert_eq!(parsed.compressed_size, 1024);
608 assert_eq!(parsed.checksum, 0xDEADBEEF);
609 }
610
611 #[test]
612 fn test_v2_header_roundtrip() {
613 let header = V2Header {
614 format_version: FormatVersion::V2,
615 compression: BlockCompression::Lz4,
616 flags: BlockFlags {
617 encrypted: true,
618 extended_checksum: false,
619 spanning: true,
620 has_metadata: false,
621 },
622 original_size: 2048,
623 compressed_size: 1500,
624 checksum: 0xCAFEBABE,
625 };
626
627 let bytes = header.to_bytes();
628 assert_eq!(bytes.len(), V2_HEADER_SIZE);
629
630 let parsed = V2Header::from_bytes(&bytes).unwrap();
631 assert_eq!(parsed.format_version, FormatVersion::V2);
632 assert_eq!(parsed.compression, BlockCompression::Lz4);
633 assert!(parsed.flags.encrypted);
634 assert!(!parsed.flags.extended_checksum);
635 assert!(parsed.flags.spanning);
636 assert!(!parsed.flags.has_metadata);
637 assert_eq!(parsed.original_size, 2048);
638 assert_eq!(parsed.compressed_size, 1500);
639 assert_eq!(parsed.checksum, 0xCAFEBABE);
640 }
641
642 #[test]
643 fn test_version_detection() {
644 let v1_header = V1Header {
646 compression: BlockCompression::None,
647 original_size: 100,
648 compressed_size: 100,
649 checksum: 0x12345678,
650 };
651 let v1_bytes = v1_header.to_bytes();
652
653 let detected = BlockHeader::from_bytes(&v1_bytes).unwrap();
654 assert_eq!(detected.version(), FormatVersion::V1);
655
656 let v2_header = V2Header {
658 format_version: FormatVersion::V2,
659 compression: BlockCompression::Zstd,
660 flags: BlockFlags::default(),
661 original_size: 200,
662 compressed_size: 150,
663 checksum: 0x87654321,
664 };
665 let v2_bytes = v2_header.to_bytes();
666
667 let detected = BlockHeader::from_bytes(&v2_bytes).unwrap();
668 assert_eq!(detected.version(), FormatVersion::V2);
669 }
670
671 #[test]
672 fn test_v1_to_v2_upgrade() {
673 let v1_header = V1Header {
674 compression: BlockCompression::Lz4,
675 original_size: 500,
676 compressed_size: 300,
677 checksum: 0xABCDEF00,
678 };
679
680 let v2_header = v1_header.upgrade_to_v2();
681
682 assert_eq!(v2_header.format_version, FormatVersion::V2);
683 assert_eq!(v2_header.compression, BlockCompression::Lz4);
684 assert_eq!(v2_header.original_size, 500);
685 assert_eq!(v2_header.compressed_size, 300);
686 assert_eq!(v2_header.checksum, 0xABCDEF00);
687 assert!(!v2_header.flags.encrypted);
689 }
690
691 #[test]
692 fn test_block_migration() {
693 let data = b"Hello, SochDB!";
695 let checksum = crc32fast::hash(data);
696
697 let v1_header = V1Header {
698 compression: BlockCompression::None,
699 original_size: data.len() as u32,
700 compressed_size: data.len() as u32,
701 checksum,
702 };
703
704 let mut buf = v1_header.to_bytes().to_vec();
705 buf.extend_from_slice(data);
706
707 let mut block = MigratableBlock::from_bytes(&buf).unwrap();
709 assert!(block.needs_migration());
710 assert_eq!(block.header.version(), FormatVersion::V1);
711
712 block.migrate();
713 assert!(!block.needs_migration());
714 assert_eq!(block.header.version(), FormatVersion::V2);
715
716 assert_eq!(block.data, data);
718 block.verify_checksum().unwrap();
719 }
720
721 #[test]
722 fn test_format_migrator() {
723 let data1 = b"Block one data";
724 let data2 = b"Block two data";
725
726 let mut blocks: Vec<MigratableBlock> = vec![
728 MigratableBlock {
729 header: BlockHeader::V1(V1Header {
730 compression: BlockCompression::None,
731 original_size: data1.len() as u32,
732 compressed_size: data1.len() as u32,
733 checksum: crc32fast::hash(data1),
734 }),
735 data: data1.to_vec(),
736 },
737 MigratableBlock {
738 header: BlockHeader::V1(V1Header {
739 compression: BlockCompression::None,
740 original_size: data2.len() as u32,
741 compressed_size: data2.len() as u32,
742 checksum: crc32fast::hash(data2),
743 }),
744 data: data2.to_vec(),
745 },
746 ];
747
748 let mut migrator = FormatMigrator::new();
749 let migrated = migrator.migrate_blocks(&mut blocks).unwrap();
750
751 assert_eq!(migrated, 2);
752 assert_eq!(migrator.stats().blocks_read, 2);
753 assert_eq!(migrator.stats().blocks_migrated, 2);
754 assert_eq!(migrator.stats().v1_blocks_found, 2);
755
756 for block in &blocks {
757 assert_eq!(block.header.version(), FormatVersion::V2);
758 }
759 }
760
761 #[test]
762 fn test_checksum_verification_failure() {
763 let data = b"Test data";
764 let block = MigratableBlock {
765 header: BlockHeader::V1(V1Header {
766 compression: BlockCompression::None,
767 original_size: data.len() as u32,
768 compressed_size: data.len() as u32,
769 checksum: 0xBADBAD, }),
771 data: data.to_vec(),
772 };
773
774 let result = block.verify_checksum();
775 assert!(result.is_err());
776 }
777
778 #[test]
779 fn test_block_flags() {
780 let flags = BlockFlags {
781 encrypted: true,
782 extended_checksum: true,
783 spanning: false,
784 has_metadata: true,
785 };
786
787 let byte = flags.to_byte();
788 let parsed = BlockFlags::from_byte(byte);
789
790 assert!(parsed.encrypted);
791 assert!(parsed.extended_checksum);
792 assert!(!parsed.spanning);
793 assert!(parsed.has_metadata);
794 }
795
796 #[test]
797 fn test_migration_progress() {
798 let mut stats = MigrationStats::default();
799
800 assert_eq!(stats.migration_progress(), 100.0);
802
803 stats.v1_blocks_found = 10;
805 stats.blocks_migrated = 5;
806 assert_eq!(stats.migration_progress(), 50.0);
807
808 stats.blocks_migrated = 10;
809 assert_eq!(stats.migration_progress(), 100.0);
810 }
811
812 #[test]
813 fn test_block_complete_roundtrip() {
814 let data = b"Complete block test with some data";
815 let checksum = crc32fast::hash(data);
816
817 let block = MigratableBlock::new(
818 data.to_vec(),
819 BlockCompression::None,
820 data.len() as u32,
821 data.len() as u32,
822 checksum,
823 );
824
825 let bytes = block.to_bytes();
827
828 let parsed = MigratableBlock::from_bytes(&bytes).unwrap();
830
831 assert_eq!(parsed.header.version(), FormatVersion::V2);
832 assert_eq!(parsed.data, data);
833 parsed.verify_checksum().unwrap();
834 }
835
836 #[test]
837 fn test_block_with_flags() {
838 let data = b"Encrypted data";
839 let checksum = crc32fast::hash(data);
840
841 let block = MigratableBlock::new(
842 data.to_vec(),
843 BlockCompression::Zstd,
844 data.len() as u32,
845 data.len() as u32,
846 checksum,
847 )
848 .with_flags(BlockFlags {
849 encrypted: true,
850 extended_checksum: false,
851 spanning: false,
852 has_metadata: true,
853 });
854
855 let bytes = block.to_bytes();
856 let parsed = MigratableBlock::from_bytes(&bytes).unwrap();
857
858 assert!(parsed.header.flags().encrypted);
859 assert!(parsed.header.flags().has_metadata);
860 assert!(!parsed.header.flags().spanning);
861 }
862
863 #[test]
864 fn test_unknown_magic_error() {
865 let bad_magic = b"XXXX\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00";
866 let result = BlockHeader::from_bytes(bad_magic);
867 assert!(result.is_err());
868 }
869
870 #[test]
871 fn test_buffer_too_short_error() {
872 let short_buf = b"TBL";
873 let result = BlockHeader::from_bytes(short_buf);
874 assert!(result.is_err());
875 }
876}