Skip to main content

sochdb_storage/
upgrade_contract.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2// SochDB - LLM-Optimized Embedded Database
3// Copyright (C) 2026 Sushanth Reddy Vanagala (https://github.com/sushanthpy)
4//
5// This program is free software: you can redistribute it and/or modify
6// it under the terms of the GNU Affero General Public License as published by
7// the Free Software Foundation, either version 3 of the License, or
8// (at your option) any later version.
9//
10// This program is distributed in the hope that it will be useful,
11// but WITHOUT ANY WARRANTY; without even the implied warranty of
12// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13// GNU Affero General Public License for more details.
14//
15// You should have received a copy of the GNU Affero General Public License
16// along with this program. If not, see <https://www.gnu.org/licenses/>.
17
18//! # Upgrade Compatibility Contract
19//!
20//! Manages versioned file formats and safe upgrade paths:
21//! - Versioned magic numbers for all persisted formats
22//! - Forward/backward compatibility policies
23//! - Migration orchestration (N → N+1 only)
24//! - Downgrade behavior specification
25//!
26//! ## Design Principles
27//!
28//! 1. **Explicit Versioning**: All formats have magic + version in header
29//! 2. **Safe Upgrades**: Migrations are atomic with rollback capability
30//! 3. **No Silent Corruption**: Incompatible formats fail loudly
31//! 4. **Document Downgrades**: Usually "not supported" but explicit
32
33use std::collections::HashMap;
34use std::fmt;
35
36use sochdb_core::SochDBError;
37
38/// Magic number for SochDB files (8 bytes)
39pub const SOCHDB_MAGIC: [u8; 8] = *b"SOCHDB\x00\x01";
40
41/// File format types
42#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
43pub enum FormatType {
44    /// Write-Ahead Log segment
45    WalSegment,
46    /// Data page file
47    DataPage,
48    /// Manifest/catalog
49    Manifest,
50    /// HNSW vector index
51    HnswIndex,
52    /// SSTable (sorted string table)
53    Sstable,
54    /// Checkpoint file
55    Checkpoint,
56    /// Backup archive
57    BackupArchive,
58}
59
60impl FormatType {
61    /// Get unique identifier for format type
62    pub fn type_id(&self) -> u8 {
63        match self {
64            FormatType::WalSegment => 0x01,
65            FormatType::DataPage => 0x02,
66            FormatType::Manifest => 0x03,
67            FormatType::HnswIndex => 0x04,
68            FormatType::Sstable => 0x05,
69            FormatType::Checkpoint => 0x06,
70            FormatType::BackupArchive => 0x07,
71        }
72    }
73
74    /// Parse from type ID
75    pub fn from_type_id(id: u8) -> Option<Self> {
76        match id {
77            0x01 => Some(FormatType::WalSegment),
78            0x02 => Some(FormatType::DataPage),
79            0x03 => Some(FormatType::Manifest),
80            0x04 => Some(FormatType::HnswIndex),
81            0x05 => Some(FormatType::Sstable),
82            0x06 => Some(FormatType::Checkpoint),
83            0x07 => Some(FormatType::BackupArchive),
84            _ => None,
85        }
86    }
87
88    pub fn name(&self) -> &'static str {
89        match self {
90            FormatType::WalSegment => "WAL Segment",
91            FormatType::DataPage => "Data Page",
92            FormatType::Manifest => "Manifest",
93            FormatType::HnswIndex => "HNSW Index",
94            FormatType::Sstable => "SSTable",
95            FormatType::Checkpoint => "Checkpoint",
96            FormatType::BackupArchive => "Backup Archive",
97        }
98    }
99}
100
101/// Format version with major.minor
102#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
103pub struct FormatVersion {
104    pub major: u16,
105    pub minor: u16,
106}
107
108impl FormatVersion {
109    pub const fn new(major: u16, minor: u16) -> Self {
110        Self { major, minor }
111    }
112
113    /// Check if this version is compatible with another
114    /// Same major version is backward compatible
115    pub fn is_compatible_with(&self, other: &FormatVersion) -> bool {
116        self.major == other.major && self.minor >= other.minor
117    }
118
119    /// Check if upgrade from other to self is supported
120    pub fn can_upgrade_from(&self, other: &FormatVersion) -> bool {
121        // Only N → N+1 upgrades supported (within same major)
122        if self.major == other.major {
123            return self.minor >= other.minor;
124        }
125        // Major version upgrade: only N.x → (N+1).0
126        if self.major == other.major + 1 && self.minor == 0 {
127            return true;
128        }
129        false
130    }
131
132    /// Serialize to bytes (4 bytes)
133    pub fn to_bytes(&self) -> [u8; 4] {
134        let mut buf = [0u8; 4];
135        buf[0..2].copy_from_slice(&self.major.to_le_bytes());
136        buf[2..4].copy_from_slice(&self.minor.to_le_bytes());
137        buf
138    }
139
140    /// Parse from bytes
141    pub fn from_bytes(buf: &[u8]) -> Option<Self> {
142        if buf.len() < 4 {
143            return None;
144        }
145        Some(Self {
146            major: u16::from_le_bytes([buf[0], buf[1]]),
147            minor: u16::from_le_bytes([buf[2], buf[3]]),
148        })
149    }
150}
151
152impl fmt::Display for FormatVersion {
153    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
154        write!(f, "{}.{}", self.major, self.minor)
155    }
156}
157
158/// Current format versions
159pub mod current_versions {
160    use super::*;
161
162    pub const WAL_SEGMENT: FormatVersion = FormatVersion::new(1, 0);
163    pub const DATA_PAGE: FormatVersion = FormatVersion::new(1, 0);
164    pub const MANIFEST: FormatVersion = FormatVersion::new(1, 0);
165    pub const HNSW_INDEX: FormatVersion = FormatVersion::new(1, 0);
166    pub const SSTABLE: FormatVersion = FormatVersion::new(1, 0);
167    pub const CHECKPOINT: FormatVersion = FormatVersion::new(1, 0);
168    pub const BACKUP_ARCHIVE: FormatVersion = FormatVersion::new(1, 0);
169}
170
171/// File header with magic and version
172#[derive(Debug, Clone)]
173pub struct FileHeader {
174    /// Magic bytes (8)
175    pub magic: [u8; 8],
176    /// Format type (1 byte)
177    pub format_type: FormatType,
178    /// Format version (4 bytes)
179    pub version: FormatVersion,
180    /// Feature flags (4 bytes)
181    pub feature_flags: u32,
182    /// Reserved for future use (15 bytes)
183    pub reserved: [u8; 15],
184}
185
186impl FileHeader {
187    /// Header size in bytes
188    pub const SIZE: usize = 32;
189
190    /// Create a new header for a format type
191    pub fn new(format_type: FormatType, version: FormatVersion) -> Self {
192        Self {
193            magic: SOCHDB_MAGIC,
194            format_type,
195            version,
196            feature_flags: 0,
197            reserved: [0; 15],
198        }
199    }
200
201    /// Serialize to bytes
202    pub fn to_bytes(&self) -> [u8; Self::SIZE] {
203        let mut buf = [0u8; Self::SIZE];
204        buf[0..8].copy_from_slice(&self.magic);
205        buf[8] = self.format_type.type_id();
206        buf[9..13].copy_from_slice(&self.version.to_bytes());
207        buf[13..17].copy_from_slice(&self.feature_flags.to_le_bytes());
208        // reserved stays zero
209        buf
210    }
211
212    /// Parse from bytes
213    pub fn from_bytes(buf: &[u8]) -> Result<Self, VersionError> {
214        if buf.len() < Self::SIZE {
215            return Err(VersionError::InvalidHeader("Header too short".to_string()));
216        }
217
218        let mut magic = [0u8; 8];
219        magic.copy_from_slice(&buf[0..8]);
220
221        if magic != SOCHDB_MAGIC {
222            return Err(VersionError::InvalidMagic {
223                expected: SOCHDB_MAGIC,
224                found: magic,
225            });
226        }
227
228        let format_type = FormatType::from_type_id(buf[8])
229            .ok_or_else(|| VersionError::UnknownFormatType(buf[8]))?;
230
231        let version = FormatVersion::from_bytes(&buf[9..13])
232            .ok_or_else(|| VersionError::InvalidHeader("Invalid version bytes".to_string()))?;
233
234        let feature_flags = u32::from_le_bytes([buf[13], buf[14], buf[15], buf[16]]);
235
236        Ok(Self {
237            magic,
238            format_type,
239            version,
240            feature_flags,
241            reserved: [0; 15],
242        })
243    }
244
245    /// Check compatibility with expected type and version
246    pub fn check_compatibility(
247        &self,
248        expected_type: FormatType,
249        current_version: FormatVersion,
250    ) -> Result<CompatibilityResult, VersionError> {
251        if self.format_type != expected_type {
252            return Err(VersionError::TypeMismatch {
253                expected: expected_type,
254                found: self.format_type,
255            });
256        }
257
258        if self.version == current_version {
259            Ok(CompatibilityResult::Exact)
260        } else if current_version.is_compatible_with(&self.version) {
261            Ok(CompatibilityResult::BackwardCompatible {
262                file_version: self.version,
263                current_version,
264            })
265        } else if current_version.can_upgrade_from(&self.version) {
266            Ok(CompatibilityResult::NeedsMigration {
267                from: self.version,
268                to: current_version,
269            })
270        } else {
271            Err(VersionError::Incompatible {
272                file_version: self.version,
273                current_version,
274            })
275        }
276    }
277
278    /// Parse and validate a file header in a single fail-fast step.
279    ///
280    /// This is the canonical entry point for *opening* any persisted SochDB
281    /// file that uses the unified [`FileHeader`] contract. It guarantees that:
282    ///
283    /// 1. The magic bytes match [`SOCHDB_MAGIC`] (else the file is not a SochDB
284    ///    file, or is truncated/corrupt).
285    /// 2. The on-disk [`FormatType`] matches what the caller expects (else the
286    ///    caller is reading the wrong kind of file).
287    /// 3. The on-disk version is compatible with `current_version` (else the
288    ///    file was written by an incompatible — usually newer — release).
289    ///
290    /// Unlike [`from_bytes`](Self::from_bytes) + [`check_compatibility`], any
291    /// failure here is mapped to [`SochDBError::Corruption`] so callers across
292    /// the workspace can propagate a single, clear, fail-fast error instead of
293    /// silently misinterpreting bytes. A [`CompatibilityResult::NeedsMigration`]
294    /// outcome is returned as `Ok` so callers can run the migration pipeline;
295    /// an outright incompatible version is an error.
296    pub fn validate(
297        bytes: &[u8],
298        expected_type: FormatType,
299        current_version: FormatVersion,
300    ) -> Result<(Self, CompatibilityResult), SochDBError> {
301        let header = Self::from_bytes(bytes).map_err(SochDBError::from)?;
302        let compat = header
303            .check_compatibility(expected_type, current_version)
304            .map_err(SochDBError::from)?;
305        Ok((header, compat))
306    }
307}
308
309impl From<VersionError> for SochDBError {
310    /// All format-version violations are surfaced as corruption so that opening
311    /// an incompatible or malformed file fails fast with a clear, actionable
312    /// message rather than risking silent data misinterpretation.
313    fn from(err: VersionError) -> Self {
314        SochDBError::Corruption(format!("on-disk format contract violation: {err}"))
315    }
316}
317
318/// Compatibility check result
319#[derive(Debug, Clone)]
320pub enum CompatibilityResult {
321    /// Exact version match
322    Exact,
323    /// File version is older but readable
324    BackwardCompatible {
325        file_version: FormatVersion,
326        current_version: FormatVersion,
327    },
328    /// Migration required before use
329    NeedsMigration {
330        from: FormatVersion,
331        to: FormatVersion,
332    },
333}
334
335/// Version-related errors
336#[derive(Debug, Clone)]
337pub enum VersionError {
338    /// Invalid magic bytes
339    InvalidMagic { expected: [u8; 8], found: [u8; 8] },
340    /// Unknown format type
341    UnknownFormatType(u8),
342    /// Format type mismatch
343    TypeMismatch {
344        expected: FormatType,
345        found: FormatType,
346    },
347    /// Version incompatible
348    Incompatible {
349        file_version: FormatVersion,
350        current_version: FormatVersion,
351    },
352    /// Invalid header
353    InvalidHeader(String),
354    /// Migration failed
355    MigrationFailed {
356        from: FormatVersion,
357        to: FormatVersion,
358        reason: String,
359    },
360    /// Downgrade not supported
361    DowngradeNotSupported {
362        from: FormatVersion,
363        to: FormatVersion,
364    },
365}
366
367impl fmt::Display for VersionError {
368    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
369        match self {
370            VersionError::InvalidMagic { expected, found } => {
371                write!(
372                    f,
373                    "Invalid magic: expected {:?}, found {:?}",
374                    expected, found
375                )
376            }
377            VersionError::UnknownFormatType(id) => {
378                write!(f, "Unknown format type: 0x{:02x}", id)
379            }
380            VersionError::TypeMismatch { expected, found } => {
381                write!(
382                    f,
383                    "Format type mismatch: expected {}, found {}",
384                    expected.name(),
385                    found.name()
386                )
387            }
388            VersionError::Incompatible {
389                file_version,
390                current_version,
391            } => {
392                write!(
393                    f,
394                    "Incompatible version: file is {}, current is {}",
395                    file_version, current_version
396                )
397            }
398            VersionError::InvalidHeader(msg) => {
399                write!(f, "Invalid header: {}", msg)
400            }
401            VersionError::MigrationFailed { from, to, reason } => {
402                write!(f, "Migration from {} to {} failed: {}", from, to, reason)
403            }
404            VersionError::DowngradeNotSupported { from, to } => {
405                write!(f, "Downgrade from {} to {} is not supported", from, to)
406            }
407        }
408    }
409}
410
411impl std::error::Error for VersionError {}
412
413/// Migration step
414pub trait Migration: Send + Sync {
415    /// Source version
416    fn from_version(&self) -> FormatVersion;
417    /// Target version
418    fn to_version(&self) -> FormatVersion;
419    /// Migrate data (returns new data)
420    fn migrate(&self, data: &[u8]) -> Result<Vec<u8>, VersionError>;
421    /// Check if migration is reversible
422    fn is_reversible(&self) -> bool;
423    /// Reverse migration (if reversible)
424    fn reverse(&self, data: &[u8]) -> Result<Vec<u8>, VersionError>;
425}
426
427/// Migration registry
428pub struct MigrationRegistry {
429    /// Registered migrations by format type
430    migrations: HashMap<FormatType, Vec<Box<dyn Migration>>>,
431}
432
433impl MigrationRegistry {
434    /// Create a new migration registry
435    pub fn new() -> Self {
436        Self {
437            migrations: HashMap::new(),
438        }
439    }
440
441    /// Register a migration
442    pub fn register(&mut self, format_type: FormatType, migration: Box<dyn Migration>) {
443        self.migrations
444            .entry(format_type)
445            .or_insert_with(Vec::new)
446            .push(migration);
447    }
448
449    /// Find migration path from one version to another
450    pub fn find_path(
451        &self,
452        format_type: FormatType,
453        from: FormatVersion,
454        to: FormatVersion,
455    ) -> Option<Vec<&dyn Migration>> {
456        let migrations = self.migrations.get(&format_type)?;
457
458        // Simple linear search for now (could use graph algorithm for complex paths)
459        let mut path = Vec::new();
460        let mut current = from;
461
462        while current < to {
463            let next = migrations
464                .iter()
465                .find(|m| m.from_version() == current && m.to_version() > current)?;
466            path.push(next.as_ref());
467            current = next.to_version();
468        }
469
470        if current == to { Some(path) } else { None }
471    }
472
473    /// Execute migration path
474    pub fn execute_path(
475        &self,
476        path: &[&dyn Migration],
477        data: &[u8],
478    ) -> Result<Vec<u8>, VersionError> {
479        let mut current_data = data.to_vec();
480        for migration in path {
481            current_data = migration.migrate(&current_data)?;
482        }
483        Ok(current_data)
484    }
485}
486
487impl Default for MigrationRegistry {
488    fn default() -> Self {
489        Self::new()
490    }
491}
492
493/// Upgrade policy configuration
494#[derive(Debug, Clone)]
495pub struct UpgradePolicy {
496    /// Allow automatic minor version upgrades
497    pub auto_minor_upgrade: bool,
498    /// Allow automatic major version upgrades
499    pub auto_major_upgrade: bool,
500    /// Create backup before migration
501    pub backup_before_migration: bool,
502    /// Supported upgrade paths
503    pub supported_paths: Vec<(FormatVersion, FormatVersion)>,
504}
505
506impl Default for UpgradePolicy {
507    fn default() -> Self {
508        Self {
509            auto_minor_upgrade: true,
510            auto_major_upgrade: false, // Require explicit action
511            backup_before_migration: true,
512            supported_paths: Vec::new(),
513        }
514    }
515}
516
517#[cfg(test)]
518mod tests {
519    use super::*;
520
521    #[test]
522    fn test_format_version_compatibility() {
523        let v1_0 = FormatVersion::new(1, 0);
524        let v1_1 = FormatVersion::new(1, 1);
525        let v2_0 = FormatVersion::new(2, 0);
526
527        // Same version is compatible
528        assert!(v1_0.is_compatible_with(&v1_0));
529
530        // Newer minor is compatible with older
531        assert!(v1_1.is_compatible_with(&v1_0));
532
533        // Older minor is not compatible with newer
534        assert!(!v1_0.is_compatible_with(&v1_1));
535
536        // Different major is not compatible
537        assert!(!v2_0.is_compatible_with(&v1_0));
538    }
539
540    #[test]
541    fn test_upgrade_paths() {
542        let v1_0 = FormatVersion::new(1, 0);
543        let v1_1 = FormatVersion::new(1, 1);
544        let v2_0 = FormatVersion::new(2, 0);
545
546        // Can upgrade within same major
547        assert!(v1_1.can_upgrade_from(&v1_0));
548
549        // Can upgrade to next major.0
550        assert!(v2_0.can_upgrade_from(&v1_1));
551
552        // Cannot skip major versions
553        let v3_0 = FormatVersion::new(3, 0);
554        assert!(!v3_0.can_upgrade_from(&v1_0));
555    }
556
557    #[test]
558    fn test_file_header_roundtrip() {
559        let header = FileHeader::new(FormatType::WalSegment, FormatVersion::new(1, 2));
560
561        let bytes = header.to_bytes();
562        let parsed = FileHeader::from_bytes(&bytes).unwrap();
563
564        assert_eq!(parsed.format_type, FormatType::WalSegment);
565        assert_eq!(parsed.version, FormatVersion::new(1, 2));
566    }
567
568    #[test]
569    fn test_header_invalid_magic() {
570        let mut bytes = [0u8; FileHeader::SIZE];
571        bytes[0..8].copy_from_slice(b"INVALID!");
572
573        let result = FileHeader::from_bytes(&bytes);
574        assert!(matches!(result, Err(VersionError::InvalidMagic { .. })));
575    }
576
577    #[test]
578    fn test_compatibility_check() {
579        let header = FileHeader::new(FormatType::Manifest, FormatVersion::new(1, 0));
580
581        // Exact match
582        let result = header
583            .check_compatibility(FormatType::Manifest, FormatVersion::new(1, 0))
584            .unwrap();
585        assert!(matches!(result, CompatibilityResult::Exact));
586
587        // Backward compatible
588        let result = header
589            .check_compatibility(FormatType::Manifest, FormatVersion::new(1, 1))
590            .unwrap();
591        assert!(matches!(
592            result,
593            CompatibilityResult::BackwardCompatible { .. }
594        ));
595
596        // Needs migration
597        let result = header
598            .check_compatibility(FormatType::Manifest, FormatVersion::new(2, 0))
599            .unwrap();
600        assert!(matches!(result, CompatibilityResult::NeedsMigration { .. }));
601    }
602
603    #[test]
604    fn test_validate_accepts_exact_match() {
605        let header = FileHeader::new(FormatType::Sstable, FormatVersion::new(1, 0));
606        let bytes = header.to_bytes();
607
608        let (parsed, compat) =
609            FileHeader::validate(&bytes, FormatType::Sstable, FormatVersion::new(1, 0))
610                .expect("exact-version header must validate");
611        assert_eq!(parsed.format_type, FormatType::Sstable);
612        assert!(matches!(compat, CompatibilityResult::Exact));
613    }
614
615    #[test]
616    fn test_validate_rejects_bad_magic_as_corruption() {
617        let mut bytes = [0u8; FileHeader::SIZE];
618        bytes[0..8].copy_from_slice(b"NOTSOCH!");
619
620        let err = FileHeader::validate(&bytes, FormatType::WalSegment, FormatVersion::new(1, 0))
621            .expect_err("bad magic must fail fast");
622        assert!(matches!(err, SochDBError::Corruption(_)));
623    }
624
625    #[test]
626    fn test_validate_rejects_wrong_format_type_as_corruption() {
627        // Header written for a WAL segment, but caller expects an SSTable.
628        let header = FileHeader::new(FormatType::WalSegment, FormatVersion::new(1, 0));
629        let bytes = header.to_bytes();
630
631        let err = FileHeader::validate(&bytes, FormatType::Sstable, FormatVersion::new(1, 0))
632            .expect_err("wrong format type must fail fast");
633        assert!(matches!(err, SochDBError::Corruption(_)));
634    }
635
636    #[test]
637    fn test_validate_rejects_incompatible_future_version() {
638        // File written by a hypothetical future major release.
639        let header = FileHeader::new(FormatType::DataPage, FormatVersion::new(3, 0));
640        let bytes = header.to_bytes();
641
642        // Current code only understands major version 1.
643        let err = FileHeader::validate(&bytes, FormatType::DataPage, FormatVersion::new(1, 0))
644            .expect_err("incompatible future version must fail fast");
645        assert!(matches!(err, SochDBError::Corruption(_)));
646    }
647
648    #[test]
649    fn test_validate_allows_older_minor_via_migration() {
650        // File at 1.0, current code at 2.0 — an N → N+1 migration is allowed.
651        let header = FileHeader::new(FormatType::Manifest, FormatVersion::new(1, 0));
652        let bytes = header.to_bytes();
653
654        let (_parsed, compat) =
655            FileHeader::validate(&bytes, FormatType::Manifest, FormatVersion::new(2, 0))
656                .expect("migratable header must validate");
657        assert!(matches!(compat, CompatibilityResult::NeedsMigration { .. }));
658    }
659}