kstone_core/
layout.rs

1/// Single-file database layout
2///
3/// File structure:
4/// ```text
5/// [Header (4KB)] [WAL Ring] [Manifest Ring] [SST Heap]
6/// ```
7///
8/// - Header: Database metadata and region offsets
9/// - WAL Ring: Circular buffer for write-ahead log
10/// - Manifest Ring: Circular buffer for metadata/catalog
11/// - SST Heap: Variable-size extents for SST blocks
12
13use bytes::{Bytes, BytesMut, BufMut};
14use crate::{Error, Result};
15
16/// Block size (4KB) - fundamental unit of I/O
17pub const BLOCK_SIZE: usize = 4096;
18
19/// File header size (4KB)
20pub const HEADER_SIZE: usize = BLOCK_SIZE;
21
22/// Magic number for database file (big-endian)
23pub const DB_MAGIC: u32 = 0x4B53544E; // "KSTN"
24
25/// Current file format version
26pub const DB_VERSION: u32 = 1;
27
28/// Default WAL ring size (64MB)
29pub const DEFAULT_WAL_SIZE: u64 = 64 * 1024 * 1024;
30
31/// Default Manifest ring size (4MB)
32pub const DEFAULT_MANIFEST_SIZE: u64 = 4 * 1024 * 1024;
33
34/// File layout header
35///
36/// Format (big-endian for magic, little-endian for rest):
37/// ```text
38/// [magic(4)] [version(4)]
39/// [wal_offset(8)] [wal_size(8)]
40/// [manifest_offset(8)] [manifest_size(8)]
41/// [sst_offset(8)] [sst_size(8)]
42/// [reserved(3968)]
43/// [crc32c(4)]
44/// ```
45#[derive(Debug, Clone)]
46pub struct FileHeader {
47    pub magic: u32,
48    pub version: u32,
49    pub wal_region: Region,
50    pub manifest_region: Region,
51    pub sst_region: Region,
52}
53
54/// Region in the file
55#[derive(Debug, Clone, Copy, PartialEq, Eq)]
56pub struct Region {
57    /// Byte offset from start of file
58    pub offset: u64,
59    /// Size in bytes
60    pub size: u64,
61}
62
63impl Region {
64    pub fn new(offset: u64, size: u64) -> Self {
65        Self { offset, size }
66    }
67
68    pub fn end(&self) -> u64 {
69        self.offset + self.size
70    }
71
72    pub fn contains(&self, offset: u64) -> bool {
73        offset >= self.offset && offset < self.end()
74    }
75}
76
77impl FileHeader {
78    /// Create a new file header with default sizes
79    pub fn new() -> Self {
80        let wal_offset = HEADER_SIZE as u64;
81        let wal_size = DEFAULT_WAL_SIZE;
82
83        let manifest_offset = wal_offset + wal_size;
84        let manifest_size = DEFAULT_MANIFEST_SIZE;
85
86        let sst_offset = manifest_offset + manifest_size;
87        let sst_size = 0; // Will grow as needed
88
89        Self {
90            magic: DB_MAGIC,
91            version: DB_VERSION,
92            wal_region: Region::new(wal_offset, wal_size),
93            manifest_region: Region::new(manifest_offset, manifest_size),
94            sst_region: Region::new(sst_offset, sst_size),
95        }
96    }
97
98    /// Create with custom region sizes
99    pub fn with_sizes(wal_size: u64, manifest_size: u64) -> Self {
100        let wal_offset = HEADER_SIZE as u64;
101        let manifest_offset = wal_offset + wal_size;
102        let sst_offset = manifest_offset + manifest_size;
103
104        Self {
105            magic: DB_MAGIC,
106            version: DB_VERSION,
107            wal_region: Region::new(wal_offset, wal_size),
108            manifest_region: Region::new(manifest_offset, manifest_size),
109            sst_region: Region::new(sst_offset, 0),
110        }
111    }
112
113    /// Serialize header to bytes
114    pub fn encode(&self) -> Bytes {
115        let mut buf = BytesMut::with_capacity(HEADER_SIZE);
116
117        // Magic and version
118        buf.put_u32(self.magic); // big-endian for magic
119        buf.put_u32_le(self.version);
120
121        // WAL region
122        buf.put_u64_le(self.wal_region.offset);
123        buf.put_u64_le(self.wal_region.size);
124
125        // Manifest region
126        buf.put_u64_le(self.manifest_region.offset);
127        buf.put_u64_le(self.manifest_region.size);
128
129        // SST region
130        buf.put_u64_le(self.sst_region.offset);
131        buf.put_u64_le(self.sst_region.size);
132
133        // Reserved space (zero-filled)
134        let used = 4 + 4 + 8 + 8 + 8 + 8 + 8 + 8; // 56 bytes
135        let reserved = HEADER_SIZE - used - 4; // -4 for CRC at end
136        buf.put_bytes(0, reserved);
137
138        // CRC32C of header (excluding CRC field itself)
139        let crc = crate::types::checksum::compute(&buf);
140        buf.put_u32_le(crc);
141
142        buf.freeze()
143    }
144
145    /// Deserialize header from bytes
146    pub fn decode(data: &[u8]) -> Result<Self> {
147        if data.len() < HEADER_SIZE {
148            return Err(Error::Corruption("Header too short".to_string()));
149        }
150
151        // Verify magic
152        let magic = u32::from_be_bytes([data[0], data[1], data[2], data[3]]);
153        if magic != DB_MAGIC {
154            return Err(Error::Corruption(format!("Invalid magic: 0x{:08X}", magic)));
155        }
156
157        // Verify CRC
158        let expected_crc = u32::from_le_bytes([
159            data[HEADER_SIZE - 4],
160            data[HEADER_SIZE - 3],
161            data[HEADER_SIZE - 2],
162            data[HEADER_SIZE - 1],
163        ]);
164
165        let actual_crc = crate::types::checksum::compute(&data[..HEADER_SIZE - 4]);
166        if expected_crc != actual_crc {
167            return Err(Error::ChecksumMismatch);
168        }
169
170        // Parse fields
171        let version = u32::from_le_bytes([data[4], data[5], data[6], data[7]]);
172
173        let wal_offset = u64::from_le_bytes([
174            data[8], data[9], data[10], data[11],
175            data[12], data[13], data[14], data[15],
176        ]);
177        let wal_size = u64::from_le_bytes([
178            data[16], data[17], data[18], data[19],
179            data[20], data[21], data[22], data[23],
180        ]);
181
182        let manifest_offset = u64::from_le_bytes([
183            data[24], data[25], data[26], data[27],
184            data[28], data[29], data[30], data[31],
185        ]);
186        let manifest_size = u64::from_le_bytes([
187            data[32], data[33], data[34], data[35],
188            data[36], data[37], data[38], data[39],
189        ]);
190
191        let sst_offset = u64::from_le_bytes([
192            data[40], data[41], data[42], data[43],
193            data[44], data[45], data[46], data[47],
194        ]);
195        let sst_size = u64::from_le_bytes([
196            data[48], data[49], data[50], data[51],
197            data[52], data[53], data[54], data[55],
198        ]);
199
200        Ok(Self {
201            magic,
202            version,
203            wal_region: Region::new(wal_offset, wal_size),
204            manifest_region: Region::new(manifest_offset, manifest_size),
205            sst_region: Region::new(sst_offset, sst_size),
206        })
207    }
208}
209
210impl Default for FileHeader {
211    fn default() -> Self {
212        Self::new()
213    }
214}
215
216#[cfg(test)]
217mod tests {
218    use super::*;
219
220    #[test]
221    fn test_header_encode_decode() {
222        let header = FileHeader::new();
223        let encoded = header.encode();
224
225        assert_eq!(encoded.len(), HEADER_SIZE);
226
227        let decoded = FileHeader::decode(&encoded).unwrap();
228        assert_eq!(decoded.magic, DB_MAGIC);
229        assert_eq!(decoded.version, DB_VERSION);
230        assert_eq!(decoded.wal_region, header.wal_region);
231        assert_eq!(decoded.manifest_region, header.manifest_region);
232        assert_eq!(decoded.sst_region, header.sst_region);
233    }
234
235    #[test]
236    fn test_header_with_custom_sizes() {
237        let header = FileHeader::with_sizes(128 * 1024 * 1024, 8 * 1024 * 1024);
238
239        assert_eq!(header.wal_region.offset, HEADER_SIZE as u64);
240        assert_eq!(header.wal_region.size, 128 * 1024 * 1024);
241
242        assert_eq!(header.manifest_region.offset, HEADER_SIZE as u64 + 128 * 1024 * 1024);
243        assert_eq!(header.manifest_region.size, 8 * 1024 * 1024);
244
245        let expected_sst_offset = HEADER_SIZE as u64 + 128 * 1024 * 1024 + 8 * 1024 * 1024;
246        assert_eq!(header.sst_region.offset, expected_sst_offset);
247    }
248
249    #[test]
250    fn test_region_contains() {
251        let region = Region::new(1000, 500);
252
253        assert!(!region.contains(999));
254        assert!(region.contains(1000));
255        assert!(region.contains(1250));
256        assert!(region.contains(1499));
257        assert!(!region.contains(1500));
258    }
259
260    #[test]
261    fn test_invalid_magic() {
262        let mut data = vec![0u8; HEADER_SIZE];
263        data[0..4].copy_from_slice(&0x12345678u32.to_be_bytes());
264
265        let result = FileHeader::decode(&data);
266        assert!(matches!(result, Err(Error::Corruption(_))));
267    }
268
269    #[test]
270    fn test_corrupted_checksum() {
271        let header = FileHeader::new();
272        let mut encoded = header.encode().to_vec();
273
274        // Corrupt the CRC
275        encoded[HEADER_SIZE - 1] ^= 0xFF;
276
277        let result = FileHeader::decode(&encoded);
278        assert!(matches!(result, Err(Error::ChecksumMismatch)));
279    }
280}