Skip to main content

oximedia_cache/
cache_serialization.rs

1//! Cache state serialization and restoration.
2//!
3//! Provides a binary serializer / deserializer so a cache can be persisted
4//! to disk on shutdown and quickly restored on startup — avoiding a cold-start
5//! penalty where all traffic misses until the cache is re-warmed organically.
6//!
7//! # Wire format
8//!
9//! The on-disk format is a simple length-prefixed binary encoding; no external
10//! serialization dependency is needed.
11//!
12//! ```text
13//! ┌─────────────────────────────────────────────────────┐
14//! │ MAGIC   8 bytes   "OXICACHE"                        │
15//! │ VERSION 2 bytes   u16 LE = 1                        │
16//! │ FLAGS   2 bytes   u16 LE (reserved, must be 0)      │
17//! │ N       4 bytes   u32 LE number of entries          │
18//! │ ── per entry ──────────────────────────────────────  │
19//! │   key_len   4 bytes  u32 LE                         │
20//! │   key       key_len bytes UTF-8                     │
21//! │   val_len   4 bytes  u32 LE                         │
22//! │   value     val_len bytes raw bytes                 │
23//! │   priority  4 bytes  u32 LE                         │
24//! │   ttl_secs  8 bytes  u64 LE (0 = no TTL)           │
25//! └─────────────────────────────────────────────────────┘
26//! ```
27//!
28//! The format is intentionally simple and forward-compatible: unknown flags
29//! and trailing data are ignored on read.
30
31use std::io::{self, Read, Write};
32use thiserror::Error;
33
34// ── Magic + version ───────────────────────────────────────────────────────────
35
36const MAGIC: &[u8; 8] = b"OXICACHE";
37const FORMAT_VERSION: u16 = 1;
38
39// ── Errors ────────────────────────────────────────────────────────────────────
40
41/// Errors returned by serialization / deserialization functions.
42#[derive(Debug, Error)]
43pub enum SerializeError {
44    /// An I/O error occurred while reading or writing.
45    #[error("I/O error: {0}")]
46    Io(#[from] io::Error),
47
48    /// The magic header did not match the expected value.
49    #[error("invalid magic header: expected 'OXICACHE', got {0:?}")]
50    InvalidMagic([u8; 8]),
51
52    /// The format version is not supported.
53    #[error("unsupported format version {0}; expected {FORMAT_VERSION}")]
54    UnsupportedVersion(u16),
55
56    /// A key could not be decoded as valid UTF-8.
57    #[error("key at entry {0} is not valid UTF-8: {1}")]
58    InvalidKeyUtf8(usize, std::string::FromUtf8Error),
59
60    /// An entry's data length field exceeds the configured safety limit.
61    #[error("entry {index} value length {actual} exceeds safety limit {limit}")]
62    ValueTooLarge {
63        /// Entry index in the stream.
64        index: usize,
65        /// Value length reported in the stream.
66        actual: u32,
67        /// Configured safety limit.
68        limit: u32,
69    },
70}
71
72// ── CacheRecord ───────────────────────────────────────────────────────────────
73
74/// A single key-value record with optional metadata.
75///
76/// This is the unit of serialization: a collection of `CacheRecord`s
77/// represents a complete cache snapshot.
78#[derive(Debug, Clone, PartialEq, Eq)]
79pub struct CacheRecord {
80    /// Cache key.
81    pub key: String,
82    /// Raw value bytes.
83    pub value: Vec<u8>,
84    /// Optional TTL in seconds (0 means no TTL / immortal).
85    pub ttl_secs: u64,
86    /// Priority tag (higher = more important to keep on restore).
87    pub priority: u32,
88}
89
90impl CacheRecord {
91    /// Create a new `CacheRecord` with the given key and value, no TTL,
92    /// and default priority 0.
93    pub fn new(key: impl Into<String>, value: Vec<u8>) -> Self {
94        Self {
95            key: key.into(),
96            value,
97            ttl_secs: 0,
98            priority: 0,
99        }
100    }
101
102    /// Set a TTL hint (seconds).  A value of `0` means no TTL.
103    pub fn with_ttl(mut self, ttl_secs: u64) -> Self {
104        self.ttl_secs = ttl_secs;
105        self
106    }
107
108    /// Set the priority tag.
109    pub fn with_priority(mut self, priority: u32) -> Self {
110        self.priority = priority;
111        self
112    }
113}
114
115// ── Serializer ────────────────────────────────────────────────────────────────
116
117/// Write a collection of [`CacheRecord`]s to `writer` in the OXICACHE binary
118/// format.
119///
120/// # Errors
121///
122/// Returns [`SerializeError::Io`] on any I/O failure.
123pub fn serialize<W: Write>(writer: &mut W, records: &[CacheRecord]) -> Result<(), SerializeError> {
124    // Magic + version + flags.
125    writer.write_all(MAGIC)?;
126    writer.write_all(&FORMAT_VERSION.to_le_bytes())?;
127    let flags: u16 = 0;
128    writer.write_all(&flags.to_le_bytes())?;
129
130    // Entry count.
131    let n = records.len() as u32;
132    writer.write_all(&n.to_le_bytes())?;
133
134    for rec in records {
135        let key_bytes = rec.key.as_bytes();
136        writer.write_all(&(key_bytes.len() as u32).to_le_bytes())?;
137        writer.write_all(key_bytes)?;
138
139        writer.write_all(&(rec.value.len() as u32).to_le_bytes())?;
140        writer.write_all(&rec.value)?;
141
142        writer.write_all(&rec.priority.to_le_bytes())?;
143        writer.write_all(&rec.ttl_secs.to_le_bytes())?;
144    }
145
146    Ok(())
147}
148
149// ── Deserializer ──────────────────────────────────────────────────────────────
150
151/// Configuration for the deserializer.
152#[derive(Debug, Clone)]
153pub struct DeserializeConfig {
154    /// Maximum allowed value size in bytes.  Records whose value exceeds this
155    /// limit are rejected with [`SerializeError::ValueTooLarge`].
156    ///
157    /// Default: 512 MiB.
158    pub max_value_bytes: u32,
159    /// Maximum number of records to restore.  Additional records in the
160    /// stream are silently discarded.
161    ///
162    /// Default: `u32::MAX` (no limit).
163    pub max_records: u32,
164}
165
166impl Default for DeserializeConfig {
167    fn default() -> Self {
168        Self {
169            max_value_bytes: 512 * 1024 * 1024, // 512 MiB
170            max_records: u32::MAX,
171        }
172    }
173}
174
175/// Read [`CacheRecord`]s from `reader` using the default [`DeserializeConfig`].
176///
177/// See [`deserialize_with_config`] for a version with explicit limits.
178pub fn deserialize<R: Read>(reader: &mut R) -> Result<Vec<CacheRecord>, SerializeError> {
179    deserialize_with_config(reader, &DeserializeConfig::default())
180}
181
182/// Read [`CacheRecord`]s from `reader` with an explicit [`DeserializeConfig`].
183///
184/// # Errors
185///
186/// * [`SerializeError::InvalidMagic`] — magic header mismatch.
187/// * [`SerializeError::UnsupportedVersion`] — version field is not 1.
188/// * [`SerializeError::InvalidKeyUtf8`] — key bytes are not valid UTF-8.
189/// * [`SerializeError::ValueTooLarge`] — value exceeds `config.max_value_bytes`.
190/// * [`SerializeError::Io`] — any underlying I/O failure.
191pub fn deserialize_with_config<R: Read>(
192    reader: &mut R,
193    config: &DeserializeConfig,
194) -> Result<Vec<CacheRecord>, SerializeError> {
195    // Magic.
196    let mut magic = [0u8; 8];
197    reader.read_exact(&mut magic)?;
198    if &magic != MAGIC {
199        return Err(SerializeError::InvalidMagic(magic));
200    }
201
202    // Version.
203    let mut ver_buf = [0u8; 2];
204    reader.read_exact(&mut ver_buf)?;
205    let version = u16::from_le_bytes(ver_buf);
206    if version != FORMAT_VERSION {
207        return Err(SerializeError::UnsupportedVersion(version));
208    }
209
210    // Flags (ignored for now, reserved for future use).
211    let mut flags_buf = [0u8; 2];
212    reader.read_exact(&mut flags_buf)?;
213    // Flags consumed but not acted upon.
214
215    // Entry count.
216    let mut n_buf = [0u8; 4];
217    reader.read_exact(&mut n_buf)?;
218    let n = u32::from_le_bytes(n_buf);
219
220    let to_read = n.min(config.max_records);
221    let mut records = Vec::with_capacity(to_read as usize);
222
223    for idx in 0..n as usize {
224        // key_len + key
225        let mut klen_buf = [0u8; 4];
226        reader.read_exact(&mut klen_buf)?;
227        let key_len = u32::from_le_bytes(klen_buf) as usize;
228        let mut key_bytes = vec![0u8; key_len];
229        reader.read_exact(&mut key_bytes)?;
230        let key =
231            String::from_utf8(key_bytes).map_err(|e| SerializeError::InvalidKeyUtf8(idx, e))?;
232
233        // val_len + value
234        let mut vlen_buf = [0u8; 4];
235        reader.read_exact(&mut vlen_buf)?;
236        let val_len = u32::from_le_bytes(vlen_buf);
237        if val_len > config.max_value_bytes {
238            return Err(SerializeError::ValueTooLarge {
239                index: idx,
240                actual: val_len,
241                limit: config.max_value_bytes,
242            });
243        }
244        let mut value = vec![0u8; val_len as usize];
245        reader.read_exact(&mut value)?;
246
247        // priority
248        let mut prio_buf = [0u8; 4];
249        reader.read_exact(&mut prio_buf)?;
250        let priority = u32::from_le_bytes(prio_buf);
251
252        // ttl_secs
253        let mut ttl_buf = [0u8; 8];
254        reader.read_exact(&mut ttl_buf)?;
255        let ttl_secs = u64::from_le_bytes(ttl_buf);
256
257        if (idx as u32) < config.max_records {
258            records.push(CacheRecord {
259                key,
260                value,
261                ttl_secs,
262                priority,
263            });
264        }
265        // Records beyond max_records: we already read the bytes above; they
266        // are discarded here.
267    }
268
269    Ok(records)
270}
271
272// ── Convenience: file-based helpers ───────────────────────────────────────────
273
274/// Persist `records` to the file at `path`, creating or truncating it.
275///
276/// Equivalent to opening the file and calling [`serialize`].
277pub fn save_to_file(path: &std::path::Path, records: &[CacheRecord]) -> Result<(), SerializeError> {
278    let mut file = std::fs::File::create(path)?;
279    serialize(&mut file, records)
280}
281
282/// Restore records from the file at `path`.
283///
284/// Equivalent to opening the file and calling [`deserialize`].
285pub fn load_from_file(path: &std::path::Path) -> Result<Vec<CacheRecord>, SerializeError> {
286    let mut file = std::fs::File::open(path)?;
287    deserialize(&mut file)
288}
289
290/// Restore records from the file at `path` with explicit limits.
291pub fn load_from_file_with_config(
292    path: &std::path::Path,
293    config: &DeserializeConfig,
294) -> Result<Vec<CacheRecord>, SerializeError> {
295    let mut file = std::fs::File::open(path)?;
296    deserialize_with_config(&mut file, config)
297}
298
299// ── Tests ─────────────────────────────────────────────────────────────────────
300
301#[cfg(test)]
302mod tests {
303    use super::*;
304    use std::io::Cursor;
305
306    fn roundtrip(records: &[CacheRecord]) -> Vec<CacheRecord> {
307        let mut buf = Vec::new();
308        serialize(&mut buf, records).expect("serialize should succeed");
309        let mut cursor = Cursor::new(&buf);
310        deserialize(&mut cursor).expect("deserialize should succeed")
311    }
312
313    // 1. Empty snapshot round-trips cleanly
314    #[test]
315    fn test_empty_roundtrip() {
316        let records: Vec<CacheRecord> = Vec::new();
317        let restored = roundtrip(&records);
318        assert!(restored.is_empty());
319    }
320
321    // 2. Single record round-trips correctly
322    #[test]
323    fn test_single_record_roundtrip() {
324        let records = vec![CacheRecord::new("key-001", b"hello world".to_vec())];
325        let restored = roundtrip(&records);
326        assert_eq!(restored.len(), 1);
327        assert_eq!(restored[0].key, "key-001");
328        assert_eq!(restored[0].value, b"hello world");
329        assert_eq!(restored[0].ttl_secs, 0);
330        assert_eq!(restored[0].priority, 0);
331    }
332
333    // 3. Multiple records preserve order and content
334    #[test]
335    fn test_multiple_records_roundtrip() {
336        let records: Vec<CacheRecord> = (0..20u32)
337            .map(|i| {
338                CacheRecord::new(format!("seg-{i:04}"), vec![i as u8; 128])
339                    .with_ttl(300)
340                    .with_priority(i % 5)
341            })
342            .collect();
343        let restored = roundtrip(&records);
344        assert_eq!(restored.len(), records.len());
345        for (orig, rest) in records.iter().zip(restored.iter()) {
346            assert_eq!(orig, rest);
347        }
348    }
349
350    // 4. TTL and priority survive round-trip
351    #[test]
352    fn test_ttl_and_priority_roundtrip() {
353        let rec = CacheRecord::new("manifest.m3u8", b"#EXTM3U".to_vec())
354            .with_ttl(30)
355            .with_priority(10);
356        let restored = roundtrip(std::slice::from_ref(&rec));
357        assert_eq!(restored[0].ttl_secs, 30);
358        assert_eq!(restored[0].priority, 10);
359    }
360
361    // 5. Binary values (non-UTF8 payload) round-trip correctly
362    #[test]
363    fn test_binary_value_roundtrip() {
364        let value: Vec<u8> = (0u8..=255).collect();
365        let records = vec![CacheRecord::new("binary", value.clone())];
366        let restored = roundtrip(&records);
367        assert_eq!(restored[0].value, value);
368    }
369
370    // 6. Unicode key round-trips correctly
371    #[test]
372    fn test_unicode_key_roundtrip() {
373        let records = vec![CacheRecord::new("媒体-segment-001", vec![1, 2, 3])];
374        let restored = roundtrip(&records);
375        assert_eq!(restored[0].key, "媒体-segment-001");
376    }
377
378    // 7. Invalid magic header returns error
379    #[test]
380    fn test_invalid_magic() {
381        let garbage = b"GARBAGE_HEADER_DATA";
382        let mut cursor = Cursor::new(garbage);
383        let result = deserialize(&mut cursor);
384        assert!(
385            matches!(result, Err(SerializeError::InvalidMagic(_))),
386            "expected InvalidMagic, got {result:?}"
387        );
388    }
389
390    // 8. Wrong version returns error
391    #[test]
392    fn test_wrong_version() {
393        let mut buf = Vec::new();
394        buf.extend_from_slice(MAGIC);
395        buf.extend_from_slice(&9999u16.to_le_bytes()); // bad version
396        buf.extend_from_slice(&0u16.to_le_bytes()); // flags
397        buf.extend_from_slice(&0u32.to_le_bytes()); // 0 records
398        let mut cursor = Cursor::new(&buf);
399        let result = deserialize(&mut cursor);
400        assert!(
401            matches!(result, Err(SerializeError::UnsupportedVersion(9999))),
402            "expected UnsupportedVersion"
403        );
404    }
405
406    // 9. max_records limit in DeserializeConfig
407    #[test]
408    fn test_max_records_limit() {
409        let records: Vec<CacheRecord> = (0..10u32)
410            .map(|i| CacheRecord::new(format!("k{i}"), vec![i as u8]))
411            .collect();
412        let mut buf = Vec::new();
413        serialize(&mut buf, &records).expect("ok");
414        let config = DeserializeConfig {
415            max_records: 3,
416            ..Default::default()
417        };
418        let mut cursor = Cursor::new(&buf);
419        let restored = deserialize_with_config(&mut cursor, &config).expect("ok");
420        assert_eq!(restored.len(), 3, "only 3 records should be restored");
421    }
422
423    // 10. max_value_bytes limit rejects oversized records
424    #[test]
425    fn test_max_value_bytes_rejected() {
426        let records = vec![CacheRecord::new("big", vec![0u8; 1024])];
427        let mut buf = Vec::new();
428        serialize(&mut buf, &records).expect("ok");
429        let config = DeserializeConfig {
430            max_value_bytes: 128, // smaller than 1024
431            ..Default::default()
432        };
433        let mut cursor = Cursor::new(&buf);
434        let result = deserialize_with_config(&mut cursor, &config);
435        assert!(
436            matches!(result, Err(SerializeError::ValueTooLarge { .. })),
437            "expected ValueTooLarge"
438        );
439    }
440
441    // 11. File-based save/load round-trip
442    #[test]
443    fn test_file_save_load_roundtrip() {
444        let dir = std::env::temp_dir();
445        let path = dir.join("oximedia_cache_test_serialization.bin");
446        let records = vec![
447            CacheRecord::new("segment-1", b"data1".to_vec()).with_ttl(60),
448            CacheRecord::new("segment-2", b"data2".to_vec()).with_priority(5),
449        ];
450        save_to_file(&path, &records).expect("save should succeed");
451        let restored = load_from_file(&path).expect("load should succeed");
452        assert_eq!(restored.len(), 2);
453        assert_eq!(restored[0].key, "segment-1");
454        assert_eq!(restored[1].priority, 5);
455        // Clean up.
456        let _ = std::fs::remove_file(&path);
457    }
458
459    // 12. Empty key is valid
460    #[test]
461    fn test_empty_key_roundtrip() {
462        let records = vec![CacheRecord::new("", b"value".to_vec())];
463        let restored = roundtrip(&records);
464        assert_eq!(restored[0].key, "");
465    }
466
467    // 13. Empty value is valid
468    #[test]
469    fn test_empty_value_roundtrip() {
470        let records = vec![CacheRecord::new("empty-val", Vec::new())];
471        let restored = roundtrip(&records);
472        assert!(restored[0].value.is_empty());
473    }
474
475    // 14. Serialized bytes start with magic
476    #[test]
477    fn test_serialized_magic_prefix() {
478        let mut buf = Vec::new();
479        serialize(&mut buf, &[]).expect("ok");
480        assert_eq!(&buf[..8], MAGIC);
481    }
482
483    // 15. CacheRecord builder API
484    #[test]
485    fn test_cache_record_builder() {
486        let rec = CacheRecord::new("k", vec![1, 2])
487            .with_ttl(120)
488            .with_priority(7);
489        assert_eq!(rec.key, "k");
490        assert_eq!(rec.ttl_secs, 120);
491        assert_eq!(rec.priority, 7);
492    }
493}