Skip to main content

claw_core/
cof.rs

1use crate::error::CoreError;
2use crate::object::TypeTag;
3
4const MAGIC: &[u8; 4] = b"CLW1";
5/// Current Claw Object Format version written by this crate.
6pub const COF_VERSION: u8 = 0x01;
7/// Oldest Claw Object Format version this crate may read or migrate.
8pub const MIN_READABLE_COF_VERSION: u8 = 0x01;
9const KNOWN_FLAG_BITS: u8 = 0x03;
10
11/// Read/write compatibility class for a COF version byte.
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13pub enum CofVersionSupport {
14    /// The version is the crate's native read/write format.
15    Native,
16    /// The version is older but within the migration window.
17    ReadViaMigration,
18    /// The version is newer than this crate understands.
19    UnsupportedFuture,
20    /// The version is older than the supported migration floor.
21    UnsupportedPast,
22}
23
24/// Planned handling for an observed COF version.
25#[derive(Debug, Clone, Copy, PartialEq, Eq)]
26pub struct CofMigrationPlan {
27    source_version: u8,
28    target_version: u8,
29    support: CofVersionSupport,
30}
31
32impl CofMigrationPlan {
33    /// Version byte found in the encoded object.
34    pub fn source_version(self) -> u8 {
35        self.source_version
36    }
37
38    /// Version byte this crate writes by default.
39    pub fn target_version(self) -> u8 {
40        self.target_version
41    }
42
43    /// Compatibility class for the source version.
44    pub fn support(self) -> CofVersionSupport {
45        self.support
46    }
47
48    /// Return true when the object can be read by this crate.
49    pub fn can_read(self) -> bool {
50        matches!(
51            self.support,
52            CofVersionSupport::Native | CofVersionSupport::ReadViaMigration
53        )
54    }
55
56    /// Return true when writing this source version is supported.
57    pub fn can_write_source_version(self) -> bool {
58        matches!(self.support, CofVersionSupport::Native)
59    }
60
61    /// Return true when decoding requires an explicit migration step.
62    pub fn requires_migration(self) -> bool {
63        matches!(self.support, CofVersionSupport::ReadViaMigration)
64    }
65}
66
67/// Classify a COF version byte for migration and compatibility checks.
68pub fn classify_cof_version(version: u8) -> CofVersionSupport {
69    if version == COF_VERSION {
70        CofVersionSupport::Native
71    } else if version > COF_VERSION {
72        CofVersionSupport::UnsupportedFuture
73    } else if version >= MIN_READABLE_COF_VERSION {
74        CofVersionSupport::ReadViaMigration
75    } else {
76        CofVersionSupport::UnsupportedPast
77    }
78}
79
80/// Return the migration plan for an observed COF version.
81///
82/// v0.1 writes only the native COF version. The plan API is intentionally
83/// present before v2 exists so future readers can add old-version migrators
84/// without changing the public compatibility contract.
85pub fn cof_migration_plan(version: u8) -> CofMigrationPlan {
86    CofMigrationPlan {
87        source_version: version,
88        target_version: COF_VERSION,
89        support: classify_cof_version(version),
90    }
91}
92
93/// Return the COF version byte after validating the magic prefix.
94pub fn cof_version(data: &[u8]) -> Result<u8, CoreError> {
95    if data.len() < 5 {
96        return Err(CoreError::Deserialization(
97            "data too short for COF version".into(),
98        ));
99    }
100    if &data[..4] != MAGIC {
101        return Err(CoreError::InvalidMagic);
102    }
103    Ok(data[4])
104}
105
106/// Compression marker stored in the COF header.
107#[derive(Debug, Clone, Copy, PartialEq, Eq)]
108#[repr(u8)]
109pub enum Compression {
110    /// Payload bytes are stored without compression.
111    None = 0x00,
112    /// Payload bytes are compressed with zstd.
113    Zstd = 0x01,
114}
115
116impl Compression {
117    fn from_u8(v: u8) -> Option<Self> {
118        match v {
119            0x00 => Some(Self::None),
120            0x01 => Some(Self::Zstd),
121            _ => None,
122        }
123    }
124}
125
126/// Bit flags stored in a COF header.
127#[derive(Debug, Clone, Copy, PartialEq, Eq)]
128pub struct CofFlags(u8);
129
130impl CofFlags {
131    /// Build flag bits from the current compression and encryption booleans.
132    pub fn new(compressed: bool, encrypted: bool) -> Self {
133        let mut bits = 0u8;
134        if compressed {
135            bits |= 0x01;
136        }
137        if encrypted {
138            bits |= 0x02;
139        }
140        Self(bits)
141    }
142
143    /// Return the raw encoded flag bits.
144    pub fn bits(&self) -> u8 {
145        self.0
146    }
147
148    /// Return whether the compressed flag is set.
149    pub fn is_compressed(&self) -> bool {
150        self.0 & 0x01 != 0
151    }
152
153    /// Return whether the encrypted flag is set.
154    pub fn is_encrypted(&self) -> bool {
155        self.0 & 0x02 != 0
156    }
157}
158
159fn encode_uvarint(mut value: u64, buf: &mut Vec<u8>) {
160    loop {
161        let mut byte = (value & 0x7F) as u8;
162        value >>= 7;
163        if value != 0 {
164            byte |= 0x80;
165        }
166        buf.push(byte);
167        if value == 0 {
168            break;
169        }
170    }
171}
172
173fn decode_uvarint(data: &[u8], pos: &mut usize) -> Result<u64, CoreError> {
174    let mut result: u64 = 0;
175    let mut shift = 0u32;
176    loop {
177        if *pos >= data.len() {
178            return Err(CoreError::Deserialization(
179                "unexpected end of uvarint".into(),
180            ));
181        }
182        let byte = data[*pos];
183        *pos += 1;
184        result |= ((byte & 0x7F) as u64) << shift;
185        if byte & 0x80 == 0 {
186            break;
187        }
188        shift += 7;
189        if shift >= 64 {
190            return Err(CoreError::Deserialization("uvarint overflow".into()));
191        }
192    }
193    Ok(result)
194}
195
196/// Encode a payload into COF v1 format.
197/// Format: [4B magic][1B version][1B type_tag][1B flags][1B compression][uvarint uncompressed_len][payload][4B CRC32]
198pub fn cof_encode(type_tag: TypeTag, payload: &[u8]) -> Result<Vec<u8>, CoreError> {
199    let compression = if payload.len() > 64 {
200        Compression::Zstd
201    } else {
202        Compression::None
203    };
204
205    let compressed = match compression {
206        Compression::None => payload.to_vec(),
207        Compression::Zstd => {
208            zstd::encode_all(payload, 3).map_err(|e| CoreError::Compression(e.to_string()))?
209        }
210    };
211
212    let flags = CofFlags::new(
213        compression != Compression::None,
214        false, // encryption flag set at higher layer
215    );
216
217    let mut buf = Vec::with_capacity(4 + 4 + compressed.len() + 10 + 4);
218
219    // Header
220    buf.extend_from_slice(MAGIC);
221    buf.push(COF_VERSION);
222    buf.push(type_tag as u8);
223    buf.push(flags.bits());
224    buf.push(compression as u8);
225
226    // Uncompressed length
227    encode_uvarint(payload.len() as u64, &mut buf);
228
229    // Payload
230    buf.extend_from_slice(&compressed);
231
232    // CRC32 of uncompressed payload (little endian) per spec
233    let crc = crc32fast::hash(payload);
234    buf.extend_from_slice(&crc.to_le_bytes());
235
236    Ok(buf)
237}
238
239/// Decode COF v1 format, returning (TypeTag, decompressed payload).
240pub fn cof_decode(data: &[u8]) -> Result<(TypeTag, Vec<u8>), CoreError> {
241    if data.len() < 12 {
242        return Err(CoreError::Deserialization("data too short for COF".into()));
243    }
244
245    // Check magic
246    if &data[..4] != MAGIC {
247        return Err(CoreError::InvalidMagic);
248    }
249
250    // Version
251    let version = data[4];
252    if !matches!(classify_cof_version(version), CofVersionSupport::Native) {
253        return Err(CoreError::UnsupportedVersion(version));
254    }
255
256    // Type tag
257    let type_tag = TypeTag::from_u8(data[5]).ok_or(CoreError::UnknownTypeTag(data[5]))?;
258
259    let flags = data[6];
260    if flags & !KNOWN_FLAG_BITS != 0 {
261        return Err(CoreError::Deserialization(format!(
262            "unknown COF flags: 0x{flags:02x}"
263        )));
264    }
265
266    // Compression
267    let compression = Compression::from_u8(data[7])
268        .ok_or_else(|| CoreError::Deserialization(format!("unknown compression: {}", data[7])))?;
269
270    // Uncompressed length
271    let mut pos = 8;
272    let uncompressed_len = decode_uvarint(data, &mut pos)? as usize;
273
274    // CRC32 check: last 4 bytes
275    if data.len() < pos + 4 {
276        return Err(CoreError::Deserialization(
277            "data too short for CRC32".into(),
278        ));
279    }
280    let crc_offset = data.len() - 4;
281    let expected_crc = u32::from_le_bytes([
282        data[crc_offset],
283        data[crc_offset + 1],
284        data[crc_offset + 2],
285        data[crc_offset + 3],
286    ]);
287
288    // Compressed payload
289    let compressed = &data[pos..crc_offset];
290
291    // Decompress
292    let payload = match compression {
293        Compression::None => compressed.to_vec(),
294        Compression::Zstd => {
295            zstd::decode_all(compressed).map_err(|e| CoreError::Decompression(e.to_string()))?
296        }
297    };
298
299    if payload.len() != uncompressed_len {
300        return Err(CoreError::Deserialization(format!(
301            "COF length mismatch: header says {uncompressed_len}, decoded {}",
302            payload.len()
303        )));
304    };
305
306    // CRC32 of uncompressed payload per spec
307    let actual_crc = crc32fast::hash(&payload);
308    if expected_crc != actual_crc {
309        return Err(CoreError::Crc32Mismatch {
310            expected: expected_crc,
311            actual: actual_crc,
312        });
313    }
314
315    Ok((type_tag, payload))
316}
317
318/// Decode COF bytes and return the compatibility plan used for decoding.
319///
320/// Native v1 objects decode directly. Older readable versions will use this
321/// entry point when a migrator is added; until then no older version exists in
322/// the supported range. Future and unsupported-past versions fail closed.
323pub fn cof_decode_with_migration(
324    data: &[u8],
325) -> Result<(TypeTag, Vec<u8>, CofMigrationPlan), CoreError> {
326    let version = cof_version(data)?;
327    let plan = cof_migration_plan(version);
328    match plan.support {
329        CofVersionSupport::Native => {
330            let (type_tag, payload) = cof_decode(data)?;
331            Ok((type_tag, payload, plan))
332        }
333        CofVersionSupport::ReadViaMigration => Err(CoreError::UnsupportedVersion(version)),
334        CofVersionSupport::UnsupportedFuture | CofVersionSupport::UnsupportedPast => {
335            Err(CoreError::UnsupportedVersion(version))
336        }
337    }
338}
339
340/// Peek at the type tag from COF-encoded data without fully decoding.
341///
342/// This is useful when the raw COF bytes will be forwarded over the wire
343/// (e.g., pack uploads) and only the type tag is needed for metadata.
344pub fn cof_peek_type_tag(data: &[u8]) -> Result<TypeTag, CoreError> {
345    if data.len() < 8 {
346        return Err(CoreError::Deserialization(
347            "data too short for COF header".into(),
348        ));
349    }
350    if &data[..4] != MAGIC {
351        return Err(CoreError::InvalidMagic);
352    }
353    let version = data[4];
354    if !matches!(classify_cof_version(version), CofVersionSupport::Native) {
355        return Err(CoreError::UnsupportedVersion(version));
356    }
357    TypeTag::from_u8(data[5]).ok_or(CoreError::UnknownTypeTag(data[5]))
358}
359
360#[cfg(test)]
361mod tests {
362    use super::*;
363
364    #[test]
365    fn roundtrip_no_compression() {
366        let payload = b"short";
367        let encoded = cof_encode(TypeTag::Blob, payload).unwrap();
368        let (tag, decoded) = cof_decode(&encoded).unwrap();
369        assert_eq!(tag, TypeTag::Blob);
370        assert_eq!(decoded, payload);
371    }
372
373    #[test]
374    fn roundtrip_with_compression() {
375        let payload = vec![b'a'; 1000];
376        let encoded = cof_encode(TypeTag::Tree, &payload).unwrap();
377        let (tag, decoded) = cof_decode(&encoded).unwrap();
378        assert_eq!(tag, TypeTag::Tree);
379        assert_eq!(decoded, payload);
380    }
381
382    #[test]
383    fn crc_corruption_detected() {
384        let payload = b"test data";
385        let mut encoded = cof_encode(TypeTag::Blob, payload).unwrap();
386        // Corrupt one byte in the payload area
387        let mid = encoded.len() / 2;
388        encoded[mid] ^= 0xFF;
389        assert!(cof_decode(&encoded).is_err());
390    }
391
392    #[test]
393    fn invalid_magic_rejected() {
394        let mut data = cof_encode(TypeTag::Blob, b"test").unwrap();
395        data[0] = b'X';
396        assert!(matches!(cof_decode(&data), Err(CoreError::InvalidMagic)));
397    }
398
399    #[test]
400    fn future_version_rejected_but_classified_for_migration() {
401        let mut data = cof_encode(TypeTag::Blob, b"test").unwrap();
402        data[4] = COF_VERSION + 1;
403        assert_eq!(
404            classify_cof_version(COF_VERSION + 1),
405            CofVersionSupport::UnsupportedFuture
406        );
407        assert!(matches!(
408            cof_decode(&data),
409            Err(CoreError::UnsupportedVersion(version)) if version == COF_VERSION + 1
410        ));
411    }
412
413    #[test]
414    fn migration_plan_describes_native_read_write() {
415        let plan = cof_migration_plan(COF_VERSION);
416        assert_eq!(plan.source_version(), COF_VERSION);
417        assert_eq!(plan.target_version(), COF_VERSION);
418        assert_eq!(plan.support(), CofVersionSupport::Native);
419        assert!(plan.can_read());
420        assert!(plan.can_write_source_version());
421        assert!(!plan.requires_migration());
422    }
423
424    #[test]
425    fn migration_plan_rejects_future_versions() {
426        let plan = cof_migration_plan(COF_VERSION + 1);
427        assert_eq!(plan.support(), CofVersionSupport::UnsupportedFuture);
428        assert!(!plan.can_read());
429        assert!(!plan.can_write_source_version());
430    }
431
432    #[test]
433    fn decode_with_migration_returns_native_plan() {
434        let encoded = cof_encode(TypeTag::Blob, b"migration plan").unwrap();
435        let (tag, payload, plan) = cof_decode_with_migration(&encoded).unwrap();
436        assert_eq!(tag, TypeTag::Blob);
437        assert_eq!(payload, b"migration plan");
438        assert_eq!(plan.support(), CofVersionSupport::Native);
439    }
440
441    #[test]
442    fn unknown_flags_are_rejected() {
443        let mut data = cof_encode(TypeTag::Blob, b"test").unwrap();
444        data[6] = 0x80;
445        assert!(matches!(
446            cof_decode(&data),
447            Err(CoreError::Deserialization(_))
448        ));
449    }
450
451    #[test]
452    fn unknown_compression_marker_is_rejected() {
453        let mut data = cof_encode(TypeTag::Blob, b"test").unwrap();
454        data[7] = 0xff;
455        assert!(matches!(
456            cof_decode(&data),
457            Err(CoreError::Deserialization(_))
458        ));
459    }
460
461    #[test]
462    fn length_mismatch_or_payload_mutation_is_rejected() {
463        let mut data = cof_encode(TypeTag::Blob, b"test").unwrap();
464        data[8] = data[8].saturating_add(1);
465        assert!(cof_decode(&data).is_err());
466    }
467
468    #[test]
469    fn all_type_tags_roundtrip() {
470        for tag_val in 0x01..=0x0Cu8 {
471            let tag = TypeTag::from_u8(tag_val).unwrap();
472            let payload = format!("payload for {}", tag.name());
473            let encoded = cof_encode(tag, payload.as_bytes()).unwrap();
474            let (decoded_tag, decoded_payload) = cof_decode(&encoded).unwrap();
475            assert_eq!(decoded_tag, tag);
476            assert_eq!(decoded_payload, payload.as_bytes());
477        }
478    }
479
480    #[test]
481    fn peek_type_tag_matches_decode() {
482        for tag_val in 0x01..=0x0Cu8 {
483            let tag = TypeTag::from_u8(tag_val).unwrap();
484            let encoded = cof_encode(tag, b"hello world").unwrap();
485            let peeked = cof_peek_type_tag(&encoded).unwrap();
486            assert_eq!(peeked, tag);
487        }
488    }
489
490    #[test]
491    fn peek_type_tag_rejects_short_data() {
492        assert!(cof_peek_type_tag(&[0; 4]).is_err());
493    }
494
495    #[test]
496    fn peek_type_tag_rejects_bad_magic() {
497        let mut data = cof_encode(TypeTag::Blob, b"test").unwrap();
498        data[0] = b'X';
499        assert!(matches!(
500            cof_peek_type_tag(&data),
501            Err(CoreError::InvalidMagic)
502        ));
503    }
504}