cjc-snap 0.1.10

Binary serialization for CJC values
Documentation
//! File persistence for CJC Snap — save/load values to disk.
//!
//! ## File Format (.snap) — v1
//!
//! ```text
//! Offset  Size  Description
//! 0       4     Magic: "CJCS" (0x43, 0x4A, 0x43, 0x53)
//! 4       4     Version: 1 (u32 LE)
//! 8       32    SHA-256 content hash
//! 40      8     Data length (u64 LE)
//! 48      N     Snap-encoded data bytes (v1 tag+data)
//! ```
//!
//! ## File Format (.snap) — v2
//!
//! ```text
//! Offset  Size  Description
//! 0       4     Magic: "CJCS" (0x43, 0x4A, 0x43, 0x53)
//! 4       4     Version: 2 (u32 LE)
//! 8       32    SHA-256 content hash
//! 40      8     Data length (u64 LE)
//! 48      N     Snap-encoded data bytes (v2: CJS\x01 + version + flags + payload)
//! ```

use cjc_runtime::Value;
use crate::{snap, snap_v2, restore_v2};

/// Magic bytes identifying a CJC Snap file.
pub const MAGIC: [u8; 4] = [0x43, 0x4A, 0x43, 0x53]; // "CJCS"

/// File format version 1 (legacy).
pub const VERSION_V1: u32 = 1;

/// File format version 2 (new tags, v2 header).
pub const VERSION_V2: u32 = 2;

/// Current file format version.
pub const VERSION: u32 = VERSION_V1;

/// Header size: magic(4) + version(4) + hash(32) + data_len(8) = 48 bytes.
const HEADER_SIZE: usize = 48;

/// Save a CJC value to a `.snap` file (v1 format).
///
/// The file contains a self-describing header with the SHA-256 hash,
/// followed by the canonical snap encoding. The file can be loaded
/// back with `snap_load()`, or parsed by external tools (e.g., Python).
pub fn snap_save(value: &Value, path: &str) -> Result<(), String> {
    let blob = snap(value);
    write_snap_file(&blob.content_hash, &blob.data, VERSION_V1, path)
}

/// Save a CJC value to a `.snap` file (v2 format).
///
/// Uses the v2 binary encoding with magic header, supporting all new
/// tags (typed tensors, sparse CSR, chunked tensors, etc.).
pub fn snap_save_v2(value: &Value, path: &str) -> Result<(), String> {
    let blob = snap_v2(value);
    write_snap_file(&blob.content_hash, &blob.data, VERSION_V2, path)
}

/// Write a snap file with the given header fields.
fn write_snap_file(
    content_hash: &[u8; 32],
    data: &[u8],
    version: u32,
    path: &str,
) -> Result<(), String> {
    let data_len = data.len() as u64;
    let mut file_bytes = Vec::with_capacity(HEADER_SIZE + data.len());

    // Magic
    file_bytes.extend_from_slice(&MAGIC);
    // Version
    file_bytes.extend_from_slice(&version.to_le_bytes());
    // Content hash
    file_bytes.extend_from_slice(content_hash);
    // Data length
    file_bytes.extend_from_slice(&data_len.to_le_bytes());
    // Data
    file_bytes.extend_from_slice(data);

    std::fs::write(path, &file_bytes)
        .map_err(|e| format!("snap_save: {}", e))
}

/// Load a CJC value from a `.snap` file.
///
/// Auto-detects v1 and v2 file format versions.
/// Validates magic bytes, version, and SHA-256 hash integrity.
/// Returns the decoded value or a descriptive error.
pub fn snap_load(path: &str) -> Result<Value, String> {
    let file_bytes = std::fs::read(path)
        .map_err(|e| format!("snap_load: {}", e))?;

    if file_bytes.len() < HEADER_SIZE {
        return Err(format!(
            "snap_load: file too small ({} bytes, need at least {})",
            file_bytes.len(),
            HEADER_SIZE
        ));
    }

    // Validate magic
    if file_bytes[0..4] != MAGIC {
        return Err(format!(
            "snap_load: invalid magic bytes {:02x}{:02x}{:02x}{:02x} (expected CJCS)",
            file_bytes[0], file_bytes[1], file_bytes[2], file_bytes[3]
        ));
    }

    // Read version
    let version = u32::from_le_bytes(file_bytes[4..8].try_into().unwrap());
    if version != VERSION_V1 && version != VERSION_V2 {
        return Err(format!(
            "snap_load: unsupported version {} (expected {} or {})",
            version, VERSION_V1, VERSION_V2
        ));
    }

    // Extract hash
    let mut content_hash = [0u8; 32];
    content_hash.copy_from_slice(&file_bytes[8..40]);

    // Extract data length and data
    let data_len = u64::from_le_bytes(file_bytes[40..48].try_into().unwrap()) as usize;
    if file_bytes.len() < HEADER_SIZE + data_len {
        return Err(format!(
            "snap_load: truncated file (header says {} data bytes, file has {})",
            data_len,
            file_bytes.len() - HEADER_SIZE
        ));
    }

    let data = file_bytes[HEADER_SIZE..HEADER_SIZE + data_len].to_vec();

    // Reconstruct blob and restore (auto-detects v1/v2 payload format)
    let blob = crate::SnapBlob { content_hash, data };
    restore_v2(&blob).map_err(|e| format!("snap_load: {}", e))
}

#[cfg(test)]
mod tests {
    use super::*;
    use cjc_runtime::{Tensor, SparseCsr};
    use std::rc::Rc;

    fn test_file(name: &str) -> String {
        format!("__test_persist_{}.snap", name)
    }

    fn cleanup(path: &str) {
        let _ = std::fs::remove_file(path);
    }

    #[test]
    fn test_save_load_int() {
        let path = test_file("int");
        snap_save(&Value::Int(42), &path).unwrap();
        let loaded = snap_load(&path).unwrap();
        assert!(matches!(loaded, Value::Int(42)));
        cleanup(&path);
    }

    #[test]
    fn test_save_load_string() {
        let path = test_file("string");
        snap_save(&Value::String(Rc::new("hello CJC".into())), &path).unwrap();
        let loaded = snap_load(&path).unwrap();
        match loaded {
            Value::String(s) => assert_eq!(s.as_str(), "hello CJC"),
            _ => panic!("expected String"),
        }
        cleanup(&path);
    }

    #[test]
    fn test_save_load_tensor() {
        let path = test_file("tensor");
        let t = Tensor::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0], &[2, 3]).unwrap();
        snap_save(&Value::Tensor(t), &path).unwrap();
        let loaded = snap_load(&path).unwrap();
        match loaded {
            Value::Tensor(t) => {
                assert_eq!(t.shape(), &[2, 3]);
                assert_eq!(t.to_vec(), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]);
            }
            _ => panic!("expected Tensor"),
        }
        cleanup(&path);
    }

    #[test]
    fn test_save_load_v2_int() {
        let path = test_file("v2_int");
        snap_save_v2(&Value::Int(99), &path).unwrap();
        let loaded = snap_load(&path).unwrap();
        assert!(matches!(loaded, Value::Int(99)));
        cleanup(&path);
    }

    #[test]
    fn test_save_load_v2_tensor() {
        let path = test_file("v2_tensor");
        let t = Tensor::from_vec(vec![1.0, 2.0, 3.0], &[3]).unwrap();
        snap_save_v2(&Value::Tensor(t), &path).unwrap();
        let loaded = snap_load(&path).unwrap();
        match loaded {
            Value::Tensor(t) => {
                assert_eq!(t.shape(), &[3]);
                assert_eq!(t.to_vec(), vec![1.0, 2.0, 3.0]);
            }
            _ => panic!("expected Tensor"),
        }
        cleanup(&path);
    }

    #[test]
    fn test_save_load_v2_sparse() {
        let path = test_file("v2_sparse");
        let sparse = SparseCsr {
            nrows: 2,
            ncols: 3,
            row_offsets: vec![0, 1, 3],
            col_indices: vec![0, 1, 2],
            values: vec![1.0, 2.0, 3.0],
        };
        snap_save_v2(&Value::SparseTensor(sparse), &path).unwrap();
        let loaded = snap_load(&path).unwrap();
        match loaded {
            Value::SparseTensor(s) => {
                assert_eq!(s.nrows, 2);
                assert_eq!(s.ncols, 3);
                assert_eq!(s.values, vec![1.0, 2.0, 3.0]);
            }
            _ => panic!("expected SparseTensor"),
        }
        cleanup(&path);
    }

    #[test]
    fn test_bad_magic() {
        let path = test_file("bad_magic");
        let mut bytes = vec![0u8; 48];
        bytes[0..4].copy_from_slice(b"XXXX");
        std::fs::write(&path, &bytes).unwrap();
        let result = snap_load(&path);
        assert!(result.is_err());
        assert!(result.unwrap_err().contains("invalid magic"));
        cleanup(&path);
    }

    #[test]
    fn test_truncated_file() {
        let path = test_file("truncated");
        std::fs::write(&path, b"CJC").unwrap();
        let result = snap_load(&path);
        assert!(result.is_err());
        assert!(result.unwrap_err().contains("too small"));
        cleanup(&path);
    }

    #[test]
    fn test_bad_version() {
        let path = test_file("bad_version");
        let mut bytes = Vec::new();
        bytes.extend_from_slice(&MAGIC);
        bytes.extend_from_slice(&99u32.to_le_bytes());
        bytes.extend_from_slice(&[0u8; 40]);
        std::fs::write(&path, &bytes).unwrap();
        let result = snap_load(&path);
        assert!(result.is_err());
        assert!(result.unwrap_err().contains("unsupported version"));
        cleanup(&path);
    }

    #[test]
    fn test_missing_file() {
        let result = snap_load("__nonexistent_file_12345.snap");
        assert!(result.is_err());
    }

    #[test]
    fn test_roundtrip_array() {
        let path = test_file("array");
        let val = Value::Array(Rc::new(vec![
            Value::Int(1),
            Value::Float(2.5),
            Value::Bool(true),
        ]));
        snap_save(&val, &path).unwrap();
        let loaded = snap_load(&path).unwrap();
        assert!(matches!(loaded, Value::Array(_)));
        cleanup(&path);
    }
}