heddle-objects 0.2.0

An AI-native version control system
Documentation
// SPDX-License-Identifier: Apache-2.0
use super::{ObjectType, varint};
use crate::{
    object::{ChangeId, ContentHash},
    store::{Result, StoreError, compression::CompressionConfig},
};

pub const PACK_CHECKSUM_LEN: usize = 32;

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)]
pub enum PackObjectId {
    Hash(ContentHash),
    ChangeId(ChangeId),
}

impl PackObjectId {
    pub fn encode_tagged(self, buf: &mut Vec<u8>) {
        match self {
            Self::Hash(hash) => {
                buf.push(0);
                buf.extend_from_slice(hash.as_bytes());
            }
            Self::ChangeId(change_id) => {
                buf.push(1);
                buf.extend_from_slice(change_id.as_bytes());
            }
        }
    }

    pub fn decode_tagged(data: &[u8]) -> Result<(Self, usize)> {
        let Some(tag) = data.first().copied() else {
            return Err(StoreError::InvalidObject(
                "missing pack object id tag".to_string(),
            ));
        };
        match tag {
            0 => {
                if data.len() < 33 {
                    return Err(StoreError::InvalidObject(
                        "hash pack object id truncated".to_string(),
                    ));
                }
                let hash = ContentHash::from_bytes(data[1..33].try_into().map_err(|_| {
                    StoreError::InvalidObject("invalid hash id length".to_string())
                })?);
                Ok((Self::Hash(hash), 33))
            }
            1 => {
                if data.len() < 17 {
                    return Err(StoreError::InvalidObject(
                        "change id pack object id truncated".to_string(),
                    ));
                }
                let change_id = ChangeId::from_bytes(data[1..17].try_into().map_err(|_| {
                    StoreError::InvalidObject("invalid change id length".to_string())
                })?);
                Ok((Self::ChangeId(change_id), 17))
            }
            _ => Err(StoreError::InvalidObject(format!(
                "unknown pack object id tag {tag}"
            ))),
        }
    }
}

#[derive(Debug, Clone)]
pub struct PackObjectRecord {
    pub id: PackObjectId,
    pub obj_type: ObjectType,
    pub data: Vec<u8>,
    pub delta_base: Option<PackObjectId>,
    pub path_hint: Option<String>,
}

#[derive(Debug, Clone, Copy)]
pub struct PackContainerSpec {
    pub magic: &'static [u8; 4],
    pub version: u32,
}

#[derive(Debug, Clone)]
pub struct PackEntryHeader {
    pub id: PackObjectId,
    pub obj_type: ObjectType,
    pub uncompressed_size: usize,
    pub compressed_size: usize,
    pub delta_base: Option<PackObjectId>,
    pub header_len: usize,
}

pub fn write_container_header(buf: &mut Vec<u8>, spec: PackContainerSpec, count: u64) {
    buf.extend_from_slice(spec.magic);
    buf.extend_from_slice(&spec.version.to_be_bytes());
    buf.extend_from_slice(&count.to_be_bytes());
}

pub fn verify_container(data: &[u8], spec: PackContainerSpec) -> Result<(u64, usize, usize)> {
    if data.len() < 16 + PACK_CHECKSUM_LEN {
        return Err(StoreError::InvalidObject("Pack too short".to_string()));
    }
    if &data[..4] != spec.magic {
        return Err(StoreError::InvalidObject("Invalid pack magic".to_string()));
    }
    let version = u32::from_be_bytes([data[4], data[5], data[6], data[7]]);
    if version != spec.version {
        return Err(StoreError::InvalidObject(format!(
            "Unsupported pack version: {}",
            version
        )));
    }

    let content_end = data.len() - PACK_CHECKSUM_LEN;
    let content = &data[..content_end];
    let stored_checksum = &data[content_end..];
    let computed_checksum = blake3::hash(content);
    if computed_checksum.as_bytes() != stored_checksum {
        return Err(StoreError::InvalidObject(
            "Pack checksum mismatch".to_string(),
        ));
    }

    let count = u64::from_be_bytes([
        data[8], data[9], data[10], data[11], data[12], data[13], data[14], data[15],
    ]);
    Ok((count, 16, content_end))
}

pub fn append_container_checksum(buf: &mut Vec<u8>) {
    let checksum = blake3::hash(buf);
    buf.extend_from_slice(checksum.as_bytes());
}

pub fn encode_tagged_entry(
    buf: &mut Vec<u8>,
    record: &PackObjectRecord,
    stored_type: ObjectType,
    compressed: &[u8],
) -> Result<()> {
    encode_tagged_entry_parts(
        buf,
        record.id,
        stored_type,
        record.data.len(),
        record.delta_base,
        compressed,
    )
}

pub fn encode_tagged_entry_parts(
    buf: &mut Vec<u8>,
    id: PackObjectId,
    stored_type: ObjectType,
    uncompressed_size: usize,
    delta_base: Option<PackObjectId>,
    compressed: &[u8],
) -> Result<()> {
    id.encode_tagged(buf);
    varint::encode_type_and_size(stored_type, uncompressed_size as u64, buf);
    varint::encode_varint(compressed.len() as u64, buf);
    if stored_type == ObjectType::Delta {
        let Some(base) = delta_base else {
            return Err(StoreError::InvalidObject(
                "Delta entry missing base id".to_string(),
            ));
        };
        base.encode_tagged(buf);
    }
    buf.extend_from_slice(compressed);
    Ok(())
}

pub fn decode_tagged_entry_header(data: &[u8]) -> Result<PackEntryHeader> {
    let (id, id_len) = PackObjectId::decode_tagged(data)?;
    let (obj_type, uncompressed_size, type_len) = varint::decode_type_and_size(&data[id_len..])
        .ok_or_else(|| StoreError::InvalidObject("Truncated type+size varint".to_string()))?;
    let varint_start = id_len + type_len;
    let (compressed_size, comp_len) = varint::decode_varint(&data[varint_start..])
        .ok_or_else(|| StoreError::InvalidObject("Truncated compressed_size varint".to_string()))?;
    let mut header_len = varint_start + comp_len;

    let delta_base = if obj_type == ObjectType::Delta {
        let (base, base_len) = PackObjectId::decode_tagged(&data[header_len..])?;
        header_len += base_len;
        Some(base)
    } else {
        None
    };

    Ok(PackEntryHeader {
        id,
        obj_type,
        uncompressed_size: uncompressed_size as usize,
        compressed_size: compressed_size as usize,
        delta_base,
        header_len,
    })
}

pub fn try_decode_tagged_entry_header(data: &[u8]) -> Result<Option<PackEntryHeader>> {
    let Some(tag) = data.first().copied() else {
        return Ok(None);
    };

    let (id, id_len) =
        match tag {
            0 => {
                if data.len() < 33 {
                    return Ok(None);
                }
                let hash = ContentHash::from_bytes(data[1..33].try_into().map_err(|_| {
                    StoreError::InvalidObject("invalid hash id length".to_string())
                })?);
                (PackObjectId::Hash(hash), 33)
            }
            1 => {
                if data.len() < 17 {
                    return Ok(None);
                }
                let change_id = ChangeId::from_bytes(data[1..17].try_into().map_err(|_| {
                    StoreError::InvalidObject("invalid change id length".to_string())
                })?);
                (PackObjectId::ChangeId(change_id), 17)
            }
            _ => {
                return Err(StoreError::InvalidObject(format!(
                    "unknown pack object id tag {tag}"
                )));
            }
        };

    let Some((obj_type, uncompressed_size, type_len)) =
        varint::decode_type_and_size(&data[id_len..])
    else {
        return Ok(None);
    };
    let varint_start = id_len + type_len;
    let Some((compressed_size, comp_len)) = varint::decode_varint(&data[varint_start..]) else {
        return Ok(None);
    };
    let mut header_len = varint_start + comp_len;

    let delta_base = if obj_type == ObjectType::Delta {
        let Some(base_tag) = data.get(header_len).copied() else {
            return Ok(None);
        };
        let (base, base_len) = match base_tag {
            0 => {
                let end = header_len + 33;
                if data.len() < end {
                    return Ok(None);
                }
                let hash = ContentHash::from_bytes(data[header_len + 1..end].try_into().map_err(
                    |_| StoreError::InvalidObject("invalid hash id length".to_string()),
                )?);
                (PackObjectId::Hash(hash), 33)
            }
            1 => {
                let end = header_len + 17;
                if data.len() < end {
                    return Ok(None);
                }
                let change_id =
                    ChangeId::from_bytes(data[header_len + 1..end].try_into().map_err(|_| {
                        StoreError::InvalidObject("invalid change id length".to_string())
                    })?);
                (PackObjectId::ChangeId(change_id), 17)
            }
            _ => {
                return Err(StoreError::InvalidObject(format!(
                    "unknown pack object id tag {base_tag}"
                )));
            }
        };
        header_len += base_len;
        Some(base)
    } else {
        None
    };

    Ok(Some(PackEntryHeader {
        id,
        obj_type,
        uncompressed_size: uncompressed_size as usize,
        compressed_size: compressed_size as usize,
        delta_base,
        header_len,
    }))
}

pub fn compress_pack_payload(data: &[u8], config: &CompressionConfig) -> Result<Vec<u8>> {
    if !config.enabled || data.len() < config.min_size {
        return Ok(data.to_vec());
    }
    #[cfg(feature = "zstd")]
    {
        match zstd::encode_all(data, config.level) {
            Ok(compressed) if compressed.len() < data.len() => Ok(compressed),
            _ => Ok(data.to_vec()),
        }
    }
    #[cfg(not(feature = "zstd"))]
    {
        let _ = config;
        Ok(data.to_vec())
    }
}

pub fn decompress_pack_payload(data: &[u8], expected_size: usize) -> Result<Vec<u8>> {
    #[cfg(feature = "zstd")]
    {
        use std::io::Read;
        let mut decoder = zstd::stream::read::Decoder::new(data)
            .map_err(|e| StoreError::InvalidObject(format!("zstd decode init failed: {e}")))?;
        let capacity = if expected_size > 0 {
            expected_size
        } else {
            data.len() * 2
        };
        let mut buf = Vec::with_capacity(capacity);
        decoder
            .read_to_end(&mut buf)
            .map_err(|e| StoreError::InvalidObject(format!("zstd decompression failed: {e}")))?;
        Ok(buf)
    }
    #[cfg(not(feature = "zstd"))]
    {
        let _ = expected_size;
        Ok(data.to_vec())
    }
}

pub fn has_zstd_magic(data: &[u8]) -> bool {
    data.len() >= 4 && data[..4] == [0x28, 0xB5, 0x2F, 0xFD]
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn tagged_pack_object_ids_round_trip() {
        let ids = [
            PackObjectId::Hash(ContentHash::compute(b"hash-object")),
            PackObjectId::ChangeId(ChangeId::generate()),
        ];

        for id in ids {
            let mut encoded = Vec::new();
            id.encode_tagged(&mut encoded);
            let (decoded, consumed) = PackObjectId::decode_tagged(&encoded).unwrap();
            assert_eq!(decoded, id);
            assert_eq!(consumed, encoded.len());
        }
    }

    #[test]
    fn tagged_entry_header_round_trips_mixed_identity() {
        let record = PackObjectRecord {
            id: PackObjectId::ChangeId(ChangeId::generate()),
            obj_type: ObjectType::State,
            data: vec![1, 2, 3, 4, 5],
            delta_base: None,
            path_hint: None,
        };

        let mut encoded = Vec::new();
        encode_tagged_entry(&mut encoded, &record, record.obj_type, &record.data).unwrap();
        let decoded = decode_tagged_entry_header(&encoded).unwrap();

        assert_eq!(decoded.id, record.id);
        assert_eq!(decoded.obj_type, ObjectType::State);
        assert_eq!(decoded.uncompressed_size, 5);
        assert_eq!(decoded.compressed_size, 5);
        assert_eq!(decoded.delta_base, None);
    }
}