atx_reader 0.1.0

Parser and decoder for Apple .atx texture archives (AAPL container with ASTC payload), as produced by tools like Cellebrite UFED iOS exports.
Documentation
use crate::error::{AtxError, Result};

/// 8-byte container signature: `b"AAPL\r\n\x1a\n"`.
pub const AAPL_MAGIC: &[u8; 8] = b"AAPL\r\n\x1a\n";

/// `HEAD` chunk tag — texture metadata block.
pub const HEAD_TAG: &[u8; 4] = b"HEAD";

/// `FILL` chunk tag — zero-padding for GPU alignment.
pub const FILL_TAG: &[u8; 4] = b"FILL";

/// `astc` chunk tag — raw ASTC payload, optionally swizzled.
pub const ASTC_TAG: &[u8; 4] = b"astc";

/// `LZFS` chunk tag — LZFSE-compressed ASTC payload (`bvx2` stream).
pub const LZFS_TAG: &[u8; 4] = b"LZFS";

/// Texture payload variants exposed by [`AtxContainer::texture_payload`].
#[derive(Debug, Clone, Copy)]
pub enum TexturePayload<'a> {
    /// Raw ASTC blocks. The high-level decoder treats these as Apple's
    /// macro-tiled Morton layout.
    Astc(&'a [u8]),
    /// LZFSE-compressed stream (starts with `bvx2`/`bvxn` magic). The
    /// high-level decoder decompresses and treats the result as linear
    /// row-major ASTC blocks.
    Lzfse(&'a [u8]),
}

/// HEAD payload offsets we know how to read.
mod head_off {
    pub const FLAGS: usize = 0x00;
    pub const WIDTH: usize = 0x18;
    pub const HEIGHT: usize = 0x1C;
    pub const DEPTH: usize = 0x20;
    pub const ARRAY_LAYERS: usize = 0x28;
    pub const MIPMAP_COUNT: usize = 0x2C;
    pub const UUID: usize = 0x3C;
    pub const FORMAT_CODE_A: usize = 0x4C;
    pub const FORMAT_CODE_B: usize = 0x50;
    pub const MIN_LEN: usize = 0x54;
}

/// Parsed metadata from the `HEAD` chunk.
#[derive(Debug, Clone, Copy)]
pub struct AtxHeader {
    pub flags: u32,
    pub width: u32,
    pub height: u32,
    pub depth: u32,
    pub array_layers: u32,
    pub mipmap_count: u32,
    /// Per-texture identity (16 bytes). Probably the source MTLTexture's hash.
    pub uuid: [u8; 16],
    /// First pixel-format discriminator at HEAD payload offset `0x4C`.
    ///
    /// The exact semantics are proprietary. For the verified Cellebrite
    /// sample this is `3` and corresponds to ASTC 4×4 alongside [`Self::format_code_b`].
    pub format_code_a: u32,
    /// Second pixel-format discriminator at HEAD payload offset `0x50`.
    pub format_code_b: u32,
}

/// A non-owning view of one chunk inside an AAPL container.
#[derive(Debug, Clone, Copy)]
pub struct AtxChunk<'a> {
    pub tag: [u8; 4],
    /// Offset of the chunk's size field in the container's raw bytes.
    pub offset: usize,
    /// The chunk's payload bytes (length matches the size field).
    pub payload: &'a [u8],
}

impl<'a> AtxChunk<'a> {
    pub fn tag_str(&self) -> &str {
        core::str::from_utf8(&self.tag).unwrap_or("????")
    }
}

/// A zero-copy view over a `.atx` file's bytes.
///
/// `parse` validates the AAPL signature; nothing further is parsed eagerly.
/// Use [`AtxContainer::header`], [`AtxContainer::chunks`], or
/// [`AtxContainer::astc_payload`] to extract the parts you need.
#[derive(Debug, Clone, Copy)]
pub struct AtxContainer<'a> {
    raw: &'a [u8],
}

impl<'a> AtxContainer<'a> {
    /// Validate the AAPL signature and wrap the bytes for further parsing.
    pub fn parse(raw: &'a [u8]) -> Result<Self> {
        if raw.len() < AAPL_MAGIC.len() {
            return Err(AtxError::TooShort {
                needed: AAPL_MAGIC.len(),
                got: raw.len(),
            });
        }
        if &raw[..AAPL_MAGIC.len()] != AAPL_MAGIC {
            return Err(AtxError::BadMagic);
        }
        Ok(Self { raw })
    }

    /// Raw container bytes.
    pub fn raw(&self) -> &'a [u8] {
        self.raw
    }

    /// Iterate over well-formed chunks starting just after the magic.
    ///
    /// The walker stops as soon as it encounters a non-ASCII-alphanumeric
    /// tag — in practice this means after `HEAD` and `FILL` it bails, since
    /// the `astc` payload is preceded by a few bytes of alignment padding.
    /// Use [`AtxContainer::astc_payload`] to retrieve the ASTC blocks
    /// regardless of the walker's stopping point.
    pub fn chunks(&self) -> ChunkIter<'a> {
        ChunkIter {
            raw: self.raw,
            offset: AAPL_MAGIC.len(),
        }
    }

    /// Find the first chunk with the given tag.
    pub fn find_chunk(&self, tag: &[u8; 4]) -> Option<AtxChunk<'a>> {
        self.chunks().find(|c| &c.tag == tag)
    }

    /// Parse the `HEAD` chunk into a typed view.
    pub fn header(&self) -> Result<AtxHeader> {
        let head = self
            .find_chunk(HEAD_TAG)
            .ok_or(AtxError::ChunkNotFound("HEAD"))?;
        if head.payload.len() < head_off::MIN_LEN {
            return Err(AtxError::BadChunk {
                tag: "HEAD".into(),
                offset: head.offset,
            });
        }
        let p = head.payload;
        let read_u32 = |off: usize| u32::from_le_bytes(p[off..off + 4].try_into().unwrap());
        let mut uuid = [0u8; 16];
        uuid.copy_from_slice(&p[head_off::UUID..head_off::UUID + 16]);
        Ok(AtxHeader {
            flags: read_u32(head_off::FLAGS),
            width: read_u32(head_off::WIDTH),
            height: read_u32(head_off::HEIGHT),
            depth: read_u32(head_off::DEPTH),
            array_layers: read_u32(head_off::ARRAY_LAYERS),
            mipmap_count: read_u32(head_off::MIPMAP_COUNT),
            uuid,
            format_code_a: read_u32(head_off::FORMAT_CODE_A),
            format_code_b: read_u32(head_off::FORMAT_CODE_B),
        })
    }

    /// Locate the texture payload — either raw ASTC blocks (`astc` chunk)
    /// or an LZFSE stream (`LZFS` chunk). Prefers an `LZFS` chunk when both
    /// are present (none of the observed samples carry both).
    pub fn texture_payload(&self) -> Result<TexturePayload<'a>> {
        if let Some(c) = self.find_chunk(LZFS_TAG) {
            return Ok(TexturePayload::Lzfse(skip_inner_size(c.payload)));
        }
        if let Some(c) = self.find_chunk(ASTC_TAG) {
            return Ok(TexturePayload::Astc(skip_inner_size(c.payload)));
        }
        // Fall back to byte-search for `astc` for samples where alignment
        // padding hides the chunk from the walker.
        if let Some(off) = find_subslice(self.raw, ASTC_TAG) {
            let payload_start = off + 8;
            if payload_start <= self.raw.len() {
                return Ok(TexturePayload::Astc(&self.raw[payload_start..]));
            }
        }
        Err(AtxError::PayloadNotFound)
    }

    /// Return a slice over the raw ASTC blocks. Convenience wrapper around
    /// [`Self::texture_payload`] for callers that know the file is the
    /// uncompressed `astc` variant.
    pub fn astc_payload(&self) -> Result<&'a [u8]> {
        match self.texture_payload()? {
            TexturePayload::Astc(p) => Ok(p),
            TexturePayload::Lzfse(_) => Err(AtxError::PayloadNotFound),
        }
    }
}

/// Strip the 4-byte inner-size header that every variable payload chunk in
/// AAPL appears to carry between the tag and the real data.
fn skip_inner_size(payload: &[u8]) -> &[u8] {
    if payload.len() >= 4 { &payload[4..] } else { &[] }
}

/// Iterator over the chunks following the AAPL magic.
#[derive(Debug)]
pub struct ChunkIter<'a> {
    raw: &'a [u8],
    offset: usize,
}

impl<'a> Iterator for ChunkIter<'a> {
    type Item = AtxChunk<'a>;

    fn next(&mut self) -> Option<Self::Item> {
        let off = self.offset;
        if off + 8 > self.raw.len() {
            return None;
        }
        let size = u32::from_le_bytes(self.raw[off..off + 4].try_into().ok()?) as usize;
        let mut tag = [0u8; 4];
        tag.copy_from_slice(&self.raw[off + 4..off + 8]);
        if !tag.iter().all(|b| b.is_ascii_alphanumeric()) {
            return None;
        }
        let payload_start = off + 8;
        let payload_end = payload_start.checked_add(size)?;
        if payload_end > self.raw.len() {
            return None;
        }
        let chunk = AtxChunk {
            tag,
            offset: off,
            payload: &self.raw[payload_start..payload_end],
        };
        self.offset = payload_end;
        Some(chunk)
    }
}

fn find_subslice(haystack: &[u8], needle: &[u8]) -> Option<usize> {
    if needle.is_empty() || needle.len() > haystack.len() {
        return None;
    }
    haystack
        .windows(needle.len())
        .position(|w| w == needle)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn rejects_short_input() {
        assert!(matches!(
            AtxContainer::parse(b"AAPL"),
            Err(AtxError::TooShort { .. })
        ));
    }

    #[test]
    fn rejects_bad_magic() {
        assert!(matches!(
            AtxContainer::parse(b"NOPE\r\n\x1a\nrest"),
            Err(AtxError::BadMagic)
        ));
    }

    #[test]
    fn walks_minimal_head_chunk() {
        // magic + [size=84][HEAD][80 zeros + 4 bytes uuid hint]
        let mut buf = Vec::from(AAPL_MAGIC.as_slice());
        buf.extend_from_slice(&84u32.to_le_bytes());
        buf.extend_from_slice(HEAD_TAG);
        buf.resize(buf.len() + 84, 0);
        // width @ payload+0x18, height @ +0x1C
        let payload_start = AAPL_MAGIC.len() + 8;
        buf[payload_start + 0x18..payload_start + 0x1C].copy_from_slice(&100u32.to_le_bytes());
        buf[payload_start + 0x1C..payload_start + 0x20].copy_from_slice(&200u32.to_le_bytes());

        let c = AtxContainer::parse(&buf).expect("parse");
        let h = c.header().expect("header");
        assert_eq!((h.width, h.height), (100, 200));
        let chunks: Vec<_> = c.chunks().collect();
        assert_eq!(chunks.len(), 1);
        assert_eq!(&chunks[0].tag, HEAD_TAG);
    }

    #[test]
    fn finds_astc_payload_after_padding() {
        let mut buf = Vec::from(AAPL_MAGIC.as_slice());
        // Some arbitrary padding bytes (not a real chunk)
        buf.extend_from_slice(&[0u8; 16]);
        // astc marker + 4-byte inner header + 32 bytes of "data"
        buf.extend_from_slice(ASTC_TAG);
        buf.extend_from_slice(&0u32.to_le_bytes());
        buf.extend_from_slice(&[0xAB; 32]);

        let c = AtxContainer::parse(&buf).expect("parse");
        let payload = c.astc_payload().expect("astc");
        assert_eq!(payload.len(), 32);
        assert!(payload.iter().all(|&b| b == 0xAB));
    }

    #[test]
    fn detects_lzfs_chunk_as_lzfse_payload() {
        // magic + [size=12][LZFS][inner_size=8][8 bytes lzfse data]
        let mut buf = Vec::from(AAPL_MAGIC.as_slice());
        buf.extend_from_slice(&12u32.to_le_bytes());
        buf.extend_from_slice(LZFS_TAG);
        buf.extend_from_slice(&8u32.to_le_bytes());
        buf.extend_from_slice(&[0xCD; 8]);

        let c = AtxContainer::parse(&buf).expect("parse");
        match c.texture_payload().expect("payload") {
            TexturePayload::Lzfse(p) => assert_eq!(p, &[0xCD; 8]),
            other => panic!("expected lzfse, got {other:?}"),
        }
    }
}