Skip to main content

agentic_codebase/types/
header.rs

1//! File header for the `.acb` binary format.
2//!
3//! The header is exactly 128 bytes and appears at the start of every `.acb` file.
4//! It contains magic bytes, version, section offsets, and metadata.
5
6use std::io::{Read, Write};
7
8use super::error::{AcbError, AcbResult};
9use super::{ACB_MAGIC, FORMAT_VERSION};
10
11/// Header of an `.acb` file. Fixed size: 128 bytes.
12///
13/// Layout (all fields little-endian):
14/// - 0x00: magic [u8; 4]
15/// - 0x04: version u32
16/// - 0x08: dimension u32
17/// - 0x0C: language_count u32
18/// - 0x10: unit_count u64
19/// - 0x18: edge_count u64
20/// - 0x20: unit_table_offset u64
21/// - 0x28: edge_table_offset u64
22/// - 0x30: string_pool_offset u64
23/// - 0x38: feature_vec_offset u64
24/// - 0x40: temporal_offset u64
25/// - 0x48: index_offset u64
26/// - 0x50: repo_hash [u8; 32]
27/// - 0x70: compiled_at u64
28/// - 0x78: _reserved [u8; 8]
29#[derive(Debug, Clone, Copy, PartialEq)]
30pub struct FileHeader {
31    /// Magic bytes: must be [0x41, 0x43, 0x44, 0x42] ("ACDB").
32    pub magic: [u8; 4],
33
34    /// Format version (currently 1).
35    pub version: u32,
36
37    /// Feature vector dimensionality.
38    pub dimension: u32,
39
40    /// Number of supported languages in this file.
41    pub language_count: u32,
42
43    /// Total number of code units.
44    pub unit_count: u64,
45
46    /// Total number of edges.
47    pub edge_count: u64,
48
49    /// Byte offset to code unit table.
50    pub unit_table_offset: u64,
51
52    /// Byte offset to edge table.
53    pub edge_table_offset: u64,
54
55    /// Byte offset to string pool.
56    pub string_pool_offset: u64,
57
58    /// Byte offset to feature vectors.
59    pub feature_vec_offset: u64,
60
61    /// Byte offset to temporal block.
62    pub temporal_offset: u64,
63
64    /// Byte offset to index block.
65    pub index_offset: u64,
66
67    /// Repository root path hash (for cache validation).
68    pub repo_hash: [u8; 32],
69
70    /// Compilation timestamp (Unix epoch microseconds).
71    pub compiled_at: u64,
72
73    /// Reserved for future use.
74    pub _reserved: [u8; 8],
75}
76
77/// The size of the file header in bytes.
78pub const HEADER_SIZE: usize = 128;
79
80impl FileHeader {
81    /// Create a new header with sensible defaults and the given dimension.
82    pub fn new(dimension: u32) -> Self {
83        Self {
84            magic: ACB_MAGIC,
85            version: FORMAT_VERSION,
86            dimension,
87            language_count: 0,
88            unit_count: 0,
89            edge_count: 0,
90            unit_table_offset: HEADER_SIZE as u64,
91            edge_table_offset: HEADER_SIZE as u64,
92            string_pool_offset: 0,
93            feature_vec_offset: 0,
94            temporal_offset: 0,
95            index_offset: 0,
96            repo_hash: [0u8; 32],
97            compiled_at: crate::types::now_micros(),
98            _reserved: [0u8; 8],
99        }
100    }
101
102    /// Write the header to a byte writer (little-endian).
103    pub fn write_to(&self, w: &mut impl Write) -> AcbResult<()> {
104        w.write_all(&self.magic)?;
105        w.write_all(&self.version.to_le_bytes())?;
106        w.write_all(&self.dimension.to_le_bytes())?;
107        w.write_all(&self.language_count.to_le_bytes())?;
108        w.write_all(&self.unit_count.to_le_bytes())?;
109        w.write_all(&self.edge_count.to_le_bytes())?;
110        w.write_all(&self.unit_table_offset.to_le_bytes())?;
111        w.write_all(&self.edge_table_offset.to_le_bytes())?;
112        w.write_all(&self.string_pool_offset.to_le_bytes())?;
113        w.write_all(&self.feature_vec_offset.to_le_bytes())?;
114        w.write_all(&self.temporal_offset.to_le_bytes())?;
115        w.write_all(&self.index_offset.to_le_bytes())?;
116        w.write_all(&self.repo_hash)?;
117        w.write_all(&self.compiled_at.to_le_bytes())?;
118        w.write_all(&self._reserved)?;
119        Ok(())
120    }
121
122    /// Read a header from a byte reader (little-endian).
123    ///
124    /// # Errors
125    ///
126    /// - `AcbError::InvalidMagic` if magic bytes don't match.
127    /// - `AcbError::UnsupportedVersion` if version is not recognized.
128    /// - `AcbError::Io` on read failure.
129    pub fn read_from(r: &mut impl Read) -> AcbResult<Self> {
130        let mut magic = [0u8; 4];
131        r.read_exact(&mut magic)?;
132        if magic != ACB_MAGIC {
133            return Err(AcbError::InvalidMagic);
134        }
135
136        let version = read_u32(r)?;
137        if version > FORMAT_VERSION {
138            return Err(AcbError::UnsupportedVersion(version));
139        }
140
141        let dimension = read_u32(r)?;
142        let language_count = read_u32(r)?;
143        let unit_count = read_u64(r)?;
144        let edge_count = read_u64(r)?;
145        let unit_table_offset = read_u64(r)?;
146        let edge_table_offset = read_u64(r)?;
147        let string_pool_offset = read_u64(r)?;
148        let feature_vec_offset = read_u64(r)?;
149        let temporal_offset = read_u64(r)?;
150        let index_offset = read_u64(r)?;
151
152        let mut repo_hash = [0u8; 32];
153        r.read_exact(&mut repo_hash)?;
154
155        let compiled_at = read_u64(r)?;
156
157        let mut reserved = [0u8; 8];
158        r.read_exact(&mut reserved)?;
159
160        Ok(Self {
161            magic,
162            version,
163            dimension,
164            language_count,
165            unit_count,
166            edge_count,
167            unit_table_offset,
168            edge_table_offset,
169            string_pool_offset,
170            feature_vec_offset,
171            temporal_offset,
172            index_offset,
173            repo_hash,
174            compiled_at,
175            _reserved: reserved,
176        })
177    }
178
179    /// Serialize the header to a 128-byte array.
180    pub fn to_bytes(&self) -> [u8; HEADER_SIZE] {
181        let mut buf = [0u8; HEADER_SIZE];
182        let mut cursor = std::io::Cursor::new(&mut buf[..]);
183        // write_to only fails on I/O, and Cursor<&mut [u8]> can't fail for 128 bytes
184        self.write_to(&mut cursor)
185            .expect("header write to fixed buffer");
186        buf
187    }
188
189    /// Deserialize a header from a 128-byte slice.
190    pub fn from_bytes(data: &[u8; HEADER_SIZE]) -> AcbResult<Self> {
191        let mut cursor = std::io::Cursor::new(&data[..]);
192        Self::read_from(&mut cursor)
193    }
194}
195
196// ---------------------------------------------------------------------------
197// Helpers
198// ---------------------------------------------------------------------------
199
200fn read_u32(r: &mut impl Read) -> AcbResult<u32> {
201    let mut buf = [0u8; 4];
202    r.read_exact(&mut buf)?;
203    Ok(u32::from_le_bytes(buf))
204}
205
206fn read_u64(r: &mut impl Read) -> AcbResult<u64> {
207    let mut buf = [0u8; 8];
208    r.read_exact(&mut buf)?;
209    Ok(u64::from_le_bytes(buf))
210}
211
212#[cfg(test)]
213mod tests {
214    use super::*;
215
216    #[test]
217    fn header_roundtrip() {
218        let h = FileHeader::new(256);
219        let bytes = h.to_bytes();
220        assert_eq!(bytes.len(), HEADER_SIZE);
221        let h2 = FileHeader::from_bytes(&bytes).unwrap();
222        assert_eq!(h, h2);
223    }
224}