plotnik_bytecode/bytecode/
header.rs

1//! Bytecode file header (64 bytes).
2//!
3//! v3 layout: Offsets are computed from counts + SECTION_ALIGN (64 bytes).
4//! Section order: Header → StringBlob → RegexBlob → StringTable → RegexTable →
5//! NodeTypes → NodeFields → Trivia → TypeDefs → TypeMembers → TypeNames →
6//! Entrypoints → Transitions
7
8use super::{MAGIC, SECTION_ALIGN, VERSION};
9
10/// File header - first 64 bytes of the bytecode file.
11///
12/// v3 layout (offsets computed from counts):
13/// - 0-23: identity and sizes (magic, version, checksum, total_size, str_blob_size, regex_blob_size)
14/// - 24-43: counts (10 × u16) — order matches section order
15/// - 44-63: reserved
16#[derive(Clone, Copy, Debug, PartialEq, Eq)]
17#[repr(C, align(64))]
18pub struct Header {
19    // Bytes 0-23: Identity and sizes (6 × u32)
20    /// Magic bytes: b"PTKQ"
21    pub magic: [u8; 4],
22    /// Format version (currently 3)
23    pub version: u32,
24    /// CRC32 checksum of everything after the header
25    pub checksum: u32,
26    /// Total file size in bytes
27    pub total_size: u32,
28    /// Size of the string blob in bytes.
29    pub str_blob_size: u32,
30    /// Size of the regex blob in bytes.
31    pub regex_blob_size: u32,
32
33    // Bytes 24-43: Element counts (10 × u16) — order matches section order
34    pub str_table_count: u16,
35    pub regex_table_count: u16,
36    pub node_types_count: u16,
37    pub node_fields_count: u16,
38    pub trivia_count: u16,
39    pub type_defs_count: u16,
40    pub type_members_count: u16,
41    pub type_names_count: u16,
42    pub entrypoints_count: u16,
43    pub transitions_count: u16,
44
45    // Bytes 44-63: Reserved (public for cross-crate struct initialization)
46    pub _reserved: [u8; 20],
47}
48
49const _: () = assert!(std::mem::size_of::<Header>() == 64);
50
51impl Default for Header {
52    fn default() -> Self {
53        Self {
54            magic: MAGIC,
55            version: VERSION,
56            checksum: 0,
57            total_size: 0,
58            str_blob_size: 0,
59            regex_blob_size: 0,
60            str_table_count: 0,
61            regex_table_count: 0,
62            node_types_count: 0,
63            node_fields_count: 0,
64            trivia_count: 0,
65            type_defs_count: 0,
66            type_members_count: 0,
67            type_names_count: 0,
68            entrypoints_count: 0,
69            transitions_count: 0,
70            _reserved: [0; 20],
71        }
72    }
73}
74
75/// Computed section offsets derived from header counts.
76///
77/// Order: StringBlob → RegexBlob → StringTable → RegexTable → NodeTypes →
78/// NodeFields → Trivia → TypeDefs → TypeMembers → TypeNames → Entrypoints → Transitions
79#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
80pub struct SectionOffsets {
81    pub str_blob: u32,
82    pub regex_blob: u32,
83    pub str_table: u32,
84    pub regex_table: u32,
85    pub node_types: u32,
86    pub node_fields: u32,
87    pub trivia: u32,
88    pub type_defs: u32,
89    pub type_members: u32,
90    pub type_names: u32,
91    pub entrypoints: u32,
92    pub transitions: u32,
93}
94
95impl Header {
96    /// Decode header from 64 bytes.
97    pub fn from_bytes(bytes: &[u8]) -> Self {
98        assert!(bytes.len() >= 64, "header too short");
99
100        let mut reserved = [0u8; 20];
101        reserved.copy_from_slice(&bytes[44..64]);
102
103        Self {
104            magic: [bytes[0], bytes[1], bytes[2], bytes[3]],
105            version: u32::from_le_bytes([bytes[4], bytes[5], bytes[6], bytes[7]]),
106            checksum: u32::from_le_bytes([bytes[8], bytes[9], bytes[10], bytes[11]]),
107            total_size: u32::from_le_bytes([bytes[12], bytes[13], bytes[14], bytes[15]]),
108            str_blob_size: u32::from_le_bytes([bytes[16], bytes[17], bytes[18], bytes[19]]),
109            regex_blob_size: u32::from_le_bytes([bytes[20], bytes[21], bytes[22], bytes[23]]),
110            str_table_count: u16::from_le_bytes([bytes[24], bytes[25]]),
111            regex_table_count: u16::from_le_bytes([bytes[26], bytes[27]]),
112            node_types_count: u16::from_le_bytes([bytes[28], bytes[29]]),
113            node_fields_count: u16::from_le_bytes([bytes[30], bytes[31]]),
114            trivia_count: u16::from_le_bytes([bytes[32], bytes[33]]),
115            type_defs_count: u16::from_le_bytes([bytes[34], bytes[35]]),
116            type_members_count: u16::from_le_bytes([bytes[36], bytes[37]]),
117            type_names_count: u16::from_le_bytes([bytes[38], bytes[39]]),
118            entrypoints_count: u16::from_le_bytes([bytes[40], bytes[41]]),
119            transitions_count: u16::from_le_bytes([bytes[42], bytes[43]]),
120            _reserved: reserved,
121        }
122    }
123
124    /// Encode header to 64 bytes.
125    pub fn to_bytes(&self) -> [u8; 64] {
126        let mut bytes = [0u8; 64];
127        bytes[0..4].copy_from_slice(&self.magic);
128        bytes[4..8].copy_from_slice(&self.version.to_le_bytes());
129        bytes[8..12].copy_from_slice(&self.checksum.to_le_bytes());
130        bytes[12..16].copy_from_slice(&self.total_size.to_le_bytes());
131        bytes[16..20].copy_from_slice(&self.str_blob_size.to_le_bytes());
132        bytes[20..24].copy_from_slice(&self.regex_blob_size.to_le_bytes());
133        bytes[24..26].copy_from_slice(&self.str_table_count.to_le_bytes());
134        bytes[26..28].copy_from_slice(&self.regex_table_count.to_le_bytes());
135        bytes[28..30].copy_from_slice(&self.node_types_count.to_le_bytes());
136        bytes[30..32].copy_from_slice(&self.node_fields_count.to_le_bytes());
137        bytes[32..34].copy_from_slice(&self.trivia_count.to_le_bytes());
138        bytes[34..36].copy_from_slice(&self.type_defs_count.to_le_bytes());
139        bytes[36..38].copy_from_slice(&self.type_members_count.to_le_bytes());
140        bytes[38..40].copy_from_slice(&self.type_names_count.to_le_bytes());
141        bytes[40..42].copy_from_slice(&self.entrypoints_count.to_le_bytes());
142        bytes[42..44].copy_from_slice(&self.transitions_count.to_le_bytes());
143        bytes[44..64].copy_from_slice(&self._reserved);
144        bytes
145    }
146
147    pub fn validate_magic(&self) -> bool {
148        self.magic == MAGIC
149    }
150
151    pub fn validate_version(&self) -> bool {
152        self.version == VERSION
153    }
154
155    /// Compute section offsets from counts and blob sizes.
156    ///
157    /// Section order (all 64-byte aligned):
158    /// Header → StringBlob → RegexBlob → StringTable → RegexTable →
159    /// NodeTypes → NodeFields → Trivia → TypeDefs → TypeMembers →
160    /// TypeNames → Entrypoints → Transitions
161    pub fn compute_offsets(&self) -> SectionOffsets {
162        let align = SECTION_ALIGN as u32;
163
164        // Blobs first (right after header)
165        let str_blob = align; // 64
166        let regex_blob = align_up(str_blob + self.str_blob_size, align);
167
168        // Tables after blobs
169        let str_table = align_up(regex_blob + self.regex_blob_size, align);
170        let str_table_size = (self.str_table_count as u32 + 1) * 4;
171
172        let regex_table = align_up(str_table + str_table_size, align);
173        let regex_table_size = (self.regex_table_count as u32 + 1) * 8;
174
175        // Symbol sections
176        let node_types = align_up(regex_table + regex_table_size, align);
177        let node_types_size = self.node_types_count as u32 * 4;
178
179        let node_fields = align_up(node_types + node_types_size, align);
180        let node_fields_size = self.node_fields_count as u32 * 4;
181
182        let trivia = align_up(node_fields + node_fields_size, align);
183        let trivia_size = self.trivia_count as u32 * 2;
184
185        // Type metadata
186        let type_defs = align_up(trivia + trivia_size, align);
187        let type_defs_size = self.type_defs_count as u32 * 4;
188
189        let type_members = align_up(type_defs + type_defs_size, align);
190        let type_members_size = self.type_members_count as u32 * 4;
191
192        let type_names = align_up(type_members + type_members_size, align);
193        let type_names_size = self.type_names_count as u32 * 4;
194
195        // Entry points and instructions
196        let entrypoints = align_up(type_names + type_names_size, align);
197        let entrypoints_size = self.entrypoints_count as u32 * 8;
198
199        let transitions = align_up(entrypoints + entrypoints_size, align);
200
201        SectionOffsets {
202            str_blob,
203            regex_blob,
204            str_table,
205            regex_table,
206            node_types,
207            node_fields,
208            trivia,
209            type_defs,
210            type_members,
211            type_names,
212            entrypoints,
213            transitions,
214        }
215    }
216}
217
218/// Round up to the next multiple of `align`.
219fn align_up(value: u32, align: u32) -> u32 {
220    (value + align - 1) & !(align - 1)
221}