1use std::io;
7use std::ops::Deref;
8use std::path::Path;
9
10use super::aligned_vec::AlignedVec;
11use super::header::{Header, SectionOffsets};
12use super::ids::{StringId, TypeId};
13use super::instructions::{Call, Match, Opcode, Return, Trampoline};
14use super::sections::{FieldSymbol, NodeSymbol, TriviaEntry};
15use super::type_meta::{TypeData, TypeDef, TypeKind, TypeMember, TypeName};
16use super::{Entrypoint, STEP_SIZE, VERSION};
17
18#[inline]
20fn read_u16_le(bytes: &[u8], offset: usize) -> u16 {
21 u16::from_le_bytes([bytes[offset], bytes[offset + 1]])
22}
23
24#[inline]
26fn read_u32_le(bytes: &[u8], offset: usize) -> u32 {
27 u32::from_le_bytes([
28 bytes[offset],
29 bytes[offset + 1],
30 bytes[offset + 2],
31 bytes[offset + 3],
32 ])
33}
34
35pub enum ByteStorage {
42 Static(&'static [u8]),
44 Aligned(AlignedVec),
46}
47
48impl Deref for ByteStorage {
49 type Target = [u8];
50
51 fn deref(&self) -> &Self::Target {
52 match self {
53 ByteStorage::Static(s) => s,
54 ByteStorage::Aligned(v) => v,
55 }
56 }
57}
58
59impl std::fmt::Debug for ByteStorage {
60 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
61 match self {
62 ByteStorage::Static(s) => f.debug_tuple("Static").field(&s.len()).finish(),
63 ByteStorage::Aligned(v) => f.debug_tuple("Aligned").field(&v.len()).finish(),
64 }
65 }
66}
67
68impl ByteStorage {
69 pub fn from_static(bytes: &'static [u8]) -> Self {
76 assert!(
77 (bytes.as_ptr() as usize).is_multiple_of(64),
78 "static bytes must be 64-byte aligned; use include_query_aligned! macro"
79 );
80 Self::Static(bytes)
81 }
82
83 pub fn from_aligned(vec: AlignedVec) -> Self {
85 Self::Aligned(vec)
86 }
87
88 pub fn copy_from_slice(bytes: &[u8]) -> Self {
92 Self::Aligned(AlignedVec::copy_from_slice(bytes))
93 }
94
95 pub fn from_file(path: impl AsRef<Path>) -> io::Result<Self> {
97 Ok(Self::Aligned(AlignedVec::from_file(path)?))
98 }
99}
100
101#[derive(Clone, Copy, Debug)]
103pub enum Instruction<'a> {
104 Match(Match<'a>),
105 Call(Call),
106 Return(Return),
107 Trampoline(Trampoline),
108}
109
110impl<'a> Instruction<'a> {
111 #[inline]
113 pub fn from_bytes(bytes: &'a [u8]) -> Self {
114 debug_assert!(bytes.len() >= 8, "instruction too short");
115
116 let opcode = Opcode::from_u8(bytes[0] & 0xF);
117 match opcode {
118 Opcode::Call => {
119 let arr: [u8; 8] = bytes[..8].try_into().unwrap();
120 Self::Call(Call::from_bytes(arr))
121 }
122 Opcode::Return => {
123 let arr: [u8; 8] = bytes[..8].try_into().unwrap();
124 Self::Return(Return::from_bytes(arr))
125 }
126 Opcode::Trampoline => {
127 let arr: [u8; 8] = bytes[..8].try_into().unwrap();
128 Self::Trampoline(Trampoline::from_bytes(arr))
129 }
130 _ => Self::Match(Match::from_bytes(bytes)),
131 }
132 }
133}
134
135#[derive(Debug, thiserror::Error)]
137pub enum ModuleError {
138 #[error("invalid magic: expected PTKQ")]
139 InvalidMagic,
140 #[error("unsupported version: {0} (expected {VERSION})")]
141 UnsupportedVersion(u32),
142 #[error("file too small: {0} bytes (minimum 64)")]
143 FileTooSmall(usize),
144 #[error("size mismatch: header says {header} bytes, got {actual}")]
145 SizeMismatch { header: u32, actual: usize },
146 #[error("io error: {0}")]
147 Io(#[from] io::Error),
148}
149
150#[derive(Debug)]
155pub struct Module {
156 storage: ByteStorage,
157 header: Header,
158 offsets: SectionOffsets,
160}
161
162impl Module {
163 pub fn from_aligned(vec: AlignedVec) -> Result<Self, ModuleError> {
167 Self::from_storage(ByteStorage::from_aligned(vec))
168 }
169
170 pub fn from_static(bytes: &'static [u8]) -> Result<Self, ModuleError> {
182 Self::from_storage(ByteStorage::from_static(bytes))
183 }
184
185 pub fn from_path(path: impl AsRef<Path>) -> Result<Self, ModuleError> {
189 Self::from_storage(ByteStorage::from_file(&path)?)
190 }
191
192 pub fn load(bytes: &[u8]) -> Result<Self, ModuleError> {
196 Self::from_storage(ByteStorage::copy_from_slice(bytes))
197 }
198
199 #[deprecated(since = "0.1.0", note = "use `Module::from_aligned` for AlignedVec or `Module::load` for copying")]
201 pub fn from_bytes(bytes: Vec<u8>) -> Result<Self, ModuleError> {
202 Self::load(&bytes)
203 }
204
205 fn from_storage(storage: ByteStorage) -> Result<Self, ModuleError> {
207 if storage.len() < 64 {
208 return Err(ModuleError::FileTooSmall(storage.len()));
209 }
210
211 let header = Header::from_bytes(&storage[..64]);
212
213 if !header.validate_magic() {
214 return Err(ModuleError::InvalidMagic);
215 }
216 if !header.validate_version() {
217 return Err(ModuleError::UnsupportedVersion(header.version));
218 }
219 if header.total_size as usize != storage.len() {
220 return Err(ModuleError::SizeMismatch {
221 header: header.total_size,
222 actual: storage.len(),
223 });
224 }
225
226 let offsets = header.compute_offsets();
228
229 Ok(Self {
230 storage,
231 header,
232 offsets,
233 })
234 }
235
236 pub fn header(&self) -> &Header {
238 &self.header
239 }
240
241 pub fn offsets(&self) -> &SectionOffsets {
243 &self.offsets
244 }
245
246 pub fn bytes(&self) -> &[u8] {
248 &self.storage
249 }
250
251 #[inline]
253 pub fn decode_step(&self, step: u16) -> Instruction<'_> {
254 let offset = self.offsets.transitions as usize + (step as usize) * STEP_SIZE;
255 Instruction::from_bytes(&self.storage[offset..])
256 }
257
258 pub fn strings(&self) -> StringsView<'_> {
260 StringsView {
261 blob: &self.storage[self.offsets.str_blob as usize..],
262 table: self.string_table_slice(),
263 }
264 }
265
266 pub fn node_types(&self) -> SymbolsView<'_, NodeSymbol> {
268 let offset = self.offsets.node_types as usize;
269 let count = self.header.node_types_count as usize;
270 SymbolsView {
271 bytes: &self.storage[offset..offset + count * 4],
272 count,
273 _marker: std::marker::PhantomData,
274 }
275 }
276
277 pub fn node_fields(&self) -> SymbolsView<'_, FieldSymbol> {
279 let offset = self.offsets.node_fields as usize;
280 let count = self.header.node_fields_count as usize;
281 SymbolsView {
282 bytes: &self.storage[offset..offset + count * 4],
283 count,
284 _marker: std::marker::PhantomData,
285 }
286 }
287
288 pub fn trivia(&self) -> TriviaView<'_> {
290 let offset = self.offsets.trivia as usize;
291 let count = self.header.trivia_count as usize;
292 TriviaView {
293 bytes: &self.storage[offset..offset + count * 2],
294 count,
295 }
296 }
297
298 pub fn regexes(&self) -> RegexView<'_> {
300 RegexView {
301 blob: &self.storage[self.offsets.regex_blob as usize..],
302 table: self.regex_table_slice(),
303 }
304 }
305
306 pub fn types(&self) -> TypesView<'_> {
308 let defs_offset = self.offsets.type_defs as usize;
309 let defs_count = self.header.type_defs_count as usize;
310 let members_offset = self.offsets.type_members as usize;
311 let members_count = self.header.type_members_count as usize;
312 let names_offset = self.offsets.type_names as usize;
313 let names_count = self.header.type_names_count as usize;
314
315 TypesView {
316 defs_bytes: &self.storage[defs_offset..defs_offset + defs_count * 4],
317 members_bytes: &self.storage[members_offset..members_offset + members_count * 4],
318 names_bytes: &self.storage[names_offset..names_offset + names_count * 4],
319 defs_count,
320 members_count,
321 names_count,
322 }
323 }
324
325 pub fn entrypoints(&self) -> EntrypointsView<'_> {
327 let offset = self.offsets.entrypoints as usize;
328 let count = self.header.entrypoints_count as usize;
329 EntrypointsView {
330 bytes: &self.storage[offset..offset + count * 8],
331 count,
332 }
333 }
334
335 fn string_table_slice(&self) -> &[u8] {
338 let offset = self.offsets.str_table as usize;
339 let count = self.header.str_table_count as usize;
340 &self.storage[offset..offset + (count + 1) * 4]
341 }
342
343 fn regex_table_slice(&self) -> &[u8] {
346 let offset = self.offsets.regex_table as usize;
347 let count = self.header.regex_table_count as usize;
348 &self.storage[offset..offset + (count + 1) * 8]
349 }
350}
351
352pub struct StringsView<'a> {
354 blob: &'a [u8],
355 table: &'a [u8],
356}
357
358impl<'a> StringsView<'a> {
359 pub fn get(&self, id: StringId) -> &'a str {
361 self.get_by_index(id.get() as usize)
362 }
363
364 pub fn get_by_index(&self, idx: usize) -> &'a str {
369 let start = read_u32_le(self.table, idx * 4) as usize;
370 let end = read_u32_le(self.table, (idx + 1) * 4) as usize;
371 std::str::from_utf8(&self.blob[start..end]).expect("invalid UTF-8 in string table")
372 }
373}
374
375pub struct SymbolsView<'a, T> {
377 bytes: &'a [u8],
378 count: usize,
379 _marker: std::marker::PhantomData<T>,
380}
381
382impl<'a> SymbolsView<'a, NodeSymbol> {
383 pub fn get(&self, idx: usize) -> NodeSymbol {
385 assert!(idx < self.count, "node symbol index out of bounds");
386 let offset = idx * 4;
387 NodeSymbol::new(
388 read_u16_le(self.bytes, offset),
389 StringId::new(read_u16_le(self.bytes, offset + 2)),
390 )
391 }
392
393 pub fn len(&self) -> usize {
395 self.count
396 }
397
398 pub fn is_empty(&self) -> bool {
400 self.count == 0
401 }
402}
403
404impl<'a> SymbolsView<'a, FieldSymbol> {
405 pub fn get(&self, idx: usize) -> FieldSymbol {
407 assert!(idx < self.count, "field symbol index out of bounds");
408 let offset = idx * 4;
409 FieldSymbol::new(
410 read_u16_le(self.bytes, offset),
411 StringId::new(read_u16_le(self.bytes, offset + 2)),
412 )
413 }
414
415 pub fn len(&self) -> usize {
417 self.count
418 }
419
420 pub fn is_empty(&self) -> bool {
422 self.count == 0
423 }
424}
425
426pub struct TriviaView<'a> {
428 bytes: &'a [u8],
429 count: usize,
430}
431
432impl<'a> TriviaView<'a> {
433 pub fn get(&self, idx: usize) -> TriviaEntry {
435 assert!(idx < self.count, "trivia index out of bounds");
436 TriviaEntry::new(read_u16_le(self.bytes, idx * 2))
437 }
438
439 pub fn len(&self) -> usize {
441 self.count
442 }
443
444 pub fn is_empty(&self) -> bool {
446 self.count == 0
447 }
448
449 pub fn contains(&self, node_type: u16) -> bool {
451 (0..self.count).any(|i| self.get(i).node_type == node_type)
452 }
453}
454
455pub struct RegexView<'a> {
460 blob: &'a [u8],
461 table: &'a [u8],
462}
463
464impl<'a> RegexView<'a> {
465 const ENTRY_SIZE: usize = 8;
467
468 pub fn get_by_index(&self, idx: usize) -> &'a [u8] {
473 let entry_offset = idx * Self::ENTRY_SIZE;
474 let next_entry_offset = (idx + 1) * Self::ENTRY_SIZE;
475
476 let start = read_u32_le(self.table, entry_offset + 4) as usize;
477 let end = read_u32_le(self.table, next_entry_offset + 4) as usize;
478 &self.blob[start..end]
479 }
480
481 pub fn get_string_id(&self, idx: usize) -> super::StringId {
485 let entry_offset = idx * Self::ENTRY_SIZE;
486 let string_id = read_u16_le(self.table, entry_offset);
487 super::StringId::new(string_id)
488 }
489}
490
491pub struct TypesView<'a> {
498 defs_bytes: &'a [u8],
499 members_bytes: &'a [u8],
500 names_bytes: &'a [u8],
501 defs_count: usize,
502 members_count: usize,
503 names_count: usize,
504}
505
506impl<'a> TypesView<'a> {
507 pub fn get_def(&self, idx: usize) -> TypeDef {
509 assert!(idx < self.defs_count, "type def index out of bounds");
510 let offset = idx * 4;
511 TypeDef::from_bytes(&self.defs_bytes[offset..])
512 }
513
514 pub fn get(&self, id: TypeId) -> Option<TypeDef> {
516 let idx = id.0 as usize;
517 if idx < self.defs_count {
518 Some(self.get_def(idx))
519 } else {
520 None
521 }
522 }
523
524 pub fn get_member(&self, idx: usize) -> TypeMember {
526 assert!(idx < self.members_count, "type member index out of bounds");
527 let offset = idx * 4;
528 TypeMember::new(
529 StringId::new(read_u16_le(self.members_bytes, offset)),
530 TypeId(read_u16_le(self.members_bytes, offset + 2)),
531 )
532 }
533
534 pub fn get_name(&self, idx: usize) -> TypeName {
536 assert!(idx < self.names_count, "type name index out of bounds");
537 let offset = idx * 4;
538 TypeName::new(
539 StringId::new(read_u16_le(self.names_bytes, offset)),
540 TypeId(read_u16_le(self.names_bytes, offset + 2)),
541 )
542 }
543
544 pub fn defs_count(&self) -> usize {
546 self.defs_count
547 }
548
549 pub fn members_count(&self) -> usize {
551 self.members_count
552 }
553
554 pub fn names_count(&self) -> usize {
556 self.names_count
557 }
558
559 pub fn members_of(&self, def: &TypeDef) -> impl Iterator<Item = TypeMember> + '_ {
561 let (start, count) = match def.classify() {
562 TypeData::Composite {
563 member_start,
564 member_count,
565 ..
566 } => (member_start as usize, member_count as usize),
567 _ => (0, 0),
568 };
569 (0..count).map(move |i| self.get_member(start + i))
570 }
571
572 pub fn unwrap_optional(&self, type_id: TypeId) -> (TypeId, bool) {
575 let Some(type_def) = self.get(type_id) else {
576 return (type_id, false);
577 };
578 match type_def.classify() {
579 TypeData::Wrapper {
580 kind: TypeKind::Optional,
581 inner,
582 } => (inner, true),
583 _ => (type_id, false),
584 }
585 }
586}
587
588pub struct EntrypointsView<'a> {
590 bytes: &'a [u8],
591 count: usize,
592}
593
594impl<'a> EntrypointsView<'a> {
595 pub fn get(&self, idx: usize) -> Entrypoint {
597 assert!(idx < self.count, "entrypoint index out of bounds");
598 let offset = idx * 8;
599 Entrypoint::from_bytes(&self.bytes[offset..])
600 }
601
602 pub fn len(&self) -> usize {
604 self.count
605 }
606
607 pub fn is_empty(&self) -> bool {
609 self.count == 0
610 }
611
612 pub fn find_by_name(&self, name: &str, strings: &StringsView<'_>) -> Option<Entrypoint> {
614 (0..self.count)
615 .map(|i| self.get(i))
616 .find(|e| strings.get(e.name()) == name)
617 }
618}