1use std::io;
7use std::ops::Deref;
8use std::path::Path;
9
10use super::aligned_vec::AlignedVec;
11use super::header::{Header, SectionOffsets};
12use super::ids::{StringId, TypeId};
13use super::instructions::{Call, Match, Opcode, Return, Trampoline};
14use super::sections::{FieldSymbol, NodeSymbol, TriviaEntry};
15use super::type_meta::{TypeData, TypeDef, TypeKind, TypeMember, TypeName};
16use super::{Entrypoint, STEP_SIZE, VERSION};
17
18#[inline]
20fn read_u16_le(bytes: &[u8], offset: usize) -> u16 {
21 u16::from_le_bytes([bytes[offset], bytes[offset + 1]])
22}
23
24#[inline]
26fn read_u32_le(bytes: &[u8], offset: usize) -> u32 {
27 u32::from_le_bytes([
28 bytes[offset],
29 bytes[offset + 1],
30 bytes[offset + 2],
31 bytes[offset + 3],
32 ])
33}
34
35pub enum ByteStorage {
42 Static(&'static [u8]),
44 Aligned(AlignedVec),
46}
47
48impl Deref for ByteStorage {
49 type Target = [u8];
50
51 fn deref(&self) -> &Self::Target {
52 match self {
53 ByteStorage::Static(s) => s,
54 ByteStorage::Aligned(v) => v,
55 }
56 }
57}
58
59impl std::fmt::Debug for ByteStorage {
60 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
61 match self {
62 ByteStorage::Static(s) => f.debug_tuple("Static").field(&s.len()).finish(),
63 ByteStorage::Aligned(v) => f.debug_tuple("Aligned").field(&v.len()).finish(),
64 }
65 }
66}
67
68impl ByteStorage {
69 pub fn from_static(bytes: &'static [u8]) -> Self {
76 assert!(
77 (bytes.as_ptr() as usize).is_multiple_of(64),
78 "static bytes must be 64-byte aligned; use include_query_aligned! macro"
79 );
80 Self::Static(bytes)
81 }
82
83 pub fn from_aligned(vec: AlignedVec) -> Self {
85 Self::Aligned(vec)
86 }
87
88 pub fn copy_from_slice(bytes: &[u8]) -> Self {
92 Self::Aligned(AlignedVec::copy_from_slice(bytes))
93 }
94
95 pub fn from_file(path: impl AsRef<Path>) -> io::Result<Self> {
97 Ok(Self::Aligned(AlignedVec::from_file(path)?))
98 }
99}
100
101#[derive(Clone, Copy, Debug)]
103pub enum Instruction<'a> {
104 Match(Match<'a>),
105 Call(Call),
106 Return(Return),
107 Trampoline(Trampoline),
108}
109
110impl<'a> Instruction<'a> {
111 #[inline]
113 pub fn from_bytes(bytes: &'a [u8]) -> Self {
114 debug_assert!(bytes.len() >= 8, "instruction too short");
115
116 let opcode = Opcode::from_u8(bytes[0] & 0xF);
117 match opcode {
118 Opcode::Call => {
119 let arr: [u8; 8] = bytes[..8].try_into().unwrap();
120 Self::Call(Call::from_bytes(arr))
121 }
122 Opcode::Return => {
123 let arr: [u8; 8] = bytes[..8].try_into().unwrap();
124 Self::Return(Return::from_bytes(arr))
125 }
126 Opcode::Trampoline => {
127 let arr: [u8; 8] = bytes[..8].try_into().unwrap();
128 Self::Trampoline(Trampoline::from_bytes(arr))
129 }
130 _ => Self::Match(Match::from_bytes(bytes)),
131 }
132 }
133}
134
135#[derive(Debug, thiserror::Error)]
137pub enum ModuleError {
138 #[error("invalid magic: expected PTKQ")]
139 InvalidMagic,
140 #[error("unsupported version: {0} (expected {VERSION})")]
141 UnsupportedVersion(u32),
142 #[error("file too small: {0} bytes (minimum 64)")]
143 FileTooSmall(usize),
144 #[error("size mismatch: header says {header} bytes, got {actual}")]
145 SizeMismatch { header: u32, actual: usize },
146 #[error("io error: {0}")]
147 Io(#[from] io::Error),
148}
149
150#[derive(Debug)]
155pub struct Module {
156 storage: ByteStorage,
157 header: Header,
158 offsets: SectionOffsets,
160}
161
162impl Module {
163 pub fn from_aligned(vec: AlignedVec) -> Result<Self, ModuleError> {
167 Self::from_storage(ByteStorage::from_aligned(vec))
168 }
169
170 pub fn from_static(bytes: &'static [u8]) -> Result<Self, ModuleError> {
182 Self::from_storage(ByteStorage::from_static(bytes))
183 }
184
185 pub fn from_path(path: impl AsRef<Path>) -> Result<Self, ModuleError> {
189 Self::from_storage(ByteStorage::from_file(&path)?)
190 }
191
192 pub fn load(bytes: &[u8]) -> Result<Self, ModuleError> {
196 Self::from_storage(ByteStorage::copy_from_slice(bytes))
197 }
198
199 #[deprecated(
201 since = "0.1.0",
202 note = "use `Module::from_aligned` for AlignedVec or `Module::load` for copying"
203 )]
204 pub fn from_bytes(bytes: Vec<u8>) -> Result<Self, ModuleError> {
205 Self::load(&bytes)
206 }
207
208 fn from_storage(storage: ByteStorage) -> Result<Self, ModuleError> {
210 if storage.len() < 64 {
211 return Err(ModuleError::FileTooSmall(storage.len()));
212 }
213
214 let header = Header::from_bytes(&storage[..64]);
215
216 if !header.validate_magic() {
217 return Err(ModuleError::InvalidMagic);
218 }
219 if !header.validate_version() {
220 return Err(ModuleError::UnsupportedVersion(header.version));
221 }
222 if header.total_size as usize != storage.len() {
223 return Err(ModuleError::SizeMismatch {
224 header: header.total_size,
225 actual: storage.len(),
226 });
227 }
228
229 let offsets = header.compute_offsets();
231
232 Ok(Self {
233 storage,
234 header,
235 offsets,
236 })
237 }
238
239 pub fn header(&self) -> &Header {
241 &self.header
242 }
243
244 pub fn offsets(&self) -> &SectionOffsets {
246 &self.offsets
247 }
248
249 pub fn bytes(&self) -> &[u8] {
251 &self.storage
252 }
253
254 #[inline]
256 pub fn decode_step(&self, step: u16) -> Instruction<'_> {
257 let offset = self.offsets.transitions as usize + (step as usize) * STEP_SIZE;
258 Instruction::from_bytes(&self.storage[offset..])
259 }
260
261 pub fn strings(&self) -> StringsView<'_> {
263 StringsView {
264 blob: &self.storage[self.offsets.str_blob as usize..],
265 table: self.string_table_slice(),
266 }
267 }
268
269 pub fn node_types(&self) -> SymbolsView<'_, NodeSymbol> {
271 let offset = self.offsets.node_types as usize;
272 let count = self.header.node_types_count as usize;
273 SymbolsView {
274 bytes: &self.storage[offset..offset + count * 4],
275 count,
276 _marker: std::marker::PhantomData,
277 }
278 }
279
280 pub fn node_fields(&self) -> SymbolsView<'_, FieldSymbol> {
282 let offset = self.offsets.node_fields as usize;
283 let count = self.header.node_fields_count as usize;
284 SymbolsView {
285 bytes: &self.storage[offset..offset + count * 4],
286 count,
287 _marker: std::marker::PhantomData,
288 }
289 }
290
291 pub fn trivia(&self) -> TriviaView<'_> {
293 let offset = self.offsets.trivia as usize;
294 let count = self.header.trivia_count as usize;
295 TriviaView {
296 bytes: &self.storage[offset..offset + count * 2],
297 count,
298 }
299 }
300
301 pub fn regexes(&self) -> RegexView<'_> {
303 RegexView {
304 blob: &self.storage[self.offsets.regex_blob as usize..],
305 table: self.regex_table_slice(),
306 }
307 }
308
309 pub fn types(&self) -> TypesView<'_> {
311 let defs_offset = self.offsets.type_defs as usize;
312 let defs_count = self.header.type_defs_count as usize;
313 let members_offset = self.offsets.type_members as usize;
314 let members_count = self.header.type_members_count as usize;
315 let names_offset = self.offsets.type_names as usize;
316 let names_count = self.header.type_names_count as usize;
317
318 TypesView {
319 defs_bytes: &self.storage[defs_offset..defs_offset + defs_count * 4],
320 members_bytes: &self.storage[members_offset..members_offset + members_count * 4],
321 names_bytes: &self.storage[names_offset..names_offset + names_count * 4],
322 defs_count,
323 members_count,
324 names_count,
325 }
326 }
327
328 pub fn entrypoints(&self) -> EntrypointsView<'_> {
330 let offset = self.offsets.entrypoints as usize;
331 let count = self.header.entrypoints_count as usize;
332 EntrypointsView {
333 bytes: &self.storage[offset..offset + count * 8],
334 count,
335 }
336 }
337
338 fn string_table_slice(&self) -> &[u8] {
341 let offset = self.offsets.str_table as usize;
342 let count = self.header.str_table_count as usize;
343 &self.storage[offset..offset + (count + 1) * 4]
344 }
345
346 fn regex_table_slice(&self) -> &[u8] {
349 let offset = self.offsets.regex_table as usize;
350 let count = self.header.regex_table_count as usize;
351 &self.storage[offset..offset + (count + 1) * 8]
352 }
353}
354
355pub struct StringsView<'a> {
357 blob: &'a [u8],
358 table: &'a [u8],
359}
360
361impl<'a> StringsView<'a> {
362 pub fn get(&self, id: StringId) -> &'a str {
364 self.get_by_index(id.get() as usize)
365 }
366
367 pub fn get_by_index(&self, idx: usize) -> &'a str {
372 let start = read_u32_le(self.table, idx * 4) as usize;
373 let end = read_u32_le(self.table, (idx + 1) * 4) as usize;
374 std::str::from_utf8(&self.blob[start..end]).expect("invalid UTF-8 in string table")
375 }
376}
377
378pub struct SymbolsView<'a, T> {
380 bytes: &'a [u8],
381 count: usize,
382 _marker: std::marker::PhantomData<T>,
383}
384
385impl<'a> SymbolsView<'a, NodeSymbol> {
386 pub fn get(&self, idx: usize) -> NodeSymbol {
388 assert!(idx < self.count, "node symbol index out of bounds");
389 let offset = idx * 4;
390 NodeSymbol::new(
391 read_u16_le(self.bytes, offset),
392 StringId::new(read_u16_le(self.bytes, offset + 2)),
393 )
394 }
395
396 pub fn len(&self) -> usize {
398 self.count
399 }
400
401 pub fn is_empty(&self) -> bool {
403 self.count == 0
404 }
405}
406
407impl<'a> SymbolsView<'a, FieldSymbol> {
408 pub fn get(&self, idx: usize) -> FieldSymbol {
410 assert!(idx < self.count, "field symbol index out of bounds");
411 let offset = idx * 4;
412 FieldSymbol::new(
413 read_u16_le(self.bytes, offset),
414 StringId::new(read_u16_le(self.bytes, offset + 2)),
415 )
416 }
417
418 pub fn len(&self) -> usize {
420 self.count
421 }
422
423 pub fn is_empty(&self) -> bool {
425 self.count == 0
426 }
427}
428
429pub struct TriviaView<'a> {
431 bytes: &'a [u8],
432 count: usize,
433}
434
435impl<'a> TriviaView<'a> {
436 pub fn get(&self, idx: usize) -> TriviaEntry {
438 assert!(idx < self.count, "trivia index out of bounds");
439 TriviaEntry::new(read_u16_le(self.bytes, idx * 2))
440 }
441
442 pub fn len(&self) -> usize {
444 self.count
445 }
446
447 pub fn is_empty(&self) -> bool {
449 self.count == 0
450 }
451
452 pub fn contains(&self, node_type: u16) -> bool {
454 (0..self.count).any(|i| self.get(i).node_type == node_type)
455 }
456}
457
458pub struct RegexView<'a> {
463 blob: &'a [u8],
464 table: &'a [u8],
465}
466
467impl<'a> RegexView<'a> {
468 const ENTRY_SIZE: usize = 8;
470
471 pub fn get_by_index(&self, idx: usize) -> &'a [u8] {
476 let entry_offset = idx * Self::ENTRY_SIZE;
477 let next_entry_offset = (idx + 1) * Self::ENTRY_SIZE;
478
479 let start = read_u32_le(self.table, entry_offset + 4) as usize;
480 let end = read_u32_le(self.table, next_entry_offset + 4) as usize;
481 &self.blob[start..end]
482 }
483
484 pub fn get_string_id(&self, idx: usize) -> super::StringId {
488 let entry_offset = idx * Self::ENTRY_SIZE;
489 let string_id = read_u16_le(self.table, entry_offset);
490 super::StringId::new(string_id)
491 }
492}
493
494pub struct TypesView<'a> {
501 defs_bytes: &'a [u8],
502 members_bytes: &'a [u8],
503 names_bytes: &'a [u8],
504 defs_count: usize,
505 members_count: usize,
506 names_count: usize,
507}
508
509impl<'a> TypesView<'a> {
510 pub fn get_def(&self, idx: usize) -> TypeDef {
512 assert!(idx < self.defs_count, "type def index out of bounds");
513 let offset = idx * 4;
514 TypeDef::from_bytes(&self.defs_bytes[offset..])
515 }
516
517 pub fn get(&self, id: TypeId) -> Option<TypeDef> {
519 let idx = id.0 as usize;
520 if idx < self.defs_count {
521 Some(self.get_def(idx))
522 } else {
523 None
524 }
525 }
526
527 pub fn get_member(&self, idx: usize) -> TypeMember {
529 assert!(idx < self.members_count, "type member index out of bounds");
530 let offset = idx * 4;
531 TypeMember::new(
532 StringId::new(read_u16_le(self.members_bytes, offset)),
533 TypeId(read_u16_le(self.members_bytes, offset + 2)),
534 )
535 }
536
537 pub fn get_name(&self, idx: usize) -> TypeName {
539 assert!(idx < self.names_count, "type name index out of bounds");
540 let offset = idx * 4;
541 TypeName::new(
542 StringId::new(read_u16_le(self.names_bytes, offset)),
543 TypeId(read_u16_le(self.names_bytes, offset + 2)),
544 )
545 }
546
547 pub fn defs_count(&self) -> usize {
549 self.defs_count
550 }
551
552 pub fn members_count(&self) -> usize {
554 self.members_count
555 }
556
557 pub fn names_count(&self) -> usize {
559 self.names_count
560 }
561
562 pub fn members_of(&self, def: &TypeDef) -> impl Iterator<Item = TypeMember> + '_ {
564 let (start, count) = match def.classify() {
565 TypeData::Composite {
566 member_start,
567 member_count,
568 ..
569 } => (member_start as usize, member_count as usize),
570 _ => (0, 0),
571 };
572 (0..count).map(move |i| self.get_member(start + i))
573 }
574
575 pub fn unwrap_optional(&self, type_id: TypeId) -> (TypeId, bool) {
578 let Some(type_def) = self.get(type_id) else {
579 return (type_id, false);
580 };
581 match type_def.classify() {
582 TypeData::Wrapper {
583 kind: TypeKind::Optional,
584 inner,
585 } => (inner, true),
586 _ => (type_id, false),
587 }
588 }
589}
590
591pub struct EntrypointsView<'a> {
593 bytes: &'a [u8],
594 count: usize,
595}
596
597impl<'a> EntrypointsView<'a> {
598 pub fn get(&self, idx: usize) -> Entrypoint {
600 assert!(idx < self.count, "entrypoint index out of bounds");
601 let offset = idx * 8;
602 Entrypoint::from_bytes(&self.bytes[offset..])
603 }
604
605 pub fn len(&self) -> usize {
607 self.count
608 }
609
610 pub fn is_empty(&self) -> bool {
612 self.count == 0
613 }
614
615 pub fn find_by_name(&self, name: &str, strings: &StringsView<'_>) -> Option<Entrypoint> {
617 (0..self.count)
618 .map(|i| self.get(i))
619 .find(|e| strings.get(e.name()) == name)
620 }
621}