plotnik_lib/ir/
compiled.rs

1//! Compiled query container and buffer.
2//!
3//! The compiled query lives in a single contiguous allocation—cache-friendly,
4//! zero fragmentation, portable to WASM. See ADR-0004 for format details.
5
6use std::alloc::{Layout, alloc, dealloc};
7use std::fmt::Write;
8use std::ptr;
9
10use super::{
11    EffectOp, Entrypoint, NodeFieldId, NodeTypeId, Slice, StringId, StringRef, Transition,
12    TransitionId, TypeDef, TypeMember,
13};
14
15/// Buffer alignment for cache-line efficiency.
16pub const BUFFER_ALIGN: usize = 64;
17
18/// Magic bytes identifying a compiled query file.
19pub const MAGIC: [u8; 4] = *b"PLNK";
20
21/// Current format version.
22pub const FORMAT_VERSION: u32 = 1;
23
24/// Aligned buffer for compiled query data.
25///
26/// Allocated via `Layout::from_size_align(len, BUFFER_ALIGN)`. Standard `Box<[u8]>`
27/// won't work—it assumes 1-byte alignment and corrupts `dealloc`.
28pub struct CompiledQueryBuffer {
29    ptr: *mut u8,
30    len: usize,
31    /// `true` if allocated, `false` if mmap'd or external.
32    owned: bool,
33}
34
35impl CompiledQueryBuffer {
36    /// Allocate a new buffer with 64-byte alignment.
37    pub fn allocate(len: usize) -> Self {
38        if len == 0 {
39            return Self {
40                ptr: ptr::null_mut(),
41                len: 0,
42                owned: true,
43            };
44        }
45
46        let layout = Layout::from_size_align(len, BUFFER_ALIGN).expect("invalid layout");
47
48        // SAFETY: layout is non-zero size, properly aligned
49        let ptr = unsafe { alloc(layout) };
50        if ptr.is_null() {
51            std::alloc::handle_alloc_error(layout);
52        }
53
54        Self {
55            ptr,
56            len,
57            owned: true,
58        }
59    }
60
61    /// Create a view into external memory (mmap'd or borrowed).
62    ///
63    /// # Safety
64    /// - `ptr` must be valid for reads of `len` bytes
65    /// - `ptr` must be aligned to `BUFFER_ALIGN`
66    /// - The backing memory must outlive the returned buffer
67    pub unsafe fn from_external(ptr: *mut u8, len: usize) -> Self {
68        debug_assert!(
69            (ptr as usize).is_multiple_of(BUFFER_ALIGN),
70            "buffer must be 64-byte aligned"
71        );
72        Self {
73            ptr,
74            len,
75            owned: false,
76        }
77    }
78
79    /// Returns a pointer to the buffer start.
80    #[inline]
81    pub fn as_ptr(&self) -> *const u8 {
82        self.ptr
83    }
84
85    /// Returns a mutable pointer to the buffer start.
86    #[inline]
87    pub fn as_mut_ptr(&mut self) -> *mut u8 {
88        self.ptr
89    }
90
91    /// Returns the buffer length in bytes.
92    #[inline]
93    pub fn len(&self) -> usize {
94        self.len
95    }
96
97    /// Returns true if the buffer is empty.
98    #[inline]
99    pub fn is_empty(&self) -> bool {
100        self.len == 0
101    }
102
103    /// Returns the buffer as a byte slice.
104    #[inline]
105    pub fn as_slice(&self) -> &[u8] {
106        if self.ptr.is_null() {
107            &[]
108        } else {
109            // SAFETY: ptr is valid for len bytes if non-null
110            unsafe { std::slice::from_raw_parts(self.ptr, self.len) }
111        }
112    }
113
114    /// Returns the buffer as a mutable byte slice.
115    #[inline]
116    pub fn as_mut_slice(&mut self) -> &mut [u8] {
117        if self.ptr.is_null() {
118            &mut []
119        } else {
120            // SAFETY: ptr is valid for len bytes if non-null, and we have &mut self
121            unsafe { std::slice::from_raw_parts_mut(self.ptr, self.len) }
122        }
123    }
124}
125
126impl Drop for CompiledQueryBuffer {
127    fn drop(&mut self) {
128        if self.owned && !self.ptr.is_null() {
129            let layout = Layout::from_size_align(self.len, BUFFER_ALIGN)
130                .expect("layout was valid at allocation");
131            // SAFETY: ptr was allocated with this exact layout
132            unsafe { dealloc(self.ptr, layout) };
133        }
134    }
135}
136
137// SAFETY: The buffer is just raw bytes, safe to send across threads
138unsafe impl Send for CompiledQueryBuffer {}
139unsafe impl Sync for CompiledQueryBuffer {}
140
141/// A compiled query ready for execution.
142///
143/// Contains a single contiguous buffer with all segments, plus offset indices
144/// for O(1) access to each segment.
145pub struct CompiledQuery {
146    buffer: CompiledQueryBuffer,
147    // Segment offsets (byte offsets into buffer)
148    successors_offset: u32,
149    effects_offset: u32,
150    negated_fields_offset: u32,
151    string_refs_offset: u32,
152    string_bytes_offset: u32,
153    type_defs_offset: u32,
154    type_members_offset: u32,
155    entrypoints_offset: u32,
156    trivia_kinds_offset: u32, // 0 = no trivia kinds
157    // Segment counts (number of elements)
158    transition_count: u32,
159    successor_count: u32,
160    effect_count: u32,
161    negated_field_count: u16,
162    string_ref_count: u16,
163    type_def_count: u16,
164    type_member_count: u16,
165    entrypoint_count: u16,
166    trivia_kind_count: u16,
167}
168
169impl CompiledQuery {
170    /// Creates a new compiled query from pre-built components.
171    ///
172    /// This is typically called by the emitter after layout computation.
173    #[allow(clippy::too_many_arguments)]
174    pub fn new(
175        buffer: CompiledQueryBuffer,
176        successors_offset: u32,
177        effects_offset: u32,
178        negated_fields_offset: u32,
179        string_refs_offset: u32,
180        string_bytes_offset: u32,
181        type_defs_offset: u32,
182        type_members_offset: u32,
183        entrypoints_offset: u32,
184        trivia_kinds_offset: u32,
185        transition_count: u32,
186        successor_count: u32,
187        effect_count: u32,
188        negated_field_count: u16,
189        string_ref_count: u16,
190        type_def_count: u16,
191        type_member_count: u16,
192        entrypoint_count: u16,
193        trivia_kind_count: u16,
194    ) -> Self {
195        Self {
196            buffer,
197            successors_offset,
198            effects_offset,
199            negated_fields_offset,
200            string_refs_offset,
201            string_bytes_offset,
202            type_defs_offset,
203            type_members_offset,
204            entrypoints_offset,
205            trivia_kinds_offset,
206            transition_count,
207            successor_count,
208            effect_count,
209            negated_field_count,
210            string_ref_count,
211            type_def_count,
212            type_member_count,
213            entrypoint_count,
214            trivia_kind_count,
215        }
216    }
217
218    /// Returns the transitions segment.
219    #[inline]
220    pub fn transitions(&self) -> &[Transition] {
221        // Transitions start at offset 0
222        // SAFETY: buffer is properly aligned, transitions are at offset 0
223        unsafe {
224            std::slice::from_raw_parts(
225                self.buffer.as_ptr() as *const Transition,
226                self.transition_count as usize,
227            )
228        }
229    }
230
231    /// Returns the successors segment.
232    #[inline]
233    pub fn successors(&self) -> &[TransitionId] {
234        // SAFETY: offset is aligned to 4
235        unsafe {
236            std::slice::from_raw_parts(
237                self.buffer.as_ptr().add(self.successors_offset as usize) as *const TransitionId,
238                self.successor_count as usize,
239            )
240        }
241    }
242
243    /// Returns the effects segment.
244    #[inline]
245    pub fn effects(&self) -> &[EffectOp] {
246        // SAFETY: offset is aligned to 2
247        unsafe {
248            std::slice::from_raw_parts(
249                self.buffer.as_ptr().add(self.effects_offset as usize) as *const EffectOp,
250                self.effect_count as usize,
251            )
252        }
253    }
254
255    /// Returns the negated fields segment.
256    #[inline]
257    pub fn negated_fields(&self) -> &[NodeFieldId] {
258        // SAFETY: offset is aligned to 2
259        unsafe {
260            std::slice::from_raw_parts(
261                self.buffer
262                    .as_ptr()
263                    .add(self.negated_fields_offset as usize) as *const NodeFieldId,
264                self.negated_field_count as usize,
265            )
266        }
267    }
268
269    /// Returns the string refs segment.
270    #[inline]
271    pub fn string_refs(&self) -> &[StringRef] {
272        // SAFETY: offset is aligned to 4
273        unsafe {
274            std::slice::from_raw_parts(
275                self.buffer.as_ptr().add(self.string_refs_offset as usize) as *const StringRef,
276                self.string_ref_count as usize,
277            )
278        }
279    }
280
281    /// Returns the raw string bytes.
282    #[inline]
283    pub fn string_bytes(&self) -> &[u8] {
284        let end = if self.type_defs_offset > 0 {
285            self.type_defs_offset as usize
286        } else {
287            self.buffer.len()
288        };
289        let start = self.string_bytes_offset as usize;
290        &self.buffer.as_slice()[start..end]
291    }
292
293    /// Returns the type definitions segment.
294    #[inline]
295    pub fn type_defs(&self) -> &[TypeDef] {
296        // SAFETY: offset is aligned to 4
297        unsafe {
298            std::slice::from_raw_parts(
299                self.buffer.as_ptr().add(self.type_defs_offset as usize) as *const TypeDef,
300                self.type_def_count as usize,
301            )
302        }
303    }
304
305    /// Returns the type members segment.
306    #[inline]
307    pub fn type_members(&self) -> &[TypeMember] {
308        // SAFETY: offset is aligned to 2
309        unsafe {
310            std::slice::from_raw_parts(
311                self.buffer.as_ptr().add(self.type_members_offset as usize) as *const TypeMember,
312                self.type_member_count as usize,
313            )
314        }
315    }
316
317    /// Returns the entrypoints segment.
318    #[inline]
319    pub fn entrypoints(&self) -> &[Entrypoint] {
320        // SAFETY: offset is aligned to 4
321        unsafe {
322            std::slice::from_raw_parts(
323                self.buffer.as_ptr().add(self.entrypoints_offset as usize) as *const Entrypoint,
324                self.entrypoint_count as usize,
325            )
326        }
327    }
328
329    /// Returns the trivia kinds segment (node types to skip).
330    #[inline]
331    pub fn trivia_kinds(&self) -> &[NodeTypeId] {
332        if self.trivia_kinds_offset == 0 {
333            return &[];
334        }
335        // SAFETY: offset is aligned to 2
336        unsafe {
337            std::slice::from_raw_parts(
338                self.buffer.as_ptr().add(self.trivia_kinds_offset as usize) as *const NodeTypeId,
339                self.trivia_kind_count as usize,
340            )
341        }
342    }
343
344    /// Returns a transition by ID.
345    #[inline]
346    pub fn transition(&self, id: TransitionId) -> &Transition {
347        &self.transitions()[id as usize]
348    }
349
350    /// Returns a view of a transition with resolved slices.
351    #[inline]
352    pub fn transition_view(&self, id: TransitionId) -> TransitionView<'_> {
353        TransitionView {
354            query: self,
355            raw: self.transition(id),
356        }
357    }
358
359    /// Resolves a string ID to its UTF-8 content.
360    #[inline]
361    pub fn string(&self, id: StringId) -> &str {
362        let refs = self.string_refs();
363        let string_ref = &refs[id as usize];
364        let bytes = self.string_bytes();
365        let start = string_ref.offset as usize;
366        let end = start + string_ref.len as usize;
367        // SAFETY: emitter ensures valid UTF-8
368        unsafe { std::str::from_utf8_unchecked(&bytes[start..end]) }
369    }
370
371    /// Resolves a slice of effects.
372    #[inline]
373    pub fn resolve_effects(&self, slice: Slice<EffectOp>) -> &[EffectOp] {
374        let effects = self.effects();
375        let start = slice.start_index() as usize;
376        let end = start + slice.len() as usize;
377        &effects[start..end]
378    }
379
380    /// Resolves a slice of negated fields.
381    #[inline]
382    pub fn resolve_negated_fields(&self, slice: Slice<NodeFieldId>) -> &[NodeFieldId] {
383        let fields = self.negated_fields();
384        let start = slice.start_index() as usize;
385        let end = start + slice.len() as usize;
386        &fields[start..end]
387    }
388
389    /// Resolves a slice of type members.
390    #[inline]
391    pub fn resolve_type_members(&self, slice: Slice<TypeMember>) -> &[TypeMember] {
392        let members = self.type_members();
393        let start = slice.start_index() as usize;
394        let end = start + slice.len() as usize;
395        &members[start..end]
396    }
397
398    /// Resolves successors for a transition by ID, handling both inline and spilled cases.
399    #[inline]
400    pub fn resolve_successors_by_id(&self, id: TransitionId) -> &[TransitionId] {
401        let transition = self.transition(id);
402        if transition.has_inline_successors() {
403            // Return from transitions segment - inline data is part of the transition
404            let count = transition.successor_count as usize;
405            &self.transitions()[id as usize].successor_data[..count]
406        } else {
407            let start = transition.spilled_successors_index() as usize;
408            let count = transition.successor_count as usize;
409            &self.successors()[start..start + count]
410        }
411    }
412
413    /// Returns the number of transitions.
414    #[inline]
415    pub fn transition_count(&self) -> u32 {
416        self.transition_count
417    }
418
419    /// Returns the number of entrypoints.
420    #[inline]
421    pub fn entrypoint_count(&self) -> u16 {
422        self.entrypoint_count
423    }
424
425    /// Returns the raw buffer for serialization.
426    #[inline]
427    pub fn buffer(&self) -> &CompiledQueryBuffer {
428        &self.buffer
429    }
430
431    /// Returns offset metadata for serialization.
432    pub fn offsets(&self) -> CompiledQueryOffsets {
433        CompiledQueryOffsets {
434            successors_offset: self.successors_offset,
435            effects_offset: self.effects_offset,
436            negated_fields_offset: self.negated_fields_offset,
437            string_refs_offset: self.string_refs_offset,
438            string_bytes_offset: self.string_bytes_offset,
439            type_defs_offset: self.type_defs_offset,
440            type_members_offset: self.type_members_offset,
441            entrypoints_offset: self.entrypoints_offset,
442            trivia_kinds_offset: self.trivia_kinds_offset,
443        }
444    }
445
446    /// Dumps the compiled query in human-readable format for debugging.
447    pub fn dump(&self) -> String {
448        let mut out = String::new();
449
450        // Header
451        writeln!(out, "CompiledQuery {{").unwrap();
452        writeln!(out, "  buffer_len: {}", self.buffer.len()).unwrap();
453        writeln!(out, "  transitions: {}", self.transition_count).unwrap();
454        writeln!(out, "  successors: {} (spilled)", self.successor_count).unwrap();
455        writeln!(out, "  effects: {}", self.effect_count).unwrap();
456        writeln!(out, "  strings: {}", self.string_ref_count).unwrap();
457        writeln!(out, "  type_defs: {}", self.type_def_count).unwrap();
458        writeln!(out, "  entrypoints: {}", self.entrypoint_count).unwrap();
459        writeln!(out).unwrap();
460
461        // Entrypoints
462        writeln!(out, "  Entrypoints:").unwrap();
463        for ep in self.entrypoints() {
464            let name = self.string(ep.name_id());
465            writeln!(
466                out,
467                "    {} -> T{} (type {})",
468                name,
469                ep.target(),
470                ep.result_type()
471            )
472            .unwrap();
473        }
474        writeln!(out).unwrap();
475
476        // Transitions
477        writeln!(out, "  Transitions:").unwrap();
478        for i in 0..self.transition_count {
479            let view = self.transition_view(i);
480            write!(out, "    T{}: ", i).unwrap();
481
482            // Matcher
483            match view.matcher() {
484                super::Matcher::Epsilon => write!(out, "ε").unwrap(),
485                super::Matcher::Node { kind, field, .. } => {
486                    write!(out, "Node({})", kind).unwrap();
487                    if let Some(f) = field {
488                        write!(out, " field={}", f).unwrap();
489                    }
490                }
491                super::Matcher::Anonymous { kind, field, .. } => {
492                    write!(out, "Anon({})", kind).unwrap();
493                    if let Some(f) = field {
494                        write!(out, " field={}", f).unwrap();
495                    }
496                }
497                super::Matcher::Wildcard => write!(out, "_").unwrap(),
498            }
499
500            // Nav
501            let nav = view.nav();
502            if !nav.is_stay() {
503                write!(out, " nav={:?}", nav.kind).unwrap();
504                if nav.level > 0 {
505                    write!(out, "({})", nav.level).unwrap();
506                }
507            }
508
509            // Ref marker
510            match view.ref_marker() {
511                super::RefTransition::None => {}
512                super::RefTransition::Enter(id) => write!(out, " Enter({})", id).unwrap(),
513                super::RefTransition::Exit(id) => write!(out, " Exit({})", id).unwrap(),
514            }
515
516            // Effects
517            let effects = view.effects();
518            if !effects.is_empty() {
519                write!(out, " [").unwrap();
520                for (j, eff) in effects.iter().enumerate() {
521                    if j > 0 {
522                        write!(out, ", ").unwrap();
523                    }
524                    match eff {
525                        EffectOp::CaptureNode => write!(out, "Capture").unwrap(),
526                        EffectOp::ClearCurrent => write!(out, "Clear").unwrap(),
527                        EffectOp::StartArray => write!(out, "StartArr").unwrap(),
528                        EffectOp::PushElement => write!(out, "Push").unwrap(),
529                        EffectOp::EndArray => write!(out, "EndArr").unwrap(),
530                        EffectOp::StartObject => write!(out, "StartObj").unwrap(),
531                        EffectOp::EndObject => write!(out, "EndObj").unwrap(),
532                        EffectOp::Field(id) => write!(out, "Field({})", self.string(*id)).unwrap(),
533                        EffectOp::StartVariant(id) => {
534                            write!(out, "Var({})", self.string(*id)).unwrap()
535                        }
536                        EffectOp::EndVariant => write!(out, "EndVar").unwrap(),
537                        EffectOp::ToString => write!(out, "ToStr").unwrap(),
538                    }
539                }
540                write!(out, "]").unwrap();
541            }
542
543            // Successors
544            let succs = view.successors();
545            if !succs.is_empty() {
546                write!(out, " -> [").unwrap();
547                for (j, s) in succs.iter().enumerate() {
548                    if j > 0 {
549                        write!(out, ", ").unwrap();
550                    }
551                    write!(out, "T{}", s).unwrap();
552                }
553                write!(out, "]").unwrap();
554            }
555
556            writeln!(out).unwrap();
557        }
558
559        // Strings
560        if self.string_ref_count > 0 {
561            writeln!(out).unwrap();
562            writeln!(out, "  Strings:").unwrap();
563            for i in 0..self.string_ref_count {
564                let s = self.string(i);
565                writeln!(out, "    S{}: {:?}", i, s).unwrap();
566            }
567        }
568
569        // Types
570        if self.type_def_count > 0 {
571            writeln!(out).unwrap();
572            writeln!(out, "  Types:").unwrap();
573            for (i, td) in self.type_defs().iter().enumerate() {
574                let type_id = i as u16 + super::TYPE_COMPOSITE_START;
575                let name = if td.name != super::STRING_NONE {
576                    self.string(td.name)
577                } else {
578                    "<anon>"
579                };
580                write!(out, "    Ty{}: {} {:?}", type_id, name, td.kind).unwrap();
581                if td.is_wrapper() {
582                    if let Some(inner) = td.inner_type() {
583                        write!(out, " inner=Ty{}", inner).unwrap();
584                    }
585                } else if let Some(members) = td.members_slice() {
586                    let resolved = self.resolve_type_members(members);
587                    write!(out, " {{").unwrap();
588                    for (j, m) in resolved.iter().enumerate() {
589                        if j > 0 {
590                            write!(out, ", ").unwrap();
591                        }
592                        write!(out, "{}: Ty{}", self.string(m.name), m.ty).unwrap();
593                    }
594                    write!(out, "}}").unwrap();
595                }
596                writeln!(out).unwrap();
597            }
598        }
599
600        writeln!(out, "}}").unwrap();
601        out
602    }
603}
604
605/// Offset metadata extracted from CompiledQuery.
606#[derive(Debug, Clone, Copy)]
607pub struct CompiledQueryOffsets {
608    pub successors_offset: u32,
609    pub effects_offset: u32,
610    pub negated_fields_offset: u32,
611    pub string_refs_offset: u32,
612    pub string_bytes_offset: u32,
613    pub type_defs_offset: u32,
614    pub type_members_offset: u32,
615    pub entrypoints_offset: u32,
616    pub trivia_kinds_offset: u32,
617}
618
619/// A view of a transition with resolved slices.
620///
621/// Hides offset arithmetic and inline/spilled distinction from callers.
622pub struct TransitionView<'a> {
623    query: &'a CompiledQuery,
624    raw: &'a Transition,
625}
626
627impl<'a> TransitionView<'a> {
628    /// Returns the raw transition.
629    #[inline]
630    pub fn raw(&self) -> &'a Transition {
631        self.raw
632    }
633
634    /// Returns resolved successor IDs.
635    #[inline]
636    pub fn successors(&self) -> &'a [TransitionId] {
637        if self.raw.has_inline_successors() {
638            let count = self.raw.successor_count as usize;
639            &self.raw.successor_data[..count]
640        } else {
641            let start = self.raw.spilled_successors_index() as usize;
642            let count = self.raw.successor_count as usize;
643            &self.query.successors()[start..start + count]
644        }
645    }
646
647    /// Returns resolved effect operations.
648    #[inline]
649    pub fn effects(&self) -> &'a [EffectOp] {
650        self.query.resolve_effects(self.raw.effects())
651    }
652
653    /// Returns the matcher.
654    #[inline]
655    pub fn matcher(&self) -> &super::Matcher {
656        &self.raw.matcher
657    }
658
659    /// Returns a view of the matcher with resolved slices.
660    #[inline]
661    pub fn matcher_view(&self) -> MatcherView<'a> {
662        MatcherView {
663            query: self.query,
664            raw: &self.raw.matcher,
665        }
666    }
667
668    /// Returns the navigation instruction.
669    #[inline]
670    pub fn nav(&self) -> super::Nav {
671        self.raw.nav
672    }
673
674    /// Returns the ref transition marker.
675    #[inline]
676    pub fn ref_marker(&self) -> super::RefTransition {
677        self.raw.ref_marker
678    }
679}
680
681/// A view of a matcher with resolved slices.
682pub struct MatcherView<'a> {
683    query: &'a CompiledQuery,
684    raw: &'a super::Matcher,
685}
686
687impl<'a> MatcherView<'a> {
688    /// Returns the raw matcher.
689    #[inline]
690    pub fn raw(&self) -> &'a super::Matcher {
691        self.raw
692    }
693
694    /// Returns resolved negated fields.
695    #[inline]
696    pub fn negated_fields(&self) -> &'a [NodeFieldId] {
697        self.query.resolve_negated_fields(self.raw.negated_fields())
698    }
699
700    /// Returns the matcher kind.
701    #[inline]
702    pub fn kind(&self) -> super::MatcherKind {
703        self.raw.kind()
704    }
705}
706
707/// Aligns an offset up to the given alignment.
708#[inline]
709pub const fn align_up(offset: u32, align: u32) -> u32 {
710    (offset + align - 1) & !(align - 1)
711}
712
713#[cfg(test)]
714mod tests {
715    use super::*;
716
717    #[test]
718    fn buffer_alignment() {
719        let buf = CompiledQueryBuffer::allocate(128);
720        assert_eq!(buf.as_ptr() as usize % BUFFER_ALIGN, 0);
721        assert_eq!(buf.len(), 128);
722    }
723
724    #[test]
725    fn buffer_empty() {
726        let buf = CompiledQueryBuffer::allocate(0);
727        assert!(buf.is_empty());
728        assert_eq!(buf.as_slice(), &[] as &[u8]);
729    }
730
731    #[test]
732    fn align_up_values() {
733        assert_eq!(align_up(0, 4), 0);
734        assert_eq!(align_up(1, 4), 4);
735        assert_eq!(align_up(4, 4), 4);
736        assert_eq!(align_up(5, 4), 8);
737        assert_eq!(align_up(63, 64), 64);
738        assert_eq!(align_up(64, 64), 64);
739        assert_eq!(align_up(65, 64), 128);
740    }
741}