Skip to main content

syntaqlite_syntax/
ast.rs

1// Copyright 2025 The syntaqlite Authors. All rights reserved.
2// Licensed under the Apache License, Version 2.0.
3
4use std::marker::PhantomData;
5
6use crate::parser::AnyParsedStatement;
7
8// ── Public API ───────────────────────────────────────────────────────────────
9
10/// Trait for AST node views that can be materialized from arena IDs.
11///
12/// Implemented by generated node wrappers and used by generic traversals such
13/// as [`TypedNodeList`].
14pub trait GrammarNodeType<'a>: Sized {
15    /// Resolve `id` to `Self`, or `None` if null, invalid, or tag mismatch.
16    fn from_result(stmt_result: &'a AnyParsedStatement<'a>, id: AnyNodeId) -> Option<Self>;
17}
18
19/// Trait for token enums that support typed <-> raw conversion.
20///
21/// Enables tokenizer/parser code that is generic over a grammar's token type.
22pub trait GrammarTokenType: Sized + Clone + Copy + std::fmt::Debug + Into<u32> {
23    /// Convert a type-erased [`AnyTokenType`] into this grammar's typed token
24    /// variant, or `None` if the ordinal is out of range.
25    fn from_token_type(raw: AnyTokenType) -> Option<Self>;
26}
27
28/// Type-erased token kind represented as a raw ordinal.
29///
30/// Use this in grammar-agnostic paths where concrete token enums are unknown.
31#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
32#[repr(transparent)]
33pub struct AnyTokenType(pub(crate) u32);
34
35impl AnyTokenType {
36    /// Construct from a raw token-type ordinal.
37    ///
38    /// This does not validate that `v` is a known token for any particular
39    /// grammar. Prefer typed token enums when available.
40    pub fn from_raw(v: u32) -> Self {
41        AnyTokenType(v)
42    }
43}
44
45impl From<AnyTokenType> for u32 {
46    fn from(t: AnyTokenType) -> u32 {
47        t.0
48    }
49}
50
51impl GrammarTokenType for AnyTokenType {
52    fn from_token_type(raw: AnyTokenType) -> Option<Self> {
53        Some(raw)
54    }
55}
56
57/// Type-erased AST node tag represented as a raw ordinal.
58///
59/// Use this for grammar-agnostic AST introspection.
60#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
61#[repr(transparent)]
62pub struct AnyNodeTag(pub(crate) u32);
63
64impl From<AnyNodeTag> for u32 {
65    fn from(t: AnyNodeTag) -> u32 {
66        t.0
67    }
68}
69
70impl AnyNodeTag {
71    /// Construct from a raw node tag ordinal.
72    ///
73    /// This does not validate that `v` is a known tag for any particular
74    /// grammar. Prefer typed tags when available.
75    pub fn from_raw(v: u32) -> Self {
76        AnyNodeTag(v)
77    }
78}
79
80/// Lifetime-free handle to a node in the parser arena.
81///
82/// Store this when you need stable node identity outside a borrowed AST view.
83#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
84#[repr(transparent)]
85pub struct AnyNodeId(pub(crate) u32);
86
87impl AnyNodeId {
88    /// Sentinel value representing a missing/null node.
89    pub(crate) const NULL: AnyNodeId = AnyNodeId(0xFFFF_FFFF);
90
91    /// Returns `true` if this is the null sentinel.
92    pub fn is_null(&self) -> bool {
93        self.0 == Self::NULL.0
94    }
95}
96
97/// Grammar-agnostic node view.
98///
99/// Useful for tooling that traverses trees without generated node enums.
100#[derive(Clone, Copy)]
101pub struct AnyNode<'a> {
102    pub(crate) id: AnyNodeId,
103    pub(crate) stmt_result: &'a AnyParsedStatement<'a>,
104}
105
106impl<'a> GrammarNodeType<'a> for AnyNode<'a> {
107    fn from_result(stmt_result: &'a AnyParsedStatement<'a>, id: AnyNodeId) -> Option<Self> {
108        stmt_result.node_ptr(id)?; // validate the node exists
109        Some(AnyNode { id, stmt_result })
110    }
111}
112
113impl std::fmt::Debug for AnyNode<'_> {
114    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
115        f.debug_struct("Node").field("id", &self.id).finish()
116    }
117}
118
119impl std::fmt::Display for AnyNode<'_> {
120    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
121        let mut buf = String::new();
122        self.stmt_result.dump_node(self.id, &mut buf, 0);
123        f.write_str(&buf)
124    }
125}
126
127/// Typed read-only view over a list node in the arena.
128///
129/// Used throughout generated AST APIs for child collections.
130#[derive(Clone)]
131pub struct TypedNodeList<'a, G: crate::grammar::TypedGrammar, T> {
132    raw: &'a RawNodeList,
133    stmt_result: &'a AnyParsedStatement<'a>,
134    id: AnyNodeId,
135    _phantom: PhantomData<fn() -> (G, T)>,
136}
137
138// Manual Copy impl: all fields are Copy regardless of G or T.
139// `derive(Copy)` would add a spurious `G: Copy` bound via PhantomData,
140// which would propagate to every generated list alias.
141impl<G: crate::grammar::TypedGrammar, T: Clone> Copy for TypedNodeList<'_, G, T> {}
142
143impl<G: crate::grammar::TypedGrammar, T> std::fmt::Debug for TypedNodeList<'_, G, T> {
144    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
145        f.debug_struct("TypedNodeList")
146            .field("len", &self.raw.children().len())
147            .finish()
148    }
149}
150
151impl<G: crate::grammar::TypedGrammar, T> TypedNodeList<'_, G, T> {
152    /// The arena node ID of this list, as the grammar's typed node ID.
153    pub fn node_id(&self) -> G::NodeId {
154        G::NodeId::from(self.id)
155    }
156
157    /// Number of children.
158    pub fn len(&self) -> usize {
159        self.raw.children().len()
160    }
161
162    /// Whether this list has no children.
163    pub fn is_empty(&self) -> bool {
164        self.raw.children().is_empty()
165    }
166}
167
168impl<'a, G: crate::grammar::TypedGrammar, T: GrammarNodeType<'a>> TypedNodeList<'a, G, T> {
169    /// Get a child by index, or `None` if out of bounds or unresolvable.
170    pub fn get(&self, index: usize) -> Option<T> {
171        let id = *self.raw.children().get(index)?;
172        T::from_result(self.stmt_result, id)
173    }
174
175    /// Iterate over children. Unresolvable IDs are silently skipped.
176    pub fn iter(&self) -> impl Iterator<Item = T> + 'a {
177        let stmt_result = self.stmt_result;
178        let children = self.raw.children();
179        children
180            .iter()
181            .filter_map(move |&id| T::from_result(stmt_result, id))
182    }
183}
184
185/// Trait for typed node IDs generated per AST node kind.
186///
187/// IDs are cheap, storable handles that can later be resolved against a parse
188/// result back into typed node views.
189pub trait TypedNodeId: Copy + Into<AnyNodeId> {
190    /// The typed view produced when this ID is resolved against an arena.
191    type Node<'a>: GrammarNodeType<'a>;
192}
193
194/// Reflected field value extracted from a node.
195///
196/// Used by grammar-agnostic AST tooling built on
197/// [`AnyParsedStatement::extract_fields`](crate::parser::AnyParsedStatement::extract_fields).
198#[derive(Clone, Copy, Debug)]
199pub enum FieldValue<'a> {
200    /// A child node reference.
201    NodeId(AnyNodeId),
202    /// A source text span — a subslice of the original source string.
203    Span(&'a str),
204    /// A boolean flag.
205    Bool(bool),
206    /// A compact bitfield of flags.
207    Flags(u8),
208    /// An enum discriminant.
209    Enum(u32),
210}
211
212/// Compact reflected field collection for one AST node.
213///
214/// Returned by [`AnyParsedStatement::extract_fields`](crate::parser::AnyParsedStatement::extract_fields)
215/// and indexable via `fields[idx]`.
216pub struct NodeFields<'a> {
217    buf: [std::mem::MaybeUninit<FieldValue<'a>>; 16],
218    len: usize,
219}
220
221impl<'a> NodeFields<'a> {
222    /// Create an empty `NodeFields`.
223    pub(crate) fn new() -> Self {
224        Self {
225            buf: [const { std::mem::MaybeUninit::uninit() }; 16],
226            len: 0,
227        }
228    }
229
230    /// Append a field value.
231    ///
232    /// # Panics
233    /// Panics if more than 16 fields are pushed.
234    pub(crate) fn push(&mut self, val: FieldValue<'a>) {
235        assert!(self.len < 16, "NodeFields overflow: more than 16 fields");
236        self.buf[self.len] = std::mem::MaybeUninit::new(val);
237        self.len += 1;
238    }
239
240    /// Number of fields.
241    pub fn len(&self) -> usize {
242        self.len
243    }
244
245    /// Whether there are no fields.
246    pub fn is_empty(&self) -> bool {
247        self.len == 0
248    }
249}
250
251impl<'a> std::ops::Index<usize> for NodeFields<'a> {
252    type Output = FieldValue<'a>;
253
254    fn index(&self, idx: usize) -> &FieldValue<'a> {
255        assert!(
256            idx < self.len,
257            "field index {} out of bounds (len={})",
258            idx,
259            self.len
260        );
261        // SAFETY: buf[..len] are all initialised via `push`.
262        unsafe { self.buf[idx].assume_init_ref() }
263    }
264}
265
266impl std::fmt::Debug for NodeFields<'_> {
267    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
268        let mut list = f.debug_list();
269        for i in 0..self.len {
270            list.entry(&self[i]);
271        }
272        list.finish()
273    }
274}
275
276// ── Crate-internal ───────────────────────────────────────────────────────────
277
278/// Blanket [`GrammarNodeType`] impl for [`TypedNodeList`] — resolves the ID as a list node.
279impl<'a, G: crate::grammar::TypedGrammar, T> GrammarNodeType<'a> for TypedNodeList<'a, G, T> {
280    fn from_result(stmt_result: &'a AnyParsedStatement<'a>, id: AnyNodeId) -> Option<Self> {
281        let raw = stmt_result.resolve_list(id)?;
282        Some(TypedNodeList {
283            raw,
284            stmt_result,
285            id,
286            _phantom: PhantomData,
287        })
288    }
289}
290
291/// Implemented by each `#[repr(C)]` arena node struct to declare its type tag.
292///
293/// # Safety
294/// Implementors must guarantee that `TAG` matches the `tag` field value
295/// that the C parser writes into the first `u32` of the struct.
296pub(crate) unsafe trait ArenaNode {
297    const TAG: u32;
298}
299
300// ── serde::Serialize (feature = "serde") ─────────────────────────────────────
301
302#[cfg(feature = "serde")]
303mod serde_impl {
304    use super::{AnyNode, FieldValue, GrammarNodeType};
305    use crate::grammar::{FieldKind, FieldMeta};
306    use crate::parser::AnyParsedStatement;
307
308    /// Serializes an AST node to the JSON equivalent of the text dump format.
309    ///
310    /// Regular nodes become `{ "type": "NodeName", "field1": value, ... }`.
311    /// List nodes become `{ "type": "ListName", "count": N, "children": [...] }`.
312    /// Field values mirror the dump: spans as strings, bools as booleans,
313    /// enums as their display-name strings, flags as arrays of active names,
314    /// absent nodes/spans as `null`.
315    impl serde::Serialize for AnyNode<'_> {
316        fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
317            use serde::ser::SerializeMap;
318
319            let stmt = self.stmt_result;
320            let grammar = &stmt.grammar;
321            let id = self.id;
322
323            let Some((tag, fields)) = stmt.extract_fields(id) else {
324                return serializer.serialize_none();
325            };
326
327            let name = grammar.node_name(tag);
328
329            if grammar.is_list(tag) {
330                // { "type": "ListName", "count": N, "children": [...] }
331                let raw_children = stmt.list_children(id).unwrap_or(&[]);
332                let children: Vec<AnyNode<'_>> = raw_children
333                    .iter()
334                    .filter(|id| !id.is_null())
335                    .filter_map(|&id| AnyNode::from_result(stmt, id))
336                    .collect();
337                let mut map = serializer.serialize_map(Some(3))?;
338                map.serialize_entry("type", name)?;
339                map.serialize_entry("count", &children.len())?;
340                map.serialize_entry("children", &children)?;
341                map.end()
342            } else {
343                // { "type": "NodeName", "field1": value1, ... }
344                let metas: Vec<FieldMeta<'static>> = grammar.field_meta(tag).collect();
345                let field_count = metas.len().min(fields.len());
346                let mut map = serializer.serialize_map(Some(1 + field_count))?;
347                map.serialize_entry("type", name)?;
348                for i in 0..field_count {
349                    let meta = &metas[i];
350                    let value = &fields[i];
351                    map.serialize_entry(meta.name(), &FieldValueSerializer { meta, value, stmt })?;
352                }
353                map.end()
354            }
355        }
356    }
357
358    /// Serializes the value side of a single field — the right-hand side of
359    /// `"field_name": <this>`.
360    struct FieldValueSerializer<'a, 'b> {
361        meta: &'b FieldMeta<'static>,
362        value: &'b FieldValue<'a>,
363        stmt: &'b AnyParsedStatement<'a>,
364    }
365
366    impl serde::Serialize for FieldValueSerializer<'_, '_> {
367        fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
368            match (self.meta.kind(), self.value) {
369                // Node field: recurse, or null if absent.
370                (FieldKind::NodeId, FieldValue::NodeId(id)) => {
371                    if id.is_null() {
372                        serializer.serialize_none()
373                    } else {
374                        match AnyNode::from_result(self.stmt, *id) {
375                            Some(node) => node.serialize(serializer),
376                            None => serializer.serialize_none(),
377                        }
378                    }
379                }
380                // Span: text string, or null for an absent/empty span.
381                (FieldKind::Span, FieldValue::Span(text)) => {
382                    if text.is_empty() {
383                        serializer.serialize_none()
384                    } else {
385                        serializer.serialize_str(text)
386                    }
387                }
388                // Bool: plain boolean.
389                (FieldKind::Bool, FieldValue::Bool(b)) => serializer.serialize_bool(*b),
390                // Enum: display-name string, or null if no display name.
391                (FieldKind::Enum, FieldValue::Enum(discriminant)) => {
392                    match self.meta.display_name(*discriminant as usize) {
393                        Some(s) => serializer.serialize_str(s),
394                        None => serializer.serialize_none(),
395                    }
396                }
397                // Flags: array of active flag-name strings (empty array when none set).
398                (FieldKind::Flags, FieldValue::Flags(bits)) => {
399                    use serde::ser::SerializeSeq;
400                    let bits = *bits;
401                    let active: Vec<&'static str> = (0..self.meta.display_count())
402                        .filter(|&i| bits & (1u8 << i) != 0)
403                        .filter_map(|i| self.meta.display_name(i))
404                        .collect();
405                    let mut seq = serializer.serialize_seq(Some(active.len()))?;
406                    for s in &active {
407                        seq.serialize_element(s)?;
408                    }
409                    seq.end()
410                }
411                // Shouldn't occur (kind/value mismatch would be a codegen bug).
412                _ => serializer.serialize_none(),
413            }
414        }
415    }
416}
417
418// ── ffi ───────────────────────────────────────────────────────────────────────
419
420pub(crate) use ffi::{CNodeList as RawNodeList, CSourceSpan as SourceSpan};
421
422mod ffi {
423    use crate::ast::AnyNodeId;
424
425    /// A source byte range within the parser's source buffer.
426    ///
427    /// Mirrors the C `SyntaqliteSpan` layout: `offset` and `length` in bytes.
428    /// Used in generated node structs for token-valued fields (identifiers, literals).
429    #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
430    #[repr(C)]
431    pub(crate) struct CSourceSpan {
432        pub(crate) offset: u32,
433        pub(crate) length: u16,
434    }
435
436    #[expect(dead_code)]
437    impl CSourceSpan {
438        /// Returns `true` if the span covers zero bytes.
439        pub(crate) fn is_empty(self) -> bool {
440            self.length == 0
441        }
442
443        /// Slice the span out of the given source string.
444        pub(crate) fn as_str(self, source: &str) -> &str {
445            let start = self.offset as usize;
446            let end = start + self.length as usize;
447            &source[start..end]
448        }
449    }
450
451    /// List node header — `tag` + `count`, followed by `count` child [`AnyNodeId`]s
452    /// in trailing data. The parser arena guarantees this contiguous layout.
453    #[derive(Debug)]
454    #[repr(C)]
455    pub(crate) struct CNodeList {
456        pub(crate) tag: u32,
457        pub(crate) count: u32,
458    }
459
460    impl CNodeList {
461        /// The child node IDs stored after this header in the arena.
462        pub(crate) fn children(&self) -> &[AnyNodeId] {
463            // SAFETY: The arena allocates list nodes as { tag, count, children[count] }
464            // contiguously, so `count` u32 values immediately follow this header.
465            // CNodeList is only constructed from valid arena pointers (validated tag).
466            // AnyNodeId is #[repr(transparent)] over u32, so &[AnyNodeId] is
467            // layout-compatible with &[u32].
468            unsafe {
469                let base = std::ptr::from_ref::<CNodeList>(self)
470                    .add(1)
471                    .cast::<AnyNodeId>();
472                std::slice::from_raw_parts(base, self.count as usize)
473            }
474        }
475    }
476}