gibberish_core/
node.rs

1use std::{fmt::Debug, ops::Range};
2
3use crate::{expected::ExpectedData, lang::CompiledLang, vec::RawVec};
4
5use super::{err::ParseError, lang::Lang};
6use ansi_term::Colour::{Blue, Green, Red};
7
8pub type Span = Range<usize>;
9
10#[derive(Debug, Clone, PartialEq, Eq)]
11pub struct Lexeme<L: Lang> {
12    pub span: Span,
13    pub kind: L::Token,
14    pub text: String,
15}
16
17#[repr(C)]
18#[derive(Debug, Clone, PartialEq, Eq, Copy)]
19pub struct LexemeData {
20    pub kind: usize,
21    pub start: usize,
22    pub end: usize,
23}
24
25impl Lexeme<CompiledLang> {
26    pub fn from_data(value: LexemeData, src: &str) -> Self {
27        Lexeme {
28            span: value.start..value.end,
29            kind: value.kind as u32,
30            text: src[value.start..value.end].to_string(),
31        }
32    }
33}
34
35#[derive(Debug)]
36pub struct Group<L: Lang> {
37    pub kind: L::Syntax,
38    pub children: Vec<Node<L>>,
39}
40
41#[derive(Debug)]
42pub enum Node<L: Lang> {
43    Group(Group<L>),
44    Lexeme(Lexeme<L>),
45    Err(ParseError<L>),
46}
47
48impl<L: Lang> Node<L> {
49    pub fn push_tok(&mut self, lexeme: Lexeme<L>) {
50        let Node::Group(Group { children, .. }) = self else {
51            panic!("Expected a group")
52        };
53        children.push(Node::Lexeme(lexeme))
54    }
55
56    fn debug_at(&self, offset: usize, errors: bool, tokens: bool, lang: &L) {
57        fn print_offset(n: usize) {
58            for _ in 0..n {
59                print!("  ");
60            }
61        }
62        match self {
63            Node::Group(Group { kind, children }) => {
64                print_offset(offset);
65                println!("{}", Green.paint(lang.syntax_name(kind)));
66                for child in children.iter() {
67                    child.debug_at(offset + 1, errors, tokens, lang);
68                }
69            }
70            Node::Lexeme(lexeme) => {
71                if tokens {
72                    print_offset(offset);
73                    println!(
74                        "{}: {:?}",
75                        Blue.paint(lang.token_name(&lexeme.kind)),
76                        lexeme.text
77                    )
78                }
79            }
80            Node::Err(err_group) => {
81                if errors {
82                    print_offset(offset);
83                    err_group.debug_at(offset, lang)
84                }
85            }
86        }
87    }
88
89    pub fn as_group(&self) -> &Group<L> {
90        let Node::Group(group) = self else {
91            panic!("Expected a group");
92        };
93        group
94    }
95
96    pub fn debug_print(&self, errors: bool, tokens: bool, lang: &L) {
97        self.debug_at(0, errors, tokens, lang);
98    }
99
100    pub fn name(&self) -> L::Syntax {
101        match self {
102            Node::Group(Group { kind, .. }) => kind.clone(),
103            Node::Lexeme(_) => panic!("Lexeme has no name"),
104            Node::Err(_) => panic!("ErrGroup has no name"),
105        }
106    }
107
108    pub fn green_children(&self) -> impl Iterator<Item = &Group<L>> {
109        match self {
110            Node::Group(Group { children, .. }) => children.iter().filter_map(|it| match it {
111                Node::Group(group) => Some(group),
112                Node::Lexeme(_) => None,
113                Node::Err(_) => None,
114            }),
115            Node::Lexeme(_) => panic!("Lexeme has no children"),
116            Node::Err(_) => panic!("ErrGroup has no children"),
117        }
118    }
119
120    pub fn at_offset(&self, offset: usize) -> Option<&Node<L>> {
121        match self {
122            Node::Group(group) => group.children.iter().find_map(|it| it.at_offset(offset)),
123            Node::Lexeme(Lexeme { span, .. }) if span.start <= offset && offset <= span.end => {
124                Some(self)
125            }
126            Node::Err(err) if err.span().start <= offset && offset <= err.span().end => Some(self),
127            _ => None,
128        }
129    }
130}
131
132impl<L: Lang> Group<L> {
133    pub fn name(&self) -> L::Syntax {
134        self.kind.clone()
135    }
136
137    pub fn green_children(&self) -> impl Iterator<Item = &Group<L>> {
138        self.children.iter().filter_map(|it| match it {
139            Node::Group(group) => Some(group),
140            Node::Lexeme(_) => None,
141            Node::Err(_) => None,
142        })
143    }
144
145    pub fn green_node_by_name(&self, name: L::Syntax) -> Option<&Group<L>> {
146        self.green_children().find(|it| it.kind == name)
147    }
148
149    pub fn lexeme_by_kind(&self, name: L::Token) -> Option<&Lexeme<L>> {
150        self.children.iter().find_map(|it| {
151            if let Node::Lexeme(l) = it
152                && l.kind == name
153            {
154                Some(l)
155            } else {
156                None
157            }
158        })
159    }
160}
161
162impl<L: Lang> ParseError<L> {
163    fn debug_at(&self, offset: usize, lang: &L) {
164        // NOTE: Only works when called by outer 'debug_at'
165        match self {
166            ParseError::MissingError { expected, .. } => {
167                let expected = expected
168                    .iter()
169                    .map(|it| it.debug_name(lang))
170                    .collect::<Vec<_>>()
171                    .join(",");
172                println!("Missing: {expected}");
173            }
174            ParseError::Unexpected { actual, .. } => {
175                println!("Unexpected:");
176                for token in actual {
177                    for _ in 0..offset {
178                        print!("  ");
179                    }
180                    println!(
181                        "  {}: {:?}",
182                        Red.paint(lang.token_name(&token.kind)),
183                        token.text
184                    )
185                }
186            }
187        }
188    }
189}
190
191impl<L: Lang> Node<L> {
192    /// Iterate over all `Lexeme`s inside this node (DFS, left-to-right).
193    pub fn lexemes(&self) -> LexemeIter<'_, L> {
194        LexemeIter { stack: vec![self] }
195    }
196
197    pub fn errors(&self) -> ErrorIter<'_, L> {
198        ErrorIter {
199            stack: vec![self],
200            offset: 0,
201        }
202    }
203
204    pub fn start_offset(&self) -> usize {
205        match self {
206            Node::Group(group) => group.start_offset(),
207            Node::Lexeme(lexeme) => lexeme.span.start,
208            Node::Err(parse_error) => parse_error.start(),
209        }
210    }
211
212    pub fn end_offset(&self) -> usize {
213        match self {
214            Node::Group(group) => group.end_offset(),
215            Node::Lexeme(lexeme) => lexeme.span.end,
216            Node::Err(parse_error) => parse_error
217                .actual()
218                .last()
219                .map(|it| it.span.end)
220                .unwrap_or(parse_error.start()),
221        }
222    }
223
224    pub fn span(&self) -> Span {
225        self.start_offset()..self.end_offset()
226    }
227
228    pub fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
229        match self {
230            Node::Group(group) => group.fmt(f),
231            Node::Lexeme(lexeme) => write!(f, "{}", &lexeme.text),
232            Node::Err(parse_error) => {
233                for lexeme in parse_error.actual() {
234                    write!(f, "{}", &lexeme.text)?
235                }
236                Ok(())
237            }
238        }
239    }
240}
241
242impl<L: Lang> Group<L> {
243    pub fn errors(&self) -> ErrorIter<'_, L> {
244        let mut stack = vec![];
245        for child in self.children.iter().rev() {
246            stack.push(child);
247        }
248        ErrorIter { stack, offset: 0 }
249    }
250
251    pub fn start_offset(&self) -> usize {
252        if let Some(first) = self.children.first() {
253            first.start_offset()
254        } else {
255            0
256        }
257    }
258
259    pub fn end_offset(&self) -> usize {
260        if let Some(first) = self.children.last() {
261            first.end_offset()
262        } else {
263            0
264        }
265    }
266
267    pub fn span(&self) -> Span {
268        self.start_offset()..self.end_offset()
269    }
270
271    pub fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
272        for child in self.children.iter() {
273            child.fmt(f)?
274        }
275        Ok(())
276    }
277
278    pub fn lexemes(&self) -> impl Iterator<Item = &Lexeme<L>> {
279        self.children.iter().flat_map(|it| it.lexemes())
280    }
281}
282
283pub struct LexemeIter<'a, L: Lang> {
284    stack: Vec<&'a Node<L>>,
285}
286
287impl<'a, L: Lang> Iterator for LexemeIter<'a, L> {
288    type Item = &'a Lexeme<L>;
289
290    fn next(&mut self) -> Option<Self::Item> {
291        while let Some(node) = self.stack.pop() {
292            match node {
293                Node::Lexeme(l) => return Some(l),
294                Node::Group(g) => {
295                    // push children in reverse so we visit in original order
296                    for child in g.children.iter().rev() {
297                        self.stack.push(child);
298                    }
299                }
300                Node::Err(_) => {
301                    // ParseError contents are not part of the tree proper; skip.
302                }
303            }
304        }
305        None
306    }
307}
308
309pub struct ErrorIter<'a, L: Lang> {
310    stack: Vec<&'a Node<L>>,
311    offset: usize,
312}
313
314impl<'a, L: Lang> Iterator for ErrorIter<'a, L> {
315    type Item = (usize, &'a ParseError<L>);
316
317    fn next(&mut self) -> Option<Self::Item> {
318        while let Some(node) = self.stack.pop() {
319            match node {
320                Node::Lexeme(l) => {
321                    self.offset = l.span.end;
322                }
323                Node::Group(g) => {
324                    // push children in reverse so we visit in original order
325                    for child in g.children.iter().rev() {
326                        self.stack.push(child);
327                    }
328                }
329                Node::Err(e) => return Some((self.offset, e)),
330            }
331        }
332        None
333    }
334}
335
336#[repr(C)]
337#[derive(Clone, Copy)]
338pub struct NodeData {
339    kind: u32,
340    group_kind: u32,
341    payload: NodeDataPayload,
342}
343
344#[derive(Clone, Copy)]
345#[repr(C)]
346pub union NodeDataPayload {
347    pub lexeme: LexemeData,
348    pub node_vec: RawVec<NodeData>,
349    pub lexeme_vec: RawVec<LexemeData>,
350    pub expected_vec: RawVec<ExpectedData>,
351}
352
353impl Node<CompiledLang> {
354    pub fn from_data(value: NodeData, src: &str, offset: &mut usize) -> Self {
355        match value.kind {
356            0 => {
357                let payload = unsafe { value.payload.lexeme };
358                *offset = payload.end;
359                Node::Lexeme(Lexeme {
360                    span: payload.start..payload.end,
361                    kind: payload.kind as u32,
362                    text: src[payload.start..payload.end].to_string(),
363                })
364            }
365
366            1 => unsafe {
367                let children = Vec::from(value.payload.node_vec);
368                Node::Group(Group {
369                    kind: value.group_kind,
370                    children: children
371                        .into_iter()
372                        .map(|it| Node::from_data(it, src, offset))
373                        .collect(),
374                })
375            },
376            2 => unsafe {
377                let tokens = Vec::from(value.payload.lexeme_vec)
378                    .into_iter()
379                    .map(|it| Lexeme::from_data(it, src))
380                    .collect::<Vec<_>>();
381                if let Some(last) = tokens.last() {
382                    *offset = last.span.end;
383                }
384
385                Node::Err(ParseError::Unexpected {
386                    actual: tokens,
387                    start: *offset,
388                })
389            },
390            3 => unsafe {
391                let expected = Vec::from(value.payload.expected_vec);
392                Node::Err(ParseError::MissingError {
393                    start: *offset,
394                    expected: expected.into_iter().map(|it| it.into()).collect(),
395                })
396            },
397            id => panic!("Unexpected node id '{id}'"),
398        }
399    }
400}