lady_deirdre/syntax/
observer.rs

1////////////////////////////////////////////////////////////////////////////////
2// This file is part of "Lady Deirdre", a compiler front-end foundation       //
3// technology.                                                                //
4//                                                                            //
5// This work is proprietary software with source-available code.              //
6//                                                                            //
7// To copy, use, distribute, or contribute to this work, you must agree to    //
8// the terms of the General License Agreement:                                //
9//                                                                            //
10// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md           //
11//                                                                            //
12// The agreement grants a Basic Commercial License, allowing you to use       //
13// this work in non-commercial and limited commercial products with a total   //
14// gross revenue cap. To remove this commercial limit for one of your         //
15// products, you must acquire a Full Commercial License.                      //
16//                                                                            //
17// If you contribute to the source code, documentation, or related materials, //
18// you must grant me an exclusive license to these contributions.             //
19// Contributions are governed by the "Contributions" section of the General   //
20// License Agreement.                                                         //
21//                                                                            //
22// Copying the work in parts is strictly forbidden, except as permitted       //
23// under the General License Agreement.                                       //
24//                                                                            //
25// If you do not or cannot agree to the terms of this Agreement,              //
26// do not use this work.                                                      //
27//                                                                            //
28// This work is provided "as is", without any warranties, express or implied, //
29// except where such disclaimers are legally invalid.                         //
30//                                                                            //
31// Copyright (c) 2024 Ilya Lakhin (Илья Александрович Лахин).                 //
32// All rights reserved.                                                       //
33////////////////////////////////////////////////////////////////////////////////
34
35use std::{marker::PhantomData, mem::replace};
36
37use crate::{
38    arena::{Entry, EntryIndex, Id, Identifiable},
39    lexis::{Length, Site, SiteRef, Token, TokenCount, TokenCursor, TokenRef},
40    report::ld_unreachable,
41    syntax::{ErrorRef, Node, NodeRef, NodeRule, SyntaxError, SyntaxSession},
42};
43
44/// An object that tracks syntax grammar parser steps.
45///
46/// By supplying a reference of this object into
47/// the [ImmutableSyntaxTree::parse_with_observer](crate::syntax::ImmutableSyntaxTree::parse_with_observer)
48/// function, the function will report any interactions of the programming
49/// language syntax parser with the [SyntaxSession] interface during the syntax
50/// tree parsing process.
51///
52/// In particular, the [DebugObserver] implementation of this trait prints
53/// such interactions to the stdio for debugging purposes.
54pub trait Observer {
55    /// Specifies a type of the Node that is currently being parsed.
56    type Node: Node;
57
58    /// The parser reported consuming of a token from the token input stream.
59    ///
60    /// The `token` parameter specifies a token instance that has been consumed.
61    ///
62    /// The `token_ref` parameter specifies a [TokenRef] reference to the input
63    /// stream of the consumed token.
64    fn read_token(&mut self, token: <Self::Node as Node>::Token, token_ref: TokenRef);
65
66    /// The parser reported the beginning of a syntax parsing rule.
67    ///
68    /// The `rule` parameter specifies the rule that the parser begins to parse.
69    ///
70    /// The `node_ref` parameter specifies a [NodeRef] reference in the final
71    /// syntax tree of the node that will be created as a product of this rule.
72    fn enter_rule(&mut self, rule: NodeRule, node_ref: NodeRef);
73
74    /// The parser reported the finishing of a syntax parsing rule.
75    ///
76    /// The `rule` parameter specifies the rule that the parser finishes parsing.
77    ///
78    /// The `node_ref` parameter specifies a [NodeRef] reference in the final
79    /// syntax tree of the node that the parser created as a product of this rule.
80    fn leave_rule(&mut self, rule: NodeRule, node_ref: NodeRef);
81
82    /// The parser reported sibling node lifting to the current node.
83    ///
84    /// The `node_ref` parameter specifies a [NodeRef] reference of the syntax
85    /// tree's node that has been lifted.
86    ///
87    /// For details, see the [Left Recursion](SyntaxSession#left-recursion)
88    /// section of the syntax parsing process specification.
89    fn lift_node(&mut self, node_ref: NodeRef);
90
91    /// The parser reported a syntax error occurred during the rule parsing.
92    ///
93    /// The `error_ref` parameter specifies an [ErrorRef] reference
94    /// of the syntax tree's syntax error.
95    fn syntax_error(&mut self, error_ref: ErrorRef);
96}
97
98/// An [observer](Observer) that prints parsing steps to stdin.
99///
100/// You can provide this function to
101/// the [ImmutableSyntaxTree::parse_with_observer](crate::syntax::ImmutableSyntaxTree::parse_with_observer)
102/// to test particular syntax parser steps against an arbitrary token input.
103///
104/// Alternatively, consider using [Node::debug] where you can use arbitrary text
105/// instead of a predefined token stream.
106pub struct DebugObserver<N: Node> {
107    depth: usize,
108    _phantom: PhantomData<N>,
109}
110
111impl<N: Node> Default for DebugObserver<N> {
112    #[inline(always)]
113    fn default() -> Self {
114        Self {
115            depth: 0,
116            _phantom: PhantomData,
117        }
118    }
119}
120
121impl<N: Node> Observer for DebugObserver<N> {
122    type Node = N;
123
124    fn read_token(&mut self, token: <Self::Node as Node>::Token, _token_ref: TokenRef) {
125        let indent = self.indent();
126        let name = token.name().unwrap_or("?");
127
128        println!("{indent} ${name}");
129    }
130
131    fn enter_rule(&mut self, rule: NodeRule, _node_ref: NodeRef) {
132        let indent = self.indent();
133        let name = N::rule_name(rule).unwrap_or("?");
134
135        println!("{indent} {name} {{");
136
137        self.depth += 1;
138    }
139
140    fn leave_rule(&mut self, rule: NodeRule, _node_ref: NodeRef) {
141        self.depth = self.depth.checked_sub(1).unwrap_or_default();
142
143        let indent = self.indent();
144        let name = N::rule_name(rule).unwrap_or("?");
145
146        println!("{indent} }} {name}");
147    }
148
149    fn lift_node(&mut self, _node_ref: NodeRef) {
150        let indent = self.indent();
151        println!("{indent} --- lift ---",);
152    }
153
154    fn syntax_error(&mut self, _error_ref: ErrorRef) {
155        let indent = self.indent();
156        println!("{indent} --- error ---",);
157    }
158}
159
160impl<N: Node> DebugObserver<N> {
161    #[inline(always)]
162    fn indent(&self) -> String {
163        "    ".repeat(self.depth)
164    }
165}
166
167/// A default implementation of the [Observer] interface, which is a noop.
168#[repr(transparent)]
169pub struct VoidObserver<N: Node>(PhantomData<N>);
170
171impl<N: Node> Default for VoidObserver<N> {
172    #[inline(always)]
173    fn default() -> Self {
174        Self(PhantomData)
175    }
176}
177
178impl<N: Node> Observer for VoidObserver<N> {
179    type Node = N;
180
181    #[inline(always)]
182    fn read_token(&mut self, _token: <Self::Node as Node>::Token, _token_ref: TokenRef) {}
183
184    #[inline(always)]
185    fn enter_rule(&mut self, _rule: NodeRule, _node_ref: NodeRef) {}
186
187    #[inline(always)]
188    fn leave_rule(&mut self, _rule: NodeRule, _node_ref: NodeRef) {}
189
190    #[inline(always)]
191    fn lift_node(&mut self, _node_ref: NodeRef) {}
192
193    #[inline(always)]
194    fn syntax_error(&mut self, _error_ref: ErrorRef) {}
195}
196
197pub(super) struct ObservableSyntaxSession<
198    'code,
199    'observer,
200    N: Node,
201    C: TokenCursor<'code, Token = <N as Node>::Token>,
202    O: Observer<Node = N>,
203> {
204    pub(super) id: Id,
205    pub(super) context: Vec<EntryIndex>,
206    pub(super) nodes: Vec<Option<N>>,
207    pub(super) errors: Vec<SyntaxError>,
208    pub(super) failing: bool,
209    pub(super) token_cursor: C,
210    pub(super) observer: &'observer mut O,
211    pub(super) _phantom: PhantomData<&'code ()>,
212}
213
214impl<'code, 'observer, N, C, O> Identifiable for ObservableSyntaxSession<'code, 'observer, N, C, O>
215where
216    N: Node,
217    C: TokenCursor<'code, Token = <N as Node>::Token>,
218    O: Observer<Node = N>,
219{
220    #[inline(always)]
221    fn id(&self) -> Id {
222        self.id
223    }
224}
225
226impl<'code, 'observer, N, C, O> TokenCursor<'code>
227    for ObservableSyntaxSession<'code, 'observer, N, C, O>
228where
229    N: Node,
230    C: TokenCursor<'code, Token = <N as Node>::Token>,
231    O: Observer<Node = N>,
232{
233    type Token = <N as Node>::Token;
234
235    #[inline(always)]
236    fn advance(&mut self) -> bool {
237        let token = self.token(0);
238        let token_ref = self.token_ref(0);
239        self.observer.read_token(token, token_ref);
240
241        let advanced = self.token_cursor.advance();
242
243        self.failing = self.failing && !advanced;
244
245        advanced
246    }
247
248    #[inline(always)]
249    fn skip(&mut self, mut distance: TokenCount) {
250        let start = self.token_cursor.site(0);
251
252        while distance > 0 {
253            if !self.advance() {
254                break;
255            }
256
257            distance -= 1;
258        }
259
260        self.failing = self.failing && start == self.token_cursor.site(0);
261    }
262
263    #[inline(always)]
264    fn token(&mut self, distance: TokenCount) -> Self::Token {
265        self.token_cursor.token(distance)
266    }
267
268    #[inline(always)]
269    fn site(&mut self, distance: TokenCount) -> Option<Site> {
270        self.token_cursor.site(distance)
271    }
272
273    #[inline(always)]
274    fn length(&mut self, distance: TokenCount) -> Option<Length> {
275        self.token_cursor.length(distance)
276    }
277
278    #[inline(always)]
279    fn string(&mut self, distance: TokenCount) -> Option<&'code str> {
280        self.token_cursor.string(distance)
281    }
282
283    #[inline(always)]
284    fn token_ref(&mut self, distance: TokenCount) -> TokenRef {
285        self.token_cursor.token_ref(distance)
286    }
287
288    #[inline(always)]
289    fn site_ref(&mut self, distance: TokenCount) -> SiteRef {
290        self.token_cursor.site_ref(distance)
291    }
292
293    #[inline(always)]
294    fn end_site_ref(&mut self) -> SiteRef {
295        self.token_cursor.end_site_ref()
296    }
297}
298
299impl<'code, 'observer, N, C, O> SyntaxSession<'code>
300    for ObservableSyntaxSession<'code, 'observer, N, C, O>
301where
302    N: Node,
303    C: TokenCursor<'code, Token = <N as Node>::Token>,
304    O: Observer<Node = N>,
305{
306    type Node = N;
307
308    fn descend(&mut self, rule: NodeRule) -> NodeRef {
309        let _ = self.enter(rule);
310
311        let node = N::parse(self, rule);
312
313        self.leave(node)
314    }
315
316    #[inline]
317    fn enter(&mut self, rule: NodeRule) -> NodeRef {
318        let index = self.nodes.len();
319
320        self.nodes.push(None);
321
322        self.context.push(index);
323
324        let node_ref = NodeRef {
325            id: self.id,
326            entry: Entry { index, version: 0 },
327        };
328
329        self.observer.enter_rule(rule, node_ref);
330
331        node_ref
332    }
333
334    #[inline]
335    fn leave(&mut self, node: Self::Node) -> NodeRef {
336        let Some(index) = self.context.pop() else {
337            #[cfg(debug_assertions)]
338            {
339                panic!("Nesting imbalance.");
340            }
341
342            #[cfg(not(debug_assertions))]
343            {
344                return NodeRef::nil();
345            }
346        };
347
348        let Some(item) = self.nodes.get_mut(index) else {
349            unsafe { ld_unreachable!("Bad context index.") }
350        };
351
352        let rule = node.rule();
353
354        if replace(item, Some(node)).is_some() {
355            unsafe { ld_unreachable!("Bad context index.") }
356        }
357
358        let node_ref = NodeRef {
359            id: self.id,
360            entry: Entry { index, version: 0 },
361        };
362
363        self.observer.leave_rule(rule, node_ref);
364
365        node_ref
366    }
367
368    #[inline]
369    fn lift(&mut self, node_ref: &NodeRef) {
370        if self.id != node_ref.id {
371            #[cfg(debug_assertions)]
372            {
373                panic!("Cannot lift a node that does not belong to this compilation session.");
374            }
375
376            #[cfg(not(debug_assertions))]
377            {
378                return;
379            }
380        }
381
382        let parent_ref = self.node_ref();
383
384        let Some(Some(node)) = self.nodes.get_mut(node_ref.entry.index) else {
385            #[cfg(debug_assertions)]
386            {
387                panic!("Cannot lift a node that does not belong to this compilation session.");
388            }
389
390            #[cfg(not(debug_assertions))]
391            {
392                return;
393            }
394        };
395
396        node.set_parent_ref(parent_ref);
397
398        self.observer.lift_node(*node_ref);
399    }
400
401    #[inline(always)]
402    fn node_ref(&self) -> NodeRef {
403        let Some(index) = self.context.last() else {
404            #[cfg(debug_assertions)]
405            {
406                panic!("Nesting imbalance.");
407            }
408
409            #[cfg(not(debug_assertions))]
410            {
411                return NodeRef::nil();
412            }
413        };
414
415        NodeRef {
416            id: self.id,
417            entry: Entry {
418                index: *index,
419                version: 0,
420            },
421        }
422    }
423
424    #[inline(always)]
425    fn parent_ref(&self) -> NodeRef {
426        let Some(depth) = self.context.len().checked_sub(2) else {
427            return NodeRef::nil();
428        };
429
430        let index = *unsafe { self.context.get_unchecked(depth) };
431
432        NodeRef {
433            id: self.id,
434            entry: Entry { index, version: 0 },
435        }
436    }
437
438    #[inline(always)]
439    fn failure(&mut self, error: SyntaxError) -> ErrorRef {
440        if self.failing {
441            return ErrorRef::nil();
442        }
443
444        self.failing = true;
445
446        let index = self.errors.len();
447
448        self.errors.push(error.into());
449
450        let error_ref = ErrorRef {
451            id: self.id,
452            entry: Entry { index, version: 0 },
453        };
454
455        self.observer.syntax_error(error_ref);
456
457        error_ref
458    }
459}