lady_deirdre/units/
unit.rs

1////////////////////////////////////////////////////////////////////////////////
2// This file is part of "Lady Deirdre", a compiler front-end foundation       //
3// technology.                                                                //
4//                                                                            //
5// This work is proprietary software with source-available code.              //
6//                                                                            //
7// To copy, use, distribute, or contribute to this work, you must agree to    //
8// the terms of the General License Agreement:                                //
9//                                                                            //
10// https://github.com/Eliah-Lakhin/lady-deirdre/blob/master/EULA.md           //
11//                                                                            //
12// The agreement grants a Basic Commercial License, allowing you to use       //
13// this work in non-commercial and limited commercial products with a total   //
14// gross revenue cap. To remove this commercial limit for one of your         //
15// products, you must acquire a Full Commercial License.                      //
16//                                                                            //
17// If you contribute to the source code, documentation, or related materials, //
18// you must grant me an exclusive license to these contributions.             //
19// Contributions are governed by the "Contributions" section of the General   //
20// License Agreement.                                                         //
21//                                                                            //
22// Copying the work in parts is strictly forbidden, except as permitted       //
23// under the General License Agreement.                                       //
24//                                                                            //
25// If you do not or cannot agree to the terms of this Agreement,              //
26// do not use this work.                                                      //
27//                                                                            //
28// This work is provided "as is", without any warranties, express or implied, //
29// except where such disclaimers are legally invalid.                         //
30//                                                                            //
31// Copyright (c) 2024 Ilya Lakhin (Илья Александрович Лахин).                 //
32// All rights reserved.                                                       //
33////////////////////////////////////////////////////////////////////////////////
34
35use std::fmt::{Debug, Display, Formatter};
36
37use crate::{
38    arena::{Entry, Identifiable},
39    lexis::{
40        Chunk,
41        Length,
42        LineIndex,
43        Site,
44        SiteSpan,
45        SourceCode,
46        ToSpan,
47        Token,
48        TokenBuffer,
49        TokenCount,
50        TokenCursor,
51    },
52    syntax::{Capture, Node, NodeRef, PolyRef, PolyVariant, SyntaxError, SyntaxTree},
53    units::{Document, ImmutableUnit, MutableUnit},
54};
55
56/// An object that grants access to the lexical and syntax structure of
57/// an individual file within the compilation project.
58///
59/// [Document], [ImmutableUnit] and [MutableUnit] are compilation units
60/// because they offer access to both components of the language grammar,
61/// but, for instance, [TokenBuffer] is not because it only provides an access
62/// to the lexical structure only.
63///
64/// CompilationUnit trait provides conventional functions to convert this unit
65/// into other types of units and some syntax analysis functions
66/// that require access to the full grammar structure of the file.
67///
68/// If you intend to implement this trait on your object, take a look at the
69/// [Lexis] and the [Syntax] facade-interfaces; they will assist you in exposing
70/// particular components of the grammar.
71pub trait CompilationUnit:
72    SourceCode<Token = <<Self as SyntaxTree>::Node as Node>::Token> + SyntaxTree
73{
74    /// Returns `true` if the compilation unit allows document write operations
75    /// after creation.
76    fn is_mutable(&self) -> bool;
77
78    /// Returns `true` if the compilation unit does not have write capabilities
79    /// after creation.
80    #[inline(always)]
81    fn is_immutable(&self) -> bool {
82        !self.is_mutable()
83    }
84
85    /// Extracts lexical structure of the compilation unit.
86    fn into_token_buffer(self) -> TokenBuffer<<Self as SourceCode>::Token>;
87
88    /// Converts this compilation unit into [Document].
89    ///
90    /// The mutable capabilities of the returning document depend on the
91    /// [CompilationUnit::is_mutable] value.
92    ///
93    /// Depending on the implementation this function may require full
94    /// source code reparsing, but implementors typically make the best effort
95    /// to reduce overhead. In particular, [Document]'s into_document is noop.
96    #[inline(always)]
97    fn into_document(self) -> Document<<Self as SyntaxTree>::Node>
98    where
99        Self: Sized,
100    {
101        match self.is_mutable() {
102            true => Document::Mutable(self.into_mutable_unit()),
103            false => Document::Immutable(self.into_immutable_unit()),
104        }
105    }
106
107    /// Converts this compilation unit [MutableUnit].
108    ///
109    /// Depending on the implementation this function may require full
110    /// source code reparsing, but implementors typically make the best effort
111    /// to reduce overhead. In particular, [MutableUnit]'s into_mutable_unit
112    /// is noop.
113    #[inline(always)]
114    fn into_mutable_unit(self) -> MutableUnit<<Self as SyntaxTree>::Node>
115    where
116        Self: Sized,
117    {
118        self.into_token_buffer().into_mutable_unit()
119    }
120
121    /// Converts this compilation unit [ImmutableUnit].
122    ///
123    /// Depending on the implementation this function may require full
124    /// source code reparsing, but implementors typically make the best effort
125    /// to reduce overhead. In particular, [ImmutableUnit]'s into_immutable_unit
126    /// is noop.
127    #[inline(always)]
128    fn into_immutable_unit(self) -> ImmutableUnit<<Self as SyntaxTree>::Node>
129    where
130        Self: Sized,
131    {
132        self.into_token_buffer().into_immutable_unit()
133    }
134
135    /// Searches for the top-most node in the syntax tree that fully covers
136    /// specified source code [span](ToSpan).
137    ///
138    /// For example, in the case of JSON `{"foo": [123]}`, the coverage of the
139    /// "123" token could be the `[123]` array, and the coverage of the ":"
140    /// token could be the `"foo": [bar]` entry of the JSON object.
141    ///
142    /// The result depends on the particular programming language grammar.
143    ///
144    /// In the worst case scenario, if the algorithm fails to find the top-most
145    /// node, it returns the reference to the root node.
146    ///
147    /// **Panic**
148    ///
149    /// Panics if the specified span is not valid for this compilation unit.
150    #[inline(always)]
151    fn cover(&self, span: impl ToSpan) -> NodeRef
152    where
153        Self: Sized,
154    {
155        let span = match span.to_site_span(self) {
156            None => panic!("Specified span is invalid."),
157
158            Some(span) => span,
159        };
160
161        let root = self.root_node_ref();
162
163        match NodeCoverage::cover(self, &root, &span) {
164            NodeCoverage::Fit(result) => result,
165            _ => root,
166        }
167    }
168
169    /// Returns an object that prints the underlying grammar structure.
170    ///
171    /// The `poly_ref` parameter specifies a reference to a particular grammar
172    /// component to debug. It could be a [NodeRef],
173    /// [TokenRef](crate::lexis::TokenRef) or [PolyVariant].
174    ///
175    /// To print the entire syntax tree with all nodes and tokens metadata, you
176    /// can obtain a root NodeRef using [SyntaxTree::root_node_ref] function.
177    ///
178    /// The default implementation is infallible regardless of the `poly_ref`
179    /// validity.
180    #[inline(always)]
181    fn display(&self, poly_ref: &(impl PolyRef + ?Sized)) -> impl Debug + Display + '_
182    where
183        Self: Sized,
184    {
185        DisplayTree {
186            unit: self,
187            variant: poly_ref.as_variant(),
188        }
189    }
190}
191
192/// A facade of the lexical structure.
193///
194/// This trait auto-implements [SourceCode] on the target object by delegating
195/// all calls to the required function [Lexis::lexis].
196///
197/// ```rust
198/// use lady_deirdre::units::Lexis;
199/// use lady_deirdre::arena::{Id, Identifiable};
200/// use lady_deirdre::lexis::{Token, TokenBuffer};
201///
202/// struct MyDocument<T: Token> {
203///     buf: TokenBuffer<T>,
204/// }
205///
206/// impl<T: Token> Identifiable for MyDocument<T> {
207///     fn id(&self) -> Id {
208///         self.buf.id()
209///     }
210/// }
211///
212/// impl<T: Token> Lexis for MyDocument<T> {
213///     type Lexis = TokenBuffer<T>;
214///
215///     fn lexis(&self) -> &Self::Lexis {
216///         &self.buf
217///     }
218/// }
219/// ```
220pub trait Lexis: Identifiable {
221    /// The target [SourceCode] delegation type.
222    type Lexis: SourceCode;
223
224    /// This function fully exposes underlying [SourceCode] interface
225    /// of this object.
226    fn lexis(&self) -> &Self::Lexis;
227}
228
229impl<F: Lexis> SourceCode for F {
230    type Token = <F::Lexis as SourceCode>::Token;
231
232    type Cursor<'code> = <F::Lexis as SourceCode>::Cursor<'code>
233        where Self: 'code;
234
235    type CharIterator<'code> = <F::Lexis as SourceCode>::CharIterator<'code>
236    where
237        Self: 'code;
238
239    #[inline(always)]
240    fn chars(&self, span: impl ToSpan) -> Self::CharIterator<'_> {
241        self.lexis().chars(span)
242    }
243
244    #[inline(always)]
245    fn has_chunk(&self, entry: &Entry) -> bool {
246        self.lexis().has_chunk(entry)
247    }
248
249    #[inline(always)]
250    fn get_token(&self, entry: &Entry) -> Option<Self::Token> {
251        self.lexis().get_token(entry)
252    }
253
254    #[inline(always)]
255    fn get_site(&self, entry: &Entry) -> Option<Site> {
256        self.lexis().get_site(entry)
257    }
258
259    #[inline(always)]
260    fn get_string(&self, entry: &Entry) -> Option<&str> {
261        self.lexis().get_string(entry)
262    }
263
264    #[inline(always)]
265    fn get_length(&self, entry: &Entry) -> Option<Length> {
266        self.lexis().get_length(entry)
267    }
268
269    #[inline(always)]
270    fn cursor(&self, span: impl ToSpan) -> Self::Cursor<'_> {
271        self.lexis().cursor(span)
272    }
273
274    #[inline(always)]
275    fn length(&self) -> Length {
276        self.lexis().length()
277    }
278
279    #[inline(always)]
280    fn tokens(&self) -> TokenCount {
281        self.lexis().tokens()
282    }
283
284    #[inline(always)]
285    fn lines(&self) -> &LineIndex {
286        self.lexis().lines()
287    }
288}
289
290/// A facade of the syntax structure.
291///
292/// This trait auto-implements [SyntaxTree] on the target object by delegating
293/// all calls to the required functions [Syntax::syntax]
294/// and [Syntax::syntax_mut].
295///
296/// ```rust
297/// use lady_deirdre::units::Syntax;
298/// use lady_deirdre::arena::{Id, Identifiable};
299/// use lady_deirdre::syntax::{Node, ImmutableSyntaxTree};
300///
301/// struct MyDocument<N: Node> {
302///     tree: ImmutableSyntaxTree<N>,
303/// }
304///
305/// impl<N: Node> Identifiable for MyDocument<N> {
306///     fn id(&self) -> Id {
307///         self.tree.id()
308///     }
309/// }
310///
311/// impl<N: Node> Syntax for MyDocument<N> {
312///     type Syntax = ImmutableSyntaxTree<N>;
313///
314///     fn syntax(&self) -> &Self::Syntax {
315///         &self.tree
316///     }
317///
318///     fn syntax_mut(&mut self) -> &mut Self::Syntax {
319///         &mut self.tree
320///     }
321/// }
322/// ```
323pub trait Syntax: Identifiable {
324    /// The target [SyntaxTree] delegation type.
325    type Syntax: SyntaxTree;
326
327    /// This function fully exposes immutable access to the underlying
328    /// [SyntaxTree] interface of this object.
329    fn syntax(&self) -> &Self::Syntax;
330
331    /// This function fully exposes mutable access to the underlying
332    /// [SyntaxTree] interface of this object.
333    fn syntax_mut(&mut self) -> &mut Self::Syntax;
334}
335
336impl<F: Syntax> SyntaxTree for F {
337    type Node = <F::Syntax as SyntaxTree>::Node;
338
339    type NodeIterator<'tree> = <F::Syntax as SyntaxTree>::NodeIterator<'tree> where Self: 'tree;
340
341    type ErrorIterator<'tree> = <F::Syntax as SyntaxTree>::ErrorIterator<'tree> where Self: 'tree;
342
343    #[inline(always)]
344    fn root_node_ref(&self) -> NodeRef {
345        self.syntax().root_node_ref()
346    }
347
348    #[inline(always)]
349    fn node_refs(&self) -> Self::NodeIterator<'_> {
350        self.syntax().node_refs()
351    }
352
353    #[inline(always)]
354    fn error_refs(&self) -> Self::ErrorIterator<'_> {
355        self.syntax().error_refs()
356    }
357
358    #[inline(always)]
359    fn has_node(&self, entry: &Entry) -> bool {
360        self.syntax().has_node(entry)
361    }
362
363    #[inline(always)]
364    fn get_node(&self, entry: &Entry) -> Option<&Self::Node> {
365        self.syntax().get_node(entry)
366    }
367
368    #[inline(always)]
369    fn get_node_mut(&mut self, entry: &Entry) -> Option<&mut Self::Node> {
370        self.syntax_mut().get_node_mut(entry)
371    }
372
373    #[inline(always)]
374    fn has_error(&self, entry: &Entry) -> bool {
375        self.syntax().has_error(entry)
376    }
377
378    #[inline(always)]
379    fn get_error(&self, entry: &Entry) -> Option<&SyntaxError> {
380        self.syntax().get_error(entry)
381    }
382}
383
384struct DisplayTree<
385    'unit,
386    N: Node,
387    C: TokenCursor<'unit>,
388    U: CompilationUnit<Cursor<'unit> = C, Node = N>,
389> {
390    unit: &'unit U,
391    variant: PolyVariant,
392}
393
394impl<'unit, N, C, U> Debug for DisplayTree<'unit, N, C, U>
395where
396    N: Node,
397    C: TokenCursor<'unit>,
398    U: CompilationUnit<Cursor<'unit> = C, Node = N>,
399{
400    #[inline(always)]
401    fn fmt(&self, formatter: &mut Formatter) -> std::fmt::Result {
402        Display::fmt(self, formatter)
403    }
404}
405
406impl<'unit, N, C, U> Display for DisplayTree<'unit, N, C, U>
407where
408    N: Node,
409    C: TokenCursor<'unit>,
410    U: CompilationUnit<Cursor<'unit> = C, Node = N>,
411{
412    fn fmt(&self, formatter: &mut Formatter) -> std::fmt::Result {
413        match &self.variant {
414            PolyVariant::Token(variant) => {
415                let chunk: Chunk<U::Token> = match variant.chunk(self.unit) {
416                    None => return Debug::fmt(variant, formatter),
417                    Some(chunk) => chunk,
418                };
419
420                let name = chunk.token.name().unwrap_or("TokenRef");
421
422                let mut debug_struct =
423                    formatter.debug_struct(&format!("${name}(chunk_entry: {:?})", variant.entry));
424
425                debug_struct.field("string", &chunk.string);
426                debug_struct.field("length", &chunk.length);
427
428                if let Some(site_span) = chunk.to_site_span(self.unit) {
429                    debug_struct.field("site_span", &site_span);
430
431                    debug_struct.field(
432                        "position_span",
433                        &format_args!("{}", chunk.display(self.unit)),
434                    );
435                }
436
437                debug_struct.finish()
438            }
439
440            PolyVariant::Node(variant) => {
441                let node: &N = match variant.deref(self.unit) {
442                    None => return Debug::fmt(variant, formatter),
443                    Some(node) => node,
444                };
445
446                let name = node.name().unwrap_or("NodeRef");
447
448                let alternate = formatter.alternate();
449
450                let mut debug_struct =
451                    formatter.debug_struct(&format!("{name}(entry: {:?})", variant.entry));
452
453                for key in node.capture_keys() {
454                    let Some(capture) = node.capture(*key) else {
455                        continue;
456                    };
457
458                    let key = key.to_string();
459
460                    match capture {
461                        Capture::SingleNode(capture) => match alternate {
462                            true => debug_struct
463                                .field(&key, &format_args!("{:#}", self.unit.display(capture))),
464                            false => debug_struct
465                                .field(&key, &format_args!("{}", self.unit.display(capture))),
466                        },
467
468                        Capture::ManyNodes(capture) => {
469                            let poly_refs = capture
470                                .into_iter()
471                                .map(|poly_ref| self.unit.display(poly_ref))
472                                .collect::<Vec<_>>();
473
474                            debug_struct.field(&key, &poly_refs)
475                        }
476
477                        Capture::SingleToken(capture) => match alternate {
478                            true => debug_struct
479                                .field(&key, &format_args!("{:#}", self.unit.display(capture))),
480                            false => debug_struct
481                                .field(&key, &format_args!("{}", self.unit.display(capture))),
482                        },
483
484                        Capture::ManyTokens(capture) => {
485                            let poly_refs = capture
486                                .into_iter()
487                                .map(|poly_ref| self.unit.display(poly_ref))
488                                .collect::<Vec<_>>();
489
490                            debug_struct.field(&key, &poly_refs)
491                        }
492                    };
493                }
494
495                debug_struct.finish()
496            }
497        }
498    }
499}
500
501#[derive(Debug)]
502pub(super) enum NodeCoverage {
503    Nil,
504    Fit(NodeRef),
505    Misfit(Site),
506}
507
508impl NodeCoverage {
509    pub(super) fn cover<
510        'unit,
511        N: Node,
512        C: TokenCursor<'unit>,
513        U: CompilationUnit<Cursor<'unit> = C, Node = N>,
514    >(
515        unit: &'unit U,
516        node_ref: &NodeRef,
517        span: &SiteSpan,
518    ) -> Self {
519        let node: &N = match node_ref.deref(unit) {
520            None => return Self::Nil,
521            Some(node) => node,
522        };
523
524        let node_span = match node.span(unit) {
525            None => return Self::Nil,
526            Some(span) => span,
527        };
528
529        if node_span.start > span.start || node_span.end < span.end {
530            return Self::Misfit(node_span.start);
531        }
532
533        for child in node.children_iter() {
534            if !child.kind().is_node() {
535                continue;
536            }
537
538            match Self::cover(unit, child.as_node_ref(), span) {
539                Self::Nil => continue,
540                Self::Misfit(start) => {
541                    if start > span.start {
542                        break;
543                    }
544                }
545                other => return other,
546            }
547        }
548
549        Self::Fit(*node_ref)
550    }
551}