ra_ap_parser 0.0.331

The Rust parser for rust-analyzer.
Documentation
//! This module provides a way to construct a `File`.
//! It is intended to be completely decoupled from the
//! parser, so as to allow to evolve the tree representation
//! and the parser algorithm independently.
use std::{mem, num::NonZeroU32};

use crate::{
    SyntaxKind::{self, *},
    output::Output,
};

/// `Parser` produces a flat list of `Event`s.
/// They are converted to a tree-structure in
/// a separate pass, via `TreeBuilder`.
///
/// Kept to 8 bytes: error messages live in a side table on the `Parser`
/// (the `errors` vec) and `Event::Error` only stores an index into it.
/// `forward_parent` uses `NonZeroU32` so `Option` is niche-optimised away
/// (the offset is always ≥ 1 because the forward parent sits later in the
/// event stream).
#[derive(Debug, PartialEq, Clone, Copy)]
pub(crate) enum Event {
    /// This event signifies the start of the node.
    /// It should be either abandoned (in which case the
    /// `kind` is `TOMBSTONE`, and the event is ignored),
    /// or completed via a `Finish` event.
    ///
    /// All tokens between a `Start` and a `Finish` would
    /// become the children of the respective node.
    ///
    /// For left-recursive syntactic constructs, the parser produces
    /// a child node before it sees a parent. `forward_parent`
    /// saves the position of current event's parent.
    ///
    /// Consider this path
    ///
    /// foo::bar
    ///
    /// The events for it would look like this:
    ///
    /// ```text
    /// START(PATH) IDENT('foo') FINISH START(PATH) T![::] IDENT('bar') FINISH
    ///       |                          /\
    ///       |                          |
    ///       +------forward-parent------+
    /// ```
    ///
    /// And the tree would look like this
    ///
    /// ```text
    ///    +--PATH---------+
    ///    |   |           |
    ///    |   |           |
    ///    |  '::'       'bar'
    ///    |
    ///   PATH
    ///    |
    ///   'foo'
    /// ```
    ///
    /// See also `CompletedMarker::precede`.
    Start { kind: SyntaxKind, forward_parent: Option<NonZeroU32> },

    /// Complete the previous `Start` event
    Finish,

    /// Produce a single leaf-element.
    /// `n_raw_tokens` is used to glue complex contextual tokens.
    /// For example, lexer tokenizes `>>` as `>`, `>`, and
    /// `n_raw_tokens = 2` is used to produced a single `>>`.
    Token { kind: SyntaxKind, n_raw_tokens: u8 },
    /// When we parse `foo.0.0` or `foo. 0. 0` the lexer will hand us a float literal
    /// instead of an integer literal followed by a dot as the lexer has no contextual knowledge.
    /// This event instructs whatever consumes the events to split the float literal into
    /// the corresponding parts.
    FloatSplitHack { ends_in_dot: bool },
    /// Index into the parser's side `errors` vec.
    Error { err: u32 },
}

impl Event {
    pub(crate) fn tombstone() -> Self {
        Event::Start { kind: TOMBSTONE, forward_parent: None }
    }
}

/// Generate the syntax tree with the control of events. `errors` is the
/// side table of error messages built up alongside the `events` stream.
pub(super) fn process(mut events: Vec<Event>, mut errors: Vec<String>) -> Output {
    // Each event becomes roughly one u32 in Output, so preallocate to avoid
    // the amortized grow-one churn we used to see in Output::enter_node.
    let mut res = Output::with_event_capacity(events.len());
    let mut forward_parents = Vec::new();

    for i in 0..events.len() {
        match events[i] {
            Event::Start { kind, forward_parent } => {
                // For events[A, B, C], B is A's forward_parent, C is B's forward_parent,
                // in the normal control flow, the parent-child relation: `A -> B -> C`,
                // while with the magic forward_parent, it writes: `C <- B <- A`.

                // append `A` into parents.
                forward_parents.push(kind);
                let mut idx = i;
                let mut fp = forward_parent;
                while let Some(fwd) = fp {
                    idx += fwd.get() as usize;
                    // append `A`'s forward_parent `B`
                    fp = match mem::replace(&mut events[idx], Event::tombstone()) {
                        Event::Start { kind, forward_parent } => {
                            forward_parents.push(kind);
                            forward_parent
                        }
                        _ => unreachable!(),
                    };
                    // append `B`'s forward_parent `C` in the next stage.
                }

                for kind in forward_parents.drain(..).rev() {
                    if kind != TOMBSTONE {
                        res.enter_node(kind);
                    }
                }
            }
            Event::Finish => res.leave_node(),
            Event::Token { kind, n_raw_tokens } => {
                res.token(kind, n_raw_tokens);
            }
            Event::FloatSplitHack { ends_in_dot } => {
                res.float_split_hack(ends_in_dot);
                let ev = mem::replace(&mut events[i + 1], Event::tombstone());
                assert!(matches!(ev, Event::Finish), "{ev:?}");
            }
            Event::Error { err } => {
                // Move the string out of the side table; each index is visited
                // exactly once, so swapping with an empty String is cheap and
                // avoids any clone.
                let msg = mem::take(&mut errors[err as usize]);
                res.error(msg);
            }
        }
    }

    res
}