1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
//! `parse_tree` is a library to represent so-called parse tree.
//! A parse tree is a non-abstract AST: it's a generic syntax tree
//! which remembers all whitespace, comments and other trivia.
#[macro_use]
extern crate lazy_static;

use std::{cmp, fmt, ops, ptr};
use std::sync::Mutex;
use std::collections::hash_map::{Entry, HashMap};

mod text;
mod top_down_builder;
mod bottom_up_builder;

pub use text::{TextRange, TextUnit};
pub use top_down_builder::TopDownBuilder;
pub use bottom_up_builder::BottomUpBuilder;

/// A type of a syntactic construct, including both leaf tokens
/// and composite nodes, like "a comma" or "a function".
#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct Symbol(#[doc(hidden)] pub u32);

struct SymbolInfo {
    name: &'static str,
}

lazy_static! {
    static ref SYMBOLS: Mutex<HashMap<Symbol, SymbolInfo>>
        = Mutex::new(HashMap::new());
}

#[doc(hidden)]
pub fn register_symbol(symbol: Symbol, name: &'static str) {
    let mut symbols = SYMBOLS.lock().unwrap();
    match symbols.entry(symbol) {
        Entry::Occupied(_) => {
            panic!("Duplicate symbol {} {}", symbol.0, name);
        }
        Entry::Vacant(entry) => {
            entry.insert(SymbolInfo { name });
        }
    }
}

#[macro_export]
macro_rules! symbols {
    ( $register:ident $($name:ident $id:expr)*) => {
        $(
            pub const $name: $crate::Symbol = $crate::Symbol($id);
        )*

        pub fn $register() {
            static INIT: ::std::sync::Once = ::std::sync::ONCE_INIT;
            INIT.call_once(|| {
                $(
                    $crate::register_symbol($name, stringify!($name));
                )*
            })
        }
    };
}

impl Symbol {
    pub fn name(&self) -> &'static str {
        SYMBOLS.lock().unwrap()[self].name
    }
}

impl fmt::Debug for Symbol {
    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
        write!(f, "`{}", self.name())
    }
}

/// A token of source code.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Token {
    /// The kind of token.
    pub symbol: Symbol,
    /// The length of the token.
    pub len: TextUnit,
}

/// The parse tree for a single source file.
#[derive(Debug)]
pub struct ParseTree {
    root: NodeIdx,
    nodes: Vec<NodeData>,
}

impl ParseTree {
    /// The root node of this tree.
    pub fn root<'t>(&'t self) -> Node<'t> {
        assert!(!self.nodes.is_empty());
        Node {
            file: self,
            idx: self.root,
        }
    }
}

/// A reference to a node in a parse tree.
#[derive(Clone, Copy)]
pub struct Node<'t> {
    file: &'t ParseTree,
    idx: NodeIdx,
}

impl<'t> Node<'t> {
    /// The symbol of the token at this node.
    pub fn symbol(&self) -> Symbol {
        self.data().symbol
    }

    /// The text range covered by the token at this node.
    pub fn range(&self) -> TextRange {
        self.data().range
    }

    /// The parent node of this node.
    pub fn parent(&self) -> Option<Node<'t>> {
        self.as_node(self.data().parent)
    }

    /// The children nodes of this node.
    pub fn children(&self) -> Children<'t> {
        Children {
            next: self.as_node(self.data().first_child),
        }
    }

    fn data(&self) -> &'t NodeData {
        &self.file.nodes[self.idx]
    }

    fn as_node(&self, idx: Option<NodeIdx>) -> Option<Node<'t>> {
        idx.map(|idx| Node {
            file: self.file,
            idx,
        })
    }
}

impl<'t> fmt::Debug for Node<'t> {
    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
        write!(fmt, "{}@{:?}", self.symbol().name(), self.range())
    }
}

/// Debug representation of a subtree at `node`.
pub fn debug_dump(node: Node, text: &str) -> String {
    let mut result = String::new();
    go(node, &mut result, 0, text);
    return result;

    fn go(node: Node, buff: &mut String, level: usize, text: &str) {
        buff.push_str(&String::from("  ").repeat(level));
        buff.push_str(&format!("{:?}", node));

        if node.children().next().is_none() {
            let node_text = &text[node.range()];
            if !node_text.chars().all(char::is_whitespace) {
                buff.push_str(&format!(" {:?}", node_text));
            }
        }
        buff.push('\n');
        for child in node.children() {
            go(child, buff, level + 1, text)
        }
    }
}

impl<'f> cmp::PartialEq<Node<'f>> for Node<'f> {
    fn eq(&self, other: &Node<'f>) -> bool {
        self.idx == other.idx && ptr::eq(self.file, other.file)
    }
}

impl<'f> cmp::Eq for Node<'f> {}

#[derive(Debug)]
pub struct Children<'f> {
    next: Option<Node<'f>>,
}

impl<'f> Iterator for Children<'f> {
    type Item = Node<'f>;

    fn next(&mut self) -> Option<Node<'f>> {
        let next = self.next;
        self.next = next.and_then(|node| node.as_node(node.data().next_sibling));
        next
    }
}

#[derive(Clone, Copy, Debug, PartialEq, Eq)]
struct NodeIdx(u32);

#[derive(Debug)]
struct NodeData {
    symbol: Symbol,
    range: TextRange,
    parent: Option<NodeIdx>,
    first_child: Option<NodeIdx>,
    next_sibling: Option<NodeIdx>,
}

impl ops::Index<NodeIdx> for Vec<NodeData> {
    type Output = NodeData;

    fn index(&self, NodeIdx(idx): NodeIdx) -> &NodeData {
        &self[idx as usize]
    }
}

impl ops::IndexMut<NodeIdx> for Vec<NodeData> {
    fn index_mut(&mut self, NodeIdx(idx): NodeIdx) -> &mut NodeData {
        &mut self[idx as usize]
    }
}

fn fill<T>(slot: &mut Option<T>, value: T) {
    assert!(slot.is_none());
    *slot = Some(value);
}