sizzle_parser/
token_tree.rs

1//! Token trees are a second stage before parsing to simplify parsing
2//! structures.
3
4use thiserror::Error;
5
6use crate::{
7    names::Identifier,
8    src_pos::SrcPos,
9    token::{SrcToken, TaggedToken},
10};
11
12/// Token tree with an empty tag value.
13pub type Toktr = TaggedToktr<()>;
14
15pub(crate) type SrcToktr = TaggedToktr<SrcPos>;
16
17/// Token tree with a tag.
18#[derive(Clone, Debug, Eq, PartialEq)]
19pub enum TaggedToktr<T> {
20    // Keywords and structural elements.
21    /// `import` keyword.
22    Import(T),
23    /// `as` keyword.
24    As(T),
25    /// `class` keyword.
26    Class(T),
27    /// `:` keyword.
28    Colon(T),
29    /// `=` keyword.
30    Eq(T),
31    /// `,` keyword.
32    Comma(T),
33    /// `.` keyword.
34    Dot(T),
35    /// `\n` newline.
36    Newline(T),
37    /// `null` keyword.
38    Null(T),
39
40    // Identifiers.
41    /// An identifier.
42    Identifier(T, Identifier),
43
44    // Expressions.
45    /// An integer literal.
46    IntegerLiteral(T, u64),
47    /// `<<` operator.
48    Shl(T),
49    /// `*` operator.
50    Mul(T),
51
52    // Token tree nodes with children.
53    /// A bracket block.
54    BracketBlock(T, Box<NodeData<T>>),
55    /// A parenthesis block.
56    ParenBlock(T, Box<NodeData<T>>),
57    /// An indent block.
58    IndentBlock(T, Box<NodeData<T>>),
59
60    // Misc
61    DocString(T, String),
62}
63
64impl<T> TaggedToktr<T> {
65    /// Gets the tag of the token.
66    pub fn tag(&self) -> &T {
67        match self {
68            Self::Import(t) => t,
69            Self::As(t) => t,
70            Self::Class(t) => t,
71            Self::Colon(t) => t,
72            Self::Eq(t) => t,
73            Self::Comma(t) => t,
74            Self::Dot(t) => t,
75            Self::Newline(t) => t,
76            Self::Null(t) => t,
77            Self::Identifier(t, _) => t,
78            Self::IntegerLiteral(t, _) => t,
79            Self::Shl(t) => t,
80            Self::Mul(t) => t,
81            Self::BracketBlock(t, _) => t,
82            Self::ParenBlock(t, _) => t,
83            Self::IndentBlock(t, _) => t,
84            Self::DocString(t, _) => t,
85        }
86    }
87
88    /// Gets if the token is a block.
89    pub fn is_block(&self) -> bool {
90        matches!(
91            self,
92            Self::BracketBlock(_, _) | Self::ParenBlock(_, _) | Self::IndentBlock(_, _)
93        )
94    }
95
96    /// Gets the node data of the token.
97    pub fn node_data(&self) -> Option<&NodeData<T>> {
98        match self {
99            Self::BracketBlock(_, data)
100            | Self::ParenBlock(_, data)
101            | Self::IndentBlock(_, data) => Some(data),
102            _ => None,
103        }
104    }
105}
106
107/// Node data containing the children of a node.
108#[derive(Clone, Debug, Eq, PartialEq)]
109pub struct NodeData<T> {
110    children: Vec<TaggedToktr<T>>,
111}
112
113impl<T> NodeData<T> {
114    pub fn new(children: Vec<TaggedToktr<T>>) -> Self {
115        Self { children }
116    }
117
118    pub fn children(&self) -> &[TaggedToktr<T>] {
119        &self.children
120    }
121}
122
123#[derive(Debug, Error)]
124pub enum ToktrError {
125    #[error("expected {0:?} terminal but found {1:?} terminal")]
126    UnfinishedBlock(BlockType, BlockType),
127
128    #[error("end of sequence with unclosed block of type {0:?}")]
129    UnclosedBlock(BlockType),
130
131    #[error("not yet implemented")]
132    Unimplemented,
133}
134
135/// Describes what syntactic structure the block is representing.
136#[derive(Copy, Clone, Debug, Eq, PartialEq)]
137pub enum BlockType {
138    Root,
139    Bracket,
140    Paren,
141    Indent,
142}
143
144struct ToktrBuilder {
145    block_ty_stack: Vec<BlockType>,
146    block_sp_stack: Vec<SrcPos>,
147    contexts: Vec<Vec<SrcToktr>>,
148}
149
150impl ToktrBuilder {
151    pub(crate) fn new() -> Self {
152        Self {
153            block_ty_stack: vec![BlockType::Root],
154            block_sp_stack: vec![],
155            contexts: vec![Vec::new()],
156        }
157    }
158
159    fn top_block_mut(&mut self) -> &mut Vec<SrcToktr> {
160        self.contexts.last_mut().expect("toktr: top_block_mut")
161    }
162
163    fn top_ty(&self) -> &BlockType {
164        self.block_ty_stack.last().expect("toktr: top_ty")
165    }
166
167    pub(crate) fn push_token(&mut self, tt: SrcToktr) {
168        self.top_block_mut().push(tt);
169    }
170
171    pub(crate) fn push_block(&mut self, ty: BlockType, sp: SrcPos) {
172        self.contexts.push(Vec::new());
173        self.block_ty_stack.push(ty);
174        self.block_sp_stack.push(sp);
175    }
176
177    pub(crate) fn try_pop_block(&mut self, ty: BlockType) -> Result<SrcToktr, ToktrError> {
178        // This shouldn't be allowed.
179        if ty == BlockType::Root {
180            panic!("toktr: tried to pop root");
181        }
182
183        if *self.top_ty() != ty {
184            return Err(ToktrError::UnfinishedBlock(*self.top_ty(), ty));
185        }
186
187        // Construct the new node.
188        self.block_ty_stack.pop();
189        let sp = self.block_sp_stack.pop().unwrap();
190        let body_toks = self.contexts.pop().expect("toktr: try_finish_block");
191        let data = NodeData::new(body_toks);
192        let tt = match ty {
193            BlockType::Bracket => SrcToktr::BracketBlock(sp, Box::new(data)),
194            BlockType::Paren => SrcToktr::ParenBlock(sp, Box::new(data)),
195            BlockType::Indent => SrcToktr::IndentBlock(sp, Box::new(data)),
196            _ => unreachable!(),
197        };
198
199        Ok(tt)
200    }
201
202    pub(crate) fn finish(mut self) -> Result<Vec<SrcToktr>, ToktrError> {
203        if self.contexts.len() != 1 {
204            return Err(ToktrError::UnclosedBlock(*self.top_ty()));
205        }
206
207        let toks = self.contexts.pop().unwrap();
208        Ok(toks)
209    }
210}
211
212pub(crate) fn parse_tokens_to_toktrs(tokens: &[SrcToken]) -> Result<Vec<SrcToktr>, ToktrError> {
213    let mut i = 0;
214
215    let mut builder = ToktrBuilder::new();
216
217    while i < tokens.len() {
218        let cur = &tokens[i];
219
220        let tt = match cur {
221            TaggedToken::Null(sp) => TaggedToktr::Null(*sp),
222            TaggedToken::Class(sp) => TaggedToktr::Class(*sp),
223            TaggedToken::Import(sp) => TaggedToktr::Import(*sp),
224            TaggedToken::As(sp) => TaggedToktr::As(*sp),
225            TaggedToken::Colon(sp) => TaggedToktr::Colon(*sp),
226            TaggedToken::Eq(sp) => TaggedToktr::Eq(*sp),
227            TaggedToken::Comma(sp) => TaggedToktr::Comma(*sp),
228            TaggedToken::Dot(sp) => TaggedToktr::Dot(*sp),
229            TaggedToken::Newline(sp) => TaggedToktr::Newline(*sp),
230            TaggedToken::Identifier(sp, ident) => TaggedToktr::Identifier(*sp, ident.clone()),
231            TaggedToken::IntegerLiteral(sp, v) => TaggedToktr::IntegerLiteral(*sp, *v),
232            TaggedToken::Shl(sp) => TaggedToktr::Shl(*sp),
233            TaggedToken::Mul(sp) => TaggedToktr::Mul(*sp),
234            TaggedToken::DocString(sp, doc) => TaggedToktr::DocString(*sp, doc.clone()),
235
236            TaggedToken::OpenBracket(sp) => {
237                builder.push_block(BlockType::Bracket, *sp);
238                i += 1;
239                continue;
240            }
241
242            TaggedToken::CloseBracket(_) => builder.try_pop_block(BlockType::Bracket)?,
243
244            TaggedToken::OpenParen(sp) => {
245                builder.push_block(BlockType::Paren, *sp);
246                i += 1;
247                continue;
248            }
249            TaggedToken::CloseParen(_) => builder.try_pop_block(BlockType::Paren)?,
250
251            TaggedToken::Indent(sp) => {
252                builder.push_block(BlockType::Indent, *sp);
253                i += 1;
254                continue;
255            }
256            TaggedToken::Deindent(_) => builder.try_pop_block(BlockType::Indent)?,
257        };
258
259        builder.push_token(tt);
260
261        i += 1;
262    }
263
264    builder.finish()
265}
266
267#[cfg(test)]
268mod tests {
269    use crate::{
270        src_pos::SrcPos,
271        token::{parse_char_array_to_tokens, SrcToken},
272        Identifier,
273    };
274
275    use super::parse_tokens_to_toktrs;
276
277    #[test]
278    fn test_parse_simple() {
279        let tokens = vec![
280            SrcToken::Class(SrcPos::dummy()),
281            SrcToken::Colon(SrcPos::dummy()),
282            SrcToken::Eq(SrcPos::dummy()),
283            SrcToken::Comma(SrcPos::dummy()),
284        ];
285
286        let tt = parse_tokens_to_toktrs(&tokens).expect("test: invoke parse_tokens_to_toktrs");
287
288        eprintln!("{tt:#?}");
289    }
290
291    #[test]
292    fn test_parse_tree() {
293        let foo_name = Identifier::try_from("Foo".to_owned()).expect("test: parse ident name");
294        let container_name =
295            Identifier::try_from("Container".to_owned()).expect("test: parse ident name");
296        let vector_name =
297            Identifier::try_from("Vector".to_owned()).expect("test: parse ident name");
298        let bar_name = Identifier::try_from("bar".to_owned()).expect("test: parse ident name");
299
300        let sp = SrcPos::dummy();
301
302        let tokens = vec![
303            SrcToken::Class(sp),
304            SrcToken::Identifier(sp, foo_name),
305            SrcToken::OpenParen(sp),
306            SrcToken::Identifier(sp, container_name),
307            SrcToken::CloseParen(sp),
308            SrcToken::Colon(sp),
309            SrcToken::Indent(sp),
310            SrcToken::Identifier(sp, bar_name),
311            SrcToken::Colon(sp),
312            SrcToken::Identifier(sp, vector_name),
313            SrcToken::Deindent(sp),
314        ];
315
316        let tt = parse_tokens_to_toktrs(&tokens).expect("test: invoke parse_tokens_to_toktrs");
317
318        eprintln!("{tt:#?}");
319    }
320
321    #[test]
322    fn test_parse_nested_tree() {
323        let foo_name = Identifier::try_from("Foo".to_owned()).expect("test: parse ident");
324        let stable_container_name =
325            Identifier::try_from("StableContainer".to_owned()).expect("test: parse ident");
326        let vector_name = Identifier::try_from("Vector".to_owned()).expect("test: parse ident");
327        let bar_name = Identifier::try_from("bar".to_owned()).expect("test: parse ident");
328
329        let sp = SrcPos::dummy();
330
331        let tokens = vec![
332            SrcToken::Class(sp),
333            SrcToken::Identifier(sp, foo_name),
334            SrcToken::OpenParen(sp),
335            SrcToken::Identifier(sp, stable_container_name.clone()),
336            SrcToken::OpenBracket(sp),
337            SrcToken::IntegerLiteral(sp, 32),
338            SrcToken::CloseBracket(sp),
339            SrcToken::CloseParen(sp),
340            SrcToken::Colon(sp),
341            SrcToken::Indent(sp),
342            SrcToken::Identifier(sp, bar_name),
343            SrcToken::Colon(sp),
344            SrcToken::Identifier(sp, vector_name),
345            SrcToken::Deindent(sp),
346        ];
347
348        let tt = parse_tokens_to_toktrs(&tokens).expect("test: invoke parse_tokens_to_toktrs");
349
350        eprintln!("{tt:#?}");
351    }
352
353    #[test]
354    fn test_full_parse_stable_container() {
355        let s = r"
356class Foo(StableContainer[16]):
357    x_coordinate: Optional[uint32]
358    y_coordinate: Optional[uint64]
359";
360
361        let arr = s.chars().collect::<Vec<_>>();
362
363        let toks = parse_char_array_to_tokens(&arr).expect("test: tokenize string");
364
365        eprintln!("tokens {toks:#?}");
366
367        let tt = parse_tokens_to_toktrs(&toks).expect("test: treeize tokens");
368
369        eprintln!("tree {tt:#?}");
370    }
371}