litua/
parser.rs

1//! Parser for litua text documents
2
3use std::collections::HashMap;
4use std::iter;
5use std::path;
6
7use crate::tree;
8use crate::lexer;
9use crate::errors;
10
11/// `Parser` holds a reference to the text document source code.
12/// To generate better error messages, we also store the filepath.
13/// The parsing process fills a tree with data.
14///
15/// A typical parsing process is done with the following methods:
16/// `consume_iter(iter)` takes a `LexingIterator` and consumes the
17/// generated tokens. Then `finalize` declares the termination of
18/// the token consumption. Finally one can fetch the resulting
19/// abstract syntax tree by calling the method `tree()`.
20pub struct Parser<'s> {
21    pub filepath: path::PathBuf,
22    pub source_code: &'s str,
23    pub root: tree::DocumentFunction,
24}
25
26impl<'s> Parser<'s> {
27    pub fn new(filepath: &path::Path, source_code: &'s str) -> Parser<'s> {
28        let mut args = HashMap::new();
29        if let Some(fp) = filepath.to_str() {
30            args.insert("filepath".to_owned(), vec![tree::DocumentElement::Text(fp.to_owned())]);
31        }
32
33        let root = tree::DocumentFunction {
34            call: "document".to_owned(),
35            args,
36            content: vec!(),
37        };
38
39        Parser{
40            filepath: filepath.to_owned(),
41            source_code,
42            root,
43        }
44    }
45
46    #[inline]
47    fn unexpected_token<T>(tok: &lexer::Token, expected: &str) -> Result<T, errors::Error> {
48        Err(errors::Error::UnexpectedToken(tok.clone(), expected.to_owned()))
49    }
50
51    #[inline]
52    fn unexpected_eof<T>() -> Result<T, errors::Error> {
53        Err(errors::Error::UnexpectedEOF("unexpected end of lexer tokens iterator".to_owned()))
54    }
55
56    fn parse_raw(&mut self, iter: &mut iter::Peekable<lexer::LexingIterator>) -> Result<tree::DocumentElement, errors::Error> {
57        let whitespace_before;
58        let whitespace_after;
59        let name;
60        let text;
61
62        // (1) consume BeginRaw
63        match iter.next() {
64            Some(tok_or_err) => {
65                let token = tok_or_err?;
66                match token {
67                    lexer::Token::BeginRaw(range) => {
68                        // NOTE: expected token, yay!
69                        name = &self.source_code[range];
70                    },
71                    lexer::Token::EndOfFile(_) => return Self::unexpected_eof(),
72                    _ => return Self::unexpected_token(&token, "start of raw string"),
73                }
74            },
75            None => return Self::unexpected_eof(),
76        }
77
78        // (2) consume Whitespace
79        match iter.next() {
80            Some(tok_or_err) => {
81                let token = tok_or_err?;
82                match token {
83                    lexer::Token::Whitespace(_, ws) => {
84                        whitespace_before = ws;
85                        // NOTE: expected token, yay!
86                    },
87                    lexer::Token::EndOfFile(_) => return Self::unexpected_eof(),
88                    _ => return Self::unexpected_token(&token, "whitespace before"),
89                }
90            },
91            None => return Self::unexpected_eof(),
92        }
93
94        // (3) consume Text
95        match iter.next() {
96            Some(tok_or_err) => {
97                let token = tok_or_err?;
98                match token {
99                    lexer::Token::Text(range) => {
100                        text = &self.source_code[range];
101                        // NOTE: expected token, yay!
102                    },
103                    lexer::Token::EndOfFile(_) => return Self::unexpected_eof(),
104                    _ => return Self::unexpected_token(&token, "text string"),
105                }
106            },
107            None => return Self::unexpected_eof(),
108        }
109
110
111        // (4) consume Whitespace
112        match iter.next() {
113            Some(tok_or_err) => {
114                let token = tok_or_err?;
115                match token {
116                    lexer::Token::Whitespace(_, ws) => {
117                        whitespace_after = ws;
118                        // NOTE: expected token, yay!
119                    },
120                    lexer::Token::EndOfFile(_) => return Self::unexpected_eof(),
121                    _ => return Self::unexpected_token(&token, "whitespace after raw string"),
122                }
123            },
124            None => return Self::unexpected_eof(),
125        }
126
127        // (5) consume EndRaw
128        match iter.next() {
129            Some(tok_or_err) => {
130                let token = tok_or_err?;
131                match token {
132                    lexer::Token::EndRaw(_) => {
133                        // NOTE: expected token, yay!
134                    },
135                    lexer::Token::EndOfFile(_) => return Self::unexpected_eof(),
136                    _ => return Self::unexpected_token(&token, "end of raw string"),
137                }
138            },
139            None => return Self::unexpected_eof(),
140        }
141
142        // Ok(tree::DocumentElement::Text(text.to_owned()))  // NOTE would not convey `whitespace`
143        let mut h = HashMap::new();
144        h.insert("=whitespace".to_owned(), vec![ tree::DocumentElement::Text(whitespace_before.to_string()) ]);
145        h.insert("=whitespace-after".to_owned(), vec![ tree::DocumentElement::Text(whitespace_after.to_string()) ]);
146        Ok(tree::DocumentElement::Function(tree::DocumentFunction {
147            call: name.to_string(),
148            args: h,
149            content: vec![tree::DocumentElement::Text(text.to_owned())],
150        }))
151    }
152
153    fn parse_content(&mut self, iter: &mut iter::Peekable<lexer::LexingIterator>) -> Result<tree::DocumentNode, errors::Error> {
154        let mut content = tree::DocumentNode::new();
155
156        // (1) consume BeginContent
157        match iter.next() {
158            Some(tok_or_err) => {
159                let token = tok_or_err?;
160                match token {
161                    lexer::Token::BeginContent(_) => {
162                        // NOTE: expected token, yay!
163                    },
164                    lexer::Token::EndOfFile(_) => return Self::unexpected_eof(),
165                    _ => return Self::unexpected_token(&token, "start of content"),
166                }
167            },
168            None => return Self::unexpected_eof(),
169        }
170
171        // (2) loop
172        loop {
173            // admissible tokens
174            enum NextToken {
175                BeginFunction,
176                BeginRaw,
177                Text,
178                EndContent,
179                Unexpected,
180            }
181
182            let mut next_token = NextToken::Unexpected;
183
184            if let Some(token_or_err) = iter.peek() {
185                next_token = match token_or_err {
186                    Ok(lexer::Token::BeginFunction(_)) => NextToken::BeginFunction,
187                    Ok(lexer::Token::BeginRaw(_)) => NextToken::BeginRaw,
188                    Ok(lexer::Token::Text(_)) => NextToken::Text,
189                    Ok(lexer::Token::EndContent(_)) => NextToken::EndContent,
190                    _ => NextToken::Unexpected,
191                };
192            }
193
194            match next_token {
195                NextToken::BeginFunction => {
196                    // (3)   if BeginFunction
197                    // (4)     parse_function
198                    let func = self.parse_function(iter)?;
199                    content.push(func);
200                },
201                NextToken::BeginRaw => {
202                    let text = self.parse_raw(iter)?;
203                    content.push(text);
204                },
205                NextToken::Text => {
206                    // (7)   if Text
207                    // (8)     add text
208                    if let Some(Ok(lexer::Token::Text(range))) = iter.next() {
209                        let text = &self.source_code[range];
210                        content.push(tree::DocumentElement::Text(text.to_owned()));
211                    }
212                },
213                NextToken::EndContent => break,
214                NextToken::Unexpected => {
215                    // protocol violation
216                    match iter.next() {
217                        Some(Ok(tok)) => return Self::unexpected_token(&tok, "start of function/raw string or some text or end of content"),
218                        Some(Err(err)) => Err(err)?,
219                        None => return Self::unexpected_eof(),
220                    }
221                },
222            }
223        }
224        // (8) consume EndContent
225        match iter.next() {
226            Some(tok_or_err) => {
227                let token = tok_or_err?;
228                match token {
229                    lexer::Token::EndContent(_) => {
230                        // NOTE: expected token, yay!
231                    },
232                    lexer::Token::EndOfFile(_) => return Self::unexpected_eof(),
233                    _ => return Self::unexpected_token(&token, "end of content"),
234                }
235            },
236            None => return Self::unexpected_eof(),
237        }
238
239        Ok(content)
240    }
241
242    fn parse_argument_value(&mut self, iter: &mut iter::Peekable<lexer::LexingIterator>) -> Result<tree::DocumentNode, errors::Error> {
243        let mut arg_value = tree::DocumentNode::new();
244
245        // (1) consume BeginArgValue
246        match iter.next() {
247            Some(tok_or_err) => {
248                let token = tok_or_err?;
249                match token {
250                    lexer::Token::BeginArgValue(_) => {
251                        // NOTE: expected token, yay!
252                    },
253                    lexer::Token::EndOfFile(_) => return Self::unexpected_eof(),
254                    _ => return Self::unexpected_token(&token, "start of argument value"),
255                }
256            },
257            None => return Self::unexpected_eof(),
258        }
259
260        // (2) loop
261        loop {
262            // admissible tokens
263            enum NextToken {
264                BeginFunction,
265                BeginRaw,
266                Text,
267                EndArgValue,
268                Unexpected,
269            }
270
271            let mut next_token = NextToken::Unexpected;
272
273            if let Some(token_or_err) = iter.peek() {
274                next_token = match token_or_err {
275                    Ok(lexer::Token::BeginFunction(_)) => NextToken::BeginFunction,
276                    Ok(lexer::Token::BeginRaw(_)) => NextToken::BeginRaw,
277                    Ok(lexer::Token::Text(_)) => NextToken::Text,
278                    Ok(lexer::Token::EndArgValue(_)) => NextToken::EndArgValue,
279                    _ => NextToken::Unexpected,
280                };
281            }
282
283            match next_token {
284                NextToken::BeginFunction => {
285                    // (3)   if BeginFunction
286                    // (4)     parse_function
287                    let func = self.parse_function(iter)?;
288                    arg_value.push(func);
289                },
290                NextToken::BeginRaw => {
291                    let text = self.parse_raw(iter)?;
292                    arg_value.push(text);
293                },
294                NextToken::Text => {
295                    // (7)   if Text
296                    // (8)     add text
297                    if let Some(Ok(lexer::Token::Text(range))) = iter.next() {
298                        let content = &self.source_code[range];
299                        arg_value.push(tree::DocumentElement::Text(content.to_owned()));
300                    }
301                },
302                NextToken::EndArgValue => break,
303                NextToken::Unexpected => {
304                    // protocol violation
305                    match iter.next() {
306                        Some(Ok(tok)) => return Self::unexpected_token(&tok, "start of function/raw string or some text or end of argument value"),
307                        Some(Err(err)) => Err(err)?,
308                        None => return Self::unexpected_eof(),
309                    }
310                },
311            }
312        }
313
314        // (8) consume EndArgValue
315        match iter.next() {
316            Some(tok_or_err) => {
317                let token = tok_or_err?;
318                match token {
319                    lexer::Token::EndArgValue(_) => {
320                        // NOTE: expected token, yay!
321                    },
322                    lexer::Token::EndOfFile(_) => return Self::unexpected_eof(),
323                    _ => return Self::unexpected_token(&token, "end of argument value"),
324                }
325            },
326            None => return Self::unexpected_eof(),
327        }
328
329        Ok(arg_value)
330    }
331
332    fn parse_function(&mut self, iter: &mut iter::Peekable<lexer::LexingIterator>) -> Result<tree::DocumentElement, errors::Error> {
333        let mut func = tree::DocumentFunction::new();
334
335        // (01) consume BeginFunction
336        match iter.next() {
337            Some(tok_or_err) => {
338                let token = tok_or_err?;
339                match token {
340                    lexer::Token::BeginFunction(_) => {
341                        // NOTE: expected token, yay!
342                    },
343                    lexer::Token::EndOfFile(_) => return Self::unexpected_eof(),
344                    _ => return Self::unexpected_token(&token, "start of function"),
345                }
346            },
347            None => return Self::unexpected_eof(),
348        }
349
350        // (02) consume Call
351        match iter.next() {
352            Some(tok_or_err) => {
353                let token = tok_or_err?;
354                match token {
355                    lexer::Token::Call(range) => {
356                        let name = &self.source_code[range];
357                        func.call = name.to_owned();
358                    },
359                    lexer::Token::EndOfFile(_) => return Self::unexpected_eof(),
360                    _ => return Self::unexpected_token(&token, "call name"),
361                }
362            },
363            None => return Self::unexpected_eof(),
364        }
365
366        // (03) optionally consume Whitespace
367        if let Some(Ok(lexer::Token::Whitespace(_, _))) = iter.peek() {
368            match iter.next() {
369                Some(tok_or_err) => {
370                    let token = tok_or_err?;
371                    match token {
372                        lexer::Token::Whitespace(_, whitespace) => {
373                            func.args.insert("=whitespace".to_owned(), vec![tree::DocumentElement::Text(format!("{whitespace}"))]);
374                        },
375                        lexer::Token::EndOfFile(_) => return Self::unexpected_eof(),
376                        _ => return Self::unexpected_token(&token, "whitespace"),
377                    }
378                },
379                None => return Self::unexpected_eof(),
380            }
381        }
382
383        // (04) if BeginArgs
384        if let Some(Ok(lexer::Token::BeginArgs(_))) = iter.peek() {
385            // (05)   consume BeginArgs
386            match iter.next() {
387                Some(tok_or_err) => {
388                    let token = tok_or_err?;
389                    match token {
390                        lexer::Token::BeginArgs(_) => {
391                            // NOTE: expected token, yay!
392                        },
393                        lexer::Token::EndOfFile(_) => return Self::unexpected_eof(),
394                        _ => return Self::unexpected_token(&token, "start of arguments"),
395                    }
396                },
397                None => return Self::unexpected_eof(),
398            }
399
400            // (06)   loop if ArgKey
401            while let Some(Ok(lexer::Token::ArgKey(_))) = iter.peek() {
402                // NOTE: ok, we consume an argument key-value pair
403
404                // (07)     consume ArgKey
405                let arg_name = match iter.next() {
406                    Some(token_or_err) => {
407                        let token = token_or_err?;
408                        match token {
409                            lexer::Token::EndArgs(_) => {
410                                // NOTE: end of arguments? Ok.
411                                break;
412                            },
413                            lexer::Token::ArgKey(range) => {
414                                &self.source_code[range]
415                            }
416                            lexer::Token::EndOfFile(_) => return Self::unexpected_eof(),
417                            _ => return Self::unexpected_token(&token, "end of arguments or the next argument key"),
418                        }
419                    },
420                    None => return Self::unexpected_eof(),
421                }.to_owned();
422
423                // (08)     parse_argument_value
424                let arg_value = self.parse_argument_value(iter)?;
425                func.args.insert(arg_name, arg_value);
426            }
427
428            // (09)   consume EndArgs
429            match iter.next() {
430                Some(tok_or_err) => {
431                    let token = tok_or_err?;
432                    match token {
433                        lexer::Token::EndArgs(_) => {
434                            // NOTE: expected token, yay!
435                        },
436                        lexer::Token::EndOfFile(_) => return Self::unexpected_eof(),
437                        _ => return Self::unexpected_token(&token, "end of arguments"),
438                    }
439                },
440                None => return Self::unexpected_eof(),
441            }
442
443            // (10)   optionally consume Whitespace
444            if let Some(Ok(lexer::Token::Whitespace(_, _))) = iter.peek() {
445                match iter.next() {
446                    Some(tok_or_err) => {
447                        let token = tok_or_err?;
448                        match token {
449                            lexer::Token::Whitespace(_, whitespace) => {
450                                func.args.insert("=whitespace".to_owned(), vec![tree::DocumentElement::Text(format!("{whitespace}"))]);
451                            },
452                            lexer::Token::EndOfFile(_) => return Self::unexpected_eof(),
453                            _ => return Self::unexpected_token(&token, "some whitespace"),
454                        }
455                    },
456                    None => return Self::unexpected_eof(),
457                }
458            }
459        }
460
461        // (11) if BeginContent
462        let mut found_content = false;
463        if let Some(Ok(lexer::Token::BeginContent(_))) = iter.peek() {
464            found_content = true;
465        }
466
467        if found_content {
468            // (12)   parse_content
469            func.content = self.parse_content(iter)?;
470        }
471
472        // (13) consume EndFunction
473        match iter.next() {
474            Some(tok_or_err) => {
475                let token = tok_or_err?;
476                match token {
477                    lexer::Token::EndFunction(_) => {
478                        // NOTE: expected token, yay!
479                    },
480                    lexer::Token::EndOfFile(_) => return Self::unexpected_eof(),
481                    _ => return Self::unexpected_token(&token, "end of function"),
482                }
483            },
484            None => return Self::unexpected_eof(),
485        }
486
487        Ok(tree::DocumentElement::Function(func))
488    }
489
490    /// Consumes the tokens provided by the `LexingIterator` argument
491    pub fn consume_iter(&mut self, iter: lexer::LexingIterator) -> Result<(), errors::Error> {
492        let mut peekable_iter = iter.peekable();
493
494        // admissible tokens
495        enum NextToken {
496            BeginFunction,
497            BeginContent,
498            BeginRaw,
499            Text,
500            EndOfFile,
501            Unexpected,
502        }
503
504        loop {
505            let mut next_token = NextToken::Unexpected;
506
507            if let Some(token_or_err) = peekable_iter.peek() {
508                next_token = match token_or_err {
509                    Ok(lexer::Token::BeginFunction(_)) => NextToken::BeginFunction,
510                    Ok(lexer::Token::BeginContent(_)) => NextToken::BeginContent,
511                    Ok(lexer::Token::BeginRaw(_)) => NextToken::BeginRaw,
512                    Ok(lexer::Token::Text(_)) => NextToken::Text,
513                    Ok(lexer::Token::EndOfFile(_)) => NextToken::EndOfFile,
514                    _ => NextToken::Unexpected,
515                }
516            }
517
518            match next_token {
519                NextToken::BeginFunction => {
520                    let func = self.parse_function(&mut peekable_iter)?;
521                    self.root.content.push(func);
522                },
523                NextToken::BeginContent => {
524                    let mut content = self.parse_content(&mut peekable_iter)?;
525                    self.root.content.append(&mut content);
526                },
527                NextToken::BeginRaw => {
528                    let text = self.parse_raw(&mut peekable_iter)?;
529                    self.root.content.push(text);
530                },
531                NextToken::Text => {
532                    if let Some(Ok(lexer::Token::Text(range))) = peekable_iter.next() {
533                        let text = &self.source_code[range];
534                        self.root.content.push(tree::DocumentElement::Text(text.to_owned()));
535                    }
536                },
537                NextToken::EndOfFile => {
538                    // Already done? How sad.
539                    break;
540                },
541                NextToken::Unexpected => {
542                    // protocol violation
543                    match peekable_iter.next() {
544                        Some(Ok(tok)) => return Self::unexpected_token(&tok, &format!("unexpected token {:?} while parsing document", peekable_iter.peek())),
545                        Some(Err(err)) => Err(err)?,
546                        None => return Self::unexpected_token(&lexer::Token::EndOfFile(0), "unexpected end of lexer tokens iterator"),
547                    }
548                },
549            }
550        }
551
552        Ok(())
553    }
554
555    /// Declares the end of the text document
556    pub fn finalize(&mut self) -> Result<(), errors::Error> {
557        Ok(())
558    }
559
560    /// Returns the Abstract Syntax Tree to be processed further
561    pub fn tree(self) -> tree::DocumentTree {
562        tree::DocumentTree(tree::DocumentElement::Function(self.root))
563    }
564}
565
566#[cfg(test)]
567mod tests {
568    use super::*;
569    use path;
570
571    #[test]
572    fn lex_only_text() -> Result<(), errors::Error> {
573        let input = "{e_lement[a_ttr=v_alue] c_ontent}";
574        let lex = lexer::Lexer::new(input);
575        let mut par = Parser::new(path::Path::new("example"), input);
576        par.consume_iter(lex.iter())?;
577        let tree = par.tree();
578
579        match tree.0 {
580            tree::DocumentElement::Function(doc) => {
581                assert_eq!(doc.call, "document");
582                assert_eq!(doc.args["filepath"], vec![tree::DocumentElement::Text("example".to_string())]);
583                match &doc.content[0] {
584                    tree::DocumentElement::Function(elem) => {
585                        assert_eq!(elem.call, "e_lement");
586                        assert_eq!(elem.args["a_ttr"], vec![tree::DocumentElement::Text("v_alue".to_string())]);
587                        assert_eq!(elem.content, vec![tree::DocumentElement::Text("c_ontent".to_string())]);
588                    },
589                    _ => { assert!(false) },
590                }
591            },
592            tree::DocumentElement::Text(_) => assert!(false),
593        }
594
595        Ok(())
596    }
597}