cdoc_parser/raw/
parser.rs

1use crate::raw::{RawDocument, Reference};
2use cowstr::CowStr;
3use pest::iterators::Pair;
4use pest::Parser;
5use pest_derive::Parser;
6
7#[derive(Parser)]
8#[grammar = "grammars/raw_doc.pest"]
9pub struct RawDocParser;
10
11use crate::code_ast::parse_code_string;
12use crate::common::Span;
13use crate::raw::{Element, ElementInfo, Parameter, Special, Value};
14use pest::iterators::Pairs;
15use thiserror::Error;
16
17#[derive(Error, Debug)]
18pub enum ParserError {
19    #[error("code cell parsing error")]
20    CodeError(#[from] Box<pest::error::Error<crate::code_ast::Rule>>),
21    #[error("document parsing error")]
22    DocError(#[from] Box<pest::error::Error<Rule>>),
23}
24
25impl RawDocument {
26    fn parse_doc(&mut self, mut pairs: Pairs<Rule>) -> Result<(), ParserError> {
27        let mut elems = pairs.next().expect("no root item").into_inner();
28
29        if let Some(p) = elems.next() {
30            match p.as_rule() {
31                Rule::meta => self.parse_meta(p),
32                _ => {
33                    let el = self.parse_element(p);
34                    self.src.push(el?)
35                }
36            }
37        }
38
39        let elems = self.parse_elements(elems)?;
40        self.src.extend(elems);
41
42        Ok(())
43    }
44
45    fn parse_elements(&mut self, pairs: Pairs<Rule>) -> Result<Vec<ElementInfo>, ParserError> {
46        pairs.map(|p| self.parse_element(p.clone())).collect()
47    }
48
49    fn parse_element(&mut self, pair: Pair<Rule>) -> Result<ElementInfo, ParserError> {
50        let span = Span::from(pair.as_span());
51
52        let element = match pair.as_rule() {
53            Rule::command => self.parse_command(pair)?,
54            Rule::math_block => self.parse_math_block(pair),
55            Rule::code_def => self.parse_code(pair)?,
56            Rule::verbatim => self.parse_verbatim(pair),
57            Rule::src | Rule::string | Rule::body => self.parse_src(pair),
58            _ => unreachable!(),
59        };
60
61        Ok(ElementInfo { element, span })
62    }
63
64    fn parse_src(&mut self, pair: Pair<Rule>) -> Element {
65        let value = pair.as_str();
66        Element::Markdown(value.into())
67    }
68
69    fn parse_command(&mut self, pair: Pair<Rule>) -> Result<Element, ParserError> {
70        let mut inner = pair.into_inner();
71        let name = inner
72            .next()
73            .expect("empty command")
74            .into_inner()
75            .next()
76            .unwrap()
77            .as_span();
78        let name = self.cowstr_from_span(name);
79
80        let mut parameters = vec![];
81        let mut body = None;
82        let mut label = None;
83
84        for elem in inner {
85            match elem.as_rule() {
86                Rule::parameters => parameters = self.parse_parameters(elem.into_inner())?,
87                Rule::body_def => body = Some(self.parse_elements(elem.into_inner())?),
88                Rule::label => {
89                    label = Some(self.cowstr_from_span(elem.into_inner().next().unwrap().as_span()))
90                }
91                _ => unreachable!(),
92            }
93        }
94
95        if let Some(label) = label.clone() {
96            self.references
97                .insert(label, Reference::Command(name.clone(), parameters.clone()));
98        }
99
100        Ok(Element::Special(
101            label,
102            Special::Command {
103                function: name,
104                parameters,
105                body,
106            },
107        ))
108    }
109
110    fn parse_parameters(&mut self, pairs: Pairs<Rule>) -> Result<Vec<Parameter>, ParserError> {
111        pairs
112            .into_iter()
113            .map(|elem| {
114                if let Rule::param = elem.as_rule() {
115                    self.parse_param(elem)
116                } else {
117                    unreachable!()
118                }
119            })
120            .collect()
121    }
122
123    fn parse_param(&mut self, pair: Pair<Rule>) -> Result<Parameter, ParserError> {
124        let span = Span::from(pair.as_span());
125        let mut pairs = pair.into_inner();
126        let first = pairs.next().expect("empty param");
127
128        Ok(if let Rule::key = first.as_rule() {
129            let value = pairs.next().expect("no value");
130            Parameter::with_key(first.as_str(), self.parse_value(value)?, span)
131        } else {
132            Parameter::with_value(self.parse_value(first)?, span)
133        })
134    }
135
136    fn parse_value(&mut self, pair: Pair<Rule>) -> Result<Value, ParserError> {
137        Ok(match pair.as_rule() {
138            Rule::basic_val | Rule::string => Value::String(pair.as_str().into()),
139            Rule::md_val => Value::Content(self.parse_elements(pair.into_inner())?),
140            Rule::flag => Value::Flag(pair.as_str().into()),
141            _ => unreachable!(),
142        })
143    }
144
145    fn parse_math_block(&mut self, pair: Pair<Rule>) -> Element {
146        let (lvl, src, label) = self.block_parser(pair);
147
148        let src = self.parse_math(src);
149
150        if let Some(label) = label.clone() {
151            self.references.insert(label, Reference::Math(src.clone()));
152        }
153
154        Element::Special(
155            label,
156            Special::Math {
157                inner: src,
158                is_block: lvl.len() != 1,
159            },
160        )
161    }
162
163    fn parse_code_attributes(&mut self, pairs: Pairs<Rule>) -> Vec<CowStr> {
164        pairs
165            .into_iter()
166            .map(|elem| {
167                if let Rule::code_param = elem.as_rule() {
168                    self.parse_code_attribute(elem)
169                } else {
170                    unreachable!()
171                }
172            })
173            .collect()
174    }
175
176    fn parse_code_attribute(&mut self, pair: Pair<Rule>) -> CowStr {
177        let mut pairs = pair.into_inner();
178        let first = pairs.next().expect("empty param");
179
180        if let Rule::key = first.as_rule() {
181            let value = pairs.next().expect("no value");
182            // CodeAttr {
183            //     key: Some(first.as_str().to_string()),
184            //     value: value.as_str().to_string(),
185            // }
186            self.cowstr_from_span(value.as_span())
187        } else {
188            // CodeAttr {
189            //     key: None,
190            //     value: first.as_str().to_string(),
191            // }
192            self.cowstr_from_span(first.as_span())
193        }
194    }
195
196    fn parse_code(&mut self, pair: Pair<Rule>) -> Result<Element, ParserError> {
197        let mut inner = pair.into_inner();
198        let lvl = inner.next().expect("missing code_lvl").as_str().to_string();
199
200        let maybe_param = inner.next().expect("missing code_src");
201        let (src_pair, params) = if let Rule::code_params = maybe_param.as_rule() {
202            let attributes = self.parse_code_attributes(maybe_param.into_inner());
203            (inner.next().expect("missing code_src"), Some(attributes))
204        } else {
205            (maybe_param, None)
206        };
207
208        let src_span = src_pair.as_span();
209        let src = self.cowstr_from_span(src_span);
210
211        let id = inner.next().map(|val| self.cowstr_from_span(val.as_span()));
212
213        if let Some(label) = id.clone() {
214            self.references.insert(label, Reference::Code(src.clone()));
215        }
216
217        Ok(Element::Special(
218            id,
219            if lvl.len() == 1 {
220                Special::CodeInline { inner: src }
221            } else {
222                let content = parse_code_string(src)?;
223
224                Special::CodeBlock {
225                    lvl: lvl.len(),
226                    inner: content,
227                    attributes: params.unwrap_or_default(),
228                }
229            },
230        ))
231    }
232
233    fn parse_verbatim(&mut self, pair: Pair<Rule>) -> Element {
234        let value = pair.as_str();
235        Element::Special(
236            None,
237            Special::Verbatim {
238                inner: value.into(),
239            },
240        )
241    }
242
243    fn parse_meta(&mut self, pair: Pair<Rule>) {
244        self.meta = Some(self.cowstr_from_span(pair.as_span()));
245    }
246
247    fn cowstr_from_span(&self, span: pest::Span) -> CowStr {
248        CowStr::from(&self.input[span.start()..span.end()])
249    }
250
251    fn parse_math(&self, pair: Pair<Rule>) -> CowStr {
252        match pair.as_rule() {
253            Rule::math_chars => self.cowstr_from_span(pair.as_span()),
254            Rule::math_block_curly => cowstr::format!(
255                "{{{}}}",
256                pair.into_inner()
257                    .map(|p| self.parse_math(p))
258                    .collect::<CowStr>()
259            ),
260            // | Rule::math_block_bracket
261            // | Rule::math_block_paren
262            Rule::math_body => pair
263                .into_inner()
264                .map(|p| self.parse_math(p))
265                .collect::<CowStr>(),
266            _ => unreachable!(),
267        }
268    }
269
270    fn block_parser<'a>(&'a self, pair: Pair<'a, Rule>) -> (CowStr, Pair<Rule>, Option<CowStr>) {
271        let mut inner = pair.into_inner();
272        let lvl = self.cowstr_from_span(inner.next().expect("missing code_lvl").as_span());
273        let src = inner.next().expect("missing code_src");
274        let id = inner.next().map(|val| self.cowstr_from_span(val.as_span()));
275        (lvl, src, id)
276    }
277}
278
279pub fn parse_to_doc(input: &str) -> Result<RawDocument, ParserError> {
280    let mut doc = RawDocument::new(input);
281    doc.parse_doc(RawDocParser::parse(Rule::top, input).map_err(Box::new)?)?;
282    Ok(doc)
283}
284
285#[cfg(test)]
286mod tests {
287    use crate::code_ast::types::{CodeContent, CodeElem};
288    use crate::common::Span;
289    use crate::raw::{
290        parse_to_doc, Element, ElementInfo, Parameter, RawDocument, Reference, Special, Value,
291    };
292    use cowstr::CowStr;
293    use std::collections::HashMap;
294
295    macro_rules! doc_tests {
296        ($prefix:ident $($name:ident: $value:expr,)*) => {
297        $(
298            paste::item!{
299            #[test]
300            fn [<$prefix _ $name>]() {
301                let (input, expected) = $value;
302                let doc = RawDocument { input: CowStr::from(input), src: expected, meta: None, references: Default::default() };
303                compare(doc, input);
304            }
305            }
306        )*
307        }
308    }
309
310    #[test]
311    fn test_code() {
312        let input = r#"```
313code
314```"#;
315        let expected = RawDocument {
316            src: vec![ElementInfo {
317                element: Element::Special(
318                    None,
319                    Special::CodeBlock {
320                        lvl: 3,
321                        inner: CodeContent {
322                            blocks: vec![CodeElem::Src("\ncode\n\n".into())],
323                            meta: Default::default(),
324                            hash: 3750657748055546767,
325                        },
326                        attributes: vec![],
327                    },
328                ),
329                span: Span::new(0, 12),
330            }],
331            input: CowStr::from(input),
332            meta: None,
333            references: Default::default(),
334        };
335
336        compare(expected, input);
337    }
338
339    #[test]
340    fn test_code_param() {
341        let input = r#"```lang, val
342code
343```"#;
344        let expected = RawDocument {
345            src: vec![ElementInfo {
346                element: Element::Special(
347                    None,
348                    Special::CodeBlock {
349                        lvl: 3,
350                        inner: CodeContent {
351                            blocks: vec![CodeElem::Src("code\n\n".into())],
352                            meta: Default::default(),
353                            hash: 15492099155864206242,
354                        },
355                        attributes: vec!["lang".into(), "val".into()],
356                    },
357                ),
358                span: Span::new(0, 21),
359            }],
360            input: CowStr::from(input),
361            meta: None,
362            references: Default::default(),
363        };
364
365        compare(expected, input);
366    }
367
368    #[test]
369    fn test_math() {
370        let input = "$inline$";
371        let expected = RawDocument {
372            src: vec![ElementInfo {
373                element: Element::Special(
374                    None,
375                    Special::Math {
376                        is_block: false,
377                        inner: "inline".into(),
378                    },
379                ),
380                span: Span::new(0, 8),
381            }],
382            input: CowStr::from(input),
383            meta: None,
384            references: Default::default(),
385        };
386
387        compare(expected, input);
388    }
389
390    #[test]
391    fn test_verbatim() {
392        let input = "\\{verbatim\\}";
393        let expected = RawDocument {
394            src: vec![ElementInfo {
395                element: Element::Special(
396                    None,
397                    Special::Verbatim {
398                        inner: "verbatim".into(),
399                    },
400                ),
401                span: Span::new(2, 10),
402            }],
403            input: CowStr::from(input),
404            meta: None,
405            references: Default::default(),
406        };
407
408        compare(expected, input);
409    }
410
411    #[test]
412    fn test_src() {
413        let input = "just some stuff {} xx--^*# fsdf";
414        let expected = RawDocument {
415            src: vec![ElementInfo {
416                element: Element::Markdown(input.into()),
417                span: Span::new(0, 31),
418            }],
419            input: CowStr::from(input),
420            meta: None,
421            references: Default::default(),
422        };
423
424        compare(expected, input);
425    }
426
427    #[test]
428    fn test_refs() {
429        let input = "#call|id";
430        let expected = RawDocument {
431            src: vec![ElementInfo {
432                element: Element::Special(
433                    Some("id".into()),
434                    Special::Command {
435                        function: "call".into(),
436                        parameters: vec![],
437                        body: None,
438                    },
439                ),
440                span: Span::new(0, 8),
441            }],
442            input: CowStr::from(input),
443            meta: None,
444            references: HashMap::from([("id".into(), Reference::Command("call".into(), vec![]))]),
445        };
446
447        compare(expected, input);
448    }
449
450    const CMD_WITH_PARAMS_NO_BODY: &str =
451        "#func(basic, \"quoted\", {content}, key=basic, key=\"quoted\", key={content}, :flag)";
452
453    doc_tests! {
454        command
455        no_params_no_body: ("#func",  vec![
456            ElementInfo {
457                element: Element::Special(None, Special::Command {
458                    function: "func".into(),
459                    parameters: vec![],
460                    body: None,
461                }),
462                span: Span::new(0, 5),
463            }
464        ]),
465        with_params_no_body: (CMD_WITH_PARAMS_NO_BODY,  vec![
466            ElementInfo {
467                element: Element::Special(None, Special::Command {
468                    function: "func".into(),
469                    parameters: vec![
470                        Parameter { key: None, value: Value::String("basic".into()), span: Span::new(6, 11) },
471                        Parameter { key: None, value: Value::String("quoted".into()), span: Span::new(13, 21) },
472                        Parameter { key: None, value: Value::Content(vec![
473                            ElementInfo {
474                                element: Element::Markdown("content".into()),
475                                span: Span::new(24, 31)
476                            }
477                        ]), span: Span::new(23, 32) },
478                        Parameter { key: Some("key".into()), value: Value::String("basic".into()), span: Span::new(34, 43) },
479                        Parameter { key: Some("key".into()), value: Value::String("quoted".into()), span: Span::new(45, 57) },
480                        Parameter { key: Some("key".into()), value: Value::Content(vec![
481                            ElementInfo {
482                                element: Element::Markdown("content".into()),
483                                span: Span::new(64, 71)
484                            }
485                        ]), span: Span::new( 59, 72) },
486                        Parameter { key: None, value: Value::Flag("flag".into()), span: Span::new(74, 79) }
487                    ],
488                    body: None,
489                }),
490                span: Span::new(0, 80),
491            }
492        ]),
493        with_params_with_body: ("#func(c){x}", vec![
494            ElementInfo {
495                element: Element::Special(None, Special::Command {
496                    function: "func".into(),
497                    parameters: vec![
498                        Parameter { key: None, value: Value::String("c".into()), span: Span::new(6, 7)}
499                    ],
500                    body: Some(vec![
501                        ElementInfo {
502                            element: Element::Markdown("x".into()),
503                            span: Span::new(9, 10)
504                        }
505                    ])
506                }),
507                span: Span::new(0, 11),
508            }
509        ]),
510        no_params_with_body: ("#func{x}", vec![
511            ElementInfo {
512                element: Element::Special(None, Special::Command {
513                    function: "func".into(),
514                    parameters: vec![],
515                    body: Some(vec![
516                        ElementInfo {
517                            element: Element::Markdown("x".into()),
518                            span: Span::new(6, 7)
519                        }
520                    ])
521                }),
522                span: Span::new(0, 8),
523            }
524        ]),
525        body_nested: ("#func1{#func2}", vec![
526            ElementInfo {
527                element: Element::Special(None, Special::Command {
528                    function: "func1".into(),
529                    parameters: vec![],
530                    body: Some(vec![ElementInfo {
531                            element: Element::Special(None, Special::Command{
532                                function: "func2".into(),
533                                parameters: vec![],
534                                body: None,
535                            }),
536                            span: Span::new(7, 13),
537                        }
538                    ])
539                }),
540                span: Span::new(0, 14),
541            }
542
543        ]),
544    }
545
546    fn compare(expected: RawDocument, input: &str) {
547        let doc = parse_to_doc(input).expect("Parse error");
548
549        assert_eq!(expected, doc);
550    }
551}