1use peg::{error::ParseError, str::LineCol};
4
5pub mod ast;
6
7trait IsStartOfLine {
8 fn is_start_of_line(&self, index: usize) -> peg::RuleResult<()>;
9}
10
11impl IsStartOfLine for str {
12 fn is_start_of_line(&self, index: usize) -> peg::RuleResult<()> {
13 if index == 0 {
14 return peg::RuleResult::Matched(index, ());
15 }
16 match self.as_bytes().get(index - 1) {
17 Some(b'\r') | Some(b'\n') => peg::RuleResult::Matched(index, ()),
18 _ => peg::RuleResult::Failed,
19 }
20 }
21}
22
23peg::parser! {
24 grammar parser() for str {
27 use crate::ast::*;
28
29 pub(crate) rule document() -> Document =
30 content:(token()*) { Document { content } }
31
32 rule token() -> Token =
33 t:special_macro() { Token::SpecialMacro(t) }
34 / t:macro_() { Token::Macro(t) }
35 / t:full_comment() { Token::FullComment(t) }
36 / t:group() { Token::Group(t) }
37 / t:dollar_inline_math() { Token::DollarInlineMath(t) }
38 / t:alignment_tab() { Token::AlignmentTab(t) }
39 / t:par_break() { Token::ParBreak(t) }
40 / t:macro_parameter() { Token::MacroParameter(t) }
41 / t:ignore() { Token::Ignore(t) }
42 / t:number() { Token::Number(t) }
43 / t:whitespace() { Token::Whitespace(t) }
44 / t:punctuation() { Token::Punctuation(t) }
45 / t:char_tokens() { Token::CharTokens(t) }
46 / t:begin_group() { Token::BeginGroup(t) }
47 / t:end_group() { Token::EndGroup(t) }
48 / t:math_shift() { Token::MathShift(t) }
49
50 rule par_break() -> ParBreak =
51 pos:pos() (space()* new_line())*<2,>
52 (space()* !comment_start())?
56 { ParBreak { pos } }
57
58 rule math_token() -> MathToken =
59 t:special_macro() { MathToken::SpecialMacro(t) }
60 / t:macro_() { MathToken::Macro(t) }
61 / t:full_comment() { MathToken::FullComment(t) }
62 / whitespace()* t:math_group() whitespace()* { MathToken::MathGroup(t) }
63 / whitespace()* t:alignment_tab() whitespace()* { MathToken::AlignmentTab(t) }
64 / whitespace()* t:macro_parameter() whitespace()* { MathToken::MacroParameter(t) }
65 / whitespace()* t:superscript() whitespace()* { MathToken::Superscript(t) }
66 / whitespace()* t:subscript() whitespace()* { MathToken::Subscript(t) }
67 / t:ignore() { MathToken::Ignore(t) }
68 / t:whitespace() { MathToken::Whitespace(t) }
69 / t:number() { MathToken::Number(t) }
70 / t:any_char() { MathToken::AnyChar(t) }
71
72 rule char_tokens() -> CharTokens =
73 pos:pos() content:$(char_token()+) {CharTokens { pos, content: content.into() }}
74
75 rule char_token() -> CharToken =
76 pos:pos()
77 !(
78 escape()
79 / comment_start()
80 / begin_group()
81 / end_group()
82 / math_shift()
83 / alignment_tab()
84 / new_line()
85 / macro_parameter()
86 / ignore()
87 / space()
88 / punctuation()
89 )
90 any_char() { CharToken { pos } }
91
92 rule whitespace() -> Whitespace =
93 pos:pos() (
94 new_line() space()*
95 / space()+ new_line() !comment_start() space()* !new_line()
96 / space()+
97 ) { Whitespace { pos } }
98
99 rule number() -> Number =
100 pos:pos() content:$(
101 digit()+ ("." digit()*)?
102 / "." digit()+
103 ) { Number { pos, content: content.into() } }
104
105 rule special_macro() -> SpecialMacro =
106 v:verb() { SpecialMacro::Verb(v) }
107 / v:verbatim_environment() { SpecialMacro::VerbatimEnvironment(v) }
108 / v:display_math() { SpecialMacro::DisplayMath(v) }
109 / v:parenthesized_inline_math() { SpecialMacro::ParenthesizedInlineMath(v) }
110 / v:math_environment() { SpecialMacro::MathEnvironment(v) }
111 / v:environment() { SpecialMacro::Environment(v) }
112
113 rule verb() -> Verb =
114 escape:escape()
115 env:$("verb*" / "verb")
116 delimiter:$([_])
117 content:$(
118 (
119 ch:$([_])
120 {?
121 if ch == delimiter {
122 Err("")
123 } else {
124 Ok(())
125 }
126 }
127 )*
128 )
129 [_] {
130 Verb {
131 escape,
132 env: env.into(),
133 delimiter: delimiter.chars().next().unwrap(),
134 content: content.into(),
135 }
136 }
137
138
139 rule verbatim_environment() -> VerbatimEnvironment =
140 begin:begin_environment()
141 begin_group()
142 name:verbatim_environment_name()
143 end_group()
144 body:$(
145 (
146 !(
147 end_environment()
148 begin_group()
149 end_name:verbatim_environment_name()
150 end_group()
151 {?
152 if name.kind == end_name.kind {
153 Ok(())
154 } else {
155 Err("")
156 }
157 }
158 )
159 [_]
160 )*
161 )
162 end:end_environment()
163 begin_group()
164 verbatim_environment_name()
165 end_group()
166 {
167 VerbatimEnvironment {
168 begin,
169 name,
170 body: body.into(),
171 end,
172 }
173 }
174
175 rule verbatim_environment_name() -> VerbatimEnvironmentName =
176 pos:pos() kind:(
178 "verbatim*" { VerbatimEnvironmentNameKind::VerbatimStar }
179 / "verbatim" { VerbatimEnvironmentNameKind::Verbatim }
180 / "filecontents*" { VerbatimEnvironmentNameKind::FileContentsStar }
181 / "filecontents" { VerbatimEnvironmentNameKind::FileContents }
182 / "comment" { VerbatimEnvironmentNameKind::Comment }
184 / "lstlisting" { VerbatimEnvironmentNameKind::ListListing }
186 ) { VerbatimEnvironmentName { pos, kind } }
187
188 rule display_math() -> DisplayMath =
189 pos:pos()
191 begin_display_math()
192 content:(!end_display_math() t:math_token() { t })*
193 end_display_math() { DisplayMath { pos, content } }
194 / pos:pos()
196 math_shift()
197 math_shift()
198 content:(!(math_shift() math_shift()) t:math_token() { t })*
199 math_shift()
200 math_shift() { DisplayMath { pos, content } }
201
202 rule parenthesized_inline_math() -> ParenthesizedInlineMath =
203 begin:begin_inline_math()
205 content:(!end_inline_math() t:math_token() { t })*
206 end:end_inline_math() { ParenthesizedInlineMath { begin, content, end } }
207
208 rule dollar_inline_math() -> DollarInlineMath =
209 begin:math_shift()
210 content:(!math_shift() t:math_token() { t })+
211 end:math_shift() { DollarInlineMath { begin, content, end } }
212
213 rule macro_() -> Macro =
214 escape:escape() name:macro_name() { Macro { escape, name } }
215
216 rule macro_name() -> MacroName =
217 pos:pos() content:$(letter()+ / [_]) { MacroName { pos, content: content.into() } }
218
219 rule group() -> Group =
220 begin:begin_group()
221 tokens:(!end_group() t:token() { t })*
222 end:end_group() { Group { begin, tokens, end } }
223
224 rule environment() -> Environment =
225 begin:begin_environment()
226 begin_group()
227 name:char_tokens()
228 end_group()
229 body:(
230 !(
231 end_environment()
232 begin_group()
233 end_name:char_tokens()
234 end_group()
235 {?
236 if name.content == end_name.content {
237 Ok(())
238 } else {
239 Err("")
240 }
241 }
242 )
243 t:token() { t }
244 )*
245 end:end_environment()
246 begin_group()
247 char_tokens()
248 end_group() {
249 Environment {
250 begin,
251 name,
252 body,
253 end,
254 }
255 }
256
257 rule math_environment() -> MathEnvironment =
258 begin:begin_environment()
259 begin_group()
260 name:math_environment_name()
261 end_group()
262 environment_comment:same_line_comment()?
263 body:(
264 !(
265 end_environment()
266 begin_group()
267 end_name:math_environment_name()
268 end_group()
269 {?
270 if name.kind == end_name.kind {
271 Ok(())
272 } else {
273 Err("")
274 }
275 }
276 )
277 t:math_token() { t }
278 )*
279 end:end_environment()
280 begin_group()
281 math_environment_name()
282 end_group() {
283 MathEnvironment {
284 begin,
285 name,
286 environment_comment,
287 body,
288 end,
289 }
290 }
291
292 rule math_group() -> MathGroup =
294 begin:begin_group()
295 tokens:(!end_group() t:math_token() { t })*
296 end:end_group() { MathGroup { begin, tokens, end } }
297
298 rule begin_display_math() -> BeginDisplayMath =
299 escape:escape() "[" { BeginDisplayMath { escape } }
300
301 rule end_display_math() -> EndDisplayMath =
302 escape:escape() "]" { EndDisplayMath { escape } }
303
304 rule begin_inline_math() -> BeginInlineMath =
305 escape:escape() "(" { BeginInlineMath { escape } }
306
307 rule end_inline_math() -> EndInlineMath =
308 escape:escape() ")" { EndInlineMath { escape } }
309
310 rule begin_environment() -> BeginEnvironment =
311 escape:escape() "begin" { BeginEnvironment { escape } }
312
313 rule end_environment() -> EndEnvironment =
314 escape:escape() "end" { EndEnvironment { escape } }
315
316 rule math_environment_name() -> MathEnvironmentName =
317 pos:pos() kind:(
318 "equation*" { MathEnvironmentNameKind::EquationStar }
319 / "equation" { MathEnvironmentNameKind::Equation }
320 / "align*" { MathEnvironmentNameKind::AlignStar }
321 / "align" { MathEnvironmentNameKind::Align }
322 / "alignat*" { MathEnvironmentNameKind::AlignAtStar }
323 / "alignat" { MathEnvironmentNameKind::AlignAt }
324 / "gather*" { MathEnvironmentNameKind::GatherStar }
325 / "gather" { MathEnvironmentNameKind::Gather }
326 / "multline*" { MathEnvironmentNameKind::MultiLineStar }
327 / "multline" { MathEnvironmentNameKind::MultiLine }
328 / "flalign*" { MathEnvironmentNameKind::FlAlignStar }
329 / "flalign" { MathEnvironmentNameKind::FlAlign }
330 / "split" { MathEnvironmentNameKind::Split }
331 / "math" { MathEnvironmentNameKind::Math }
332 / "displaymath" { MathEnvironmentNameKind::DisplayMath }
333 ) { MathEnvironmentName { pos, kind } }
334
335 rule escape() -> Escape =
337 pos:pos() "\\" { Escape { pos } }
338
339 rule begin_group() -> BeginGroup =
341 pos:pos() "{" { BeginGroup { pos } }
342
343 rule end_group() -> EndGroup =
345 pos:pos() "}" { EndGroup { pos } }
346
347 rule math_shift() -> MathShift =
349 pos:pos() "$" { MathShift { pos } }
350
351 rule alignment_tab() -> AlignmentTab =
353 pos:pos() "&" { AlignmentTab { pos } }
354
355 rule new_line() -> NewLine =
357 pos:pos() ("\r\n" / ['\r' | '\n']) { NewLine { pos } }
358
359 rule macro_parameter() -> MacroParameter =
361 pos:pos() "#" { MacroParameter { pos } }
362
363 rule superscript() -> Superscript =
365 pos:pos() "^" { Superscript { pos } }
366
367 rule subscript() -> Subscript =
369 pos:pos() "_" { Subscript { pos } }
370
371 rule ignore() -> Ignore =
373 pos:pos() "\0" { Ignore { pos } }
374
375 rule space() -> Space =
377 pos:pos() [' ' | '\t']+ { Space { pos } }
378
379 rule letter() -> AsciiAlphabetic =
381 pos:pos() ['a'..='z' | 'A'..='Z'] { AsciiAlphabetic { pos } }
382
383 rule digit() -> AsciiDigit =
385 pos:pos() ['0'..='9'] { AsciiDigit { pos } }
386
387 rule punctuation() -> Punctuation =
389 pos:pos() ch:$([
390 '.' | ',' | ';' | ':' | '-' | '*' | '/' | '(' | ')' | '!'
391 | '?' | '=' | '+' | '<' | '>' | '[' | ']'
392 ]) { Punctuation { pos, ch: ch.chars().next().unwrap() } }
393
394 rule comment_start() -> CommentStart =
396 pos:pos() "%" { CommentStart { pos } }
397
398 rule full_comment() -> FullComment =
403 c:own_line_comment() { FullComment::OwnLineComment(c) }
404 / c:same_line_comment() { FullComment::SameLineComment(c) }
405
406 rule own_line_comment() -> OwnLineComment =
408 pos:pos() (space()* new_line())? leading_space:leading_space() comment:comment() {
420 OwnLineComment {
421 pos,
422 leading_space,
423 comment,
424 }
425 }
426
427 rule same_line_comment() -> SameLineComment =
429 pos:pos() leading_spaces:space()* comment:comment() {
430 SameLineComment {
431 pos,
432 leading_spaces: !leading_spaces.is_empty(),
433 comment,
434 }
435 }
436
437 rule comment() -> Comment =
438 comment_start:comment_start()
443 content:$((!new_line() [_])*)
444 (
445 &par_break() / new_line() space()* !comment_start()
448 / new_line()
449 / ![_]
450 )
451 { Comment { comment_start, content: content.into() } }
452
453 rule leading_space() -> LeadingSpace =
455 pos:pos()
456 start_of_line()
457 content:$(space()*)
458 { LeadingSpace { pos, empty: content.is_empty() } }
459
460 rule start_of_line() =
461 ##is_start_of_line()
462
463 rule any_char() -> AnyChar =
464 pos:pos() ch:$([_]) { AnyChar { pos, ch: ch.chars().next().unwrap() } }
465
466 rule pos() -> Pos =
467 p:position!() { Pos::new(p) }
468 }
469}
470
471pub fn parse(input: &str) -> Result<ast::Document, ParseError<LineCol>> {
472 parser::document(input)
473}
474
475#[cfg(test)]
476mod test {
477 use crate::parse;
478 use serde::Serialize;
479 use serde_json::{
480 ser::{PrettyFormatter, Serializer},
481 *,
482 };
483 use std::str;
484
485 fn test_parse(input: &str, json: serde_json::Value) {
486 let document = parse(input).unwrap();
487 let actual_json = serde_json::to_value(&document).unwrap();
488 let mut text = Vec::new();
489 document
490 .serialize(&mut Serializer::with_formatter(
491 &mut text,
492 PrettyFormatter::with_indent(b" "),
493 ))
494 .unwrap();
495 println!("{}", str::from_utf8(&text).unwrap());
496 assert!(actual_json == json);
497 }
498
499 #[test]
500 fn test_parse_macro() {
501 test_parse(
502 r#"\abc"#,
503 json! {
504 {
505 "content": [
506 {
507 "token_type": "Macro",
508 "escape": {
509 "pos": "@0"
510 },
511 "name": {
512 "pos": "@1",
513 "content": "abc"
514 }
515 }
516 ]
517 }
518 },
519 );
520 }
521
522 #[test]
523 fn test_parse_environment() {
524 test_parse(
525 r#"\begin{env}contents\begin{env2}contents2\end{env2}a\end{env}"#,
526 json! {
527 {
528 "content": [
529 {
530 "token_type": "SpecialMacro",
531 "special_macro_type": "Environment",
532 "begin": {
533 "escape": {
534 "pos": "@0"
535 }
536 },
537 "name": {
538 "pos": "@7",
539 "content": "env"
540 },
541 "body": [
542 {
543 "token_type": "CharTokens",
544 "pos": "@11",
545 "content": "contents"
546 },
547 {
548 "token_type": "SpecialMacro",
549 "special_macro_type": "Environment",
550 "begin": {
551 "escape": {
552 "pos": "@19"
553 }
554 },
555 "name": {
556 "pos": "@26",
557 "content": "env2"
558 },
559 "body": [
560 {
561 "token_type": "CharTokens",
562 "pos": "@31",
563 "content": "contents2"
564 }
565 ],
566 "end": {
567 "escape": {
568 "pos": "@40"
569 }
570 }
571 },
572 {
573 "token_type": "CharTokens",
574 "pos": "@50",
575 "content": "a"
576 }
577 ],
578 "end": {
579 "escape": {
580 "pos": "@51"
581 }
582 }
583 }
584 ]
585 }
586 },
587 );
588 }
589}