1pub mod error;
9mod lex;
10mod macros;
11mod primitives;
12mod state;
13pub mod storage;
14mod tables;
15
16use std::ops::Range;
17
18use macros::MacroContext;
19
20use crate::event::{Event, Grouping, ScriptPosition, ScriptType};
21
22use self::{state::ParserState, storage::Storage};
23
24pub(crate) use error::{ErrorKind, InnerResult, ParserError};
25
26#[derive(Debug)]
36pub struct Parser<'store> {
37 instruction_stack: Vec<Instruction<'store>>,
46
47 buffer: Vec<Instruction<'store>>,
52
53 macro_context: MacroContext<'store>,
55
56 storage: &'store bumpalo::Bump,
58
59 span_stack: SpanStack<'store>,
61}
62
63impl<'store> Parser<'store> {
64 pub fn new<'input>(input: &'input str, storage: &'store Storage) -> Self
66 where
67 'input: 'store,
68 {
69 let mut instruction_stack = Vec::with_capacity(32);
70 instruction_stack.push(Instruction::SubGroup {
71 content: input,
72 allowed_alignment_count: None,
73 });
74 let buffer = Vec::with_capacity(16);
75 Self {
76 instruction_stack,
77 buffer,
78 macro_context: MacroContext::new(),
79 storage: &storage.0,
80 span_stack: SpanStack::from_input(input),
81 }
82 }
83}
84
85impl<'store> Iterator for Parser<'store> {
86 type Item = Result<Event<'store>, ParserError>;
87
88 fn next(&mut self) -> Option<Self::Item> {
89 match self.instruction_stack.last_mut() {
90 Some(Instruction::Event(_)) => Some(Ok(self
91 .instruction_stack
92 .pop()
93 .and_then(|i| match i {
94 Instruction::Event(e) => Some(e),
95 _ => None,
96 })
97 .expect("there is something in the stack"))),
98 Some(Instruction::SubGroup { content, .. }) if content.trim_start().is_empty() => {
99 self.instruction_stack.pop();
100 self.next()
101 }
102 Some(Instruction::SubGroup {
103 content,
104 allowed_alignment_count,
105 ..
106 }) => {
107 let state = ParserState {
108 allowed_alignment_count: allowed_alignment_count.as_mut(),
109 ..Default::default()
110 };
111
112 let inner = InnerParser {
113 content,
114 buffer: &mut self.buffer,
115 state,
116 macro_context: &mut self.macro_context,
117 storage: self.storage,
118 span_stack: &mut self.span_stack,
119 };
120
121 let (desc, rest) = inner.parse_next();
122 *content = rest;
123
124 let script_event = match desc {
125 Err(e) => {
126 let content_str = *content;
127 return Some(Err(ParserError::new(
128 e,
129 content_str.as_ptr(),
130 &mut self.span_stack,
131 )));
132 }
133 Ok(Some((e, desc))) => {
134 if desc.subscript_start > desc.superscript_start {
135 let content = self.buffer.drain(desc.superscript_start..).rev();
136 let added_len = content.len();
137
138 self.instruction_stack.reserve(added_len);
139 let spare =
140 &mut self.instruction_stack.spare_capacity_mut()[..added_len];
141 let mut idx = desc.subscript_start - desc.superscript_start;
142
143 for e in content {
144 if idx == added_len {
145 idx = 0;
146 }
147 spare[idx].write(e);
148 idx += 1;
149 }
150
151 unsafe {
155 self.instruction_stack
156 .set_len(self.instruction_stack.len() + added_len)
157 };
158 } else {
159 self.instruction_stack
160 .extend(self.buffer.drain(desc.subscript_start..).rev());
161 }
162 Some(e)
163 }
164 Ok(None) => None,
165 };
166
167 self.instruction_stack.extend(self.buffer.drain(..).rev());
168 if let Some(e) = script_event {
169 self.instruction_stack.push(Instruction::Event(e));
170 }
171 self.next()
172 }
173 None => None,
174 }
175 }
176}
177
178#[derive(Debug)]
179struct InnerParser<'b, 'store> {
180 content: &'store str,
181 buffer: &'b mut Vec<Instruction<'store>>,
182 state: ParserState<'b>,
183 macro_context: &'b mut MacroContext<'store>,
184 storage: &'store bumpalo::Bump,
185 span_stack: &'b mut SpanStack<'store>,
186}
187
188impl<'b, 'store> InnerParser<'b, 'store> {
189 fn handle_argument(&mut self, argument: Argument<'store>) -> InnerResult<()> {
193 match argument {
194 Argument::Token(token) => {
195 self.state.handling_argument = true;
196 match token {
197 Token::ControlSequence(cs) => self.handle_primitive(cs)?,
198 Token::Character(c) => self.handle_char_token(c)?,
199 };
200 }
201 Argument::Group(group) => {
202 self.buffer.extend([
203 Instruction::Event(Event::Begin(Grouping::Normal)),
204 Instruction::SubGroup {
205 content: group,
206 allowed_alignment_count: None,
207 },
208 Instruction::Event(Event::End),
209 ]);
210 }
211 };
212 Ok(())
213 }
214
215 fn parse(&mut self) -> InnerResult<Option<(Event<'store>, ScriptDescriptor)>> {
224 let original_content = self.content.trim_start();
226 let token = match lex::token(&mut self.content) {
227 Ok(token) => token,
228 Err(ErrorKind::Token) => return Ok(None),
229 Err(e) => return Err(e),
230 };
231 match token {
232 Token::ControlSequence(cs) => {
233 if let Some(result) =
234 self.macro_context
235 .try_expand_in(cs, self.content, self.storage)
236 {
237 let (new_content, arguments_consumed_length) = result?;
238 let call_site_length = cs.len() + arguments_consumed_length + 1;
239 self.span_stack
240 .add(new_content, original_content, call_site_length);
241
242 self.content = new_content;
243 return self.parse();
244 }
245
246 self.handle_primitive(cs)?
247 }
248 Token::Character(c) => self.handle_char_token(c)?,
249 };
250
251 if self.state.skip_scripts {
253 return Ok(None);
254 }
255
256 if self.state.allow_script_modifiers {
257 if let Some(limits) = lex::limit_modifiers(&mut self.content) {
258 if limits {
259 self.state.script_position = ScriptPosition::AboveBelow;
260 } else {
261 self.state.script_position = ScriptPosition::Right;
262 }
263 }
264 }
265
266 self.content = self.content.trim_start();
267 let subscript_first = match self.content.chars().next() {
268 Some('^') => false,
269 Some('_') => true,
270 _ => return Ok(None),
271 };
272 self.content = &self.content[1..];
273
274 let first_script_start = self.buffer.len();
275 let arg = lex::argument(&mut self.content)?;
276 self.handle_argument(arg)?;
277 let second_script_start = self.buffer.len();
278 let next_char = self.content.chars().next();
279 if (next_char == Some('_') && !subscript_first)
280 || (next_char == Some('^') && subscript_first)
281 {
282 self.content = &self.content[1..];
283 let arg = lex::argument(&mut self.content)?;
284 self.handle_argument(arg)?;
285
286 match self.content.chars().next() {
287 Some('_') => return Err(ErrorKind::DoubleSubscript),
288 Some('^') => return Err(ErrorKind::DoubleSuperscript),
289 _ => {}
290 }
291 } else if next_char == Some('_') || next_char == Some('^') {
292 return Err(if subscript_first {
293 ErrorKind::DoubleSubscript
294 } else {
295 ErrorKind::DoubleSuperscript
296 });
297 }
298 let second_script_end = self.buffer.len();
299
300 Ok(Some(if second_script_start == second_script_end {
301 if subscript_first {
302 (
303 Event::Script {
304 ty: ScriptType::Subscript,
305 position: self.state.script_position,
306 },
307 ScriptDescriptor {
308 subscript_start: first_script_start,
309 superscript_start: second_script_start,
310 },
311 )
312 } else {
313 (
314 Event::Script {
315 ty: ScriptType::Superscript,
316 position: self.state.script_position,
317 },
318 ScriptDescriptor {
319 subscript_start: second_script_start,
320 superscript_start: first_script_start,
321 },
322 )
323 }
324 } else {
325 (
326 Event::Script {
327 ty: ScriptType::SubSuperscript,
328 position: self.state.script_position,
329 },
330 if subscript_first {
331 ScriptDescriptor {
332 subscript_start: first_script_start,
333 superscript_start: second_script_start,
334 }
335 } else {
336 ScriptDescriptor {
337 subscript_start: second_script_start,
338 superscript_start: first_script_start,
339 }
340 },
341 )
342 }))
343 }
344
345 fn parse_next(
346 mut self,
347 ) -> (
348 InnerResult<Option<(Event<'store>, ScriptDescriptor)>>,
349 &'store str,
350 ) {
351 (self.parse(), self.content)
352 }
353}
354
355struct ScriptDescriptor {
356 subscript_start: usize,
357 superscript_start: usize,
358}
359
360#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
361pub(crate) enum Token<'a> {
362 ControlSequence(&'a str),
363 Character(CharToken<'a>),
364}
365
366#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
367pub(crate) struct CharToken<'a> {
368 char: &'a str,
369}
370
371impl<'a> CharToken<'a> {
373 fn from_str(s: &'a str) -> Self {
374 debug_assert!(
375 s.chars().next().is_some(),
376 "CharToken must be constructed from a non-empty string"
377 );
378 Self { char: s }
379 }
380
381 fn as_str(&self) -> &'a str {
382 self.char
383 }
384}
385
386impl From<CharToken<'_>> for char {
387 fn from(token: CharToken) -> char {
388 token.char.chars().next().unwrap()
389 }
390}
391
392#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
393enum Argument<'a> {
394 Token(Token<'a>),
395 Group(&'a str),
396}
397
398#[derive(Debug, Clone)]
399enum Instruction<'a> {
400 Event(Event<'a>),
402 SubGroup {
404 content: &'a str,
405 allowed_alignment_count: Option<AlignmentCount>,
406 },
407}
408
409#[derive(Debug, Clone)]
410struct AlignmentCount {
411 count: u16,
412 max: u16,
413}
414
415impl AlignmentCount {
416 fn new(max: u16) -> Self {
417 Self { count: 0, max }
418 }
419
420 fn reset(&mut self) {
421 self.count = 0;
422 }
423
424 fn increment(&mut self) {
425 self.count += 1;
426 }
427
428 fn can_increment(&self) -> bool {
429 self.count < self.max
430 }
431}
432
433#[derive(Debug, Clone)]
437struct SpanStack<'store> {
438 input: &'store str,
440 expansions: Vec<ExpansionSpan<'store>>,
442}
443
444impl<'store> SpanStack<'store> {
445 fn from_input(input: &'store str) -> Self {
446 Self {
447 input,
448 expansions: Vec::new(),
449 }
450 }
451
452 fn add(&mut self, full_expansion: &'store str, call_site: &str, call_site_length: usize) {
453 let call_site_start = self.reach_original_call_site(call_site.as_ptr());
454 let expansion_length = (call_site_length as isize
455 - (call_site.len() as isize - full_expansion.len() as isize))
456 as usize;
457
458 self.expansions.push(ExpansionSpan {
459 full_expansion,
460 expansion_length,
461 call_site_in_origin: call_site_start..call_site_start + call_site_length,
462 });
463 }
464
465 fn reach_original_call_site(&mut self, substr_start: *const u8) -> usize {
468 let ptr_val = substr_start as isize;
469
470 while let Some(expansion) = self.expansions.last() {
471 let expansion_ptr = expansion.full_expansion.as_ptr() as isize;
472
473 if ptr_val >= expansion_ptr
474 && ptr_val <= expansion_ptr + expansion.full_expansion.len() as isize
475 {
476 return (ptr_val - expansion_ptr) as usize;
477 }
478 self.expansions.pop();
479 }
480 let input_start = self.input.as_ptr() as isize;
481
482 assert!(ptr_val >= input_start && ptr_val <= input_start + self.input.len() as isize);
483 (ptr_val - input_start) as usize
484 }
485}
486
487#[derive(Debug, Clone)]
496struct ExpansionSpan<'a> {
497 full_expansion: &'a str,
501 expansion_length: usize,
504 call_site_in_origin: Range<usize>,
509}
510
511#[cfg(test)]
512mod tests {
513 use crate::event::{Content, DelimiterType, RelationContent, Visual};
514
515 use super::*;
516
517 #[test]
518 fn substr_instructions() {
519 let store = Storage::new();
520 let parser = Parser::new("\\bar{y}", &store);
521
522 let events = parser.collect::<Result<Vec<_>, ParserError>>().unwrap();
523
524 assert_eq!(
525 events,
526 vec![
527 Event::Script {
528 ty: ScriptType::Superscript,
529 position: ScriptPosition::AboveBelow
530 },
531 Event::Begin(Grouping::Normal),
532 Event::Content(Content::Ordinary {
533 content: 'y',
534 stretchy: false
535 }),
536 Event::End,
537 Event::Content(Content::Ordinary {
538 content: '‾',
539 stretchy: false,
540 }),
541 ]
542 );
543 }
544
545 #[test]
546 fn subsuperscript() {
547 let store = Storage::new();
548 let parser = Parser::new(r"a^{1+3}_2", &store);
549 let events = parser.collect::<Result<Vec<_>, ParserError>>().unwrap();
550
551 assert_eq!(
552 events,
553 vec![
554 Event::Script {
555 ty: ScriptType::SubSuperscript,
556 position: ScriptPosition::Right
557 },
558 Event::Content(Content::Ordinary {
559 content: 'a',
560 stretchy: false,
561 }),
562 Event::Content(Content::Number("2")),
563 Event::Begin(Grouping::Normal),
564 Event::Content(Content::Number("1")),
565 Event::Content(Content::BinaryOp {
566 content: '+',
567 small: false
568 }),
569 Event::Content(Content::Number("3")),
570 Event::End,
571 ]
572 );
573 }
574 #[test]
575 fn subscript_torture() {
576 let store = Storage::new();
577 let parser = Parser::new(r"a_{5_{5_{5_{5_{5_{5_{5_{5_{5_{5_{5_5}}}}}}}}}}}", &store);
578 let events = parser.collect::<Result<Vec<_>, ParserError>>().unwrap();
579
580 assert_eq!(
581 events,
582 vec![
583 Event::Script {
584 ty: ScriptType::Subscript,
585 position: ScriptPosition::Right
586 },
587 Event::Content(Content::Ordinary {
588 content: 'a',
589 stretchy: false,
590 }),
591 Event::Begin(Grouping::Normal),
592 Event::Script {
593 ty: ScriptType::Subscript,
594 position: ScriptPosition::Right
595 },
596 Event::Content(Content::Number("5")),
597 Event::Begin(Grouping::Normal),
598 Event::Script {
599 ty: ScriptType::Subscript,
600 position: ScriptPosition::Right
601 },
602 Event::Content(Content::Number("5")),
603 Event::Begin(Grouping::Normal),
604 Event::Script {
605 ty: ScriptType::Subscript,
606 position: ScriptPosition::Right
607 },
608 Event::Content(Content::Number("5")),
609 Event::Begin(Grouping::Normal),
610 Event::Script {
611 ty: ScriptType::Subscript,
612 position: ScriptPosition::Right
613 },
614 Event::Content(Content::Number("5")),
615 Event::Begin(Grouping::Normal),
616 Event::Script {
617 ty: ScriptType::Subscript,
618 position: ScriptPosition::Right
619 },
620 Event::Content(Content::Number("5")),
621 Event::Begin(Grouping::Normal),
622 Event::Script {
623 ty: ScriptType::Subscript,
624 position: ScriptPosition::Right
625 },
626 Event::Content(Content::Number("5")),
627 Event::Begin(Grouping::Normal),
628 Event::Script {
629 ty: ScriptType::Subscript,
630 position: ScriptPosition::Right
631 },
632 Event::Content(Content::Number("5")),
633 Event::Begin(Grouping::Normal),
634 Event::Script {
635 ty: ScriptType::Subscript,
636 position: ScriptPosition::Right
637 },
638 Event::Content(Content::Number("5")),
639 Event::Begin(Grouping::Normal),
640 Event::Script {
641 ty: ScriptType::Subscript,
642 position: ScriptPosition::Right
643 },
644 Event::Content(Content::Number("5")),
645 Event::Begin(Grouping::Normal),
646 Event::Script {
647 ty: ScriptType::Subscript,
648 position: ScriptPosition::Right
649 },
650 Event::Content(Content::Number("5")),
651 Event::Begin(Grouping::Normal),
652 Event::Script {
653 ty: ScriptType::Subscript,
654 position: ScriptPosition::Right
655 },
656 Event::Content(Content::Number("5")),
657 Event::Content(Content::Number("5")),
658 Event::End,
659 Event::End,
660 Event::End,
661 Event::End,
662 Event::End,
663 Event::End,
664 Event::End,
665 Event::End,
666 Event::End,
667 Event::End,
668 Event::End,
669 ]
670 )
671 }
672
673 #[test]
674 fn fraction() {
675 let store = Storage::new();
676 let parser = Parser::new(r"\frac{1}{2}_2^4", &store);
677 let events = parser.collect::<Result<Vec<_>, ParserError>>().unwrap();
678
679 assert_eq!(
680 events,
681 vec![
682 Event::Script {
683 ty: ScriptType::SubSuperscript,
684 position: ScriptPosition::Right
685 },
686 Event::Visual(Visual::Fraction(None)),
687 Event::Begin(Grouping::Normal),
688 Event::Content(Content::Number("1")),
689 Event::End,
690 Event::Begin(Grouping::Normal),
691 Event::Content(Content::Number("2")),
692 Event::End,
693 Event::Content(Content::Number("2")),
694 Event::Content(Content::Number("4")),
695 ]
696 );
697 }
698
699 #[test]
700 fn multidigit_number() {
701 let store = Storage::new();
702 let parser = Parser::new("123", &store);
703 let events = parser.collect::<Result<Vec<_>, ParserError>>().unwrap();
704
705 assert_eq!(events, vec![Event::Content(Content::Number("123"))]);
706 }
707
708 #[test]
709 fn error() {
710 let store = Storage::new();
711 let parser = Parser::new(
712 r"\def\blah#1#2{\fra#1#2} \def\abc#1{\blah{a}#1} \abc{b}",
713 &store,
714 );
715 let events = parser.collect::<Vec<_>>();
716
717 assert!(events[0].is_err());
718 }
719
720 #[test]
721 fn no_limits() {
722 let store = Storage::new();
723 let parser = Parser::new(r#"\lim \nolimits _{x \to 0} f(x)"#, &store);
724 let events = parser.collect::<Result<Vec<_>, ParserError>>().unwrap();
725 assert_eq!(
726 events,
727 vec![
728 Event::Script {
729 ty: ScriptType::Subscript,
730 position: ScriptPosition::Right
731 },
732 Event::Content(Content::Function("lim")),
733 Event::Begin(Grouping::Normal),
734 Event::Content(Content::Ordinary {
735 content: 'x',
736 stretchy: false
737 }),
738 Event::Content(Content::Relation {
739 content: RelationContent::single_char('→'),
740 small: false
741 }),
742 Event::Content(Content::Number("0")),
743 Event::End,
744 Event::Content(Content::Ordinary {
745 content: 'f',
746 stretchy: false
747 }),
748 Event::Content(Content::Delimiter {
749 content: '(',
750 size: None,
751 ty: DelimiterType::Open
752 }),
753 Event::Content(Content::Ordinary {
754 content: 'x',
755 stretchy: false
756 }),
757 Event::Content(Content::Delimiter {
758 content: ')',
759 size: None,
760 ty: DelimiterType::Close
761 }),
762 ]
763 );
764 }
765
766 #[test]
767 fn expansions_in_groups() {
768 let store = Storage::new();
769 let mut parser = Parser::new(
770 r"\def\abc#1{#1} {\abc{a} + \abc{b}} = c \shoulderror",
771 &store,
772 );
773 assert!(parser.by_ref().collect::<Result<Vec<_>, _>>().is_err());
774 assert!(parser.span_stack.expansions.is_empty());
775 }
776}
777
778