pub mod error;
mod lex;
mod macros;
mod primitives;
mod state;
pub mod storage;
mod tables;
use std::ops::Range;
use macros::MacroContext;
use crate::event::{Event, Grouping, ScriptPosition, ScriptType};
use self::{state::ParserState, storage::Storage};
pub(crate) use error::{ErrorKind, InnerResult, ParserError};
#[derive(Debug)]
pub struct Parser<'store> {
instruction_stack: Vec<Instruction<'store>>,
buffer: Vec<Instruction<'store>>,
macro_context: MacroContext<'store>,
storage: &'store bumpalo::Bump,
span_stack: SpanStack<'store>,
}
impl<'store> Parser<'store> {
pub fn new<'input>(input: &'input str, storage: &'store Storage) -> Self
where
'input: 'store,
{
let mut instruction_stack = Vec::with_capacity(32);
instruction_stack.push(Instruction::SubGroup {
content: input,
allowed_alignment_count: None,
});
let buffer = Vec::with_capacity(16);
Self {
instruction_stack,
buffer,
macro_context: MacroContext::new(),
storage: &storage.0,
span_stack: SpanStack::from_input(input),
}
}
}
impl<'store> Iterator for Parser<'store> {
type Item = Result<Event<'store>, ParserError>;
fn next(&mut self) -> Option<Self::Item> {
match self.instruction_stack.last_mut() {
Some(Instruction::Event(_)) => Some(Ok(self
.instruction_stack
.pop()
.and_then(|i| match i {
Instruction::Event(e) => Some(e),
_ => None,
})
.expect("there is something in the stack"))),
Some(Instruction::SubGroup { content, .. }) if content.trim_start().is_empty() => {
self.instruction_stack.pop();
self.next()
}
Some(Instruction::SubGroup {
content,
allowed_alignment_count,
..
}) => {
let state = ParserState {
allowed_alignment_count: allowed_alignment_count.as_mut(),
..Default::default()
};
let inner = InnerParser {
content,
buffer: &mut self.buffer,
state,
macro_context: &mut self.macro_context,
storage: self.storage,
span_stack: &mut self.span_stack,
};
let (desc, rest) = inner.parse_next();
*content = rest;
let script_event = match desc {
Err(e) => {
let content_str = *content;
return Some(Err(ParserError::new(
e,
content_str.as_ptr(),
&mut self.span_stack,
)));
}
Ok(Some((e, desc))) => {
if desc.subscript_start > desc.superscript_start {
let content = self.buffer.drain(desc.superscript_start..).rev();
let added_len = content.len();
self.instruction_stack.reserve(added_len);
let spare =
&mut self.instruction_stack.spare_capacity_mut()[..added_len];
let mut idx = desc.subscript_start - desc.superscript_start;
for e in content {
if idx == added_len {
idx = 0;
}
spare[idx].write(e);
idx += 1;
}
unsafe {
self.instruction_stack
.set_len(self.instruction_stack.len() + added_len)
};
} else {
self.instruction_stack
.extend(self.buffer.drain(desc.subscript_start..).rev());
}
Some(e)
}
Ok(None) => None,
};
self.instruction_stack.extend(self.buffer.drain(..).rev());
if let Some(e) = script_event {
self.instruction_stack.push(Instruction::Event(e));
}
self.next()
}
None => None,
}
}
}
#[derive(Debug)]
struct InnerParser<'b, 'store> {
content: &'store str,
buffer: &'b mut Vec<Instruction<'store>>,
state: ParserState<'b>,
macro_context: &'b mut MacroContext<'store>,
storage: &'store bumpalo::Bump,
span_stack: &'b mut SpanStack<'store>,
}
impl<'b, 'store> InnerParser<'b, 'store> {
fn handle_argument(&mut self, argument: Argument<'store>) -> InnerResult<()> {
match argument {
Argument::Token(token) => {
self.state.handling_argument = true;
match token {
Token::ControlSequence(cs) => self.handle_primitive(cs)?,
Token::Character(c) => self.handle_char_token(c)?,
};
}
Argument::Group(group) => {
self.buffer.extend([
Instruction::Event(Event::Begin(Grouping::Normal)),
Instruction::SubGroup {
content: group,
allowed_alignment_count: None,
},
Instruction::Event(Event::End),
]);
}
};
Ok(())
}
fn parse(&mut self) -> InnerResult<Option<(Event<'store>, ScriptDescriptor)>> {
let original_content = self.content.trim_start();
let token = match lex::token(&mut self.content) {
Ok(token) => token,
Err(ErrorKind::Token) => return Ok(None),
Err(e) => return Err(e),
};
match token {
Token::ControlSequence(cs) => {
if let Some(result) =
self.macro_context
.try_expand_in(cs, self.content, self.storage)
{
let (new_content, arguments_consumed_length) = result?;
let call_site_length = cs.len() + arguments_consumed_length + 1;
self.span_stack
.add(new_content, original_content, call_site_length);
self.content = new_content;
return self.parse();
}
self.handle_primitive(cs)?
}
Token::Character(c) => self.handle_char_token(c)?,
};
if self.state.skip_scripts {
return Ok(None);
}
if self.state.allow_script_modifiers {
if let Some(limits) = lex::limit_modifiers(&mut self.content) {
if limits {
self.state.script_position = ScriptPosition::AboveBelow;
} else {
self.state.script_position = ScriptPosition::Right;
}
}
}
self.content = self.content.trim_start();
let subscript_first = match self.content.chars().next() {
Some('^') => false,
Some('_') => true,
_ => return Ok(None),
};
self.content = &self.content[1..];
let first_script_start = self.buffer.len();
let arg = lex::argument(&mut self.content)?;
self.handle_argument(arg)?;
let second_script_start = self.buffer.len();
let next_char = self.content.chars().next();
if (next_char == Some('_') && !subscript_first)
|| (next_char == Some('^') && subscript_first)
{
self.content = &self.content[1..];
let arg = lex::argument(&mut self.content)?;
self.handle_argument(arg)?;
match self.content.chars().next() {
Some('_') => return Err(ErrorKind::DoubleSubscript),
Some('^') => return Err(ErrorKind::DoubleSuperscript),
_ => {}
}
} else if next_char == Some('_') || next_char == Some('^') {
return Err(if subscript_first {
ErrorKind::DoubleSubscript
} else {
ErrorKind::DoubleSuperscript
});
}
let second_script_end = self.buffer.len();
Ok(Some(if second_script_start == second_script_end {
if subscript_first {
(
Event::Script {
ty: ScriptType::Subscript,
position: self.state.script_position,
},
ScriptDescriptor {
subscript_start: first_script_start,
superscript_start: second_script_start,
},
)
} else {
(
Event::Script {
ty: ScriptType::Superscript,
position: self.state.script_position,
},
ScriptDescriptor {
subscript_start: second_script_start,
superscript_start: first_script_start,
},
)
}
} else {
(
Event::Script {
ty: ScriptType::SubSuperscript,
position: self.state.script_position,
},
if subscript_first {
ScriptDescriptor {
subscript_start: first_script_start,
superscript_start: second_script_start,
}
} else {
ScriptDescriptor {
subscript_start: second_script_start,
superscript_start: first_script_start,
}
},
)
}))
}
fn parse_next(
mut self,
) -> (
InnerResult<Option<(Event<'store>, ScriptDescriptor)>>,
&'store str,
) {
(self.parse(), self.content)
}
}
struct ScriptDescriptor {
subscript_start: usize,
superscript_start: usize,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub(crate) enum Token<'a> {
ControlSequence(&'a str),
Character(CharToken<'a>),
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub(crate) struct CharToken<'a> {
char: &'a str,
}
impl<'a> CharToken<'a> {
fn from_str(s: &'a str) -> Self {
debug_assert!(
s.chars().next().is_some(),
"CharToken must be constructed from a non-empty string"
);
Self { char: s }
}
fn as_str(&self) -> &'a str {
self.char
}
}
impl From<CharToken<'_>> for char {
fn from(token: CharToken) -> char {
token.char.chars().next().unwrap()
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
enum Argument<'a> {
Token(Token<'a>),
Group(&'a str),
}
#[derive(Debug, Clone)]
enum Instruction<'a> {
Event(Event<'a>),
SubGroup {
content: &'a str,
allowed_alignment_count: Option<AlignmentCount>,
},
}
#[derive(Debug, Clone)]
struct AlignmentCount {
count: u16,
max: u16,
}
impl AlignmentCount {
fn new(max: u16) -> Self {
Self { count: 0, max }
}
fn reset(&mut self) {
self.count = 0;
}
fn increment(&mut self) {
self.count += 1;
}
fn can_increment(&self) -> bool {
self.count < self.max
}
}
#[derive(Debug, Clone)]
struct SpanStack<'store> {
input: &'store str,
expansions: Vec<ExpansionSpan<'store>>,
}
impl<'store> SpanStack<'store> {
fn from_input(input: &'store str) -> Self {
Self {
input,
expansions: Vec::new(),
}
}
fn add(&mut self, full_expansion: &'store str, call_site: &str, call_site_length: usize) {
let call_site_start = self.reach_original_call_site(call_site.as_ptr());
let expansion_length = (call_site_length as isize
- (call_site.len() as isize - full_expansion.len() as isize))
as usize;
self.expansions.push(ExpansionSpan {
full_expansion,
expansion_length,
call_site_in_origin: call_site_start..call_site_start + call_site_length,
});
}
fn reach_original_call_site(&mut self, substr_start: *const u8) -> usize {
let ptr_val = substr_start as isize;
while let Some(expansion) = self.expansions.last() {
let expansion_ptr = expansion.full_expansion.as_ptr() as isize;
if ptr_val >= expansion_ptr
&& ptr_val <= expansion_ptr + expansion.full_expansion.len() as isize
{
return (ptr_val - expansion_ptr) as usize;
}
self.expansions.pop();
}
let input_start = self.input.as_ptr() as isize;
assert!(ptr_val >= input_start && ptr_val <= input_start + self.input.len() as isize);
(ptr_val - input_start) as usize
}
}
#[derive(Debug, Clone)]
struct ExpansionSpan<'a> {
full_expansion: &'a str,
expansion_length: usize,
call_site_in_origin: Range<usize>,
}
#[cfg(test)]
mod tests {
use crate::event::{Content, DelimiterType, RelationContent, Visual};
use super::*;
#[test]
fn substr_instructions() {
let store = Storage::new();
let parser = Parser::new("\\bar{y}", &store);
let events = parser.collect::<Result<Vec<_>, ParserError>>().unwrap();
assert_eq!(
events,
vec![
Event::Script {
ty: ScriptType::Superscript,
position: ScriptPosition::AboveBelow
},
Event::Begin(Grouping::Normal),
Event::Content(Content::Ordinary {
content: 'y',
stretchy: false
}),
Event::End,
Event::Content(Content::Ordinary {
content: '‾',
stretchy: false,
}),
]
);
}
#[test]
fn subsuperscript() {
let store = Storage::new();
let parser = Parser::new(r"a^{1+3}_2", &store);
let events = parser.collect::<Result<Vec<_>, ParserError>>().unwrap();
assert_eq!(
events,
vec![
Event::Script {
ty: ScriptType::SubSuperscript,
position: ScriptPosition::Right
},
Event::Content(Content::Ordinary {
content: 'a',
stretchy: false,
}),
Event::Content(Content::Number("2")),
Event::Begin(Grouping::Normal),
Event::Content(Content::Number("1")),
Event::Content(Content::BinaryOp {
content: '+',
small: false
}),
Event::Content(Content::Number("3")),
Event::End,
]
);
}
#[test]
fn subscript_torture() {
let store = Storage::new();
let parser = Parser::new(r"a_{5_{5_{5_{5_{5_{5_{5_{5_{5_{5_{5_5}}}}}}}}}}}", &store);
let events = parser.collect::<Result<Vec<_>, ParserError>>().unwrap();
assert_eq!(
events,
vec![
Event::Script {
ty: ScriptType::Subscript,
position: ScriptPosition::Right
},
Event::Content(Content::Ordinary {
content: 'a',
stretchy: false,
}),
Event::Begin(Grouping::Normal),
Event::Script {
ty: ScriptType::Subscript,
position: ScriptPosition::Right
},
Event::Content(Content::Number("5")),
Event::Begin(Grouping::Normal),
Event::Script {
ty: ScriptType::Subscript,
position: ScriptPosition::Right
},
Event::Content(Content::Number("5")),
Event::Begin(Grouping::Normal),
Event::Script {
ty: ScriptType::Subscript,
position: ScriptPosition::Right
},
Event::Content(Content::Number("5")),
Event::Begin(Grouping::Normal),
Event::Script {
ty: ScriptType::Subscript,
position: ScriptPosition::Right
},
Event::Content(Content::Number("5")),
Event::Begin(Grouping::Normal),
Event::Script {
ty: ScriptType::Subscript,
position: ScriptPosition::Right
},
Event::Content(Content::Number("5")),
Event::Begin(Grouping::Normal),
Event::Script {
ty: ScriptType::Subscript,
position: ScriptPosition::Right
},
Event::Content(Content::Number("5")),
Event::Begin(Grouping::Normal),
Event::Script {
ty: ScriptType::Subscript,
position: ScriptPosition::Right
},
Event::Content(Content::Number("5")),
Event::Begin(Grouping::Normal),
Event::Script {
ty: ScriptType::Subscript,
position: ScriptPosition::Right
},
Event::Content(Content::Number("5")),
Event::Begin(Grouping::Normal),
Event::Script {
ty: ScriptType::Subscript,
position: ScriptPosition::Right
},
Event::Content(Content::Number("5")),
Event::Begin(Grouping::Normal),
Event::Script {
ty: ScriptType::Subscript,
position: ScriptPosition::Right
},
Event::Content(Content::Number("5")),
Event::Begin(Grouping::Normal),
Event::Script {
ty: ScriptType::Subscript,
position: ScriptPosition::Right
},
Event::Content(Content::Number("5")),
Event::Content(Content::Number("5")),
Event::End,
Event::End,
Event::End,
Event::End,
Event::End,
Event::End,
Event::End,
Event::End,
Event::End,
Event::End,
Event::End,
]
)
}
#[test]
fn fraction() {
let store = Storage::new();
let parser = Parser::new(r"\frac{1}{2}_2^4", &store);
let events = parser.collect::<Result<Vec<_>, ParserError>>().unwrap();
assert_eq!(
events,
vec![
Event::Script {
ty: ScriptType::SubSuperscript,
position: ScriptPosition::Right
},
Event::Visual(Visual::Fraction(None)),
Event::Begin(Grouping::Normal),
Event::Content(Content::Number("1")),
Event::End,
Event::Begin(Grouping::Normal),
Event::Content(Content::Number("2")),
Event::End,
Event::Content(Content::Number("2")),
Event::Content(Content::Number("4")),
]
);
}
#[test]
fn multidigit_number() {
let store = Storage::new();
let parser = Parser::new("123", &store);
let events = parser.collect::<Result<Vec<_>, ParserError>>().unwrap();
assert_eq!(events, vec![Event::Content(Content::Number("123"))]);
}
#[test]
fn error() {
let store = Storage::new();
let parser = Parser::new(
r"\def\blah#1#2{\fra#1#2} \def\abc#1{\blah{a}#1} \abc{b}",
&store,
);
let events = parser.collect::<Vec<_>>();
assert!(events[0].is_err());
}
#[test]
fn no_limits() {
let store = Storage::new();
let parser = Parser::new(r#"\lim \nolimits _{x \to 0} f(x)"#, &store);
let events = parser.collect::<Result<Vec<_>, ParserError>>().unwrap();
assert_eq!(
events,
vec![
Event::Script {
ty: ScriptType::Subscript,
position: ScriptPosition::Right
},
Event::Content(Content::Function("lim")),
Event::Begin(Grouping::Normal),
Event::Content(Content::Ordinary {
content: 'x',
stretchy: false
}),
Event::Content(Content::Relation {
content: RelationContent::single_char('→'),
small: false
}),
Event::Content(Content::Number("0")),
Event::End,
Event::Content(Content::Ordinary {
content: 'f',
stretchy: false
}),
Event::Content(Content::Delimiter {
content: '(',
size: None,
ty: DelimiterType::Open
}),
Event::Content(Content::Ordinary {
content: 'x',
stretchy: false
}),
Event::Content(Content::Delimiter {
content: ')',
size: None,
ty: DelimiterType::Close
}),
]
);
}
#[test]
fn expansions_in_groups() {
let store = Storage::new();
let mut parser = Parser::new(
r"\def\abc#1{#1} {\abc{a} + \abc{b}} = c \shoulderror",
&store,
);
assert!(parser.by_ref().collect::<Result<Vec<_>, _>>().is_err());
assert!(parser.span_stack.expansions.is_empty());
}
}