use super::condition::ParseCondition;
use super::prelude::*;
use super::rule::Rule;
use super::RULE_PAGE;
use crate::data::PageInfo;
use crate::render::text::TextRender;
use crate::tokenizer::Tokenization;
use crate::tree::{
AcceptsPartial, Bibliography, BibliographyList, CodeBlock, HeadingLevel,
};
use std::borrow::Cow;
use std::cell::RefCell;
use std::rc::Rc;
use std::{mem, ptr};
const MAX_RECURSION_DEPTH: usize = 100;
#[derive(Debug, Clone)]
pub struct Parser<'r, 't> {
page_info: &'r PageInfo<'t>,
settings: &'r WikitextSettings,
current: &'r ExtractedToken<'t>,
remaining: &'r [ExtractedToken<'t>],
full_text: FullText<'t>,
rule: Rule,
depth: usize,
table_of_contents: Rc<RefCell<Vec<(usize, String)>>>,
html_blocks: Rc<RefCell<Vec<Cow<'t, str>>>>,
code_blocks: Rc<RefCell<Vec<CodeBlock<'t>>>>,
footnotes: Rc<RefCell<Vec<Vec<Element<'t>>>>>,
bibliographies: Rc<RefCell<BibliographyList<'t>>>,
accepts_partial: AcceptsPartial,
in_footnote: bool, has_footnote_block: bool, start_of_line: bool,
}
impl<'r, 't> Parser<'r, 't> {
pub(crate) fn new(
tokenization: &'r Tokenization<'t>,
page_info: &'r PageInfo<'t>,
settings: &'r WikitextSettings,
) -> Self {
let full_text = tokenization.full_text();
let (current, remaining) = tokenization
.tokens()
.split_first()
.expect("Parsed tokens list was empty (expected at least one element)");
Parser {
page_info,
settings,
current,
remaining,
full_text,
rule: RULE_PAGE,
depth: 0,
table_of_contents: make_shared_vec(),
html_blocks: make_shared_vec(),
code_blocks: make_shared_vec(),
footnotes: make_shared_vec(),
bibliographies: Rc::new(RefCell::new(BibliographyList::new())),
accepts_partial: AcceptsPartial::None,
in_footnote: false,
has_footnote_block: false,
start_of_line: true,
}
}
#[inline]
pub fn page_info(&self) -> &PageInfo<'t> {
self.page_info
}
#[inline]
pub fn settings(&self) -> &WikitextSettings {
self.settings
}
#[inline]
pub fn full_text(&self) -> FullText<'t> {
self.full_text
}
#[inline]
pub fn rule(&self) -> Rule {
self.rule
}
#[inline]
pub fn accepts_partial(&self) -> AcceptsPartial {
self.accepts_partial
}
#[inline]
pub fn in_footnote(&self) -> bool {
self.in_footnote
}
#[inline]
pub fn has_footnote_block(&self) -> bool {
self.has_footnote_block
}
#[inline]
pub fn start_of_line(&self) -> bool {
self.start_of_line
}
#[inline]
pub fn set_rule(&mut self, rule: Rule) {
self.rule = rule;
}
pub fn clone_with_rule(&self, rule: Rule) -> Self {
let mut clone = self.clone();
clone.set_rule(rule);
clone
}
pub fn depth_increment(&mut self) -> Result<(), ParseError> {
self.depth += 1;
debug!("Incrementing recursion depth to {}", self.depth);
if self.depth > MAX_RECURSION_DEPTH {
return Err(self.make_err(ParseErrorKind::RecursionDepthExceeded));
}
Ok(())
}
#[inline]
pub fn depth_decrement(&mut self) {
self.depth -= 1;
debug!("Decrementing recursion depth to {}", self.depth);
}
#[inline]
pub fn set_accepts_partial(&mut self, value: AcceptsPartial) {
self.accepts_partial = value;
}
#[inline]
pub fn set_footnote_flag(&mut self, value: bool) {
self.in_footnote = value;
}
#[inline]
pub fn set_footnote_block(&mut self) {
self.has_footnote_block = true;
}
pub fn check_page_syntax(&self) -> Result<(), ParseError> {
if self.settings.enable_page_syntax {
Ok(())
} else {
Err(self.make_err(ParseErrorKind::NotSupportedMode))
}
}
pub fn push_table_of_contents_entry(
&mut self,
heading: HeadingLevel,
name_elements: &[Element],
) {
let level = usize::from(heading.value()) - 1;
let name =
TextRender.render_partial(name_elements, self.page_info, self.settings, 0);
self.table_of_contents.borrow_mut().push((level, name));
}
#[cold]
pub fn remove_html_blocks(&mut self) -> Vec<Cow<'t, str>> {
mem::take(&mut self.html_blocks.borrow_mut())
}
#[cold]
pub fn remove_code_blocks(&mut self) -> Vec<CodeBlock<'t>> {
mem::take(&mut self.code_blocks.borrow_mut())
}
#[cold]
pub fn remove_table_of_contents(&mut self) -> Vec<(usize, String)> {
mem::take(&mut self.table_of_contents.borrow_mut())
}
pub fn push_footnote(&mut self, contents: Vec<Element<'t>>) {
self.footnotes.borrow_mut().push(contents);
}
#[cold]
pub fn remove_footnotes(&mut self) -> Vec<Vec<Element<'t>>> {
mem::take(&mut self.footnotes.borrow_mut())
}
pub fn push_html_block(&mut self, new_block: Cow<'t, str>) {
self.html_blocks.borrow_mut().push(new_block);
}
pub fn push_code_block(
&mut self,
new_block: CodeBlock<'t>,
) -> Result<(), NonUniqueNameError> {
{
let guard = self.code_blocks.borrow();
if let Some(ref new_name) = new_block.name {
for block in &*guard {
if let Some(ref name) = block.name {
if name == new_name {
return Err(NonUniqueNameError);
}
}
}
}
}
self.code_blocks.borrow_mut().push(new_block);
Ok(())
}
pub fn push_bibliography(&mut self, bibliography: Bibliography<'t>) -> usize {
let mut guard = self.bibliographies.borrow_mut();
let index = guard.next_index();
guard.push(bibliography);
index
}
#[cold]
pub fn remove_bibliographies(&mut self) -> BibliographyList<'t> {
mem::take(&mut self.bibliographies.borrow_mut())
}
pub fn append_shared_items(
&mut self,
html_blocks: &mut Vec<Cow<'t, str>>,
code_blocks: &mut Vec<CodeBlock<'t>>,
table_of_contents: &mut Vec<(usize, String)>,
footnotes: &mut Vec<Vec<Element<'t>>>,
bibliographies: &mut BibliographyList<'t>,
) {
self.html_blocks.borrow_mut().append(html_blocks);
self.code_blocks.borrow_mut().append(code_blocks);
self.table_of_contents
.borrow_mut()
.append(table_of_contents);
self.footnotes.borrow_mut().append(footnotes);
self.bibliographies.borrow_mut().append(bibliographies);
}
pub fn evaluate(&self, condition: ParseCondition) -> bool {
info!(
"Evaluating parser condition (token {}, slice '{}', span {}..{})",
self.current.token.name(),
self.current.slice,
self.current.span.start,
self.current.span.end,
);
match condition {
ParseCondition::CurrentToken(token) => self.current.token == token,
ParseCondition::TokenPair(current, next) => {
if self.current().token != current {
debug!(
"Current token in pair doesn't match, failing (expected '{}', actual '{}')",
current.name(),
self.current().token.name(),
);
return false;
}
match self.look_ahead(0) {
Some(actual) => {
if actual.token != next {
debug!(
"Second token in pair doesn't match, failing (expected {}, actual {})",
next.name(),
actual.token.name(),
);
return false;
}
}
None => {
debug!(
"Second token in pair doesn't exist (token {})",
next.name(),
);
return false;
}
}
true
}
}
}
#[inline]
pub fn evaluate_any(&self, conditions: &[ParseCondition]) -> bool {
info!(
"Evaluating to see if any parser condition is true (conditions length {})",
conditions.len(),
);
conditions.iter().any(|&condition| self.evaluate(condition))
}
#[inline]
pub fn evaluate_fn<F>(&self, f: F) -> bool
where
F: FnOnce(&mut Parser<'r, 't>) -> Result<bool, ParseError>,
{
info!("Evaluating closure for parser condition");
f(&mut self.clone()).unwrap_or(false)
}
pub fn save_evaluate_fn<F>(&mut self, f: F) -> Option<&'r ExtractedToken<'t>>
where
F: FnOnce(&mut Parser<'r, 't>) -> Result<bool, ParseError>,
{
info!("Evaluating closure for parser condition, saving progress on success");
let mut parser = self.clone();
if f(&mut parser).unwrap_or(false) {
let last = self.current;
self.update(&parser);
Some(last)
} else {
None
}
}
#[inline]
pub fn current(&self) -> &'r ExtractedToken<'t> {
self.current
}
#[inline]
pub fn remaining(&self) -> &'r [ExtractedToken<'t>] {
self.remaining
}
#[inline]
pub fn update(&mut self, parser: &Parser<'r, 't>) {
self.accepts_partial = parser.accepts_partial;
self.in_footnote = parser.in_footnote;
self.has_footnote_block = parser.has_footnote_block;
self.start_of_line = parser.start_of_line;
self.current = parser.current;
self.remaining = parser.remaining;
}
#[inline]
pub fn same_pointer(&self, old_remaining: &'r [ExtractedToken<'t>]) -> bool {
ptr::eq(self.remaining, old_remaining)
}
#[inline]
pub fn step(&mut self) -> Result<&'r ExtractedToken<'t>, ParseError> {
debug!("Stepping to the next token");
self.start_of_line = matches!(
self.current.token,
Token::InputStart | Token::LineBreak | Token::ParagraphBreak,
);
match self.remaining.split_first() {
Some((current, remaining)) => {
self.current = current;
self.remaining = remaining;
Ok(current)
}
None => {
warn!("Exhausted all tokens, yielding end of input error");
Err(self.make_err(ParseErrorKind::EndOfInput))
}
}
}
#[inline]
pub fn step_n(&mut self, count: usize) -> Result<(), ParseError> {
trace!("Stepping {count} times");
for _ in 0..count {
self.step()?;
}
Ok(())
}
#[inline]
pub fn look_ahead(&self, offset: usize) -> Option<&'r ExtractedToken<'t>> {
debug!("Looking ahead to a token (offset {offset})");
self.remaining.get(offset)
}
#[inline]
pub fn look_ahead_err(
&self,
offset: usize,
) -> Result<&'r ExtractedToken<'t>, ParseError> {
self.look_ahead(offset)
.ok_or_else(|| self.make_err(ParseErrorKind::EndOfInput))
}
pub fn next_two_tokens(&self) -> (Token, Option<Token>) {
let first = self.current.token;
let second = self.look_ahead(0).map(|next| next.token);
(first, second)
}
pub fn next_three_tokens(&self) -> (Token, Option<Token>, Option<Token>) {
let first = self.current.token;
let second = self.look_ahead(0).map(|next| next.token);
let third = self.look_ahead(1).map(|next| next.token);
(first, second, third)
}
pub fn get_token(
&mut self,
token: Token,
kind: ParseErrorKind,
) -> Result<&'t str, ParseError> {
debug!("Looking for token {} (error {})", token.name(), kind.name());
let current = self.current();
if current.token == token {
let text = current.slice;
self.step()?;
Ok(text)
} else {
Err(self.make_err(kind))
}
}
pub fn get_optional_token(&mut self, token: Token) -> Result<(), ParseError> {
debug!("Looking for optional token {}", token.name());
if self.current().token == token {
self.step()?;
}
Ok(())
}
pub fn get_optional_line_break(&mut self) -> Result<(), ParseError> {
info!("Looking for optional line break");
self.get_optional_token(Token::LineBreak)
}
#[inline]
pub fn get_optional_space(&mut self) -> Result<(), ParseError> {
info!("Looking for optional space");
self.get_optional_token(Token::Whitespace)
}
pub fn get_optional_spaces_any(&mut self) -> Result<(), ParseError> {
info!("Looking for optional spaces (any)");
let tokens = &[
Token::Whitespace,
Token::LineBreak,
Token::ParagraphBreak,
Token::Equals,
];
loop {
let current_token = self.current().token;
if !tokens.contains(¤t_token) {
return Ok(());
}
self.step()?;
}
}
#[cold]
#[inline]
pub fn make_err(&self, kind: ParseErrorKind) -> ParseError {
ParseError::new(kind, self.rule, self.current)
}
}
#[derive(Debug)]
pub struct NonUniqueNameError;
#[inline]
fn make_shared_vec<T>() -> Rc<RefCell<Vec<T>>> {
Rc::new(RefCell::new(Vec::new()))
}
#[test]
fn parser_newline_flag() {
use crate::layout::Layout;
use crate::settings::WikitextMode;
let page_info = PageInfo::dummy();
let settings = WikitextSettings::from_mode(WikitextMode::Page, Layout::Wikidot);
macro_rules! check {
($input:expr, $expected_steps:expr $(,)?) => {{
let tokens = crate::tokenize($input);
let mut parser = Parser::new(&tokens, &page_info, &settings);
let mut actual_steps = Vec::new();
while let Ok(_) = parser.step() {
actual_steps.push(parser.start_of_line());
}
actual_steps.pop();
assert_eq!(
&actual_steps, &$expected_steps,
"Series of start-of-line flags does not match expected",
);
}};
}
check!("A", [true]);
check!("A\nB C", [true, false, true, false, false]);
check!(
"A\nB\n\nC D\nE",
[true, false, true, false, true, false, false, false, true],
);
check!(
"\nA\n\nB\n\n\nC D",
[true, true, false, true, false, true, false, false],
);
}