use crate::ast::*;
use crate::util::combine;
#![arguments(source_lines: &[SourceLine])]
// the main document entry point.
pub document -> Element
= posl:#position top:paragraph* content:heading* EOF posr:#position
{
let mut res = top;
let mut content = content;
res.append(&mut content);
Element::Document(Document {
position: Span::new(posl, posr, source_lines),
content: res,
})
}
head_fmt -> Element
= FormattedTextTemplate<Text<heading_char>>
// A heading is a caption paragraph followed by content paragraphs.
heading -> Element
= posl:#position d:$('='+) _ cap:head_fmt* _ '='* _ (nl / EOF) pars:paragraph* posr:#position
{
Element::Heading(Heading {
position: Span::new(posl, posr, source_lines),
depth: d.len(),
caption: cap,
content: pars,
})
}
// a paragraph is a block element, some or no text followed by a newline.
// The fmt_rule parameter is only applied to plain top-level text. All nested formatting
// uses the standard formatted rule. This keeps formatted text or html tags from beeing
// ripped apart.
ParagraphTemplate<fmt_rule>
= list
/ table
/ gallery
/ (t:template _ (nl / EOF) {t})
/ (i:internal_ref _ (nl / EOF) {i})
/ (c:html_comment _ (nl / EOF) {c})
/ posl:#position PAR_START_GUARD text:fmt_rule* _ (nl / EOF) posr:#position
{
Element::Paragraph(Paragraph {
position: Span::new(posl, posr, source_lines),
content: text,
})
}
// the standard paragraph
paragraph -> Element
= ParagraphTemplate<formatted>
// === Template parsing ===
template_fmt -> Element
= FormattedTextTemplate<Text<template_char>>
template_par -> Element
= ParagraphTemplate<template_fmt>
// mediawiki templates have a name followed by a sequence of arguments.
template -> Element
= posl:#position !(MAGIC_WORDS) "{{" ws n:(template_fmt)* ws
attrs:('|' t:template_arg {t})* "}}" posr:#position
{
Element::Template(Template {
position: Span::new(posl, posr, source_lines),
name: n,
content: attrs
})
}
template_arg -> Element
= posl:#position ws name:(n:template_arg_name ws '=' {n})? ws
value:(h:heading* p:template_par* f:template_fmt* {(h, (p, f))}) posr:#position
{
Element::TemplateArgument(TemplateArgument {
position: Span::new(posl, posr, source_lines),
name: name.unwrap_or_default(),
value: combine((value.0, combine(value.1)))
})
}
// === mediawiki lists ===
list -> Element
= posl:#position items:(li:list_item ++ (nl / EOF) {li}) nl? posr:#position
{
Element::List(List {
position: Span::new(posl, posr, source_lines),
content: items,
})
}
list_item -> Element
= posl:#position s:$([*#:;]+) _ content:formatted* _ posr:#position
{
let kind = match s.chars().last() {
Some('*') => ListItemKind::Unordered,
Some('#') => ListItemKind::Ordered,
Some(':') => ListItemKind::Definition,
Some(';') => ListItemKind::DefinitionTerm,
_ => panic!("undefined list start: {:?} \
this is an implementation error!", s.chars().last())
};
Element::ListItem(ListItem {
position: Span::new(posl, posr, source_lines),
depth: s.len(),
kind,
content,
})
}
// === mediawiki tables ===
table_start = "{|" / "{{(!}}"
table_end = "|}" / "{{!)}}"
table_caption_sep = "|+" / "{{!+}}"
table_row_sep = "|-" / "{{!-}}"
table_pipe = '|' / "{{!}}"
cell_sep -> &'input str
= $("||") / $("!!") / $('|') / $('!') / $("{{!}}") / $("{{!!}}")
table -> Element
= posl:#position table_start attr:table_attrs? ws caption:table_caption?
first_cells:table_cell* rows:table_row* table_end posr:#position
{
let (cap_attrs, cap_pars) = caption.unwrap_or_default();
let mut rows = rows;
if first_cells.len() > 0 {
rows.insert(0, Element::TableRow(TableRow {
position: Span::new(0, 0, source_lines),
cells: first_cells,
attributes: vec![],
}));
}
Element::Table(Table {
position: Span::new(posl, posr, source_lines),
rows,
attributes: attr.unwrap_or_default(),
caption: cap_pars,
caption_attributes: cap_attrs,
})
}
table_attrs -> Vec<TagAttribute>
= _ attr:(html_attr ** (whitespace+)) _ {attr}
table_fmt -> Element
= !(cell_sep) FormattedTextTemplate<Text<table_char>>
table_par -> Element
= ParagraphTemplate<table_fmt>
table_caption -> (Vec<TagAttribute>, Vec<Element>)
= table_caption_sep _ attr:(t:table_attrs table_pipe {t})? _
pars:(p:table_par* f:table_fmt* {combine((p, f))})
{
(attr.unwrap_or_default(), pars)
}
row_sep -> Vec<TagAttribute>
= table_row_sep attr:table_attrs nl {attr}
table_row -> Element
= posl:#position !(table_end) sep:row_sep c:table_cell* posr:#position
{
Element::TableRow(TableRow {
position: Span::new(posl, posr, source_lines),
cells: c,
attributes: sep,
})
}
table_cell -> Element
= posl:#position !(table_end / row_sep) sep:cell_sep
attr:(a:table_attrs table_pipe !(table_pipe) {a})?
_ content:(p:table_par* f:table_fmt* {combine((p, f))}) posr:#position
{
Element::TableCell(TableCell {
position: Span::new(posl, posr, source_lines),
content,
attributes: attr.unwrap_or_default(),
header: sep.starts_with('!'),
})
}
// === References ===
// internal references, may have pipe-separated options
iref_fmt -> Element
= FormattedTextTemplate<Text<template_char>>
iref_par -> Element
= ParagraphTemplate<iref_fmt>
internal_ref -> Element
= posl:#position "[[" _ tar:iref_fmt* _ "|"? _ t:(pars:iref_par* _ fmts:iref_fmt* {(pars, fmts)}) ++ (_ '|' _) "]]" posr:#position
{
let mut t = t;
let mut t: Vec<Vec<Element>> = t.drain(..).map(combine).collect();
Element::InternalReference(InternalReference {
position: Span::new(posl, posr, source_lines),
target: tar,
caption: t.pop().unwrap_or_default(),
options: t,
})
}
// external references (hyperlink) with only url and optional caption
external_ref -> Element
= posl:#position '[' u:url ws:_ cap:formatted* ']' posr:#position
{
Element::ExternalReference(ExternalReference {
position: Span::new(posl, posr, source_lines),
target: format!("{}{}", u, ws),
caption: cap
})
}
// === Galleries ===
gallery_sep = (_ nl _)+
gallery_file -> Element
= flp:#position content:(f:iref_fmt+ {f}) ++ '|' frp:#position
{
let mut content = content;
Element::InternalReference(InternalReference {
position: Span::new(flp, frp, source_lines),
target: content.remove(0),
caption: content.pop().unwrap_or_default(),
options: content,
})
}
gallery -> Element
= posl:#position attr:TagOpen<"gallery"i>
ws files:(gallery_file ** gallery_sep) ws
TagClose<"gallery"i> posr:#position
{
Element::Gallery(Gallery {
position: Span::new(posl, posr, source_lines),
attributes: attr.1,
content: files,
})
}
// === Inline markup ===
// quoted formatted text cannot start with a single quote, except they are "Included"
QuoteFormattedTemplate<included> = text:((!('\'') t:formatted {t}) / included) {text}
// quote formatting cannot be nested into it self
strong_formatted -> Element
= QuoteFormattedTemplate<emph>
emph_formatted -> Element
= QuoteFormattedTemplate<strong>
strong_par -> Element
= !(list / table / gallery) e:ParagraphTemplate<strong_formatted> {e}
emph_par -> Element
= !(list / table / gallery) e:ParagraphTemplate<emph_formatted> {e}
strong -> Element
= posl:#position strong_lit
inner:(strong_par / strong_formatted)+
strong_lit posr:#position
{
Element::Formatted(Formatted {
position: Span::new(posl, posr, source_lines),
content: inner,
markup: MarkupType::Bold
})
}
emph -> Element
= posl:#position emph_lit
inner:(emph_par / emph_formatted)+
emph_lit posr:#position
{
Element::Formatted(Formatted {
position: Span::new(posl, posr, source_lines),
content: inner,
markup: MarkupType::Italic
})
}
// html markup
math -> Element
= inner:MarkupTag<"math"i, math_text*> {inner}
strike_through -> Element
= inner:MarkupTag<"del"i, p:paragraph* f:formatted* {combine((p, f))}> {inner}
/ inner:MarkupTag<"s"i, p:paragraph* f:formatted* {combine((p, f))}> {inner}
underline -> Element
= inner:MarkupTag<"ins"i, p:paragraph* f:formatted* {combine((p, f))}> {inner}
/ inner:MarkupTag<"u"i, p:paragraph* f:formatted* {combine((p, f))}> {inner}
nowiki -> Element
= inner:MarkupTag<"nowiki"i, nowiki_text*> {inner}
code -> Element
= inner:MarkupTag<"code"i, code_text*> {inner}
blockquote -> Element
= inner:MarkupTag<"blockquote"i, p:paragraph* f:formatted* {combine((p, f))}> {inner}
pre_formatted -> Element
= inner:MarkupTag<"pre"i, preformatted_text*> {inner}
// Template for formatted text with a specific rule for plain text.
FormattedTextTemplate<text_rule>
= fmt:(
text_rule
/ strong
/ emph
/ template
/ internal_ref
/ external_ref
/ html_comment
/ math
/ nowiki
/ strike_through
/ underline
/ code
/ blockquote
/ pre_formatted
/ any_tag
/ whitespace_elem
) {fmt}
// Standard text element for most contexts
formatted -> Element
= f:FormattedTextTemplate<normal_text> {f}
// === embedded html ===
html_attr -> TagAttribute
= posl:#position key:tag_name _ '=' _ value:(quoted_text / tag_safe_literal) posr:#position
{
TagAttribute::new(Span::new(posl, posr, source_lines), key, value)
}
TagInner<name>
= n:name _ attrs:(a:html_attr _ {a})* {(n, attrs)}
TagOpen<name>
= #quiet<'<' _ inner:TagInner<name> _ '>' {inner}> / #expected("opening html tag")
TagClose<name>
= #quiet<('<' _ '/' _ TagInner<name> _ '>') / '<' _ '/' _ '>'> / #expected("closing html tag")
// a generic html tag (self-closing or with inner elements)
HtmlTag<name, inner>
= (tag:TagOpen<name> i:inner TagClose<name> {(tag.0, tag.1, i)})
/ ("<" _ tag:TagInner<name> _ "/" _ ">" {(tag.0, tag.1, vec![])})
any_open
= TagOpen<tag_name?> {()}
any_close
= TagClose<tag_name?> {()}
// matches any valid html tag (except builtins like "gallery")
// with inner Text / Paragraph / Heading, creating a HtmlTag Element.
any_tag -> Element
= posl:#position
t:HtmlTag<(!HTML_BLOCK_ELEMENTS n:tag_name {n}), p:paragraph* f:formatted* h:heading* {combine((p, combine((f, h))))}>
posr:#position
{
Element::HtmlTag(HtmlTag {
position: Span::new(posl, posr, source_lines),
name: t.0,
attributes: t.1,
content: t.2
})
}
// macro for simple formatting markup tags. Matches markup type by tag name (see ast.rs)
MarkupTag<name, inner>
= posl:#position tag_info:HtmlTag<$(name), inner> posr:#position
{
Element::Formatted(Formatted {
position: Span::new(posl, posr, source_lines),
content: tag_info.2,
markup: MarkupType::by_tag_name(tag_info.0),
})
}
// html comments may contain any text.
html_comment_start = "<!--"
html_comment_end = "-->"
html_comment -> Element
= posl:#position html_comment_start
s:CharString<(!(html_comment_end) c:$. {c})>?
html_comment_end posr:#position
{
Element::Comment(Comment {
position: Span::new(posl, posr, source_lines),
text: s.unwrap_or_default(),
})
}
// === primitive terminals ===
emph_lit = "''"
strong_lit = "'''"
nl = '\n'
EOF = #quiet<!.> / #expected("EOF")
// === text primitives ===
Text<C>
= posl:#position s:CharString<C> posr:#position
{
Element::Text(Text {
position: Span::new(posl, posr, source_lines),
text: s
})
}
CharString<C>
= chars:C+ { chars.iter().map(|s| s.to_owned()).collect() }
EnclosedLiteral<ClosingChar>
= ClosingChar text:CharString<!(ClosingChar) $.> ClosingChar { text }
// === various text types ===
normal_text -> Element
= #quiet<Text<normal_char>> / #expected("normal text")
math_text -> Element
= #quiet<Text<math_char>> / #expected("LaTeX source code")
template_arg_name -> String
= #quiet<CharString<template_arg_char>> / #expected("template attribute name")
nowiki_text -> Element
= #quiet<Text<!TagClose<"nowiki"i> $.>> / #expected("any text")
code_text -> Element
= #quiet<Text<!TagClose<"code"i> $. >> / #expected("any text")
preformatted_text -> Element
= #quiet<Text<!TagClose<"pre"i> $. >> / #expected ("any text")
url -> String
= #quiet<CharString<url_char>> / #expected("a word of text (e.g. url)")
tag_safe_literal -> String
= #quiet<CharString<tag_char>> / #expected("tag attribute value")
quoted_text -> String
= #quiet<EnclosedLiteral<'\"'> / EnclosedLiteral<'\''>> / #expected("quoted text")
tag_name -> String
= #quiet<CharString<tag_char>> / #expected("tag / attribute name")
_ -> &'input str = #quiet<w:$([ \t]*) {w}> / #expected("whitespace")
ws -> &'input str = #quiet<w:$([\n\r \t]*) {w}> / #expected("whitespace (including newlines)")
whitespace_elem -> Element
= Text<whitespace>
// === character classes ===
// These characters are allowed within certain contexts,
// excluded characters have special meaning and break texts
math_char -> &'input str = !TagClose<"math"i> $.
normal_char -> &'input str
= !([\n\r \t{}\[\]] / emph_lit /
any_open / any_close / any_tag / html_comment_start) $.
heading_char -> &'input str
= !('='+ _ (nl / EOF)) c:normal_char {c}
table_char -> &'input str
= !(cell_sep) c:normal_char {c}
template_char -> &'input str
= !'|' c:normal_char {c}
template_arg_char -> &'input str
= ![|<>=!*#:;/] c:normal_char {c}
whitespace -> &'input str
= $(' ') / $('\t')
tag_char -> &'input str
= $([^<>/ =])
url_char -> &'input str
= $([^ \]])
// a paragraph may not start with these symbols as they indicate other elements
PAR_START_GUARD = !([=!|;#:*] / EOF)
// tags which should be parsed as block elements, rather than html tags.
HTML_BLOCK_ELEMENTS = ("gallery"i)
// magic words which cannot be interpreted as templates
MAGIC_WORDS = table_start / table_end / table_caption_sep /
table_row_sep / table_pipe / cell_sep