document = { SOI ~ section* ~ EOI }
section = _{ blank_line* ~ section_contents ~ blank_line* }
section_contents = _{ verbatim | reference | header | bullet_list | ordered_list | code_block | thematic_break | paragraph }
COMMENT = _{ ("[//]: # (" ~ ("\\)" | (!")" ~ ANY))* ~ ")") | "<!--" ~ (!"-->" ~ ANY)+ ~ "-->" }
space = { " " | "\t" }
silent_space = _{ " " }
blank_line = _{ space* ~ NEWLINE }
indent = _{ " "{4} | "\t" }
non_indent_space = _{ " "{1,3} }
double_quote = _{ "\"" }
single_quote = _{ "'" }
special_char = _{ "~" | "*" | "_" | "`" | "&" | "[" | "]" | "(" | ")" | "<" | "!" | "#" | "\\" | "\"" }
normal_char = _{ !special_char ~ !NEWLINE ~ !space ~ ANY }
non_space = { !NEWLINE ~ !space ~ ANY }
control_character = { "-" | "\\" | "`" | "|" | "*" | "#" | "+" | "." | "!" | "_" | "{" | "}" | "[" | "]" | "(" | ")" | "<" |">" | "\"" | "'" }
escaped_special_char = _{ "\\" ~ control_character }
markup = _{ strong | emphasis | code | image | link }
str = { normal_char+ ~ (normal_char | space+ ~ &normal_char)* }
symbol = { special_char }
normal_endline = { space? ~ NEWLINE ~ !(blank_line | indent | thematic_break | block_quote_open | non_indent_space? ~ (atx_hash | "1. " | "1) " | !star_strong ~ !star_emphasis ~ bullet)) }
trailing_endline = _{ space* ~ NEWLINE ~ EOI }
linebreak = { " " ~ normal_endline }
endline = _{ normal_endline | trailing_endline | linebreak }
inline = _{ str | endline | space | escaped_special_char | markup | symbol }
inlines = _{ ((!endline ~ inline) | (endline ~ &inline))+ ~ endline? }
label = { (!"]" ~ !NEWLINE ~ inline)* }
source = { (!")" ~ !">" ~ !"]" ~ non_space)+ }
star_line = _{ non_indent_space? ~ ("*" ~ space*){3,} ~ NEWLINE }
dash_line = _{ non_indent_space? ~ ("-" ~ space*){3,} ~ NEWLINE }
underscore_line = _{ non_indent_space? ~ ("_" ~ space*){3,} ~ NEWLINE }
thematic_break = { star_line | dash_line | underscore_line }
paragraph = { non_indent_space? ~ inlines ~ (blank_line+ | EOI) }
block_quote_open = _{ non_indent_space? ~ ">" }
verbatim = { (block_quote_open ~ silent_space* ~ inline* ~ (linebreak_literal ~ &block_quote_open)?)+ ~ NEWLINE? }
atx_hash = { "#"{1,6} ~ !"#" }
header = {
non_indent_space? ~
atx_hash ~ silent_space* ~
(!((space | "#")* ~ NEWLINE) ~ inline)+ ~
(silent_space | "#")* ~ endline? ~
blank_line*
}
end_list = { space* ~ COMMENT | &thematic_break }
star_bullet = { "*" ~ silent_space+ }
dash_bullet = { "-" ~ silent_space+ }
plus_bullet = { "+" ~ silent_space+ }
bullet = { star_bullet | dash_bullet | plus_bullet }
list_index = { ASCII_DIGIT{1,3} }
enumerator = _{ list_index ~ (")"| ".") ~ silent_space+ }
inline_list_block = _{
(!blank_line ~ (
inline |
(NEWLINE ~ PEEK_ALL ~ silent_space{0,1} ~ (&strong | &emphasis | !(bullet | enumerator)) ~ inline)
))*
~ endline? }
continued_list_block = _{
blank_line* ~ // Allow a blank line before the continuation block
PEEK_ALL ~ !end_list ~ // Match the indent level AND confirm there's no hard break for the list (only needed for a root, non-indented list)
(&silent_space{2,} | &strong | &emphasis | !(bullet | enumerator)) ~ // We've either indended more OR the row doesn't start with a bullet. Checking for emph and strong first since they start with a bullet but should be allowed
(section_contents) // Match a single section
}
star_bullet_item_tight = { star_bullet ~ list_item_contents_tight ~ NEWLINE? }
dash_bullet_item_tight = { dash_bullet ~ list_item_contents_tight ~ NEWLINE? }
plus_bullet_item_tight = { plus_bullet ~ list_item_contents_tight ~ NEWLINE? }
period_ordered_list_item_tight = { list_index ~ "." ~ silent_space+ ~ list_item_contents_tight ~ NEWLINE? }
parenthesis_ordered_list_item_tight = { list_index ~ ")" ~ silent_space+ ~ list_item_contents_tight ~ NEWLINE? }
star_bullet_item = { star_bullet ~ list_item_contents }
dash_bullet_item = { dash_bullet ~ list_item_contents }
plus_bullet_item = { plus_bullet ~ list_item_contents }
period_ordered_list_item = { list_index ~ "." ~ silent_space+ ~ list_item_contents }
parenthesis_ordered_list_item = { list_index ~ ")" ~ silent_space+ ~ list_item_contents }
star_bullet_list_tight = _{
PUSH(silent_space*) ~
star_bullet_item_tight ~
(!blank_line ~ PEEK_ALL ~ star_bullet_item_tight)* ~
!(blank_line+ ~ PEEK_ALL ~ "* ") ~
DROP
}
dash_bullet_list_tight = _{
PUSH(silent_space*) ~
dash_bullet_item_tight ~
(!blank_line ~ PEEK_ALL ~ dash_bullet_item_tight)* ~
!(blank_line+ ~ PEEK_ALL ~ "- ") ~
DROP
}
plus_bullet_list_tight = _{
PUSH(silent_space*) ~
plus_bullet_item_tight ~
(!blank_line ~ PEEK_ALL ~ plus_bullet_item_tight)* ~
!(blank_line+ ~ PEEK_ALL ~ "+ ") ~
DROP
}
star_bullet_list = _{
PUSH(silent_space*) ~
star_bullet_item ~
(blank_line* ~ PEEK_ALL ~ star_bullet_item)* ~
DROP
}
dash_bullet_list = _{
PUSH(silent_space*) ~
dash_bullet_item ~
(blank_line* ~ PEEK_ALL ~ dash_bullet_item)* ~
DROP
}
plus_bullet_list = _{
PUSH(silent_space*) ~
plus_bullet_item ~
(blank_line* ~ PEEK_ALL ~ plus_bullet_item)* ~
DROP
}
parenthesis_ordered_list_tight = _{
PUSH(silent_space*) ~
parenthesis_ordered_list_item_tight ~
(!blank_line ~ PEEK_ALL ~ parenthesis_ordered_list_item_tight)* ~
!(blank_line+ ~ PEEK_ALL ~ list_index ~ ")" ~ space+) ~
DROP
}
period_ordered_list_tight = _{
PUSH(silent_space*) ~
period_ordered_list_item_tight ~
(!blank_line ~ PEEK_ALL ~ period_ordered_list_item_tight)* ~
!(blank_line+ ~ PEEK_ALL ~ list_index ~ "." ~ space+) ~
DROP
}
parenthesis_ordered_list = _{
PUSH(silent_space*) ~
parenthesis_ordered_list_item ~
(blank_line* ~ PEEK_ALL ~ parenthesis_ordered_list_item)* ~
DROP
}
period_ordered_list = _{
PUSH(silent_space*) ~
period_ordered_list_item ~
(blank_line* ~ PEEK_ALL ~ period_ordered_list_item)* ~
DROP
}
list_item_contents = ${
inline_list_block ~
(blank_line* ~ continued_list_block)*
}
list_item_contents_tight = ${
inline_list_block ~
continued_list_block*
}
list_tight = {
dash_bullet_list_tight | plus_bullet_list_tight |
star_bullet_list_tight | period_ordered_list_tight |
parenthesis_ordered_list_tight
}
list_loose = {
dash_bullet_list | plus_bullet_list |
star_bullet_list | period_ordered_list |
parenthesis_ordered_list
}
bullet_list = { &(silent_space* ~ bullet) ~ (list_tight | list_loose) }
ordered_list = { &(silent_space* ~ enumerator) ~ (list_tight | list_loose) }
underline_strong = _{ "__" ~ !space ~ (!"__" ~ inline)+ ~ "__" }
star_strong = _{ "**" ~ !space ~ (!"**" ~ inline)+ ~ "**" }
strong = { star_strong | underline_strong }
star_emphasis = _{ "*" ~ !space ~ ((!"*" ~ inline) | strong)+ ~ "*" }
underline_emphasis = _{ "_" ~ !space ~ ((!"_" ~ inline) | strong)+ ~ "_" }
emphasis = { star_emphasis | underline_emphasis }
// I don't like having to break all these out, but it is helpful to assert that there are exactly n many ticks
// in a row as a dedicated rule.
single_tick = _{ "`" ~ !"`"}
two_ticks = _{ "`"{2} ~ !"`" }
three_ticks = _{ "`"{3} ~ !"`" }
four_ticks = _{ "`"{4} ~ !"`" }
five_ticks = _{ "`"{5} ~ !"`" }
info_string_language = { (!"`" ~ non_space)+ }
info_string = _{ silent_space* ~ info_string_language ~ (silent_space* ~ (!"`" ~ non_space)+ ~ silent_space*)* }
linebreak_literal = { NEWLINE }
code = {
single_tick ~ silent_space? ~ ((!"`" ~ non_space)+ | !(space ~ single_tick) ~ (space | linebreak_literal ~ !blank_line))+ ~ silent_space? ~ (single_tick | EOI)
| two_ticks ~ silent_space? ~ ((!"`" ~ non_space)+ | "`" ~ !"`" | !(space ~ two_ticks) ~ (space | linebreak_literal ~ !blank_line))+ ~ silent_space? ~ (two_ticks | EOI)
}
backtick_fenced_codeblock = _{
three_ticks ~ info_string? ~ NEWLINE? ~ ((!"`" ~ non_space)+ | "`"{1,2} ~ !"`" | !(space+ ~ "`"{3,}) ~ (space | linebreak_literal ~ !"`"{3,}))* ~ NEWLINE ~ non_indent_space? ~ ("`"{3,} ~ silent_space* ~ NEWLINE | EOI)
| four_ticks ~ info_string? ~ NEWLINE? ~ ((!"`" ~ non_space)+ | "`"{1,3} ~ !"`" | !(space+ ~ "`"{4,}) ~ (space | linebreak_literal ~ !"`"{4,}))* ~ NEWLINE ~ non_indent_space? ~ ("`"{4,} ~ silent_space* ~ NEWLINE | EOI)
| five_ticks ~ info_string? ~ NEWLINE? ~ ((!"`" ~ non_space)+ | "`"{1,4} ~ !"`" | !(space+ ~ "`"{5,}) ~ (space | linebreak_literal ~ !"`"{5,}))* ~ NEWLINE ~ non_indent_space? ~ ("`"{5,} ~ silent_space* ~ NEWLINE | EOI)
}
tilde_fenced_codeblock = { "TODO: Implement tilde_fenced_codeblock rule" }
fenced_codeblock = { backtick_fenced_codeblock | tilde_fenced_codeblock }
indented_codeblock_line = _{ !NEWLINE ~ (space* ~ non_space)+ }
indented_codeblock = {
PUSH(indent+) ~ indented_codeblock_line ~ linebreak_literal? ~
((space* ~ linebreak_literal)+ ~ &(PEEK_ALL ~ indented_codeblock_line) | PEEK_ALL ~ indented_codeblock_line)* ~
NEWLINE? ~
DROP
}
code_block = _{ fenced_codeblock | indented_codeblock }
reference_source = { non_space+ }
link_title = {
single_quote ~ (!single_quote ~ !blank_line ~ (escaped_special_char | space | non_space))* ~ single_quote
| double_quote ~ (!double_quote ~ !blank_line ~ (escaped_special_char | space | non_space))* ~ double_quote
| "(" ~ (!")" ~ !blank_line ~ (escaped_special_char | space | non_space))* ~ ")" ~ &")"
}
reference = {
non_indent_space? ~
!"[]" ~ "[" ~ label ~ "]:" ~ (silent_space+ | NEWLINE ~ silent_space*) ~ reference_source ~
((silent_space* ~ NEWLINE ~ silent_space* | silent_space+) ~ link_title)?
}
shortcut_reference_link = { "[" ~ label ~ "]" ~ "[]"? }
full_reference_link = { "[" ~ label ~ "]" ~ !"[]" ~ "[" ~ label ~ "]" }
reference_link = _{ full_reference_link | shortcut_reference_link }
directed_link = {
"[" ~ label ~ "](" ~ source ~
((silent_space* ~ NEWLINE ~ silent_space* | silent_space+) ~ link_title)?
~ ")"
}
autolink = { "<" ~ source ~ ">" }
link = { directed_link | reference_link | autolink }
image = { !"\\" ~ "!" ~ (directed_link | reference_link) }