//
// Markdown parser grammar
// Author: Zudilova Oryna
//
// This grammar defines rules for parsing CommonMark Markdown syntax.
// Each rule corresponds to a specific Markdown element and is documented below.
//
// Single character and whitespace handling
/// Any single character except newline
character = { !NEWLINE ~ ANY }
/// Whitespace characters (space or tab)
whitespace = _{ " " | "\t" }
// Links and images - core inline elements
// Example: [Click here](https://example.com)
// Example: 
link = { "[" ~ link_content ~ "](" ~ link_url ~ ")" }
image = { "" }
// Link and image content parsing
link_content = { link_char+ }
image_alt = { image_char+ }
link_url = { url_char+ }
image_url = { url_char+ }
// Characters allowed in different contexts
link_char = { !("]" | "\\") ~ ANY | escape_sequence }
image_char = { !("]" | "\\") ~ ANY | escape_sequence }
url_char = { !(")" | "\\") ~ ANY | escape_sequence }
// Text formatting - bold, italic, etc.
// Example: **bold text**, *italic text*, ~~strikethrough~~
bold_formatting = { "**" ~ bold_content ~ "**" }
italic_formatting = { ("*" ~ italic_content ~ "*") | ("_" ~ italic_content ~ "_") }
strikethrough_formatting = { "~~" ~ strikethrough_content ~ "~~" }
underline_formatting = { "__" ~ underline_content ~ "__" }
// Content rules for each formatting type (consume until closing delimiter)
bold_content = { (!( "**" ) ~ ANY)* }
italic_content = { (!( "*" | "_" ) ~ ANY)* }
strikethrough_content = { (!( "~~" ) ~ ANY)* }
underline_content = { (!( "__" ) ~ ANY)* }
// Escape sequences for special characters
// Example: \* for literal asterisk
escape_sequence = { "\\" ~ (!whitespace ~ character) }
// Inline code with backticks
inline_code = { "`" ~ (!"`" ~ ANY)* ~ "`" }
// Lists
// Unordered lists: - item or * item
// Ordered lists: 1. item, 2. item, etc.
unordered_list_item = { ("-" | "*") ~ whitespace ~ (!NEWLINE ~ ANY)* ~ NEWLINE? }
ordered_list_item = { ASCII_DIGIT+ ~ "." ~ whitespace ~ (!NEWLINE ~ ANY)* ~ NEWLINE? }
// Document lists
document_unordered_list = { unordered_list_item+ }
document_ordered_list = { ordered_list_item+ }
// Thematic break (horizontal rule)
thematic_break = { ("---" | "***" | "___") ~ whitespace* ~ (NEWLINE | EOI) }
// Plain text characters (no formatting markers)
plain_char = { !("*" | "_" | "~" | "[" | "!" | "\\" | "#" | "`" | "-" | ASCII_DIGIT | NEWLINE) ~ ANY }
// Headings with different levels
// Example: # Heading 1, ## Heading 2, ### Heading 3
document_heading = _{
h1_heading
| h2_heading
| h3_heading
}
h1_heading = { "#" ~ whitespace ~ (!NEWLINE ~ ANY)* ~ NEWLINE? }
h2_heading = { "##" ~ whitespace ~ (!NEWLINE ~ ANY)* ~ NEWLINE? }
h3_heading = { "###" ~ whitespace ~ (!NEWLINE ~ ANY)* ~ NEWLINE? }
// Blockquotes with optional line prefixes
// Example:
// > This is a quote
// > Second line
document_quote = { quote_line+ }
quote_line = { ">" ~ whitespace? ~ (paragraph_text | blank_line) }
// Code blocks with language support
// Example:
// ```rust
// println!("Hello");
// ```
code_fence = { "```" ~ (language_spec ~ whitespace* ~ NEWLINE)? ~ code_body ~ NEWLINE? ~ "```" ~ NEWLINE? }
language_spec = { whitespace* ~ ('a'..'z' | 'A'..'Z')+ }
code_body = { (!(NEWLINE? ~ "```") ~ ANY)+ }
// Horizontal rules (thematic breaks)
// Example: ---, ***, ___
// Paragraphs - basic text blocks
document_paragraph = { paragraph_text+ }
paragraph_text = { inline_content+ ~ line_break? }
line_break = _{ NEWLINE }
// Blank lines for separation
blank_line = { NEWLINE }
// All possible inline elements in text
inline_content = _{
image
| link
| text_formatting
| inline_code
| escape_sequence
| plain_text
}
text_formatting = _{
bold_formatting
| italic_formatting
| strikethrough_formatting
| underline_formatting
| image
| link
}
// Plain text spans
plain_text = @{ plain_char+ }
// Main document structure
document_structure = { SOI ~ (document_block ~ NEWLINE*)* ~ document_block? ~ EOI? }
// All possible document blocks
document_block = {
document_heading
| document_quote
| code_fence
| document_unordered_list
| document_ordered_list
| thematic_break
| document_paragraph
}