// SpeechMarkdown PEG Grammar
// Based on the JavaScript implementation from speechmarkdown-js
WHITESPACE = _{ " " | "\t" }
CRLF = _{ "\r\n" | "\n" | "\r" }
// Core text elements
plain_text_char = @{ !("[" | "]" | "(" | ")" | "~" | "+" | "#" | "!" | "*" | "@" | "_" | "/" | "-" | ";" | ":" | "\"" | "'") ~ ANY }
plain_text = @{ plain_text_char+ }
// Numbers and identifiers
number = @{ ASCII_DIGIT+ ~ ("." ~ ASCII_DIGIT*)? }
integer = @{ ASCII_DIGIT+ }
identifier = @{ ASCII_ALPHANUMERIC+ }
// Time and break syntax
time_unit = @{ "s" | "ms" }
time = @{ number ~ time_unit }
short_break = @{ "[" ~ time ~ "]" }
break_strength = @{ "none" | "x-weak" | "weak" | "medium" | "strong" | "x-strong" }
break_tag = @{ "[break:" ~ break_strength ~ "]" }
mark_tag = @{ "[mark:" ~ plain_text ~ "]" }
// Emphasis syntax (with boundary checking to avoid false positives)
boundary_char =_{ !ASCII_ALPHANUMERIC }
emphasis_strong = @{ boundary_char ~ "++" ~ plain_text ~ "++" ~ boundary_char }
emphasis_moderate = @{ boundary_char ~ "+" ~ plain_text ~ "+" ~ boundary_char }
emphasis_none = @{ boundary_char ~ "~" ~ plain_text ~ "+" ~ boundary_char }
emphasis_reduced = @{ boundary_char ~ "-" ~ plain_text ~ "-" ~ boundary_char }
// Modifier syntax: (text)[key:value] or (text)[key1;key2:value2]
modifier_text_content = @{ !("[" | "]") ~ ANY+ }
modifier_key = @{
"emphasis" | "address" | "number" | "cardinal" |
"characters" | "chars" | "digits" | "drc" |
"expletive" | "bleep" | "fraction" | "interjection" |
"ordinal" | "telephone" | "phone" | "unit" | "time" |
"date" | "whisper" | "ipa" | "sub" | "vol" | "volume" |
"rate" | "pitch" | "timbre" | "lang" | "voice" |
"excited" | "disappointed" | "dj" | "newscaster" |
identifier
}
modifier_value_single = @{ !(";" | "]" | "\"" | "'") ~ ANY+ }
modifier_value_quoted = @{
("\"" ~ modifier_value_single ~ "\"") |
("'" ~ modifier_value_single ~ "'")
}
modifier_value = @{ modifier_value_quoted | modifier_value_single }
modifier = @{ modifier_key ~ (":" ~ modifier_value)? }
modifier_list = @{ modifier ~ (";" ~ modifier)* }
text_modifier = @{
"(" ~ modifier_text_content ~ ")" ~ "[" ~ modifier_list ~ "]"
}
// Section syntax: #[key:value]
section = @{ "#[" ~ modifier_list ~ "]" }
// Audio syntax:  or 
audio_caption = @{ !("]" | "\"" | "'") ~ ANY* }
audio_url = @{ !("\)" | "\"" | "'") ~ ANY+ }
audio = @{
"![" ~ audio_caption ~ "]" ~
("[" ~ quote ~ audio_url ~ quote ~ "]")
}
quote = @{ "\"" | "'" }
// IPA pronunciation: (/text/phoneme/) or (/text/)
ipa_text = @{ !("/") ~ ANY+ }
ipa_phoneme = @{ !("/") ~ ANY+ }
ipa = @{ "(/" ~ ipa_text ~ "/" ~ ipa_phoneme? ~ "/)" }
bare_ipa = @{ "/" ~ ipa_phoneme ~ "/" }
// Substitution: {text}alias
sub_text = @{ !("}" | "{") ~ ANY+ }
sub_alias = @{ !("}" | "{") ~ ANY+ }
short_sub = @{ "{" ~ sub_text ~ "}" ~ sub_alias }
// Line structures
simple_line = @{
(plain_text | short_break | emphasis_strong | emphasis_moderate |
emphasis_none | emphasis_reduced | ipa | bare_ipa | short_sub)+
}
line_with_modifiers = @{
(text_modifier | plain_text | short_break | emphasis_strong |
emphasis_moderate | emphasis_none | emphasis_reduced | ipa |
bare_ipa | short_sub | audio | mark_tag)+
}
line_end = @{ CRLF }
empty_line = @{ SOI ~ CRLF | (CRLF ~ CRLF+) }
// Paragraph: one or more lines
paragraph = @{
(line_with_modifiers ~ line_end) |
(simple_line ~ line_end)
}
// Document structure
document = @{
SOI ~
(section | paragraph | empty_line)* ~
EOI
}