speechmarkdown-rust 0.2.9

High-performance SpeechMarkdown parser with multi-language bindings
Documentation
// SpeechMarkdown PEG Grammar
// Based on the JavaScript implementation from speechmarkdown-js

WHITESPACE = _{ " " | "\t" }
CRLF = _{ "\r\n" | "\n" | "\r" }

// Core text elements
plain_text_char = @{ !("[" | "]" | "(" | ")" | "~" | "+" | "#" | "!" | "*" | "@" | "_" | "/" | "-" | ";" | ":" | "\"" | "'") ~ ANY }
plain_text = @{ plain_text_char+ }

// Numbers and identifiers
number = @{ ASCII_DIGIT+ ~ ("." ~ ASCII_DIGIT*)? }
integer = @{ ASCII_DIGIT+ }
identifier = @{ ASCII_ALPHANUMERIC+ }

// Time and break syntax
time_unit = @{ "s" | "ms" }
time = @{ number ~ time_unit }
short_break = @{ "[" ~ time ~ "]" }
break_strength = @{ "none" | "x-weak" | "weak" | "medium" | "strong" | "x-strong" }
break_tag = @{ "[break:" ~ break_strength ~ "]" }
mark_tag = @{ "[mark:" ~ plain_text ~ "]" }

// Emphasis syntax (with boundary checking to avoid false positives)
boundary_char =_{ !ASCII_ALPHANUMERIC }

emphasis_strong = @{ boundary_char ~ "++" ~ plain_text ~ "++" ~ boundary_char }
emphasis_moderate = @{ boundary_char ~ "+" ~ plain_text ~ "+" ~ boundary_char }
emphasis_none = @{ boundary_char ~ "~" ~ plain_text ~ "+" ~ boundary_char }
emphasis_reduced = @{ boundary_char ~ "-" ~ plain_text ~ "-" ~ boundary_char }

// Modifier syntax: (text)[key:value] or (text)[key1;key2:value2]
modifier_text_content = @{ !("[" | "]") ~ ANY+ }
modifier_key = @{
    "emphasis" | "address" | "number" | "cardinal" |
    "characters" | "chars" | "digits" | "drc" |
    "expletive" | "bleep" | "fraction" | "interjection" |
    "ordinal" | "telephone" | "phone" | "unit" | "time" |
    "date" | "whisper" | "ipa" | "sub" | "vol" | "volume" |
    "rate" | "pitch" | "timbre" | "lang" | "voice" |
    "excited" | "disappointed" | "dj" | "newscaster" |
    identifier
}

modifier_value_single = @{ !(";" | "]" | "\"" | "'") ~ ANY+ }
modifier_value_quoted = @{
    ("\"" ~ modifier_value_single ~ "\"") |
    ("'" ~ modifier_value_single ~ "'")
}
modifier_value = @{ modifier_value_quoted | modifier_value_single }

modifier = @{ modifier_key ~ (":" ~ modifier_value)? }
modifier_list = @{ modifier ~ (";" ~ modifier)* }

text_modifier = @{
    "(" ~ modifier_text_content ~ ")" ~ "[" ~ modifier_list ~ "]"
}

// Section syntax: #[key:value]
section = @{ "#[" ~ modifier_list ~ "]" }

// Audio syntax: ![caption]("url") or ![caption]('url')
audio_caption = @{ !("]" | "\"" | "'") ~ ANY* }
audio_url = @{ !("\)" | "\"" | "'") ~ ANY+ }
audio = @{
    "![" ~ audio_caption ~ "]" ~
    ("[" ~ quote ~ audio_url ~ quote ~ "]")
}

quote = @{ "\"" | "'" }

// IPA pronunciation: (/text/phoneme/) or (/text/)
ipa_text = @{ !("/") ~ ANY+ }
ipa_phoneme = @{ !("/") ~ ANY+ }
ipa = @{ "(/" ~ ipa_text ~ "/" ~ ipa_phoneme? ~ "/)" }
bare_ipa = @{ "/" ~ ipa_phoneme ~ "/" }

// Substitution: {text}alias
sub_text = @{ !("}" | "{") ~ ANY+ }
sub_alias = @{ !("}" | "{") ~ ANY+ }
short_sub = @{ "{" ~ sub_text ~ "}" ~ sub_alias }

// Line structures
simple_line = @{
    (plain_text | short_break | emphasis_strong | emphasis_moderate |
     emphasis_none | emphasis_reduced | ipa | bare_ipa | short_sub)+
}

line_with_modifiers = @{
    (text_modifier | plain_text | short_break | emphasis_strong |
     emphasis_moderate | emphasis_none | emphasis_reduced | ipa |
     bare_ipa | short_sub | audio | mark_tag)+
}

line_end = @{ CRLF }
empty_line = @{ SOI ~ CRLF | (CRLF ~ CRLF+) }

// Paragraph: one or more lines
paragraph = @{
    (line_with_modifiers ~ line_end) |
    (simple_line ~ line_end)
}

// Document structure
document = @{
    SOI ~
    (section | paragraph | empty_line)* ~
    EOI
}