ftml 1.41.0

Foundation Text Markup Language - a library to render Wikidot text as HTML
Documentation
//
// parse/lexer.pest
//
// ftml - Library to parse Wikidot text
// Copyright (C) 2019-2026 Wikijump Team
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//

// Despite being a parser generator, we're using pest here only to produce tokens,
// using this as a lexer.
//
// The original pest grammar was exhaustive, but because this is a formal grammar,
// it would error as opposed to performing fallback logic.

// Meta {{{

// Complete input from start to finish
document = _{ SOI ~ token* ~ EOI }

// An individual token
// Order determines priority (earlier = higher)
token = _{
    // Raw should have the highest priority
    raw |
    left_raw |
    right_raw |

    // Comments
    left_comment |
    right_comment |

    // Text-like
    url |
    email |
    identifier |
    variable |
    double_quote |
    escaped_double_quote |
    escaped_backslash |

    // Special case to handle those pesky "[[[[" and "]]]]"s
    // These are [[[ triple links surrounded by constant [ brackets.
    (left_bracket ~ left_link) |
    (right_link ~ right_bracket) |

    // Brackets
    left_link_star |
    left_link |
    left_math |
    left_block_anchor |
    left_block_star |
    left_block_end |
    left_block |
    left_bracket_anchor |
    left_bracket_star |
    left_bracket |
    left_parens |
    right_link |
    right_math |
    right_block |
    right_bracket |
    right_parens |

    // Formatting
    bold |
    italics |
    underline |
    superscript |
    subscript |
    color |
    left_monospace |
    right_monospace |

    // Tables
    table_column_title |
    table_column_right |
    table_column_center |
    table_column |

    // Singular symbols
    clear_float_left |
    clear_float_right |
    clear_float |
    triple_dash |
    double_dash |
    double_tilde |
    left_double_angle |
    pipe |
    equals |
    colon |
    underscore |
    quote |
    heading |
    bullet_item |
    numbered_item |

    // Whitespace
    paragraph_break |
    line_break |
    space |

    // Generic fallback after all other rules have been tried
    other
}

// }}}

// Text {{{

identifier = @{ (ASCII_ALPHANUMERIC | ASCII_DIGIT)+ }

email = @{
    (!(" " | "\t" | "@" | NEWLINE) ~ ANY)+ ~
    "@" ~
    (!(" " | "\t" | "." | NEWLINE) ~ ANY)+ ~
    "." ~
    (!(" " | "\t" | NEWLINE) ~ ANY)+
}

url = @{
    (("http" ~ "s"?) | "ftp") ~ "://" ~
    (!(NEWLINE | " " | "\"" | "|" | "[" | "]") ~ ANY)+
}

// }}}

// Symbols {{{

raw = @{ "@@" }
left_raw = @{ "@<" }
right_raw = @{ ">@" }

left_comment = @{ "[!--" }
right_comment = @{ "--]" }

left_bracket = @{ "[" }
left_bracket_anchor = @{ "[#" }
left_bracket_star = @{ "[*" }
left_block = @{ "[[" }
left_block_end = @{ "[[/" }
left_block_anchor = @{ "[[#" }
left_block_star = @{ "[[*" }
left_math = @{ "[[$" }
left_link = @{ "[[[" }
left_link_star = @{ "[[[*" }
right_bracket = @{ "]" }
right_block = @{ "]]" }
right_math = @{ "$]]" }
right_link = @{ "]]]" }

left_parens = @{ "((" }
right_parens = @{ "))" }

clear_float = @{ "~"{3,} }
clear_float_left = @{ "~"{3,} ~ "<" }
clear_float_right = @{ "~"{3,} ~ ">" }
triple_dash = @{ "-"{3,} }
double_dash = @{ "-"{2} }
double_tilde = @{ "~"{2} }
left_double_angle = @{ "<<" }
pipe = @{ "|" }
equals = @{ "=" }
colon = @{ ":" }
underscore = @{ "_" }
quote = @{ ">"+ }
heading = @{ "+"{1,6} ~ ("*" ~ !"*")? }

// }}}

// Formatting {{{

bold = @{ "**" }
italics = @{ "//" }
underline = @{ "__" }
superscript = @{ "^^" }
subscript = @{ ",," }
color = @{ "##" }
left_monospace = @{ "{{" }
right_monospace = @{ "}}" }

// }}}

// Lists {{{

bullet_item = @{ "*" ~ !"*" }
numbered_item = @{ "#" ~ !"#" }

// }}}

// Tables {{{

// NOTE: "table_column_left" ("||<") is not a real token,
//       it's not supported in Wikidot and adding it can
//       cause parsing issues. See WJ-1095.
table_column = @{ "||" }
table_column_right = @{ "||>" }
table_column_center = @{ "||=" }
table_column_title = @{ "||~" }

// }}}

// Variable {{{

variable = @{ "{$" ~ identifier ~ "}" }

// }}}

// String {{{

double_quote = @{ "\"" }
escaped_double_quote = @{ "\\" ~ double_quote }
escaped_backslash = @{ "\\\\" }

// }}}

// Misc {{{

line_break = @{ NEWLINE }
paragraph_break = @{ NEWLINE{2,} }
space = @{ (" " | "\t")+ }

// To be consolidated in code
//
// this way pest matches ANY lazily, permitting other rules,
// as opposed to greedily, where the first unusual token will
// turn the rest of the input into a big "other"
other = @{ ANY }

// }}}

// vim: set fdm=marker foldlevel=0: