pasta_dsl 0.2.1

Pasta DSL - Independent DSL parser and AST definitions
Documentation
// ################################################# WHITE SPACE
space_chars = _{ " " | "\t" | "\u{3000}" | "\u{00A0}" | "\u{1680}" | "\u{2000}" | "\u{2001}" | "\u{2002}" | "\u{2003}" | "\u{2004}" | "\u{2005}" | "\u{2006}" | "\u{2007}" | "\u{2008}" | "\u{2009}" | "\u{200A}" | "\u{202F}" | "\u{205F}" }
ws          = @{ space_chars+ }
s           = _{ space_chars* }
pad         = _{ space_chars+ }
no_ws       = @{ !(space_chars | "\r" | "\n") ~ ANY }

// ################################################# id

// `_`は含まない
xid1 = @{ XID_START }

// `_`を含む
xidn = @{ XID_CONTINUE }

id1         = _{ xid1 | "_" }
idn         = _{ xidn }
idn2        = _{ !"_" ~ xidn }
identifier  = _{ id1 ~ idn* }
dunder      = _{ "__" }
reserved_id = _{ dunder ~ idn2* ~ dunder }
id          = @{ !(reserved_id) ~ identifier }

// ################################################# marker and element
hash   = _{ "#" | "#" }
at     = _{ "@" | "@" }
amp    = _{ "&" | "&" }
ast    = _{ "*" | "*" }
add    = _{ "+" | "+" }
sub    = _{ "-" | "-" }
mul    = _{ "*" | "*" | "×" }
div    = _{ "/" | "/" | "÷" }
modulo = _{ "%" | "%" }
equals = _{ "=" | "=" }
dollar = _{ "$" | "$" }
lparen = _{ "(" | "(" }
rparen = _{ ")" | ")" }
gt     = _{ ">" | ">" }
lt     = _{ "<" | "<" }
pipe   = _{ "|" | "|" }
comma  = _{ "、" | "," | "," }
dot    = _{ "." | "." }
colon  = _{ ":" | ":" }
semi   = _{ ";" | ";" }

comment_marker = _{ hash }
attr_marker    = _{ amp }
global_marker  = _{ ast }
local_marker   = _{ "・" | "-" }
kv_marker      = _{ colon }
set_marker     = _{ equals }
call_marker    = _{ gt }
var_marker     = _{ dollar }
word_marker    = _{ at }
fn_marker      = _{ at }
comma_sep      = _{ s ~ comma ~ s }
actor_marker   = _{ modulo }

// ################################################# expr
expr   = _{ term ~ s ~ bin* }
bin    = _{ bin_op ~ s ~ term ~ s }
bin_op = _{ add_op | sub_op | mul_op | div_op | modulo_op }
add_op = @{ add }
sub_op = @{ sub }
mul_op = @{ mul }
div_op = @{ div }
modulo_op = @{ modulo }

term = _{
    paren_expr
  | fn_call
  | var_ref
  | number_literal
  | string_literal
}

paren_expr = { lparen ~ s ~ expr ~ s ~ rparen }

// ################################################# var_ref
var_ref        =_{ var_ref_property | var_ref_global | var_ref_local }
var_id         = { id | digit_id }
var_ref_local  = { var_marker                 ~ var_id ~ s }
var_ref_global = { var_marker ~ global_marker ~ id     ~ s }

// ################################################# var_ref/set property
property_marker  = _{ dollar ~ modulo }
property_id      = @{ ASCII_ALPHA ~ ( "_" | "." | "(" | ")" | ASCII_DIGIT | ASCII_ALPHA )* }
var_ref_property = { property_marker ~ property_id ~ s }
var_set_property = { property_marker ~ property_id ~ s ~ set }

// ################################################# var_set
var_set        =_{ var_set_property | var_set_global | var_set_local | var_set_none }
var_set_local  = { var_marker ~                 id ~ s ~ set }
var_set_global = { var_marker ~ global_marker ~ id ~ s ~ set }
var_set_none   = { var_marker ~                            set }
set            =_{ set_marker ~ s ~ ( expr | word_ref ) }

// ################################################# fn_call
fn_call        =_{ fn_call_global | fn_call_local }
fn_call_local  = { fn_marker ~ id ~ args }
fn_call_global = { fn_marker ~ global_marker ~ id ~ args }

// ################################################# args
args           = { lparen ~ s ~ (arg ~ (comma_sep ~ arg)*)? ~ s ~ rparen }
arg            =_{ key_arg | positional_arg }
key_arg        = { key_expr }
positional_arg = { expr }

// ################################################# string_literal
string_literal  = _{ string_fenced | string_blank }
string_nofenced = @{ (!strfence ~ no_ws)+ }
string_blank    = @{ "\"\"" | "「」" }
string_fenced   = _{ strfence ~ string_contents ~ strclose }

string_contents = @{ (!PEEK ~ ANY)+ }
strclose         = _{ POP }

strfence = _{
    slfence_ja4
  | slfence_ja3
  | slfence_ja2
  | slfence_ja1
  | slfence_en
}

slfence_ja1 = _{ "「"{1} ~ PUSH_LITERAL("」") }
slfence_ja2 = _{ "「"{2} ~ PUSH_LITERAL("」」") }
slfence_ja3 = _{ "「"{3} ~ PUSH_LITERAL("」」」") }
slfence_ja4 = _{ "「"{4} ~ PUSH_LITERAL("」」」」") }
slfence_en = _{ PUSH("\""+) }

// ################################################# number_literal
number_literal = @{ sub? ~ digit+ ~ (dot ~ digit+)? }
digit          =  { ASCII_DIGIT | '0'..'9' }
digit_id       = @{ digit+ }

// ################################################# key_value
key_literal = { id ~ s ~ kv_marker ~ s ~ ( number_literal | string_literal ) }
key_expr    = { id ~ s ~ kv_marker ~ s ~ expr }
key_list    = { id ~ ( comma_sep ~ id )* }
key_words   = { key_list ~ s ~ kv_marker ~ s ~ words }

// ################################################# word_dic
words         =  { word ~ ( comma_sep ~ word )* ~ comma_sep? }
word          = _{ string_literal | sakura_script | word_nofenced }
word_nofenced = @{ (!(comma_sep | "\r" | "\n") ~ ANY)+ }

// ################################################# actors
actors       = _{ actors_item ~ ( comma_sep ~ actors_item )* ~ comma_sep? }
actors_item  =  { id ~ ( s ~ set_marker ~ s ~ digit_id )? }

// ################################################# attr
attr  =  { attr_marker ~ key_attr ~ s }
attrs = _{ attr+ }
key_attr = { id ~ s ~ kv_marker ~ s ~ attr_value }
attr_value = _{ number_literal | string_literal | attr_string }
attr_string = @ { ( !( kv_marker | attr_marker | comment_marker ) ~ no_ws )+ }

// ################################################# call
call_target_expr = { expr }
call_scene = { call_marker ~ (id | call_target_expr) ~ s ~ args? }

// ################################################# scene
scene = _{ id ~ s ~ attrs? }

// ################################################# action
action  =_{ at_escape | dollar_escape | sakura_escape | fn_call | word_ref | var_ref | sakura_script | talk }
actions = { action+ }

word_ref        = { word_marker ~ id ~ s}

at_escape     = @{ at{2} }
dollar_escape = @{ dollar{2} }
sakura_escape = @{ sakura_marker{2} }
talk_word = _{ !(at | dollar | sakura_marker | eol) ~ ANY }
talk      = @{ talk_word+ }

// ################################################# sakura_script
sakura_script = @{ sakura_marker ~ sakura_id ~ sakura_args? }
sakura_id     = @{ (('a'..'z') | ('A'..'Z') | ('0'..'9') | "_" | "!" | "-" | "+" | "*" | "?" | "&" )+ }
sakura_args   =  { sakura_open ~ sakura_body ~ sakura_close }
sakura_open   = _{ "[" ~ PUSH_LITERAL("]") }
sakura_body   = @{ ( sakura_str | (!PEEK ~ ANY) )* }
sakura_close  = _{ POP }

sakura_str       =  { sakura_str_open ~ sakura_str_body ~ sakura_str_close }
sakura_str_open  = _{ PUSH("\"") }
sakura_str_body  = @{ ( PEEK{2} | (!PEEK ~ ANY) )* }
sakura_str_close = _{ POP }

sakura_marker     = _{ "\\" }

// ################################################# eol
eol             = _{ NEWLINE }
or_comment_eol  = _{ s ~ ( comment_marker ~ (!NEWLINE ~ ANY)* )? ~ eol }

// ################################################# cue command
cue_cmd_marker   = _{ "!" | "\u{FF01}" }
cue_cmd_line     =  { pad ~ cue_cmd_marker ~ cue_cmd_name ~ cue_cmd_scope? ~ cue_cmd_args? ~ or_comment_eol }
cue_cmd_name     = @{ id }
cue_cmd_scope    =  { at ~ cue_scoped_ident }
cue_scoped_ident = @{ cue_ident_part ~ ( colon ~ cue_ident_part )? }
cue_ident_part   = @{ (!(space_chars | "(" | ")" | "\u{FF08}" | "\u{FF09}" | "," | "\u{3001}" | "\u{FF0C}" | ":" | "\u{FF1A}" | "\r" | "\n" | "@" | "\u{FF20}") ~ ANY)+ }
cue_cmd_args     =  { lparen ~ s ~ cue_arg_list? ~ s ~ rparen }
cue_arg_list     = _{ cue_arg ~ ( comma_sep ~ cue_arg )* }
cue_arg          = _{ cue_arg_at_ref | number_literal | string_literal | cue_arg_id }
cue_arg_at_ref   =  { at ~ id }
cue_arg_id       = @{ (!(space_chars | "(" | ")" | "\u{FF08}" | "\u{FF09}" | "," | "\u{3001}" | "\u{FF0C}" | "\r" | "\n") ~ ANY)+ }

// ################################################# choice
question_marker = _{ "?" | "?" }
choice_label    =  { slfence_ja1 ~ string_contents ~ strclose }
choice_line     =  { pad ~ word_marker ~ question_marker ~ id ~ choice_label? ~ or_comment_eol }

// ################################################# code_block
code_block    =  { code_open ~ code_contents ~ code_close }
code_open     = _{ PUSH("`"{3,}) ~ id? ~ eol }
code_contents = @{ (!PEEK ~ ANY)+ }
code_close    = _{ POP ~ or_comment_eol }

// ################################################# line
blank_line      =_{ or_comment_eol }
file_attr_line  = { attrs ~ or_comment_eol }
file_word_line  = { word_marker ~ key_words ~ or_comment_eol }

actor_line = { actor_marker ~ id ~ or_comment_eol }

global_scene_line          = { global_marker ~ scene ~ or_comment_eol }
global_scene_continue_line = { global_marker ~ or_comment_eol }

global_scene_attr_line = { pad ~  attrs ~ or_comment_eol }
global_scene_word_line = { pad ~ word_marker ~ key_words ~ or_comment_eol }

local_scene_line = { pad ~ local_marker ~ scene ~ or_comment_eol }

action_line          = { pad ~ id ~ s ~ kv_marker ~ s ~ actions ~ eol }
continue_action_line = { pad ~ kv_marker ~ s ~ actions ~ eol }
var_set_line         =_{ pad ~ var_set ~ or_comment_eol }
call_scene_line      =_{ pad ~ call_scene ~ or_comment_eol }
scene_actors_line    = { pad ~ actor_marker ~ actors ~ or_comment_eol }

// ################################################# scope
file_scope      = { file_scppe_item+ }
file_scppe_item =_{ file_attr_line | file_word_line | blank_line }

actor_scope      =  { actor_line ~ actor_scope_item* }
actor_scope_item = _{ global_scene_attr_line | global_scene_word_line | var_set_line | code_scope | blank_line }

global_scene_scope = { global_scene_start ~ global_scene_init* ~ code_scope* ~ local_start_scene_scope ~ local_scene_scope* }
global_scene_start = { global_scene_line | global_scene_continue_line }
global_scene_init  =_{ global_scene_attr_line | global_scene_word_line | scene_actors_line | blank_line }

local_start_scene_scope = {                     local_scene_item* ~ code_scope* }
local_scene_scope       = { local_scene_start ~ local_scene_item* ~ code_scope* }

local_scene_start =_{ local_scene_line }
local_scene_item  =_{ var_set_line | call_scene_line | cue_cmd_line | choice_line | action_line | continue_action_line | blank_line }

code_scope = _{ code_block ~ blank_line* }

// ################################################# file
file = _{ SOI ~ ( file_scope | global_scene_scope | actor_scope )* ~ s ~ EOI }