dbgen 0.8.0

Generate random test cases for databases
Documentation
WHITESPACE = _{ " "|"\t"|"\r"|"\n"|"\x0b"|"\x0c" }
COMMENT = _{
    "--" ~ (!NEWLINE ~ ANY)* ~ NEWLINE |
    "/*" ~ !"{{" ~ (!"*/" ~ ANY)* ~ "*/" |
    "/*{{" ~ (!"*/" ~ !"}}*/" ~ ANY)* ~ "*/"
}

back_quoted     = @{ "`" ~ (!"`" ~ ANY | "``")* ~ "`" }
single_quoted   = @{ "'" ~ (!"'" ~ ANY | "''")* ~ "'" }
double_quoted   = @{ "\"" ~ (!"\"" ~ ANY | "\"\"")* ~ "\"" }

ident = @{
    back_quoted |
    double_quoted |
    "[" ~ (!"]" ~ ANY)* ~ "]" |
    (ASCII_ALPHA|"_") ~ (ASCII_ALPHANUMERIC|"_")*
}
number = @{
    ^"0x" ~ ASCII_HEX_DIGIT+ |
    (ASCII_DIGIT+ ~ ("." ~ ASCII_DIGIT*)? | "." ~ ASCII_DIGIT+) ~ (^"e" ~ ("+"|"-")? ~ ASCII_DIGIT+)?
}

b = _{ !(ASCII_ALPHANUMERIC|"_") }
kw_create   = @{ ^"create" ~ b }
kw_table    = @{ ^"table" ~ b }
kw_or       = @{ ^"or" ~ b }
kw_and      = @{ ^"and" ~ b }
kw_not      = @{ ^"not" ~ b }
kw_is       = @{ ^"is" ~ b }
kw_rownum   = @{ ^"rownum" ~ b }
kw_subrownum = @{ ^"subrownum" ~ b }
kw_null     = @{ ^"null" ~ b }
kw_true     = @{ ^"true" ~ b }
kw_false    = @{ ^"false" ~ b }
kw_case     = @{ ^"case" ~ b }
kw_when     = @{ ^"when" ~ b }
kw_then     = @{ ^"then" ~ b }
kw_else     = @{ ^"else" ~ b }
kw_end      = @{ ^"end" ~ b }
kw_timestamp = @{ ^"timestamp" ~ b }
kw_interval = @{ ^"interval" ~ b }
kw_week     = @{ ^"week" ~ b }
kw_day      = @{ ^"day" ~ b }
kw_hour     = @{ ^"hour" ~ b }
kw_minute   = @{ ^"minute" ~ b }
kw_second   = @{ ^"second" ~ b }
kw_millisecond = @{ ^"millisecond" ~ b }
kw_microsecond = @{ ^"microsecond" ~ b }
kw_with     = @{ ^"with" ~ b }
kw_time     = @{ ^"time" ~ b }
kw_zone     = @{ ^"zone" ~ b }
kw_substring = @{ ^"substring" ~ b }
kw_from     = @{ ^"from" ~ b }
kw_for      = @{ ^"for" ~ b }
kw_using    = @{ ^"using" ~ b }
kw_characters = @{ ^"characters" ~ b }
kw_octets   = @{ ^"octets" ~ b }
kw_overlay  = @{ ^"overlay" ~ b }
kw_placing  = @{ ^"placing" ~ b }
kw_current_timestamp = @{ ^"current_timestamp" ~ b }
kw_array    = @{ ^"array" ~ b }
kw_each     = @{ ^"each" ~ b }
kw_rows     = @{ ^"row" ~ ^"s"? ~ b }
kw_of       = @{ ^"of" ~ b }
kw_generate = @{ ^"generate" ~ b }

open_paren  = @{ "(" }
close_paren = @{ ")" }

op_le       = @{ "<=" }
op_lt       = @{ "<" }
op_ge       = @{ ">=" }
op_gt       = @{ ">" }
op_eq       = @{ "=" }
op_ne       = @{ "<>" }
op_add      = @{ "+" }
op_sub      = @{ "-" }
op_concat   = @{ "||" }
op_mul      = @{ "*" }
op_float_div = @{ "/" }
op_semicolon = @{ ";" }
op_bit_and  = @{ "&" }
op_bit_or   = @{ "|" }
op_bit_xor  = @{ "^" }
op_bit_not  = @{ "~" }
op_comma    = @{ "," }

balanced = _{
    "(" ~ balanced* ~ ")" |
    "[" ~ balanced* ~ "]" |
    "{" ~ balanced* ~ "}" |
    back_quoted |
    single_quoted |
    double_quoted |
    !("("|"["|"{"|"`"|"'"|"\""|")"|"]"|"}") ~ ANY
}

qname = {
    ident ~ ("." ~ ident){0,2}
}

any_text = {
    (!"{{" ~ !"/*{{" ~ balanced)+
}
content = _{
    "{{" ~ stmt ~ "}}" | "/*{{" ~ stmt ~ "}}*/"
}

dependency_directive_content = _{
    "{{" ~ dependency_directive ~ "}}" | "/*{{" ~ dependency_directive ~ "}}*/"
}
dependency_directive = {
    kw_for ~ kw_each ~ kw_rows ~ kw_of ~ qname ~
    kw_generate ~ expr ~ kw_rows ~ kw_of ~ qname
}
single_table = {
    kw_create ~ kw_table ~ qname ~
    open_paren ~ (ident | op_comma | any_text | content)* ~ close_paren ~ any_text?
}
create_table = _{
    SOI ~ content* ~ single_table ~ (dependency_directive_content ~ single_table)* ~ EOI
}

stmt = {
    expr ~ (op_semicolon ~ expr)*
}
expr = {
    ("@" ~ ident ~ ":=")* ~ expr_or
}
expr_or = {
    expr_and ~ (kw_or ~ expr_and)*
}
expr_and = {
    expr_not ~ (kw_and ~ expr_not)*
}
expr_not = {
    kw_not* ~ expr_cmp
}
is_not = { kw_is ~ kw_not }
expr_cmp = {
    expr_bit_or ~ ((is_not | kw_is | op_le | op_ge | op_ne | op_lt | op_gt | op_eq) ~ expr_bit_or)?
}
expr_bit_or = {
    expr_bit_and ~ ((op_bit_or | op_bit_xor) ~ expr_bit_and)*
}
expr_bit_and = {
    expr_add ~ (op_bit_and ~ expr_add)*
}
expr_add = {
    expr_mul ~ ((op_add | op_sub | op_concat) ~ expr_mul)*
}
expr_mul = {
    expr_unary ~ ((op_mul | op_float_div) ~ expr_unary)*
}
expr_unary = {
    (op_add | op_sub | op_bit_not)* ~ expr_primary ~ ("[" ~ expr ~ "]")*
}

expr_primary = {
    kw_rownum | kw_subrownum | kw_null | kw_true | kw_false | kw_current_timestamp |
    expr_group |
    single_quoted |     // string
    number |
    expr_case_value_when |
    expr_timestamp |
    expr_interval |
    expr_get_variable |
    expr_array |
    expr_substring_function |
    expr_overlay_function |
    expr_function
}

expr_group = {
    "(" ~ expr ~ ")"
}

expr_case_value_when = {
    kw_case ~ case_value_when_value? ~
    (kw_when ~ case_value_when_pattern ~ kw_then ~ case_value_when_result)+ ~
    (kw_else ~ case_value_when_else)? ~
    kw_end
}
case_value_when_value = { expr }
case_value_when_pattern = { expr }
case_value_when_result = { stmt }
case_value_when_else = { stmt }

expr_timestamp = {
    kw_timestamp ~ (kw_with ~ kw_time ~ kw_zone)? ~ expr_primary
}
expr_interval = {
    kw_interval ~ expr ~ (kw_week | kw_day | kw_hour | kw_minute | kw_second | kw_millisecond | kw_microsecond)
}

expr_get_variable = {
    "@" ~ ident ~ !":="
}

expr_array = {
    kw_array ~ "[" ~ (expr ~ ("," ~ expr)*)? ~ "]"
}

expr_substring_function = {
    kw_substring ~ "(" ~ substring_input ~
        (kw_from ~ substring_from)? ~
        (kw_for ~ substring_for)? ~
        (kw_using ~ (kw_octets | kw_characters))? ~
    ")"
}
substring_input = { expr }
substring_from = { expr }
substring_for = { expr }

expr_overlay_function = {
    kw_overlay ~ "(" ~ substring_input ~
        kw_placing ~ overlay_placing ~
        kw_from ~ substring_from ~
        (kw_for ~ substring_for)? ~
        (kw_using ~ (kw_octets | kw_characters))? ~
    ")"
}
overlay_placing = { expr }

expr_function = {
    qname ~ "(" ~ (expr ~ ("," ~ expr)*)? ~ ")"
}