dockerfile-parser 0.9.0

a Rust library for parsing, validating, and modifying Dockerfiles
Documentation
// (C) Copyright 2019 Hewlett Packard Enterprise Development LP
// partially derived from the TOML example:
// https://github.com/pest-parser/pest/blob/master/grammars/src/grammars/toml.pest

dockerfile = { SOI ~ meta_step ~ (NEWLINE ~ meta_step)* ~ EOI }
meta_step = _{ ws* ~ (step | comment)? ~ ws* }

step = _{
  (
    from |
    run |
    arg |
    label |
    copy |
    entrypoint |
    cmd |
    env |

    // todos:
    // add | workdir | user

    // things that we probably won't bother supporting
    // expose | volume | onbuild | stopsignal | healthcheck | shell

    // deprecated: maintainer

    // catchall for unsupported directives
    misc
  )
}

// insignificant whitespace, not repeated
ws = _{ " " | "\t" }

comment = @{ "#" ~ (!NEWLINE ~ ANY)* }
comment_line = _{ ws* ~ comment ~ NEWLINE? }
empty_line = _{ ws* ~ NEWLINE }

double_quoted_string  = @{ "\"" ~ inner ~ "\"" }
inner   = @{ (!("\"" | "\\" | "\u{0000}" | "\u{001F}") ~ ANY)* ~ (escape ~ inner)? }
escape  = @{ "\\" ~ ("b" | "t" | "n" | "f" | "r" | "\"" | "\\" | "'" | unicode | NEWLINE)? }
unicode = @{ "u" ~ ASCII_HEX_DIGIT{4} | "U" ~ ASCII_HEX_DIGIT{8} }

single_quoted_string = @{ "'" ~ single_quoted_inner ~ "'" }
single_quoted_inner  = @{ (!("'" | "\\" | "\u{0000}" | "\u{001F}") ~ ANY)* ~ (escape ~ single_quoted_inner)? }

string = ${ single_quoted_string | double_quoted_string }

// a line continuation, allowing an instruction to continue onto a new line
line_continuation = _{ "\\" ~ ws* ~ NEWLINE }

// whitespace that may appear between instruction arguments
// this allows instructions to expand past a newline if escaped
arg_ws = _{ (ws | line_continuation ~ (comment_line | empty_line)*)+ }

// like arg_ws, but where whitespace is optional
arg_ws_maybe = _{ (ws | line_continuation ~ (comment_line | empty_line)*)* }

// continues consuming input beyond a newline, if the newline is preceeded by an
// escape (\)
// these tokens need to be preserved in the final tree so they can be handled
// appropraitely; pest's ignore rules aren't sufficient for our needs
any_content = @{
  (
    !NEWLINE ~
    !line_continuation ~
    ANY
  )+
}
any_breakable = ${
  (
    // can be any comment string (no line continuation required)
    comment_line ~ any_breakable?
  ) | (
    // ... OR some piece of content, requiring a continuation EXCEPT on the
    // final line
    any_content ~ (line_continuation ~ any_breakable)?
  )
}

// consumes any character until the end of the line
any_eol = _{ (!NEWLINE ~ ANY)* }

// consumes all characters until the next whitespace
any_whitespace = _{ (!(NEWLINE | EOI | arg_ws) ~ ANY)+ }

// consumes identifier characters until the next whitespace
identifier_whitespace = _{ (!ws ~ (ASCII_ALPHANUMERIC | "_" | "-"))+ }

// consumes until whitespace or = (for key in key=value pairs)
any_equals = _{ (!(NEWLINE | ws | "=") ~ ANY)+ }

// parses ["foo", "bar", "baz"] with excessive escaping
string_array = _{
  (
    "[" ~
    arg_ws_maybe ~ string ~
    (arg_ws_maybe ~ "," ~ arg_ws_maybe ~ string)* ~
    ","? ~
    arg_ws_maybe ~ "]"
  ) | "[" ~ arg_ws_maybe ~ "]"
}

from_flag_name = @{ ASCII_ALPHA+ }
from_flag_value = @{ any_whitespace }
from_flag = { "--" ~ from_flag_name ~ "=" ~ from_flag_value }
from_image = @{ (ASCII_ALPHANUMERIC | "_" | "-" | "." | ":" | "/" | "$" | "{" | "}" | "@")+ }
from_alias = { identifier_whitespace }
from_alias_outer = _{ arg_ws ~ ^"as" ~ arg_ws ~ from_alias }
from = { ^"from" ~ (arg_ws ~ from_flag)* ~ arg_ws ~ from_image ~ from_alias_outer?  }

arg_name = @{ ASCII_ALPHA ~ (ASCII_ALPHANUMERIC | "_")* }
arg_value = ${ any_whitespace }
arg_quoted_value = ${ string }
arg = { ^"arg" ~ arg_ws ~ arg_name ~ ("=" ~ (arg_quoted_value | arg_value))? }

label_name = ${ any_equals }
label_quoted_name = ${ string }
label_value = ${ any_whitespace }
label_quoted_value = ${ string }
label_pair = {
  (label_quoted_name | label_name) ~ "=" ~ (label_quoted_value | label_value)
}
label_single_name = { any_equals }
label_single_quoted_name = { string }
label_single = { arg_ws ~ (label_single_quoted_name | label_single_name) ~ arg_ws ~ (label_quoted_value | label_value) }
label = { ^"label" ~ (label_single | (arg_ws ~ label_pair?)+) }

run_shell = @{ any_breakable }
run_exec = { string_array }
run = { ^"run" ~ arg_ws ~ (run_exec | run_shell) }

entrypoint_shell = @{ any_breakable }
entrypoint_exec = { string_array }
entrypoint = { ^"entrypoint" ~ arg_ws ~ (entrypoint_exec | entrypoint_shell) }

cmd_shell = @{ any_breakable }
cmd_exec = { string_array }
cmd = { ^"cmd" ~ arg_ws ~ (cmd_exec | cmd_shell) }

copy_flag_name = @{ ASCII_ALPHA+ }
copy_flag_value = @{ any_whitespace }
copy_flag = { "--" ~ copy_flag_name ~ "=" ~ copy_flag_value }
copy_pathspec = @{ any_whitespace }
copy = { ^"copy" ~ (arg_ws ~ copy_flag)* ~ (arg_ws ~ copy_pathspec){2,} }

env_name = ${ (ASCII_ALPHANUMERIC | "_")+ }
env_pair_value = ${ any_whitespace }
env_pair_quoted_value = ${ string }
env_pair = @{ env_name ~ "=" ~ (env_pair_quoted_value | env_pair_value) }
env_pairs = { (arg_ws ~ env_pair?)+ }
env_single_quoted_value = ${ string }
env_single_value = @{ any_breakable }
env_single = {  arg_ws ~ env_name ~ arg_ws ~ (env_single_quoted_value | env_single_value) }
env = { ^"env" ~ (env_single | env_pairs) }

misc_instruction = @{ ASCII_ALPHA+ }
misc_arguments = @{ any_breakable }
misc = { misc_instruction ~ misc_arguments }