use monch::*;
use crate::ast::*;
type PResult<'a, T> = Result<(&'a str, T), ParseErrorFailureError>;
pub fn parse(text: &str) -> Result<Dockerfile, ParseErrorFailureError> {
let escape = detect_escape(text);
let parser = Parser { base: text, escape };
let instructions = parser.parse_dockerfile(text)?;
Ok(Dockerfile {
content: text.to_string(),
instructions,
escape,
})
}
fn detect_escape(text: &str) -> char {
for line in text.lines() {
let trimmed = line.trim();
let Some(directive) = trimmed.strip_prefix('#') else {
break; };
let Some((name, value)) = directive.split_once('=') else {
break; };
match name.trim().to_ascii_lowercase().as_str() {
"escape" => return if value.trim() == "`" { '`' } else { '\\' },
"syntax" => continue, _ => break, }
}
'\\'
}
const KEYWORDS: [&str; 11] = [
"from",
"run",
"arg",
"label",
"copy",
"entrypoint",
"cmd",
"env",
"shell",
"onbuild",
"healthcheck",
];
struct Parser<'a> {
base: &'a str,
escape: char,
}
impl<'a> Parser<'a> {
fn parse_dockerfile(&self, mut input: &'a str) -> Result<Vec<Instruction>, ParseErrorFailureError> {
let mut instructions = Vec::new();
loop {
let after_ws = skip_ws(input);
if after_ws.is_empty() {
break;
}
if let Some(rest) = strip_newline(after_ws) {
input = rest;
continue;
}
if after_ws.starts_with('#') {
input = skip_to_next_line(after_ws);
continue;
}
let parsed = (|| {
let (rest, instruction) = self.parse_instruction(after_ws)?;
let (rest, instruction) = self.maybe_consume_heredocs(instruction, rest);
let next = self.finish_line(rest)?;
Ok::<_, ParseErrorFailureError>((next, instruction))
})();
match parsed {
Ok((next, instruction)) => {
instructions.push(instruction);
input = next;
}
Err(_) => {
let (rest, line) = self.unknown_line(after_ws);
instructions.push(Instruction::Unknown(line));
input = rest;
}
}
}
Ok(instructions)
}
fn parse_instruction(&self, input: &'a str) -> PResult<'a, Instruction> {
let (after_kw, keyword) = alpha0(input);
let lower = keyword.to_ascii_lowercase();
if KEYWORDS.contains(&lower.as_str()) {
if let Some(after_arg_ws) = self.arg_ws(after_kw) {
let start = self.off(input);
let shell_start = self.arg_ws_keep_comments(after_kw).unwrap_or(after_arg_ws);
return match lower.as_str() {
"from" => self.parse_from(after_arg_ws, start),
"run" => self.parse_shell_or_exec(shell_start, start, ExprKind::Run),
"cmd" => self.parse_shell_or_exec(shell_start, start, ExprKind::Cmd),
"entrypoint" => self.parse_shell_or_exec(shell_start, start, ExprKind::Entrypoint),
"arg" => self.parse_arg(after_arg_ws, start),
"label" => self.parse_label(after_kw, start),
"copy" => self.parse_copy(after_kw, start),
"env" => self.parse_env(after_kw, start),
"shell" => self.parse_shell_or_exec(shell_start, start, ExprKind::Shell),
"onbuild" => self.parse_onbuild(after_arg_ws, start).or_else(|_| self.parse_misc(input)),
"healthcheck" => self.parse_healthcheck(after_arg_ws, start).or_else(|_| self.parse_misc(input)),
_ => unreachable!(),
};
}
}
self.parse_misc(input)
}
fn parse_from(&self, input: &'a str, start: usize) -> PResult<'a, Instruction> {
let mut flags = Vec::new();
let mut input = input;
while let Some((rest, name, value, span)) = self.parse_flag(input) {
flags.push(FromFlag { span, name, value });
match self.arg_ws(rest) {
Some(next) => input = next,
None => {
input = rest;
break;
}
}
}
let (after_image, image) = self.parse_image(input)?;
let mut end = after_image;
let mut alias = None;
if let Some((rest, value)) = self.parse_alias(after_image) {
alias = Some(value);
end = rest;
}
let span = Span::new(start, self.off(end));
Ok((end, Instruction::From(FromInstruction { span, flags, image, alias })))
}
fn parse_image(&self, input: &'a str) -> PResult<'a, SpannedString> {
let image = take_while(|c: char| c.is_ascii_alphanumeric() || matches!(c, '_' | '-' | '.' | ':' | '/' | '$' | '{' | '}' | '@'));
match if_not_empty(image)(input) {
Ok((rest, text)) => Ok((rest, self.spanned(input, rest, text.to_string()))),
Err(_) => Err(fail("missing from image")),
}
}
fn parse_alias(&self, after_image: &'a str) -> Option<(&'a str, SpannedString)> {
let after_ws = self.arg_ws(after_image)?;
let after_as = strip_prefix_ci(after_ws, "as")?;
let alias_start = self.arg_ws(after_as)?;
let alias = take_while(|c: char| c.is_ascii_alphanumeric() || matches!(c, '_' | '-'));
let (rest, text) = if_not_empty(alias)(alias_start).ok()?;
Some((rest, self.spanned(alias_start, rest, text.to_string())))
}
fn parse_shell_or_exec(&self, input: &'a str, start: usize, kind: ExprKind) -> PResult<'a, Instruction> {
let (rest, span, expr) = if input.starts_with('[') {
match self.string_array(input) {
Ok((rest, array)) => {
let span = Span::new(start, array.span.end);
(rest, span, ShellOrExecExpr::Exec(array))
}
Err(_) => self.shell_expr(input, start)?,
}
} else {
self.shell_expr(input, start)?
};
Ok((rest, kind.build(span, expr)))
}
fn shell_expr(&self, input: &'a str, start: usize) -> Result<(&'a str, Span, ShellOrExecExpr), ParseErrorFailureError> {
let (rest, breakable) = self.any_breakable(input)?;
let span = Span::new(start, breakable.span.end);
Ok((rest, span, ShellOrExecExpr::Shell(breakable)))
}
fn parse_arg(&self, input: &'a str, start: usize) -> PResult<'a, Instruction> {
let (after_name, name) = self.arg_name(input)?;
let mut end = after_name;
let mut value = None;
if let Some(after_eq) = after_name.strip_prefix('=') {
let (rest, v) = self.value_quoted_or(after_eq, |p, s| p.any_whitespace(s))?;
value = Some(v);
end = rest;
}
let span = Span::new(start, self.off(end));
Ok((end, Instruction::Arg(ArgInstruction { span, name, value })))
}
fn arg_name(&self, input: &'a str) -> PResult<'a, SpannedString> {
let name = substring(pair(
if_true(next_char, |c| c.is_ascii_alphabetic()),
skip_while(|c| c.is_ascii_alphanumeric() || c == '_'),
));
match name(input) {
Ok((rest, text)) => Ok((rest, self.spanned(input, rest, text.to_string()))),
Err(_) => Err(fail("arg name is required")),
}
}
fn parse_copy(&self, input: &'a str, start: usize) -> PResult<'a, Instruction> {
let mut flags = Vec::new();
let mut paths: Vec<SpannedString> = Vec::new();
let mut input = input;
while let Some(after_ws) = self.arg_ws(input) {
match self.parse_flag(after_ws) {
Some((rest, name, value, span)) => {
flags.push(CopyFlag { span, name, value });
input = rest;
}
None => break,
}
}
if let Some(after_ws) = self.arg_ws(input)
&& after_ws.starts_with('[')
&& let Ok((rest, array)) = self.string_array(after_ws)
{
let span = Span::new(start, array.span.end);
return Ok((
rest,
Instruction::Copy(CopyInstruction {
span,
flags,
args: CopyArgs::Exec(array),
}),
));
}
while let Some(after_ws) = self.arg_ws(input) {
match self.any_whitespace(after_ws) {
Ok((rest, text)) => {
paths.push(self.spanned(after_ws, rest, text.to_string()));
input = rest;
}
Err(_) => break,
}
}
if paths.len() < 2 {
return Err(fail("copy requires at least one source and a destination"));
}
let destination = paths.pop().unwrap();
let span = Span::new(start, destination.span.end);
Ok((
input,
Instruction::Copy(CopyInstruction {
span,
flags,
args: CopyArgs::Paths { sources: paths, destination },
}),
))
}
fn parse_label(&self, after_kw: &'a str, start: usize) -> PResult<'a, Instruction> {
let mut labels = Vec::new();
let input;
if let Some((rest, label)) = self.label_single(after_kw) {
labels.push(label);
input = rest;
} else {
let mut current = after_kw;
while let Some(after_ws) = self.arg_ws(current) {
match self.label_pair(after_ws) {
Some((rest, label)) => {
labels.push(label);
current = rest;
}
None => {
current = after_ws;
break;
}
}
}
input = current;
if labels.is_empty() {
return Err(fail("label requires at least one key/value pair"));
}
}
let end = labels.last().unwrap().span.end;
let span = Span::new(start, end);
Ok((input, Instruction::Label(LabelInstruction { span, labels })))
}
fn label_single(&self, after_kw: &'a str) -> Option<(&'a str, Label)> {
let input = self.arg_ws(after_kw)?;
let (after_name, name) = self.label_name(input)?;
let after_ws = self.arg_ws(after_name)?;
let (rest, value) = self.label_value(after_ws)?;
let span = Span::new(self.off(after_kw), value.span.end);
Some((rest, Label { span, name, value }))
}
fn label_pair(&self, input: &'a str) -> Option<(&'a str, Label)> {
let (after_name, name) = self.label_name(input)?;
let after_eq = after_name.strip_prefix('=')?;
let (rest, value) = self.label_value(after_eq)?;
let span = Span::new(name.span.start, value.span.end);
Some((rest, Label { span, name, value }))
}
fn label_name(&self, input: &'a str) -> Option<(&'a str, SpannedString)> {
if starts_with_quote(input) {
return self.parse_quoted_string(input).ok();
}
let any_equals = take_while(|c: char| !is_ws(c) && !is_newline_char(c) && c != '=');
let (rest, text) = if_not_empty(any_equals)(input).ok()?;
Some((rest, self.spanned(input, rest, text.to_string())))
}
fn label_value(&self, input: &'a str) -> Option<(&'a str, SpannedString)> {
if starts_with_quote(input) {
return self.parse_quoted_string(input).ok();
}
let (rest, text) = self.any_whitespace(input).ok()?;
Some((rest, self.spanned(input, rest, text.to_string())))
}
fn parse_env(&self, after_kw: &'a str, start: usize) -> PResult<'a, Instruction> {
if let Some((rest, var)) = self.env_single(after_kw) {
let span = Span::new(start, var.span.end);
return Ok((rest, Instruction::Env(EnvInstruction { span, vars: vec![var] })));
}
let mut vars = Vec::new();
let mut input = after_kw;
while let Some(after_ws) = self.arg_ws(input) {
match self.env_pair(after_ws) {
Some((rest, var)) => {
vars.push(var);
input = rest;
}
None => {
input = after_ws;
break;
}
}
}
if vars.is_empty() {
return Err(fail("env requires a key/value pair"));
}
let span = Span::new(start, vars.last().unwrap().span.end);
Ok((input, Instruction::Env(EnvInstruction { span, vars })))
}
fn env_single(&self, after_kw: &'a str) -> Option<(&'a str, EnvVar)> {
let input = self.arg_ws(after_kw)?;
let (after_name, key) = self.env_name(input)?;
let after_ws = self.arg_ws(after_name)?;
let (rest, value) = if starts_with_quote(after_ws) {
let (rest, s) = self.parse_quoted_string(after_ws).ok()?;
(rest, breakable_from_string(s))
} else {
self.any_breakable(after_ws).ok()?
};
let span = Span::new(key.span.start, value.span.end);
Some((rest, EnvVar { span, key, value }))
}
fn env_pair(&self, input: &'a str) -> Option<(&'a str, EnvVar)> {
let (after_name, key) = self.env_name(input)?;
let after_eq = after_name.strip_prefix('=')?;
let (rest, value) = if starts_with_quote(after_eq) {
let (rest, s) = self.parse_quoted_string(after_eq).ok()?;
(rest, breakable_from_string(s))
} else {
let (rest, value) = self.env_value(after_eq)?;
(rest, breakable_from_string(value))
};
let span = Span::new(key.span.start, value.span.end);
Some((rest, EnvVar { span, key, value }))
}
fn env_value(&self, input: &'a str) -> Option<(&'a str, SpannedString)> {
let mut end = input.len();
let mut chars = input.char_indices();
while let Some((i, c)) = chars.next() {
if c == self.escape {
if line_continuation(&input[i..], self.escape).is_some() {
end = i;
break;
}
chars.next(); continue;
}
if is_ws(c) || is_newline_char(c) {
end = i;
break;
}
}
if end == 0 {
return None;
}
let rest = &input[end..];
let raw = &input[..end];
Some((
rest,
SpannedString {
span: self.span(input, rest),
content: unescape(raw, self.escape),
},
))
}
fn env_name(&self, input: &'a str) -> Option<(&'a str, SpannedString)> {
let name = take_while(|c: char| c.is_ascii_alphanumeric() || c == '_');
let (rest, text) = if_not_empty(name)(input).ok()?;
Some((rest, self.spanned(input, rest, text.to_string())))
}
fn parse_onbuild(&self, input: &'a str, start: usize) -> PResult<'a, Instruction> {
let (rest, inner) = self.parse_instruction(input)?;
let span = Span::new(start, inner.span().end);
Ok((
rest,
Instruction::Onbuild(OnbuildInstruction {
span,
instruction: Box::new(inner),
}),
))
}
fn parse_healthcheck(&self, input: &'a str, start: usize) -> PResult<'a, Instruction> {
let mut flags = Vec::new();
let mut input = input;
while let Some((rest, name, value, span)) = self.parse_flag_with(input, |c| c.is_ascii_alphanumeric() || c == '-') {
flags.push(HealthcheckFlag { span, name, value });
match self.arg_ws(rest) {
Some(next) => input = next,
None => {
input = rest;
break;
}
}
}
if let Some(after) = strip_prefix_ci(input, "none")
&& (after.is_empty() || after.starts_with(is_ws) || starts_with_newline(after))
{
let span = Span::new(start, self.off(after));
return Ok((after, Instruction::Healthcheck(HealthcheckInstruction { span, flags, cmd: None })));
}
let (rest, inner) = self.parse_instruction(input)?;
let span = Span::new(start, inner.span().end);
Ok((
rest,
Instruction::Healthcheck(HealthcheckInstruction {
span,
flags,
cmd: Some(Box::new(inner)),
}),
))
}
fn parse_misc(&self, input: &'a str) -> PResult<'a, Instruction> {
let start = self.off(input);
let (after_kw, keyword) = alpha0(input);
if keyword.is_empty() {
return Err(fail("unexpected character"));
}
let instruction = self.spanned(input, after_kw, keyword.to_string());
let (rest, arguments) = self.any_breakable(after_kw)?;
let span = Span::new(start, arguments.span.end);
Ok((rest, Instruction::Misc(MiscInstruction { span, instruction, arguments })))
}
fn parse_flag(&self, input: &'a str) -> Option<(&'a str, SpannedString, SpannedString, Span)> {
self.parse_flag_with(input, |c| c.is_ascii_alphabetic())
}
fn parse_flag_with(&self, input: &'a str, name_char: impl Fn(char) -> bool) -> Option<(&'a str, SpannedString, SpannedString, Span)> {
let after_dashes = input.strip_prefix("--")?;
let (after_name, name_text) = if_not_empty(take_while(name_char))(after_dashes).ok()?;
let after_eq = after_name.strip_prefix('=')?;
let (rest, value_text) = self.any_whitespace(after_eq).ok()?;
let name = self.spanned(after_dashes, after_name, name_text.to_string());
let value = self.spanned(after_eq, rest, value_text.to_string());
let span = Span::new(self.off(input), self.off(rest));
Some((rest, name, value, span))
}
fn parse_quoted_string(&self, input: &'a str) -> PResult<'a, SpannedString> {
let quote = match input.chars().next() {
Some(c @ ('"' | '\'' | '`')) => c,
_ => return Err(fail("expected quoted string")),
};
let mut chars = input.char_indices();
chars.next(); let mut end = None;
while let Some((i, c)) = chars.next() {
if c == '\\' {
chars.next(); continue;
}
if c == quote {
end = Some(i + c.len_utf8());
break;
}
}
let Some(end) = end else {
return Err(fail("unterminated quoted string"));
};
let rest = &input[end..];
let content = unquote(&input[..end]);
Ok((rest, self.spanned(input, rest, content)))
}
fn string_array(&self, input: &'a str) -> PResult<'a, StringArray> {
let start = input.strip_prefix('[').ok_or_else(|| fail("expected ["))?;
let mut s = self.arg_ws_maybe(start);
let mut elements = Vec::new();
if let Some(rest) = s.strip_prefix(']') {
return Ok((
rest,
StringArray {
span: Span::new(self.off(input), self.off(rest)),
elements,
},
));
}
let (rest, first) = self.parse_quoted_string(s)?;
elements.push(first);
s = rest;
loop {
let after_ws = self.arg_ws_maybe(s);
let Some(after_comma) = after_ws.strip_prefix(',') else {
s = after_ws;
break;
};
let after_comma = self.arg_ws_maybe(after_comma);
if after_comma.starts_with(']') {
s = after_comma;
break;
}
let (rest, element) = self.parse_quoted_string(after_comma)?;
elements.push(element);
s = rest;
}
let s = self.arg_ws_maybe(s);
let rest = s.strip_prefix(']').ok_or_else(|| fail("expected ]"))?;
Ok((
rest,
StringArray {
span: Span::new(self.off(input), self.off(rest)),
elements,
},
))
}
fn any_breakable(&self, input: &'a str) -> PResult<'a, BreakableString> {
let mut components: Vec<BreakableStringComponent> = Vec::new();
let mut s = input;
loop {
let after_ws = skip_ws(s);
if after_ws.starts_with('#') {
let line_end = match after_ws.find(['\n', '\r']) {
Some(i) => &after_ws[i..],
None => "",
};
let content = &after_ws[..after_ws.len() - line_end.len()];
let span = self.span(after_ws, line_end);
components.push(BreakableStringComponent::Comment(SpannedComment {
span,
content: content.to_string(),
}));
s = strip_newline(line_end).unwrap_or(line_end);
if s.is_empty() {
break;
}
continue;
}
let (after_content, content) = take_any_content(s, self.escape);
if content.is_empty() {
break;
}
let span = self.span(s, after_content);
components.push(BreakableStringComponent::String(SpannedString {
span,
content: content.to_string(),
}));
s = after_content;
match line_continuation(s, self.escape) {
Some(rest) => {
s = rest;
if s.is_empty() {
break;
}
}
None => break,
}
}
if components.is_empty() {
return Err(fail("expected content"));
}
let end = component_end(components.last().unwrap());
let span = Span::new(self.off(input), end);
Ok((s, BreakableString { span, components }))
}
fn arg_ws(&self, input: &'a str) -> Option<&'a str> {
self.arg_ws_inner(input, true)
}
fn arg_ws_keep_comments(&self, input: &'a str) -> Option<&'a str> {
self.arg_ws_inner(input, false)
}
fn arg_ws_inner(&self, input: &'a str, consume_comments: bool) -> Option<&'a str> {
let mut s = input;
loop {
let after_ws = skip_ws(s);
if after_ws.len() != s.len() {
s = after_ws;
continue;
}
let Some(after_cont) = line_continuation(s, self.escape) else { break };
s = after_cont;
loop {
if consume_comments && let Some(rest) = comment_line(s) {
s = rest;
continue;
}
if let Some(rest) = empty_line(s) {
s = rest;
} else {
break;
}
}
}
if s.len() == input.len() { None } else { Some(s) }
}
fn arg_ws_maybe(&self, input: &'a str) -> &'a str {
self.arg_ws(input).unwrap_or(input)
}
fn any_whitespace(&self, input: &'a str) -> PResult<'a, &'a str> {
let mut end = input.len();
for (i, c) in input.char_indices() {
if is_ws(c) || is_newline_char(c) || (c == self.escape && line_continuation(&input[i..], self.escape).is_some()) {
end = i;
break;
}
}
if end == 0 {
return Err(fail("expected argument"));
}
Ok((&input[end..], &input[..end]))
}
fn value_quoted_or(&self, input: &'a str, fallback: impl Fn(&Self, &'a str) -> PResult<'a, &'a str>) -> PResult<'a, SpannedString> {
if starts_with_quote(input) {
return self.parse_quoted_string(input);
}
let (rest, text) = fallback(self, input)?;
Ok((rest, self.spanned(input, rest, text.to_string())))
}
fn finish_line(&self, input: &'a str) -> Result<&'a str, ParseErrorFailureError> {
let rest = skip_ws(input);
if rest.is_empty() {
return Ok(rest);
}
if let Some(rest) = strip_newline(rest) {
return Ok(rest);
}
if rest.starts_with('#') {
return Ok(skip_to_next_line(rest));
}
if let Some(rest) = line_continuation(rest, self.escape) {
return Ok(rest);
}
Err(ParseErrorFailureError::new(format!("unexpected character at: {}", snippet(rest))))
}
fn unknown_line(&self, input: &'a str) -> (&'a str, SpannedString) {
let line_end = match input.find(['\n', '\r']) {
Some(i) => &input[i..],
None => &input[input.len()..],
};
let content = input[..input.len() - line_end.len()].trim_end();
let span = Span::new(self.off(input), self.off(input) + content.len());
let rest = strip_newline(line_end).unwrap_or(line_end);
(
rest,
SpannedString {
span,
content: content.to_string(),
},
)
}
fn maybe_consume_heredocs(&self, instruction: Instruction, rest: &'a str) -> (&'a str, Instruction) {
let first_line = &self.base[instruction.span().start..self.off(rest)];
let delimiters = find_heredoc_delimiters(first_line);
if delimiters.is_empty() {
return (rest, instruction);
}
let Some(body_start) = strip_newline(rest) else {
return (rest, instruction);
};
let Some((after, body_end)) = self.consume_heredoc_bodies(body_start, &delimiters) else {
return (rest, instruction);
};
let body = self.base[self.off(body_start)..body_end].to_string();
let span = Span::new(instruction.span().start, body_end);
let instruction = Instruction::Heredoc(HeredocInstruction {
span,
instruction: Box::new(instruction),
body,
});
(after, instruction)
}
fn consume_heredoc_bodies(&self, body_start: &'a str, delimiters: &[Heredoc]) -> Option<(&'a str, usize)> {
let mut cur = body_start;
let mut final_rest = body_start;
let mut end_off = self.off(body_start);
for delim in delimiters {
loop {
if cur.is_empty() {
return None;
}
let (line, after) = match cur.find(['\n', '\r']) {
Some(i) => (&cur[..i], &cur[i..]),
None => (cur, &cur[cur.len()..]),
};
let closed = delim.matches(line);
end_off = self.off(after);
final_rest = after;
cur = strip_newline(after).unwrap_or(after);
if closed {
break;
}
if after.is_empty() {
return None;
}
}
}
Some((final_rest, end_off))
}
fn off(&self, s: &'a str) -> usize {
let base_start = self.base.as_ptr() as usize;
let s_start = s.as_ptr() as usize;
if s_start < base_start || s_start > base_start + self.base.len() {
self.base.len()
} else {
s_start - base_start
}
}
fn span(&self, from: &'a str, to: &'a str) -> Span {
Span::new(self.off(from), self.off(to))
}
fn spanned(&self, from: &'a str, to: &'a str, content: String) -> SpannedString {
SpannedString {
span: self.span(from, to),
content,
}
}
}
#[derive(Clone, Copy)]
enum ExprKind {
Run,
Cmd,
Entrypoint,
Shell,
}
impl ExprKind {
fn build(self, span: Span, expr: ShellOrExecExpr) -> Instruction {
match self {
ExprKind::Run => Instruction::Run(RunInstruction { span, expr }),
ExprKind::Cmd => Instruction::Cmd(CmdInstruction { span, expr }),
ExprKind::Entrypoint => Instruction::Entrypoint(EntrypointInstruction { span, expr }),
ExprKind::Shell => Instruction::Shell(ShellInstruction { span, expr }),
}
}
}
struct Heredoc {
word: String,
strip_tabs: bool,
}
impl Heredoc {
fn matches(&self, line: &str) -> bool {
if self.strip_tabs {
line.trim_start_matches('\t') == self.word
} else {
line == self.word
}
}
}
fn find_heredoc_delimiters(first_line: &str) -> Vec<Heredoc> {
first_line.split_whitespace().filter_map(parse_heredoc_token).collect()
}
fn parse_heredoc_token(token: &str) -> Option<Heredoc> {
let rest = token.trim_start_matches(|c: char| c.is_ascii_digit());
let rest = rest.strip_prefix("<<")?;
let (strip_tabs, rest) = match rest.strip_prefix('-') {
Some(rest) => (true, rest),
None => (false, rest),
};
let (quote, rest) = match rest.chars().next() {
Some(q @ ('\'' | '"')) => (Some(q), &rest[1..]),
_ => (None, rest),
};
let mut chars = rest.char_indices();
match chars.next() {
Some((_, c)) if c.is_ascii_alphabetic() || c == '_' => {}
_ => return None,
}
let mut end = rest.len();
for (i, c) in chars {
if !(c.is_ascii_alphanumeric() || c == '_') {
end = i;
break;
}
}
let word = &rest[..end];
let trailing = &rest[end..];
match quote {
Some(q) => {
if trailing.len() != q.len_utf8() || !trailing.starts_with(q) {
return None;
}
}
None => {
if !trailing.is_empty() {
return None;
}
}
}
Some(Heredoc {
word: word.to_string(),
strip_tabs,
})
}
fn breakable_from_string(s: SpannedString) -> BreakableString {
BreakableString {
span: s.span,
components: vec![BreakableStringComponent::String(s)],
}
}
fn component_end(component: &BreakableStringComponent) -> usize {
match component {
BreakableStringComponent::String(s) => s.span.end,
BreakableStringComponent::Comment(c) => c.span.end,
}
}
fn is_ws(c: char) -> bool {
c == ' ' || c == '\t'
}
fn is_newline_char(c: char) -> bool {
c == '\n' || c == '\r'
}
fn starts_with_newline(s: &str) -> bool {
matches!(s.as_bytes().first(), Some(b'\n' | b'\r'))
}
fn starts_with_quote(s: &str) -> bool {
matches!(s.chars().next(), Some('"' | '\'' | '`'))
}
fn skip_ws(s: &str) -> &str {
skip_while(is_ws)(s).map(|(rest, _)| rest).unwrap_or(s)
}
fn alpha0(input: &str) -> (&str, &str) {
take_while(|c: char| c.is_ascii_alphabetic())(input).unwrap_or((input, ""))
}
fn strip_newline(s: &str) -> Option<&str> {
s.strip_prefix("\r\n").or_else(|| s.strip_prefix('\n')).or_else(|| s.strip_prefix('\r'))
}
fn skip_to_next_line(s: &str) -> &str {
match s.find(['\n', '\r']) {
Some(i) => strip_newline(&s[i..]).unwrap_or(&s[i..]),
None => "",
}
}
fn strip_prefix_ci<'a>(s: &'a str, keyword: &str) -> Option<&'a str> {
let prefix = s.get(..keyword.len())?;
if prefix.eq_ignore_ascii_case(keyword) {
Some(&s[keyword.len()..])
} else {
None
}
}
fn line_continuation(s: &str, escape: char) -> Option<&str> {
let rest = s.strip_prefix(escape)?;
let rest = skip_ws(rest);
match strip_newline(rest) {
Some(rest) => Some(rest),
None if rest.is_empty() => Some(rest),
None => None,
}
}
fn comment_line(s: &str) -> Option<&str> {
let rest = skip_ws(s);
if !rest.starts_with('#') {
return None;
}
Some(skip_to_next_line(rest))
}
fn empty_line(s: &str) -> Option<&str> {
strip_newline(skip_ws(s))
}
fn take_any_content(s: &str, escape: char) -> (&str, &str) {
for (i, c) in s.char_indices() {
if is_newline_char(c) || (c == escape && line_continuation(&s[i..], escape).is_some()) {
return (&s[i..], &s[..i]);
}
}
("", s)
}
fn unescape(s: &str, escape: char) -> String {
let mut result = String::with_capacity(s.len());
let mut chars = s.chars();
while let Some(c) = chars.next() {
if c == escape {
match chars.next() {
Some(next) => result.push(next),
None => result.push(escape),
}
} else {
result.push(c);
}
}
result
}
fn unquote(s: &str) -> String {
if s.len() < 2 {
return String::new();
}
let inner = &s[1..s.len() - 1];
let mut result = String::with_capacity(inner.len());
let mut iter = inner.chars();
while let Some(c) = iter.next() {
if c != '\\' {
result.push(c);
continue;
}
match iter.next() {
Some('n') => result.push('\n'),
Some('t') => result.push('\t'),
Some('r') => result.push('\r'),
Some('b') => result.push('\u{0008}'),
Some('f') => result.push('\u{000C}'),
Some('\\') => result.push('\\'),
Some('"') => result.push('"'),
Some('\'') => result.push('\''),
Some('`') => result.push('`'),
Some('\n') => {} Some('u') => push_unicode_escape(&mut result, &mut iter, 4),
Some('U') => push_unicode_escape(&mut result, &mut iter, 8),
Some(other) => {
result.push('\\');
result.push(other);
}
None => result.push('\\'),
}
}
result
}
fn push_unicode_escape(result: &mut String, iter: &mut std::str::Chars, digits: usize) {
let marker = if digits == 4 { 'u' } else { 'U' };
let mut hex = String::with_capacity(digits);
for _ in 0..digits {
match iter.next() {
Some(c) if c.is_ascii_hexdigit() => hex.push(c),
other => {
result.push('\\');
result.push(marker);
result.push_str(&hex);
if let Some(c) = other {
result.push(c);
}
return;
}
}
}
match u32::from_str_radix(&hex, 16).ok().and_then(char::from_u32) {
Some(c) => result.push(c),
None => {
result.push('\\');
result.push(marker);
result.push_str(&hex);
}
}
}
fn fail(message: &'static str) -> ParseErrorFailureError {
ParseErrorFailureError::new(message)
}
fn snippet(s: &str) -> String {
match s.char_indices().nth(40) {
Some((i, _)) => s[..i].to_string(),
None => s.to_string(),
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn parses_without_trailing_newline() {
let cases = [
"FROM alpine",
"FROM alpine:3.10 AS build",
"FROM --platform=linux/amd64 node",
"RUN echo hi",
"RUN echo foo \\\n # comment",
"RUN \\\n #only a comment",
"CMD [\"a\", \"b\"]",
"CMD [\"a\"",
"EXPOSE 80",
"ARG VERSION",
"ARG VERSION=latest",
"ENV A=B",
"ENV A B",
"ENV A=b\\ c",
"LABEL a=b",
"COPY a b",
"SHELL [\"a\", \"b\"]",
"SHELL not-an-array",
"ONBUILD RUN echo hi",
"ONBUILD",
"HEALTHCHECK --interval=30s CMD curl",
"HEALTHCHECK NONE",
"HEALTHCHECK",
"RUN <<EOF\nbody\nEOF",
"RUN <<EOF\nbody",
"COPY <<EOF /dest\nbody\nEOF",
"",
" ",
"# just a comment",
];
for case in cases {
let result = Dockerfile::parse(case);
if let Ok(file) = result {
for instruction in &file.instructions {
let span = instruction.span();
assert!(span.end <= case.len(), "span out of range for {case:?}: {span:?}");
assert!(span.start <= span.end, "inverted span for {case:?}: {span:?}");
}
}
}
}
#[test]
fn unquotes_unicode_escapes() {
assert_eq!(unquote(r#""café""#), "café");
assert_eq!(unquote(r#""\U0001F600""#), "😀");
assert_eq!(unquote(r#""a\tb\nc""#), "a\tb\nc");
assert_eq!(unquote(r#""\u12""#), r"\u12");
assert_eq!(unquote(r#""plain""#), "plain");
}
#[test]
fn detects_heredoc_delimiters() {
let one = |s| {
let d = find_heredoc_delimiters(s);
assert_eq!(d.len(), 1, "{s:?}");
(d[0].word.clone(), d[0].strip_tabs)
};
assert_eq!(one("RUN <<EOF"), ("EOF".to_string(), false));
assert_eq!(one("RUN <<-EOF"), ("EOF".to_string(), true));
assert_eq!(one("RUN <<'EOF'"), ("EOF".to_string(), false));
assert_eq!(one("RUN <<\"EOF\""), ("EOF".to_string(), false));
assert_eq!(one("RUN python3 <<END"), ("END".to_string(), false));
assert_eq!(one("RUN cat 2<<EOF"), ("EOF".to_string(), false));
assert_eq!(find_heredoc_delimiters("RUN <<A <<B").len(), 2);
assert!(find_heredoc_delimiters("RUN echo $((1<<2))").is_empty());
assert!(find_heredoc_delimiters("RUN cat <<<EOF").is_empty());
assert!(find_heredoc_delimiters("RUN echo a >> b").is_empty());
assert!(find_heredoc_delimiters("RUN echo hi").is_empty());
}
#[test]
fn parses_heredoc_body_verbatim() {
let file = Dockerfile::parse("RUN <<EOF\n indented\n\nblank above\nEOF\n").unwrap();
match &file.instructions[0] {
Instruction::Heredoc(h) => {
assert_eq!(h.body, " indented\n\nblank above\nEOF");
assert!(matches!(&*h.instruction, Instruction::Run(_)));
}
other => panic!("expected heredoc, got {other:?}"),
}
}
#[test]
fn tab_indented_closing_delimiter_for_dash_form() {
let file = Dockerfile::parse("RUN <<-EOF\n\tline\n\tEOF\n").unwrap();
assert!(matches!(&file.instructions[0], Instruction::Heredoc(_)));
}
#[test]
fn keeps_comment_after_run_continuation() {
let file = Dockerfile::parse("RUN \\\n# note\necho hi\n").unwrap();
match &file.instructions[0] {
Instruction::Run(run) => match &run.expr {
ShellOrExecExpr::Shell(b) => {
assert!(matches!(b.components.first(), Some(BreakableStringComponent::Comment(c)) if c.content == "# note"));
}
other => panic!("expected shell, got {other:?}"),
},
other => panic!("expected run, got {other:?}"),
}
}
#[test]
fn unterminated_heredoc_is_left_unwrapped() {
let file = Dockerfile::parse("RUN <<EOF\nno closing delimiter\n").unwrap();
assert!(!matches!(&file.instructions[0], Instruction::Heredoc(_)));
}
}