use std::fmt;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ShellOrExec {
Shell(String),
Exec(Vec<String>),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum BaseImage {
Scratch,
Image(String),
Stage(String),
}
#[derive(Debug, Clone, PartialEq, Eq, Default)]
pub struct CopyFlags {
pub from: Option<String>,
pub chown: Option<String>,
pub chmod: Option<String>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum MountKind {
Cache,
Secret,
Bind,
Tmpfs,
Ssh,
Other(String),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct RunMount {
pub kind: MountKind,
pub target: Option<String>,
pub source: Option<String>,
pub id: Option<String>,
pub from: Option<String>,
pub readonly: bool,
pub required: bool,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Instruction {
Run {
run: ShellOrExec,
mounts: Vec<RunMount>,
},
Copy {
sources: Vec<String>,
dest: String,
flags: CopyFlags,
},
Add {
sources: Vec<String>,
dest: String,
flags: CopyFlags,
},
Env(Vec<(String, String)>),
Arg {
name: String,
default: Option<String>,
},
Workdir(String),
User(String),
Expose(Vec<String>),
Label(Vec<(String, String)>),
Entrypoint(ShellOrExec),
Cmd(ShellOrExec),
Volume(Vec<String>),
StopSignal(String),
Shell(Vec<String>),
}
impl Instruction {
pub fn run(cmd: ShellOrExec) -> Self {
Instruction::Run {
run: cmd,
mounts: Vec::new(),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Stage {
pub base: BaseImage,
pub name: Option<String>,
pub platform: Option<String>,
pub instructions: Vec<Instruction>,
}
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct Dockerfile {
pub global_args: Vec<(String, Option<String>)>,
pub stages: Vec<Stage>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ParseError {
pub line: usize,
pub msg: String,
}
impl fmt::Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"Dockerfile parse error (line {}): {}",
self.line, self.msg
)
}
}
impl std::error::Error for ParseError {}
pub fn parse(src: &str) -> Result<Dockerfile, ParseError> {
let escape = detect_escape_directive(src);
let logical = collect_logical_lines(src, escape);
let mut df = Dockerfile::default();
let mut seen_from = false;
for (line_no, raw) in logical {
let (keyword, rest) = match split_keyword(&raw) {
Some(kv) => kv,
None => continue, };
let kw = keyword.to_ascii_uppercase();
let err = |m: String| ParseError {
line: line_no,
msg: m,
};
match kw.as_str() {
"FROM" => {
df.stages.push(parse_from(rest).map_err(err)?);
seen_from = true;
}
"ARG" if !seen_from => {
let (name, default) = parse_arg(rest);
df.global_args.push((name, default));
}
other => {
let stage = df
.stages
.last_mut()
.ok_or_else(|| err(format!("instruction `{other}` before any FROM")))?;
let instr = parse_instruction(&kw, rest).map_err(err)?;
stage.instructions.push(instr);
}
}
}
if df.stages.is_empty() {
return Err(ParseError {
line: 0,
msg: "no FROM instruction".to_owned(),
});
}
Ok(df)
}
fn detect_escape_directive(src: &str) -> char {
for line in src.lines() {
let t = line.trim();
if t.is_empty() {
continue;
}
if let Some(rest) = t.strip_prefix('#') {
let rest = rest.trim();
if let Some(v) = rest.strip_prefix("escape=") {
return v.trim().chars().next().unwrap_or('\\');
}
if rest.contains('=') && !rest.contains(' ') {
continue;
}
}
break; }
'\\'
}
struct Heredoc {
keyword: String,
prefix: String,
delim: String,
quoted: bool,
strip_tabs: bool,
}
fn detect_heredoc(logical: &str) -> Option<Heredoc> {
let (kw, rest) = split_keyword(logical)?;
if !kw.eq_ignore_ascii_case("RUN") {
return None;
}
let pos = rest.find("<<")?;
let before = &rest[..pos];
let after = &rest[pos + 2..];
let (strip_tabs, after) = match after.strip_prefix('-') {
Some(a) => (true, a),
None => (false, after),
};
let after = after.trim_start(); let (quoted, after) = match after.strip_prefix('\'').or_else(|| after.strip_prefix('"')) {
Some(a) => (true, a),
None => (false, after),
};
let mut end = 0;
for (idx, c) in after.char_indices() {
if idx == 0 {
if !(c.is_ascii_alphabetic() || c == '_') {
return None;
}
} else if !(c.is_ascii_alphanumeric() || c == '_') {
break;
}
end = idx + c.len_utf8();
}
if end == 0 {
return None;
}
Some(Heredoc {
keyword: kw.to_owned(),
prefix: before.trim().to_owned(),
delim: after[..end].to_owned(),
quoted,
strip_tabs,
})
}
fn collect_logical_lines(src: &str, escape: char) -> Vec<(usize, String)> {
let lines: Vec<&str> = src.lines().collect();
let n = lines.len();
let mut out = Vec::new();
let mut i = 0usize;
while i < n {
let t = lines[i].trim_start();
if t.is_empty() || t.starts_with('#') {
i += 1;
continue;
}
let start_line = i + 1;
let mut acc = String::new();
let mut last = i;
loop {
if last >= n {
break;
}
let line = lines[last];
if last != i && line.trim_start().starts_with('#') {
last += 1;
continue;
}
let te = line.trim_end();
if te.ends_with(escape) && last + 1 < n {
acc.push_str(&te[..te.len() - escape.len_utf8()]);
last += 1;
} else {
acc.push_str(line);
break;
}
}
if let Some(hd) = detect_heredoc(&acc) {
let mut j = last + 1;
let mut body: Vec<String> = Vec::new();
let mut closed = false;
while j < n {
let bl = lines[j];
let is_delim = if hd.strip_tabs {
bl.trim_start_matches('\t').trim_end() == hd.delim
} else {
bl.trim_end() == hd.delim
};
if is_delim {
closed = true;
break;
}
body.push(if hd.strip_tabs {
bl.trim_start_matches('\t').to_owned()
} else {
bl.to_owned()
});
j += 1;
}
if closed {
let body_str = body.join("\n");
let rewritten = if hd.prefix.is_empty() {
format!("{} {}", hd.keyword, body_str)
} else {
let q = if hd.quoted { "'" } else { "" };
format!(
"{} {} <<{q}{d}{q}\n{body}\n{d}",
hd.keyword,
hd.prefix,
q = q,
d = hd.delim,
body = body_str
)
};
out.push((start_line, rewritten));
i = j + 1; continue;
}
}
out.push((start_line, acc));
i = last + 1;
}
out
}
fn split_keyword(line: &str) -> Option<(&str, &str)> {
let t = line.trim();
if t.is_empty() || t.starts_with('#') {
return None;
}
match t.split_once(char::is_whitespace) {
Some((kw, rest)) => Some((kw, rest.trim())),
None => Some((t, "")),
}
}
fn parse_from(rest: &str) -> Result<Stage, String> {
let mut platform = None;
let mut toks: Vec<&str> = rest.split_whitespace().collect();
while let Some(first) = toks.first() {
if let Some(p) = first.strip_prefix("--platform=") {
platform = Some(p.to_owned());
toks.remove(0);
} else if first.starts_with("--") {
toks.remove(0); } else {
break;
}
}
let base_str = toks.first().ok_or("FROM requires an image")?;
let mut name = None;
if toks.len() >= 3 && toks[1].eq_ignore_ascii_case("AS") {
name = Some(toks[2].to_owned());
}
let base = if base_str.eq_ignore_ascii_case("scratch") {
BaseImage::Scratch
} else {
BaseImage::Image((*base_str).to_owned())
};
Ok(Stage {
base,
name,
platform,
instructions: Vec::new(),
})
}
fn parse_arg(rest: &str) -> (String, Option<String>) {
match rest.split_once('=') {
Some((k, v)) => (k.trim().to_owned(), Some(v.trim().to_owned())),
None => (rest.trim().to_owned(), None),
}
}
fn parse_instruction(kw: &str, rest: &str) -> Result<Instruction, String> {
Ok(match kw {
"RUN" => parse_run(rest),
"CMD" => Instruction::Cmd(parse_shell_or_exec(rest)),
"ENTRYPOINT" => Instruction::Entrypoint(parse_shell_or_exec(rest)),
"ENV" => Instruction::Env(parse_kv_pairs(rest)),
"LABEL" => Instruction::Label(parse_kv_pairs(rest)),
"ARG" => {
let (name, default) = parse_arg(rest);
Instruction::Arg { name, default }
}
"WORKDIR" => Instruction::Workdir(rest.trim().to_owned()),
"USER" => Instruction::User(rest.trim().to_owned()),
"STOPSIGNAL" => Instruction::StopSignal(rest.trim().to_owned()),
"EXPOSE" => Instruction::Expose(rest.split_whitespace().map(str::to_owned).collect()),
"VOLUME" => Instruction::Volume(parse_string_list(rest)),
"SHELL" => Instruction::Shell(parse_json_array(rest).ok_or("SHELL requires a JSON array")?),
"COPY" | "ADD" => {
let (sources, dest, flags) = parse_copy_add(rest)?;
if kw == "COPY" {
Instruction::Copy {
sources,
dest,
flags,
}
} else {
Instruction::Add {
sources,
dest,
flags,
}
}
}
other => return Err(format!("unsupported instruction `{other}`")),
})
}
fn parse_run(rest: &str) -> Instruction {
let mut mounts = Vec::new();
let mut remainder = rest.trim_start();
loop {
if let Some(after) = remainder.strip_prefix("--mount=") {
let (spec, rest2) = match after.split_once(char::is_whitespace) {
Some((s, r)) => (s, r),
None => (after, ""),
};
mounts.push(parse_run_mount(spec));
remainder = rest2.trim_start();
} else if remainder.starts_with("--network=") || remainder.starts_with("--security=") {
remainder = remainder
.split_once(char::is_whitespace)
.map(|(_, r)| r)
.unwrap_or("")
.trim_start();
} else {
break;
}
}
Instruction::Run {
run: parse_shell_or_exec(remainder),
mounts,
}
}
fn parse_run_mount(spec: &str) -> RunMount {
let mut m = RunMount {
kind: MountKind::Bind, target: None,
source: None,
id: None,
from: None,
readonly: false,
required: false,
};
for field in spec.split(',') {
let field = field.trim();
if field.is_empty() {
continue;
}
let (key, value) = match field.split_once('=') {
Some((k, v)) => (k.trim(), v.trim()),
None => (field, ""),
};
match key.to_ascii_lowercase().as_str() {
"type" => {
m.kind = match value.to_ascii_lowercase().as_str() {
"cache" => MountKind::Cache,
"secret" => MountKind::Secret,
"bind" => MountKind::Bind,
"tmpfs" => MountKind::Tmpfs,
"ssh" => MountKind::Ssh,
other => MountKind::Other(other.to_owned()),
}
}
"target" | "dst" | "destination" | "dest" => m.target = Some(value.to_owned()),
"source" | "src" => m.source = Some(value.to_owned()),
"id" => m.id = Some(value.to_owned()),
"from" => m.from = Some(value.to_owned()),
"ro" | "readonly" => m.readonly = value.is_empty() || value == "true",
"rw" | "readwrite" => m.readonly = false,
"required" => m.required = value.is_empty() || value == "true",
_ => {} }
}
m
}
fn parse_shell_or_exec(rest: &str) -> ShellOrExec {
if let Some(arr) = parse_json_array(rest) {
ShellOrExec::Exec(arr)
} else {
ShellOrExec::Shell(rest.trim().to_owned())
}
}
fn parse_kv_pairs(rest: &str) -> Vec<(String, String)> {
let rest = rest.trim();
if !rest.contains('=') {
if let Some((k, v)) = rest.split_once(char::is_whitespace) {
return vec![(k.to_owned(), v.trim().to_owned())];
}
return vec![(rest.to_owned(), String::new())];
}
let mut out = Vec::new();
for tok in split_respecting_quotes(rest) {
if let Some((k, v)) = tok.split_once('=') {
out.push((k.to_owned(), unquote(v)));
}
}
out
}
fn parse_copy_add(rest: &str) -> Result<(Vec<String>, String, CopyFlags), String> {
let mut flags = CopyFlags::default();
let mut toks: Vec<String> = if let Some(arr) = parse_json_array(rest) {
arr
} else {
split_respecting_quotes(rest)
.into_iter()
.map(|s| s.to_owned())
.collect()
};
while let Some(first) = toks.first() {
if let Some(v) = first.strip_prefix("--from=") {
flags.from = Some(v.to_owned());
} else if let Some(v) = first.strip_prefix("--chown=") {
flags.chown = Some(v.to_owned());
} else if let Some(v) = first.strip_prefix("--chmod=") {
flags.chmod = Some(v.to_owned());
} else if first.starts_with("--") {
} else {
break;
}
toks.remove(0);
}
if toks.len() < 2 {
return Err("COPY/ADD requires at least one source and a dest".to_owned());
}
let dest = toks.pop().unwrap();
Ok((toks, dest, flags))
}
fn parse_json_array(s: &str) -> Option<Vec<String>> {
let s = s.trim();
if !s.starts_with('[') || !s.ends_with(']') {
return None;
}
let inner = &s[1..s.len() - 1];
let mut out = Vec::new();
let mut chars = inner.chars().peekable();
loop {
while matches!(chars.peek(), Some(c) if c.is_whitespace() || *c == ',') {
chars.next();
}
match chars.peek() {
None => break,
Some('"') => {
chars.next();
let mut item = String::new();
while let Some(c) = chars.next() {
match c {
'\\' => {
if let Some(n) = chars.next() {
item.push(match n {
'n' => '\n',
't' => '\t',
other => other,
});
}
}
'"' => break,
other => item.push(other),
}
}
out.push(item);
}
Some(_) => return None,
}
}
Some(out)
}
fn parse_string_list(rest: &str) -> Vec<String> {
if let Some(arr) = parse_json_array(rest) {
arr
} else {
rest.split_whitespace().map(str::to_owned).collect()
}
}
fn split_respecting_quotes(s: &str) -> Vec<String> {
let mut out = Vec::new();
let mut cur = String::new();
let mut quote: Option<char> = None;
let mut any = false;
for c in s.chars() {
match quote {
Some(q) => {
if c == q {
quote = None;
} else {
cur.push(c);
}
}
None => {
if c == '"' || c == '\'' {
quote = Some(c);
any = true;
} else if c.is_whitespace() {
if any || !cur.is_empty() {
out.push(std::mem::take(&mut cur));
any = false;
}
} else {
cur.push(c);
}
}
}
}
if any || !cur.is_empty() {
out.push(cur);
}
out
}
fn unquote(s: &str) -> String {
let s = s.trim();
if (s.starts_with('"') && s.ends_with('"') && s.len() >= 2)
|| (s.starts_with('\'') && s.ends_with('\'') && s.len() >= 2)
{
s[1..s.len() - 1].to_owned()
} else {
s.to_owned()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parses_the_essential_upsell_dockerfile() {
let src = "FROM node:20-alpine\n\
EXPOSE 3000\n\
WORKDIR /app\n\
COPY . .\n\
ENV NODE_ENV=production\n\
RUN npm install --omit=dev\n\
# a comment\n\
RUN npm remove @shopify/app @shopify/cli\n\
RUN npm run build\n";
let df = parse(src).expect("parse");
assert_eq!(df.stages.len(), 1);
let s = &df.stages[0];
assert_eq!(s.base, BaseImage::Image("node:20-alpine".into()));
assert_eq!(s.instructions.len(), 7);
}
#[test]
fn run_shell_vs_exec_form() {
let df = parse("FROM x\nRUN echo hi\nCMD [\"node\", \"server.js\"]\n").unwrap();
let ins = &df.stages[0].instructions;
assert_eq!(
ins[0],
Instruction::run(ShellOrExec::Shell("echo hi".into()))
);
assert_eq!(
ins[1],
Instruction::Cmd(ShellOrExec::Exec(vec!["node".into(), "server.js".into()]))
);
}
#[test]
fn line_continuation_joins() {
let df = parse("FROM x\nRUN apt-get update && \\\n apt-get install -y curl\n").unwrap();
assert_eq!(
df.stages[0].instructions[0],
Instruction::run(ShellOrExec::Shell(
"apt-get update && apt-get install -y curl".into()
))
);
}
#[test]
fn multi_stage_with_copy_from() {
let src = "FROM golang AS build\n\
RUN go build -o /app\n\
FROM scratch\n\
COPY --from=build /app /app\n";
let df = parse(src).unwrap();
assert_eq!(df.stages.len(), 2);
assert_eq!(df.stages[0].name, Some("build".into()));
assert_eq!(df.stages[1].base, BaseImage::Scratch);
match &df.stages[1].instructions[0] {
Instruction::Copy { flags, dest, .. } => {
assert_eq!(flags.from.as_deref(), Some("build"));
assert_eq!(dest, "/app");
}
other => panic!("expected COPY, got {other:?}"),
}
}
#[test]
fn env_both_forms() {
let a = parse("FROM x\nENV FOO=bar BAZ=qux\n").unwrap();
assert_eq!(
a.stages[0].instructions[0],
Instruction::Env(vec![
("FOO".into(), "bar".into()),
("BAZ".into(), "qux".into())
])
);
let b = parse("FROM x\nENV FOO bar baz\n").unwrap();
assert_eq!(
b.stages[0].instructions[0],
Instruction::Env(vec![("FOO".into(), "bar baz".into())])
);
}
#[test]
fn global_arg_before_from() {
let df = parse("ARG VERSION=20\nFROM node:${VERSION}\n").unwrap();
assert_eq!(df.global_args, vec![("VERSION".into(), Some("20".into()))]);
assert_eq!(df.stages.len(), 1);
}
#[test]
fn escape_directive_and_comments() {
let df = parse("# escape=`\nFROM x\nRUN echo a `\n echo b\n").unwrap();
assert_eq!(
df.stages[0].instructions[0],
Instruction::run(ShellOrExec::Shell("echo a echo b".into()))
);
}
#[test]
fn rejects_instruction_before_from() {
assert!(parse("RUN echo hi\n").is_err());
assert!(parse("# only a comment\n").is_err());
}
#[test]
fn run_mount_cache_and_command() {
let df = parse("FROM x\nRUN --mount=type=cache,target=/root/.cargo,id=cargo cargo build\n")
.unwrap();
match &df.stages[0].instructions[0] {
Instruction::Run { run, mounts } => {
assert_eq!(run, &ShellOrExec::Shell("cargo build".into()));
assert_eq!(mounts.len(), 1);
assert_eq!(mounts[0].kind, MountKind::Cache);
assert_eq!(mounts[0].target.as_deref(), Some("/root/.cargo"));
assert_eq!(mounts[0].id.as_deref(), Some("cargo"));
}
other => panic!("{other:?}"),
}
}
#[test]
fn run_multiple_mounts_and_secret() {
let df = parse(
"FROM x\nRUN --mount=type=cache,target=/c \
--mount=type=secret,id=tok,target=/run/secrets/tok,required \
make\n",
)
.unwrap();
match &df.stages[0].instructions[0] {
Instruction::Run { run, mounts } => {
assert_eq!(run, &ShellOrExec::Shell("make".into()));
assert_eq!(mounts.len(), 2);
assert_eq!(mounts[0].kind, MountKind::Cache);
assert_eq!(mounts[1].kind, MountKind::Secret);
assert_eq!(mounts[1].id.as_deref(), Some("tok"));
assert!(mounts[1].required);
}
other => panic!("{other:?}"),
}
}
#[test]
fn run_without_mount_has_empty_mounts() {
let df = parse("FROM x\nRUN echo hi\n").unwrap();
assert_eq!(
df.stages[0].instructions[0],
Instruction::run(ShellOrExec::Shell("echo hi".into()))
);
}
#[test]
fn run_heredoc_script_body() {
let df = parse(
"FROM x\n\
RUN <<EOF\n\
apt-get update\n\
apt-get install -y curl\n\
EOF\n\
RUN echo done\n",
)
.unwrap();
let ins = &df.stages[0].instructions;
assert_eq!(ins.len(), 2);
assert_eq!(
ins[0],
Instruction::run(ShellOrExec::Shell(
"apt-get update\napt-get install -y curl".into()
))
);
assert_eq!(
ins[1],
Instruction::run(ShellOrExec::Shell("echo done".into()))
);
}
#[test]
fn run_heredoc_with_interpreter() {
let df = parse(
"FROM x\n\
RUN python3 <<EOF\n\
print(\"hi\")\n\
EOF\n",
)
.unwrap();
assert_eq!(
df.stages[0].instructions[0],
Instruction::run(ShellOrExec::Shell(
"python3 <<EOF\nprint(\"hi\")\nEOF".into()
))
);
}
#[test]
fn run_heredoc_dash_strips_tabs() {
let df = parse("FROM x\nRUN <<-EOF\n\t\techo hi\n\t\tEOF\n").unwrap();
assert_eq!(
df.stages[0].instructions[0],
Instruction::run(ShellOrExec::Shell("echo hi".into()))
);
}
#[test]
fn left_shift_is_not_a_heredoc() {
let df = parse("FROM x\nRUN echo $((1 << 4))\nRUN echo after\n").unwrap();
let ins = &df.stages[0].instructions;
assert_eq!(
ins[0],
Instruction::run(ShellOrExec::Shell("echo $((1 << 4))".into()))
);
assert_eq!(
ins[1],
Instruction::run(ShellOrExec::Shell("echo after".into()))
);
}
#[test]
fn unterminated_heredoc_falls_back() {
let df = parse("FROM x\nRUN cat <<EOF\n").unwrap();
assert_eq!(
df.stages[0].instructions[0],
Instruction::run(ShellOrExec::Shell("cat <<EOF".into()))
);
}
#[test]
fn copy_with_chown() {
let df = parse("FROM x\nCOPY --chown=node:node a b /dest\n").unwrap();
match &df.stages[0].instructions[0] {
Instruction::Copy {
sources,
dest,
flags,
} => {
assert_eq!(sources, &vec!["a".to_string(), "b".to_string()]);
assert_eq!(dest, "/dest");
assert_eq!(flags.chown.as_deref(), Some("node:node"));
}
other => panic!("{other:?}"),
}
}
}