supermachine 0.7.70

//! Dockerfile frontend: parse a Dockerfile into a structured, multi-stage
//! instruction model. The parser is the first component of the in-VM
//! builder (docs/design/in-vm-builder-2026-06-05.md) — the build DAG and
//! the snapshot-per-layer executor consume this model.
//!
//! Scope of this module: lexing (line continuations, comments, the
//! `# escape=` / `# syntax=` directives), instruction parsing (shell vs
//! exec/JSON form), multi-stage `FROM ... AS`, and per-instruction flags
//! (`COPY --from`, `--chown`, …). Heredocs and `RUN --mount` are modeled
//! with extension points and parsed best-effort; full support lands with
//! the executor that needs them.

use std::fmt;

/// Shell form (`RUN echo hi`) vs exec/JSON form (`RUN ["echo", "hi"]`).
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ShellOrExec {
    Shell(String),
    Exec(Vec<String>),
}

/// The base of a stage: `scratch`, a registry image, or a prior stage name.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum BaseImage {
    Scratch,
    Image(String),
    Stage(String),
}

/// Flags shared by `COPY` / `ADD`.
#[derive(Debug, Clone, PartialEq, Eq, Default)]
pub struct CopyFlags {
    /// `--from=<stage name | stage index | external image>`.
    pub from: Option<String>,
    /// `--chown=user:group`.
    pub chown: Option<String>,
    /// `--chmod=0755`.
    pub chmod: Option<String>,
}

/// The kind of a `RUN --mount`. Unknown kinds parse to [`MountKind::Other`].
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum MountKind {
    Cache,
    Secret,
    Bind,
    Tmpfs,
    Ssh,
    Other(String),
}

/// A `RUN --mount=...` directive (BuildKit). Only the fields the executor
/// uses are modeled; unknown keys are ignored.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct RunMount {
    pub kind: MountKind,
    /// Mount point in the guest (`target`/`dst`/`destination`).
    pub target: Option<String>,
    /// Host source (`source`/`src`) for bind/secret mounts.
    pub source: Option<String>,
    /// Cache/secret id (`id`); defaults to the target for caches.
    pub id: Option<String>,
    /// `from=<stage|image>` for bind mounts.
    pub from: Option<String>,
    /// `ro`/`readonly`.
    pub readonly: bool,
    /// `required` (secret must exist).
    pub required: bool,
}

/// One Dockerfile instruction (within a stage).
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Instruction {
    Run {
        run: ShellOrExec,
        mounts: Vec<RunMount>,
    },
    Copy {
        sources: Vec<String>,
        dest: String,
        flags: CopyFlags,
    },
    Add {
        sources: Vec<String>,
        dest: String,
        flags: CopyFlags,
    },
    Env(Vec<(String, String)>),
    Arg {
        name: String,
        default: Option<String>,
    },
    Workdir(String),
    User(String),
    Expose(Vec<String>),
    Label(Vec<(String, String)>),
    Entrypoint(ShellOrExec),
    Cmd(ShellOrExec),
    Volume(Vec<String>),
    StopSignal(String),
    Shell(Vec<String>),
}

impl Instruction {
    /// A `RUN` with no `--mount` directives (the common case + test ergonomics).
    pub fn run(cmd: ShellOrExec) -> Self {
        Instruction::Run {
            run: cmd,
            mounts: Vec::new(),
        }
    }
}

/// A build stage: `FROM <base> [AS <name>]` plus its instructions.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Stage {
    pub base: BaseImage,
    pub name: Option<String>,
    pub platform: Option<String>,
    pub instructions: Vec<Instruction>,
}

/// A parsed Dockerfile.
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct Dockerfile {
    /// `ARG`s declared before the first `FROM` (global build args).
    pub global_args: Vec<(String, Option<String>)>,
    pub stages: Vec<Stage>,
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ParseError {
    pub line: usize,
    pub msg: String,
}

impl fmt::Display for ParseError {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(
            f,
            "Dockerfile parse error (line {}): {}",
            self.line, self.msg
        )
    }
}

impl std::error::Error for ParseError {}

/// Parse a Dockerfile into the [`Dockerfile`] model.
pub fn parse(src: &str) -> Result<Dockerfile, ParseError> {
    let escape = detect_escape_directive(src);
    let logical = collect_logical_lines(src, escape);

    let mut df = Dockerfile::default();
    let mut seen_from = false;

    for (line_no, raw) in logical {
        let (keyword, rest) = match split_keyword(&raw) {
            Some(kv) => kv,
            None => continue, // blank / comment
        };
        let kw = keyword.to_ascii_uppercase();
        let err = |m: String| ParseError {
            line: line_no,
            msg: m,
        };

        match kw.as_str() {
            "FROM" => {
                df.stages.push(parse_from(rest).map_err(err)?);
                seen_from = true;
            }
            "ARG" if !seen_from => {
                let (name, default) = parse_arg(rest);
                df.global_args.push((name, default));
            }
            other => {
                let stage = df
                    .stages
                    .last_mut()
                    .ok_or_else(|| err(format!("instruction `{other}` before any FROM")))?;
                let instr = parse_instruction(&kw, rest).map_err(err)?;
                stage.instructions.push(instr);
            }
        }
    }

    if df.stages.is_empty() {
        return Err(ParseError {
            line: 0,
            msg: "no FROM instruction".to_owned(),
        });
    }
    Ok(df)
}

/// The `# escape=` parser directive (default `\`). Only honored if it's a
/// directive line at the very top of the file.
fn detect_escape_directive(src: &str) -> char {
    for line in src.lines() {
        let t = line.trim();
        if t.is_empty() {
            continue;
        }
        if let Some(rest) = t.strip_prefix('#') {
            let rest = rest.trim();
            if let Some(v) = rest.strip_prefix("escape=") {
                return v.trim().chars().next().unwrap_or('\\');
            }
            // Other directives (syntax=) — keep scanning the directive block.
            if rest.contains('=') && !rest.contains(' ') {
                continue;
            }
        }
        break; // first non-directive line ends the directive block
    }
    '\\'
}

/// A `RUN` heredoc (`RUN <<EOF ... EOF`, `RUN cmd <<-'EOF' ... EOF`).
struct Heredoc {
    /// The instruction keyword (always `RUN` for now).
    keyword: String,
    /// Text between the keyword and `<<` (e.g. `python3` in `RUN python3 <<EOF`).
    prefix: String,
    /// The end-delimiter word.
    delim: String,
    /// `<<'EOF'` / `<<"EOF"` — quoted delimiter (suppresses var expansion).
    quoted: bool,
    /// `<<-EOF` — strip leading tabs from body + delimiter lines.
    strip_tabs: bool,
}

/// Detect a `RUN` heredoc in a completed logical line. Returns `None` for
/// non-`RUN` keywords and for `<<` that isn't a valid heredoc marker (e.g. the
/// bit-shift in `RUN echo $((1 << 4))`), so those stay plain shell commands.
fn detect_heredoc(logical: &str) -> Option<Heredoc> {
    let (kw, rest) = split_keyword(logical)?;
    if !kw.eq_ignore_ascii_case("RUN") {
        return None;
    }
    let pos = rest.find("<<")?;
    let before = &rest[..pos];
    let after = &rest[pos + 2..];
    let (strip_tabs, after) = match after.strip_prefix('-') {
        Some(a) => (true, a),
        None => (false, after),
    };
    let after = after.trim_start(); // allow `<< EOF`
    let (quoted, after) = match after.strip_prefix('\'').or_else(|| after.strip_prefix('"')) {
        Some(a) => (true, a),
        None => (false, after),
    };
    // Delimiter is a leading bareword: [A-Za-z_][A-Za-z0-9_]*.
    let mut end = 0;
    for (idx, c) in after.char_indices() {
        if idx == 0 {
            if !(c.is_ascii_alphabetic() || c == '_') {
                return None;
            }
        } else if !(c.is_ascii_alphanumeric() || c == '_') {
            break;
        }
        end = idx + c.len_utf8();
    }
    if end == 0 {
        return None;
    }
    Some(Heredoc {
        keyword: kw.to_owned(),
        prefix: before.trim().to_owned(),
        delim: after[..end].to_owned(),
        quoted,
        strip_tabs,
    })
}

/// Lex the source into logical lines: join escape-char continuations (stripping
/// full-line comments) and collapse `RUN` heredocs into a single shell command.
/// Returns (1-based start line, text). A heredoc with no closing delimiter
/// before EOF falls back to a plain line (so a stray `<<` never eats the file).
fn collect_logical_lines(src: &str, escape: char) -> Vec<(usize, String)> {
    let lines: Vec<&str> = src.lines().collect();
    let n = lines.len();
    let mut out = Vec::new();
    let mut i = 0usize;

    while i < n {
        let t = lines[i].trim_start();
        if t.is_empty() || t.starts_with('#') {
            i += 1;
            continue;
        }
        let start_line = i + 1;

        // Accumulate escape-char continuations into one logical line.
        let mut acc = String::new();
        let mut last = i;
        loop {
            if last >= n {
                break;
            }
            let line = lines[last];
            if last != i && line.trim_start().starts_with('#') {
                // Docker ignores comment-only lines inside a continuation.
                last += 1;
                continue;
            }
            let te = line.trim_end();
            if te.ends_with(escape) && last + 1 < n {
                acc.push_str(&te[..te.len() - escape.len_utf8()]);
                last += 1;
            } else {
                acc.push_str(line);
                break;
            }
        }

        // Heredoc? Scan ahead for the closing delimiter.
        if let Some(hd) = detect_heredoc(&acc) {
            let mut j = last + 1;
            let mut body: Vec<String> = Vec::new();
            let mut closed = false;
            while j < n {
                let bl = lines[j];
                let is_delim = if hd.strip_tabs {
                    bl.trim_start_matches('\t').trim_end() == hd.delim
                } else {
                    bl.trim_end() == hd.delim
                };
                if is_delim {
                    closed = true;
                    break;
                }
                body.push(if hd.strip_tabs {
                    bl.trim_start_matches('\t').to_owned()
                } else {
                    bl.to_owned()
                });
                j += 1;
            }
            if closed {
                let body_str = body.join("\n");
                let rewritten = if hd.prefix.is_empty() {
                    // Bare `RUN <<EOF` — the body IS the shell script.
                    format!("{} {}", hd.keyword, body_str)
                } else {
                    // `RUN cmd <<EOF` — feed the body to `cmd` as a heredoc on
                    // stdin; the guest shell runs the reconstructed redirect.
                    let q = if hd.quoted { "'" } else { "" };
                    format!(
                        "{} {} <<{q}{d}{q}\n{body}\n{d}",
                        hd.keyword,
                        hd.prefix,
                        q = q,
                        d = hd.delim,
                        body = body_str
                    )
                };
                out.push((start_line, rewritten));
                i = j + 1; // skip past the closing delimiter line
                continue;
            }
            // Unterminated: fall through and treat as a plain line.
        }

        out.push((start_line, acc));
        i = last + 1;
    }
    out
}

/// Split a logical line into (KEYWORD, rest). Returns None for blank lines.
fn split_keyword(line: &str) -> Option<(&str, &str)> {
    let t = line.trim();
    if t.is_empty() || t.starts_with('#') {
        return None;
    }
    match t.split_once(char::is_whitespace) {
        Some((kw, rest)) => Some((kw, rest.trim())),
        None => Some((t, "")),
    }
}

fn parse_from(rest: &str) -> Result<Stage, String> {
    let mut platform = None;
    let mut toks: Vec<&str> = rest.split_whitespace().collect();
    // Leading flags (--platform=...).
    while let Some(first) = toks.first() {
        if let Some(p) = first.strip_prefix("--platform=") {
            platform = Some(p.to_owned());
            toks.remove(0);
        } else if first.starts_with("--") {
            toks.remove(0); // unknown flag: ignore, keep parsing
        } else {
            break;
        }
    }
    let base_str = toks.first().ok_or("FROM requires an image")?;
    let mut name = None;
    if toks.len() >= 3 && toks[1].eq_ignore_ascii_case("AS") {
        name = Some(toks[2].to_owned());
    }
    let base = if base_str.eq_ignore_ascii_case("scratch") {
        BaseImage::Scratch
    } else {
        // Whether it's a prior stage vs a registry image is resolved when
        // the DAG is built (it needs the set of stage names); model it as
        // Image here and let the graph reclassify Stage refs.
        BaseImage::Image((*base_str).to_owned())
    };
    Ok(Stage {
        base,
        name,
        platform,
        instructions: Vec::new(),
    })
}

fn parse_arg(rest: &str) -> (String, Option<String>) {
    match rest.split_once('=') {
        Some((k, v)) => (k.trim().to_owned(), Some(v.trim().to_owned())),
        None => (rest.trim().to_owned(), None),
    }
}

fn parse_instruction(kw: &str, rest: &str) -> Result<Instruction, String> {
    Ok(match kw {
        "RUN" => parse_run(rest),
        "CMD" => Instruction::Cmd(parse_shell_or_exec(rest)),
        "ENTRYPOINT" => Instruction::Entrypoint(parse_shell_or_exec(rest)),
        "ENV" => Instruction::Env(parse_kv_pairs(rest)),
        "LABEL" => Instruction::Label(parse_kv_pairs(rest)),
        "ARG" => {
            let (name, default) = parse_arg(rest);
            Instruction::Arg { name, default }
        }
        "WORKDIR" => Instruction::Workdir(rest.trim().to_owned()),
        "USER" => Instruction::User(rest.trim().to_owned()),
        "STOPSIGNAL" => Instruction::StopSignal(rest.trim().to_owned()),
        "EXPOSE" => Instruction::Expose(rest.split_whitespace().map(str::to_owned).collect()),
        "VOLUME" => Instruction::Volume(parse_string_list(rest)),
        "SHELL" => Instruction::Shell(parse_json_array(rest).ok_or("SHELL requires a JSON array")?),
        "COPY" | "ADD" => {
            let (sources, dest, flags) = parse_copy_add(rest)?;
            if kw == "COPY" {
                Instruction::Copy {
                    sources,
                    dest,
                    flags,
                }
            } else {
                Instruction::Add {
                    sources,
                    dest,
                    flags,
                }
            }
        }
        // MAINTAINER (deprecated), HEALTHCHECK, ONBUILD: modeled later.
        other => return Err(format!("unsupported instruction `{other}`")),
    })
}

/// Parse a `RUN`: strip leading BuildKit flags (`--mount=...`, and ignore
/// `--network=`/`--security=`), then the remainder is the command (shell or
/// exec form).
fn parse_run(rest: &str) -> Instruction {
    let mut mounts = Vec::new();
    let mut remainder = rest.trim_start();
    loop {
        if let Some(after) = remainder.strip_prefix("--mount=") {
            // The spec is the next whitespace-delimited token (no spaces in a
            // BuildKit mount spec).
            let (spec, rest2) = match after.split_once(char::is_whitespace) {
                Some((s, r)) => (s, r),
                None => (after, ""),
            };
            mounts.push(parse_run_mount(spec));
            remainder = rest2.trim_start();
        } else if remainder.starts_with("--network=") || remainder.starts_with("--security=") {
            // Recognized but unused RUN flags — strip and ignore.
            remainder = remainder
                .split_once(char::is_whitespace)
                .map(|(_, r)| r)
                .unwrap_or("")
                .trim_start();
        } else {
            break;
        }
    }
    Instruction::Run {
        run: parse_shell_or_exec(remainder),
        mounts,
    }
}

/// Parse one `--mount=` spec (`type=cache,target=/x,id=foo,ro`).
fn parse_run_mount(spec: &str) -> RunMount {
    let mut m = RunMount {
        kind: MountKind::Bind, // BuildKit default when type= is omitted
        target: None,
        source: None,
        id: None,
        from: None,
        readonly: false,
        required: false,
    };
    for field in spec.split(',') {
        let field = field.trim();
        if field.is_empty() {
            continue;
        }
        let (key, value) = match field.split_once('=') {
            Some((k, v)) => (k.trim(), v.trim()),
            None => (field, ""),
        };
        match key.to_ascii_lowercase().as_str() {
            "type" => {
                m.kind = match value.to_ascii_lowercase().as_str() {
                    "cache" => MountKind::Cache,
                    "secret" => MountKind::Secret,
                    "bind" => MountKind::Bind,
                    "tmpfs" => MountKind::Tmpfs,
                    "ssh" => MountKind::Ssh,
                    other => MountKind::Other(other.to_owned()),
                }
            }
            "target" | "dst" | "destination" | "dest" => m.target = Some(value.to_owned()),
            "source" | "src" => m.source = Some(value.to_owned()),
            "id" => m.id = Some(value.to_owned()),
            "from" => m.from = Some(value.to_owned()),
            "ro" | "readonly" => m.readonly = value.is_empty() || value == "true",
            "rw" | "readwrite" => m.readonly = false,
            "required" => m.required = value.is_empty() || value == "true",
            _ => {} // sharing, mode, uid, gid, etc. — ignored in v1
        }
    }
    m
}

/// Shell form vs JSON exec form for RUN/CMD/ENTRYPOINT.
fn parse_shell_or_exec(rest: &str) -> ShellOrExec {
    if let Some(arr) = parse_json_array(rest) {
        ShellOrExec::Exec(arr)
    } else {
        ShellOrExec::Shell(rest.trim().to_owned())
    }
}

/// `ENV k v` (legacy single) and `ENV k=v k2=v2` (modern). Likewise LABEL.
fn parse_kv_pairs(rest: &str) -> Vec<(String, String)> {
    let rest = rest.trim();
    if !rest.contains('=') {
        // Legacy `ENV key value rest...` — everything after the first token
        // is the value.
        if let Some((k, v)) = rest.split_once(char::is_whitespace) {
            return vec![(k.to_owned(), v.trim().to_owned())];
        }
        return vec![(rest.to_owned(), String::new())];
    }
    let mut out = Vec::new();
    for tok in split_respecting_quotes(rest) {
        if let Some((k, v)) = tok.split_once('=') {
            out.push((k.to_owned(), unquote(v)));
        }
    }
    out
}

fn parse_copy_add(rest: &str) -> Result<(Vec<String>, String, CopyFlags), String> {
    let mut flags = CopyFlags::default();
    let mut toks: Vec<String> = if let Some(arr) = parse_json_array(rest) {
        arr
    } else {
        split_respecting_quotes(rest)
            .into_iter()
            .map(|s| s.to_owned())
            .collect()
    };
    // Leading flags.
    while let Some(first) = toks.first() {
        if let Some(v) = first.strip_prefix("--from=") {
            flags.from = Some(v.to_owned());
        } else if let Some(v) = first.strip_prefix("--chown=") {
            flags.chown = Some(v.to_owned());
        } else if let Some(v) = first.strip_prefix("--chmod=") {
            flags.chmod = Some(v.to_owned());
        } else if first.starts_with("--") {
            // unknown flag: ignore
        } else {
            break;
        }
        toks.remove(0);
    }
    if toks.len() < 2 {
        return Err("COPY/ADD requires at least one source and a dest".to_owned());
    }
    let dest = toks.pop().unwrap();
    Ok((toks, dest, flags))
}

/// Parse a JSON array of strings (`["a", "b"]`). Returns None if not a JSON
/// array. Minimal: handles quoted strings + escapes, not nested values.
fn parse_json_array(s: &str) -> Option<Vec<String>> {
    let s = s.trim();
    if !s.starts_with('[') || !s.ends_with(']') {
        return None;
    }
    let inner = &s[1..s.len() - 1];
    let mut out = Vec::new();
    let mut chars = inner.chars().peekable();
    loop {
        // skip whitespace + commas
        while matches!(chars.peek(), Some(c) if c.is_whitespace() || *c == ',') {
            chars.next();
        }
        match chars.peek() {
            None => break,
            Some('"') => {
                chars.next();
                let mut item = String::new();
                while let Some(c) = chars.next() {
                    match c {
                        '\\' => {
                            if let Some(n) = chars.next() {
                                item.push(match n {
                                    'n' => '\n',
                                    't' => '\t',
                                    other => other,
                                });
                            }
                        }
                        '"' => break,
                        other => item.push(other),
                    }
                }
                out.push(item);
            }
            // Not a well-formed JSON array of strings.
            Some(_) => return None,
        }
    }
    Some(out)
}

fn parse_string_list(rest: &str) -> Vec<String> {
    if let Some(arr) = parse_json_array(rest) {
        arr
    } else {
        rest.split_whitespace().map(str::to_owned).collect()
    }
}

/// Split on whitespace but keep quoted segments together; strips the quotes.
fn split_respecting_quotes(s: &str) -> Vec<String> {
    let mut out = Vec::new();
    let mut cur = String::new();
    let mut quote: Option<char> = None;
    let mut any = false;
    for c in s.chars() {
        match quote {
            Some(q) => {
                if c == q {
                    quote = None;
                } else {
                    cur.push(c);
                }
            }
            None => {
                if c == '"' || c == '\'' {
                    quote = Some(c);
                    any = true;
                } else if c.is_whitespace() {
                    if any || !cur.is_empty() {
                        out.push(std::mem::take(&mut cur));
                        any = false;
                    }
                } else {
                    cur.push(c);
                }
            }
        }
    }
    if any || !cur.is_empty() {
        out.push(cur);
    }
    out
}

fn unquote(s: &str) -> String {
    let s = s.trim();
    if (s.starts_with('"') && s.ends_with('"') && s.len() >= 2)
        || (s.starts_with('\'') && s.ends_with('\'') && s.len() >= 2)
    {
        s[1..s.len() - 1].to_owned()
    } else {
        s.to_owned()
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn parses_the_essential_upsell_dockerfile() {
        let src = "FROM node:20-alpine\n\
                   EXPOSE 3000\n\
                   WORKDIR /app\n\
                   COPY . .\n\
                   ENV NODE_ENV=production\n\
                   RUN npm install --omit=dev\n\
                   # a comment\n\
                   RUN npm remove @shopify/app @shopify/cli\n\
                   RUN npm run build\n";
        let df = parse(src).expect("parse");
        assert_eq!(df.stages.len(), 1);
        let s = &df.stages[0];
        assert_eq!(s.base, BaseImage::Image("node:20-alpine".into()));
        // EXPOSE WORKDIR COPY ENV RUN RUN RUN (the `# a comment` is skipped).
        assert_eq!(s.instructions.len(), 7);
    }

    #[test]
    fn run_shell_vs_exec_form() {
        let df = parse("FROM x\nRUN echo hi\nCMD [\"node\", \"server.js\"]\n").unwrap();
        let ins = &df.stages[0].instructions;
        assert_eq!(
            ins[0],
            Instruction::run(ShellOrExec::Shell("echo hi".into()))
        );
        assert_eq!(
            ins[1],
            Instruction::Cmd(ShellOrExec::Exec(vec!["node".into(), "server.js".into()]))
        );
    }

    #[test]
    fn line_continuation_joins() {
        let df = parse("FROM x\nRUN apt-get update && \\\n    apt-get install -y curl\n").unwrap();
        assert_eq!(
            df.stages[0].instructions[0],
            Instruction::run(ShellOrExec::Shell(
                "apt-get update &&     apt-get install -y curl".into()
            ))
        );
    }

    #[test]
    fn multi_stage_with_copy_from() {
        let src = "FROM golang AS build\n\
                   RUN go build -o /app\n\
                   FROM scratch\n\
                   COPY --from=build /app /app\n";
        let df = parse(src).unwrap();
        assert_eq!(df.stages.len(), 2);
        assert_eq!(df.stages[0].name, Some("build".into()));
        assert_eq!(df.stages[1].base, BaseImage::Scratch);
        match &df.stages[1].instructions[0] {
            Instruction::Copy { flags, dest, .. } => {
                assert_eq!(flags.from.as_deref(), Some("build"));
                assert_eq!(dest, "/app");
            }
            other => panic!("expected COPY, got {other:?}"),
        }
    }

    #[test]
    fn env_both_forms() {
        let a = parse("FROM x\nENV FOO=bar BAZ=qux\n").unwrap();
        assert_eq!(
            a.stages[0].instructions[0],
            Instruction::Env(vec![
                ("FOO".into(), "bar".into()),
                ("BAZ".into(), "qux".into())
            ])
        );
        let b = parse("FROM x\nENV FOO bar baz\n").unwrap();
        assert_eq!(
            b.stages[0].instructions[0],
            Instruction::Env(vec![("FOO".into(), "bar baz".into())])
        );
    }

    #[test]
    fn global_arg_before_from() {
        let df = parse("ARG VERSION=20\nFROM node:${VERSION}\n").unwrap();
        assert_eq!(df.global_args, vec![("VERSION".into(), Some("20".into()))]);
        assert_eq!(df.stages.len(), 1);
    }

    #[test]
    fn escape_directive_and_comments() {
        let df = parse("# escape=`\nFROM x\nRUN echo a `\n  echo b\n").unwrap();
        assert_eq!(
            df.stages[0].instructions[0],
            Instruction::run(ShellOrExec::Shell("echo a   echo b".into()))
        );
    }

    #[test]
    fn rejects_instruction_before_from() {
        assert!(parse("RUN echo hi\n").is_err());
        assert!(parse("# only a comment\n").is_err());
    }

    #[test]
    fn run_mount_cache_and_command() {
        let df = parse("FROM x\nRUN --mount=type=cache,target=/root/.cargo,id=cargo cargo build\n")
            .unwrap();
        match &df.stages[0].instructions[0] {
            Instruction::Run { run, mounts } => {
                assert_eq!(run, &ShellOrExec::Shell("cargo build".into()));
                assert_eq!(mounts.len(), 1);
                assert_eq!(mounts[0].kind, MountKind::Cache);
                assert_eq!(mounts[0].target.as_deref(), Some("/root/.cargo"));
                assert_eq!(mounts[0].id.as_deref(), Some("cargo"));
            }
            other => panic!("{other:?}"),
        }
    }

    #[test]
    fn run_multiple_mounts_and_secret() {
        let df = parse(
            "FROM x\nRUN --mount=type=cache,target=/c \
             --mount=type=secret,id=tok,target=/run/secrets/tok,required \
             make\n",
        )
        .unwrap();
        match &df.stages[0].instructions[0] {
            Instruction::Run { run, mounts } => {
                assert_eq!(run, &ShellOrExec::Shell("make".into()));
                assert_eq!(mounts.len(), 2);
                assert_eq!(mounts[0].kind, MountKind::Cache);
                assert_eq!(mounts[1].kind, MountKind::Secret);
                assert_eq!(mounts[1].id.as_deref(), Some("tok"));
                assert!(mounts[1].required);
            }
            other => panic!("{other:?}"),
        }
    }

    #[test]
    fn run_without_mount_has_empty_mounts() {
        let df = parse("FROM x\nRUN echo hi\n").unwrap();
        assert_eq!(
            df.stages[0].instructions[0],
            Instruction::run(ShellOrExec::Shell("echo hi".into()))
        );
    }

    #[test]
    fn run_heredoc_script_body() {
        let df = parse(
            "FROM x\n\
             RUN <<EOF\n\
             apt-get update\n\
             apt-get install -y curl\n\
             EOF\n\
             RUN echo done\n",
        )
        .unwrap();
        let ins = &df.stages[0].instructions;
        assert_eq!(ins.len(), 2);
        assert_eq!(
            ins[0],
            Instruction::run(ShellOrExec::Shell(
                "apt-get update\napt-get install -y curl".into()
            ))
        );
        assert_eq!(
            ins[1],
            Instruction::run(ShellOrExec::Shell("echo done".into()))
        );
    }

    #[test]
    fn run_heredoc_with_interpreter() {
        let df = parse(
            "FROM x\n\
             RUN python3 <<EOF\n\
             print(\"hi\")\n\
             EOF\n",
        )
        .unwrap();
        assert_eq!(
            df.stages[0].instructions[0],
            Instruction::run(ShellOrExec::Shell(
                "python3 <<EOF\nprint(\"hi\")\nEOF".into()
            ))
        );
    }

    #[test]
    fn run_heredoc_dash_strips_tabs() {
        // `<<-` strips leading tabs from body and delimiter lines.
        let df = parse("FROM x\nRUN <<-EOF\n\t\techo hi\n\t\tEOF\n").unwrap();
        assert_eq!(
            df.stages[0].instructions[0],
            Instruction::run(ShellOrExec::Shell("echo hi".into()))
        );
    }

    #[test]
    fn left_shift_is_not_a_heredoc() {
        // `<<` in an arithmetic expression must stay a plain shell command.
        let df = parse("FROM x\nRUN echo $((1 << 4))\nRUN echo after\n").unwrap();
        let ins = &df.stages[0].instructions;
        assert_eq!(
            ins[0],
            Instruction::run(ShellOrExec::Shell("echo $((1 << 4))".into()))
        );
        assert_eq!(
            ins[1],
            Instruction::run(ShellOrExec::Shell("echo after".into()))
        );
    }

    #[test]
    fn unterminated_heredoc_falls_back() {
        // No closing delimiter before EOF → don't swallow the file or loop;
        // the line stays a plain shell command.
        let df = parse("FROM x\nRUN cat <<EOF\n").unwrap();
        assert_eq!(
            df.stages[0].instructions[0],
            Instruction::run(ShellOrExec::Shell("cat <<EOF".into()))
        );
    }

    #[test]
    fn copy_with_chown() {
        let df = parse("FROM x\nCOPY --chown=node:node a b /dest\n").unwrap();
        match &df.stages[0].instructions[0] {
            Instruction::Copy {
                sources,
                dest,
                flags,
            } => {
                assert_eq!(sources, &vec!["a".to_string(), "b".to_string()]);
                assert_eq!(dest, "/dest");
                assert_eq!(flags.chown.as_deref(), Some("node:node"));
            }
            other => panic!("{other:?}"),
        }
    }
}