Skip to main content

zlayer_builder/dockerfile/
parser.rs

1//! Dockerfile parser
2//!
3//! This module provides functionality to parse Dockerfiles into a structured representation
4//! using the `dockerfile-parser` crate as the parsing backend.
5
6use std::collections::HashMap;
7use std::path::Path;
8use std::str::FromStr;
9
10use dockerfile_parser::{Dockerfile as RawDockerfile, Instruction as RawInstruction};
11use serde::{Deserialize, Serialize};
12use zlayer_types::ImageReference;
13
14use crate::error::{BuildError, Result};
15
16use super::instruction::{
17    AddInstruction, ArgInstruction, CopyInstruction, EnvInstruction, ExposeInstruction,
18    ExposeProtocol, HealthcheckInstruction, Instruction, RunInstruction, ShellOrExec,
19};
20
21/// A Dockerfile `FROM` target.
22///
23/// `FROM` references can resolve to one of three things in a Dockerfile:
24/// an OCI image (the common case), a previous stage in a multi-stage
25/// build (e.g. `FROM builder AS final`), or the special `scratch`
26/// pseudo-image. This enum captures all three. For non-Dockerfile call
27/// sites (image registry lookups, toolchain detection, etc.) use
28/// [`zlayer_types::ImageReference`] directly — the bare OCI ref type
29/// without the Dockerfile-only variants.
30#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
31pub enum DockerfileFromTarget {
32    /// An OCI image reference (canonical OCI grammar).
33    Image(ImageReference),
34    /// A reference to another stage in this multi-stage build.
35    Stage(String),
36    /// The special `scratch` pseudo-image.
37    Scratch,
38}
39
40impl DockerfileFromTarget {
41    /// Parse a raw `FROM` target string.
42    ///
43    /// Recognizes `scratch` (case-insensitive), then attempts an OCI
44    /// reference parse via [`ImageReference::from_str`]. If parsing
45    /// succeeds, the result is an [`Self::Image`]; otherwise the
46    /// input is treated as a [`Self::Stage`] reference.
47    ///
48    /// Note that the OCI grammar accepts bare names like `alpine` as
49    /// valid image references, so disambiguation between an image
50    /// and a multi-stage stage reference must happen post-hoc at the
51    /// call site by consulting the set of known stage names.
52    #[must_use]
53    pub fn parse(s: &str) -> Self {
54        let s = s.trim();
55
56        if s.eq_ignore_ascii_case("scratch") {
57            return Self::Scratch;
58        }
59
60        match ImageReference::from_str(s) {
61            Ok(r) => Self::Image(r),
62            Err(_) => Self::Stage(s.to_string()),
63        }
64    }
65
66    /// Returns true if this is a stage reference.
67    #[must_use]
68    pub fn is_stage(&self) -> bool {
69        matches!(self, Self::Stage(_))
70    }
71
72    /// Returns true if this is the `scratch` pseudo-image.
73    #[must_use]
74    pub fn is_scratch(&self) -> bool {
75        matches!(self, Self::Scratch)
76    }
77}
78
79impl std::fmt::Display for DockerfileFromTarget {
80    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
81        match self {
82            Self::Image(r) => write!(f, "{r}"),
83            Self::Stage(name) => f.write_str(name),
84            Self::Scratch => f.write_str("scratch"),
85        }
86    }
87}
88
89/// A single stage in a multi-stage Dockerfile
90#[derive(Debug, Clone, Serialize, Deserialize)]
91pub struct Stage {
92    /// Stage index (0-based)
93    pub index: usize,
94
95    /// Optional stage name (from `AS name`)
96    pub name: Option<String>,
97
98    /// The base image for this stage
99    pub base_image: DockerfileFromTarget,
100
101    /// Optional platform specification (e.g., "linux/amd64")
102    pub platform: Option<String>,
103
104    /// Instructions in this stage (excluding the FROM)
105    pub instructions: Vec<Instruction>,
106}
107
108impl Stage {
109    /// Returns the stage identifier (name if present, otherwise index as string)
110    #[must_use]
111    pub fn identifier(&self) -> String {
112        self.name.clone().unwrap_or_else(|| self.index.to_string())
113    }
114
115    /// Returns true if this stage matches the given name or index
116    #[must_use]
117    pub fn matches(&self, name_or_index: &str) -> bool {
118        if let Some(ref name) = self.name {
119            if name == name_or_index {
120                return true;
121            }
122        }
123
124        if let Ok(idx) = name_or_index.parse::<usize>() {
125            return idx == self.index;
126        }
127
128        false
129    }
130}
131
132/// A parsed Dockerfile
133#[derive(Debug, Clone, Serialize, Deserialize)]
134pub struct Dockerfile {
135    /// Global ARG instructions that appear before the first FROM
136    pub global_args: Vec<ArgInstruction>,
137
138    /// Build stages
139    pub stages: Vec<Stage>,
140}
141
142impl Dockerfile {
143    /// Parse a Dockerfile from a string
144    ///
145    /// # Errors
146    ///
147    /// Returns an error if the Dockerfile content is malformed or contains invalid instructions.
148    pub fn parse(content: &str) -> Result<Self> {
149        let raw = RawDockerfile::parse(content).map_err(|e| BuildError::DockerfileParse {
150            message: e.to_string(),
151            line: 1,
152        })?;
153
154        Self::from_raw(raw)
155    }
156
157    /// Parse a Dockerfile from a file
158    ///
159    /// # Errors
160    ///
161    /// Returns an error if the file cannot be read or the Dockerfile is malformed.
162    pub fn from_file(path: impl AsRef<Path>) -> Result<Self> {
163        let content =
164            std::fs::read_to_string(path.as_ref()).map_err(|e| BuildError::ContextRead {
165                path: path.as_ref().to_path_buf(),
166                source: e,
167            })?;
168
169        Self::parse(&content)
170    }
171
172    /// Convert from the raw dockerfile-parser types to our internal representation
173    fn from_raw(raw: RawDockerfile) -> Result<Self> {
174        let mut global_args = Vec::new();
175        let mut stages = Vec::new();
176        let mut current_stage: Option<Stage> = None;
177        let mut stage_index = 0;
178        // Track stage names declared so far so subsequent FROM lines can
179        // resolve `FROM <name>` to a stage reference even when the name
180        // is also a syntactically-valid OCI reference (e.g. `FROM builder`).
181        let mut known_stage_names: std::collections::HashSet<String> =
182            std::collections::HashSet::new();
183
184        for instruction in raw.instructions {
185            match &instruction {
186                RawInstruction::From(from) => {
187                    // Save previous stage if any
188                    if let Some(stage) = current_stage.take() {
189                        stages.push(stage);
190                    }
191
192                    // Parse base image
193                    let raw_from = from.image.content.trim().to_string();
194                    let mut base_image = DockerfileFromTarget::parse(&raw_from);
195
196                    // Post-hoc stage promotion: `DockerfileFromTarget::parse`
197                    // delegates to the OCI grammar, which accepts bare names
198                    // like `builder` as valid image refs. If the raw FROM
199                    // text matches a previously-declared stage name, swap
200                    // the parsed `Image` for a `Stage` reference.
201                    if matches!(base_image, DockerfileFromTarget::Image(_))
202                        && known_stage_names.contains(&raw_from)
203                    {
204                        base_image = DockerfileFromTarget::Stage(raw_from.clone());
205                    }
206
207                    // Get alias (stage name) - the field is `alias` not `image_alias`
208                    let name = from.alias.as_ref().map(|a| a.content.clone());
209
210                    if let Some(ref n) = name {
211                        known_stage_names.insert(n.clone());
212                    }
213
214                    // Get platform flag
215                    let platform = from
216                        .flags
217                        .iter()
218                        .find(|f| f.name.content.as_str() == "platform")
219                        .map(|f| f.value.to_string());
220
221                    current_stage = Some(Stage {
222                        index: stage_index,
223                        name,
224                        base_image,
225                        platform,
226                        instructions: Vec::new(),
227                    });
228
229                    stage_index += 1;
230                }
231
232                RawInstruction::Arg(arg) => {
233                    let arg_inst = ArgInstruction {
234                        name: arg.name.to_string(),
235                        default: arg.value.as_ref().map(std::string::ToString::to_string),
236                    };
237
238                    if current_stage.is_none() {
239                        global_args.push(arg_inst);
240                    } else if let Some(ref mut stage) = current_stage {
241                        stage.instructions.push(Instruction::Arg(arg_inst));
242                    }
243                }
244
245                _ => {
246                    if let Some(ref mut stage) = current_stage {
247                        if let Some(inst) = Self::convert_instruction(&instruction)? {
248                            stage.instructions.push(inst);
249                        }
250                    }
251                }
252            }
253        }
254
255        // Don't forget the last stage
256        if let Some(stage) = current_stage {
257            stages.push(stage);
258        }
259
260        // Resolve stage references in COPY --from
261        // (This is currently a no-op as stage references are already correct,
262        // but kept for future validation/resolution logic)
263        let _stage_names: HashMap<String, usize> = stages
264            .iter()
265            .filter_map(|s| s.name.as_ref().map(|n| (n.clone(), s.index)))
266            .collect();
267        let _num_stages = stages.len();
268
269        Ok(Self {
270            global_args,
271            stages,
272        })
273    }
274
275    /// Convert a raw instruction to our internal representation
276    #[allow(clippy::too_many_lines)]
277    fn convert_instruction(raw: &RawInstruction) -> Result<Option<Instruction>> {
278        let instruction = match raw {
279            RawInstruction::From(_) => {
280                return Ok(None);
281            }
282
283            RawInstruction::Run(run) => {
284                let command = match &run.expr {
285                    dockerfile_parser::ShellOrExecExpr::Shell(s) => {
286                        ShellOrExec::Shell(s.to_string())
287                    }
288                    dockerfile_parser::ShellOrExecExpr::Exec(args) => {
289                        ShellOrExec::Exec(args.elements.iter().map(|s| s.content.clone()).collect())
290                    }
291                };
292
293                Instruction::Run(RunInstruction {
294                    command,
295                    mounts: Vec::new(),
296                    network: None,
297                    security: None,
298                })
299            }
300
301            RawInstruction::Copy(copy) => {
302                let from = copy
303                    .flags
304                    .iter()
305                    .find(|f| f.name.content.as_str() == "from")
306                    .map(|f| f.value.to_string());
307
308                let chown = copy
309                    .flags
310                    .iter()
311                    .find(|f| f.name.content.as_str() == "chown")
312                    .map(|f| f.value.to_string());
313
314                let chmod = copy
315                    .flags
316                    .iter()
317                    .find(|f| f.name.content.as_str() == "chmod")
318                    .map(|f| f.value.to_string());
319
320                let link = copy.flags.iter().any(|f| f.name.content.as_str() == "link");
321
322                // The external parser separates sources and destination already.
323                let sources: Vec<String> = copy
324                    .sources
325                    .iter()
326                    .map(std::string::ToString::to_string)
327                    .collect();
328                let destination = copy.destination.to_string();
329
330                Instruction::Copy(CopyInstruction {
331                    sources,
332                    destination,
333                    from,
334                    chown,
335                    chmod,
336                    link,
337                    exclude: Vec::new(),
338                })
339            }
340
341            RawInstruction::Entrypoint(ep) => {
342                let command = match &ep.expr {
343                    dockerfile_parser::ShellOrExecExpr::Shell(s) => {
344                        ShellOrExec::Shell(s.to_string())
345                    }
346                    dockerfile_parser::ShellOrExecExpr::Exec(args) => {
347                        ShellOrExec::Exec(args.elements.iter().map(|s| s.content.clone()).collect())
348                    }
349                };
350                Instruction::Entrypoint(command)
351            }
352
353            RawInstruction::Cmd(cmd) => {
354                let command = match &cmd.expr {
355                    dockerfile_parser::ShellOrExecExpr::Shell(s) => {
356                        ShellOrExec::Shell(s.to_string())
357                    }
358                    dockerfile_parser::ShellOrExecExpr::Exec(args) => {
359                        ShellOrExec::Exec(args.elements.iter().map(|s| s.content.clone()).collect())
360                    }
361                };
362                Instruction::Cmd(command)
363            }
364
365            RawInstruction::Env(env) => {
366                let mut vars = HashMap::new();
367                for var in &env.vars {
368                    vars.insert(var.key.to_string(), var.value.to_string());
369                }
370                Instruction::Env(EnvInstruction { vars })
371            }
372
373            RawInstruction::Label(label) => {
374                let mut labels = HashMap::new();
375                for l in &label.labels {
376                    labels.insert(l.name.to_string(), l.value.to_string());
377                }
378                Instruction::Label(labels)
379            }
380
381            RawInstruction::Arg(arg) => Instruction::Arg(ArgInstruction {
382                name: arg.name.to_string(),
383                default: arg.value.as_ref().map(std::string::ToString::to_string),
384            }),
385
386            RawInstruction::Misc(misc) => {
387                let instruction_upper = misc.instruction.content.to_uppercase();
388                match instruction_upper.as_str() {
389                    "WORKDIR" => Instruction::Workdir(misc.arguments.to_string()),
390
391                    "USER" => Instruction::User(misc.arguments.to_string()),
392
393                    "VOLUME" => {
394                        let args = misc.arguments.to_string();
395                        let volumes = if args.trim().starts_with('[') {
396                            serde_json::from_str(&args).unwrap_or_else(|_| vec![args])
397                        } else {
398                            args.split_whitespace().map(String::from).collect()
399                        };
400                        Instruction::Volume(volumes)
401                    }
402
403                    "EXPOSE" => {
404                        let args = misc.arguments.to_string();
405                        let (port_str, protocol) = if let Some((p, proto)) = args.split_once('/') {
406                            let proto = match proto.to_lowercase().as_str() {
407                                "udp" => ExposeProtocol::Udp,
408                                _ => ExposeProtocol::Tcp,
409                            };
410                            (p, proto)
411                        } else {
412                            (args.as_str(), ExposeProtocol::Tcp)
413                        };
414
415                        let port: u16 = port_str.trim().parse().map_err(|_| {
416                            BuildError::InvalidInstruction {
417                                instruction: "EXPOSE".to_string(),
418                                reason: format!("Invalid port number: {port_str}"),
419                            }
420                        })?;
421
422                        Instruction::Expose(ExposeInstruction { port, protocol })
423                    }
424
425                    "SHELL" => {
426                        let args = misc.arguments.to_string();
427                        let shell: Vec<String> = serde_json::from_str(&args).map_err(|_| {
428                            BuildError::InvalidInstruction {
429                                instruction: "SHELL".to_string(),
430                                reason: "SHELL requires a JSON array".to_string(),
431                            }
432                        })?;
433                        Instruction::Shell(shell)
434                    }
435
436                    "STOPSIGNAL" => Instruction::Stopsignal(misc.arguments.to_string()),
437
438                    "HEALTHCHECK" => {
439                        let args = misc.arguments.to_string().trim().to_string();
440                        if args.eq_ignore_ascii_case("NONE") {
441                            Instruction::Healthcheck(HealthcheckInstruction::None)
442                        } else {
443                            let command = if let Some(stripped) = args.strip_prefix("CMD ") {
444                                ShellOrExec::Shell(stripped.to_string())
445                            } else {
446                                ShellOrExec::Shell(args)
447                            };
448                            Instruction::Healthcheck(HealthcheckInstruction::cmd(command))
449                        }
450                    }
451
452                    "ONBUILD" => {
453                        tracing::warn!("ONBUILD instruction parsing not fully implemented");
454                        return Ok(None);
455                    }
456
457                    "MAINTAINER" => {
458                        let mut labels = HashMap::new();
459                        labels.insert("maintainer".to_string(), misc.arguments.to_string());
460                        Instruction::Label(labels)
461                    }
462
463                    "ADD" => {
464                        let args = misc.arguments.to_string();
465                        let parts: Vec<String> =
466                            args.split_whitespace().map(String::from).collect();
467
468                        if parts.len() < 2 {
469                            return Err(BuildError::InvalidInstruction {
470                                instruction: "ADD".to_string(),
471                                reason: "ADD requires at least one source and a destination"
472                                    .to_string(),
473                            });
474                        }
475
476                        let (sources, dest) = parts.split_at(parts.len() - 1);
477                        let destination = dest.first().cloned().unwrap_or_default();
478
479                        Instruction::Add(AddInstruction {
480                            sources: sources.to_vec(),
481                            destination,
482                            chown: None,
483                            chmod: None,
484                            link: false,
485                            checksum: None,
486                            keep_git_dir: false,
487                        })
488                    }
489
490                    other => {
491                        tracing::warn!("Unknown Dockerfile instruction: {}", other);
492                        return Ok(None);
493                    }
494                }
495            }
496        };
497
498        Ok(Some(instruction))
499    }
500
501    /// Get a stage by name or index
502    #[must_use]
503    pub fn get_stage(&self, name_or_index: &str) -> Option<&Stage> {
504        self.stages.iter().find(|s| s.matches(name_or_index))
505    }
506
507    /// Get the final stage (last one in the Dockerfile)
508    #[must_use]
509    pub fn final_stage(&self) -> Option<&Stage> {
510        self.stages.last()
511    }
512
513    /// Get all stage names/identifiers
514    #[must_use]
515    pub fn stage_names(&self) -> Vec<String> {
516        self.stages.iter().map(Stage::identifier).collect()
517    }
518
519    /// Check if a stage exists
520    #[must_use]
521    pub fn has_stage(&self, name_or_index: &str) -> bool {
522        self.get_stage(name_or_index).is_some()
523    }
524
525    /// Returns the number of stages
526    #[must_use]
527    pub fn stage_count(&self) -> usize {
528        self.stages.len()
529    }
530}
531
532#[cfg(test)]
533mod tests {
534    use super::*;
535
536    #[test]
537    fn test_parse_simple_dockerfile() {
538        let content = r#"
539FROM alpine:3.18
540RUN apk add --no-cache curl
541COPY . /app
542WORKDIR /app
543CMD ["./app"]
544"#;
545
546        let dockerfile = Dockerfile::parse(content).unwrap();
547        assert_eq!(dockerfile.stages.len(), 1);
548
549        let stage = &dockerfile.stages[0];
550        assert_eq!(stage.index, 0);
551        assert!(stage.name.is_none());
552        assert_eq!(stage.instructions.len(), 4);
553    }
554
555    #[test]
556    fn test_parse_multistage_dockerfile() {
557        let content = r#"
558FROM golang:1.21 AS builder
559WORKDIR /src
560COPY . .
561RUN go build -o /app
562
563FROM alpine:3.18
564COPY --from=builder /app /app
565CMD ["/app"]
566"#;
567
568        let dockerfile = Dockerfile::parse(content).unwrap();
569        assert_eq!(dockerfile.stages.len(), 2);
570
571        let builder = &dockerfile.stages[0];
572        assert_eq!(builder.name, Some("builder".to_string()));
573
574        let runtime = &dockerfile.stages[1];
575        assert!(runtime.name.is_none());
576
577        let copy = runtime
578            .instructions
579            .iter()
580            .find(|i| matches!(i, Instruction::Copy(_)));
581        assert!(copy.is_some());
582        if let Some(Instruction::Copy(c)) = copy {
583            assert_eq!(c.from, Some("builder".to_string()));
584        }
585    }
586
587    #[test]
588    fn test_parse_copy_from_external_image_reference() {
589        // `COPY --from=<external-image>` must capture the full registry-
590        // qualified reference in `CopyInstruction.from` so the buildah
591        // backend can pull and forward it to `buildah copy --from=...`.
592        let content = r"
593FROM alpine:3.18
594COPY --from=ghcr.io/astral-sh/uv:0.5.0 /uv /usr/local/bin/uv
595RUN /usr/local/bin/uv --version
596";
597
598        let dockerfile = Dockerfile::parse(content).unwrap();
599        assert_eq!(dockerfile.stages.len(), 1);
600
601        let copy = dockerfile.stages[0]
602            .instructions
603            .iter()
604            .find_map(|i| {
605                if let Instruction::Copy(c) = i {
606                    Some(c)
607                } else {
608                    None
609                }
610            })
611            .expect("COPY instruction present");
612
613        assert_eq!(
614            copy.from,
615            Some("ghcr.io/astral-sh/uv:0.5.0".to_string()),
616            "external image ref must be preserved verbatim in CopyInstruction.from",
617        );
618        assert_eq!(copy.sources, vec!["/uv".to_string()]);
619        assert_eq!(copy.destination, "/usr/local/bin/uv".to_string());
620
621        // The parser must NOT treat the external ref as a stage; only the
622        // top-level `FROM alpine:3.18` should appear in the stage list.
623        assert!(dockerfile.get_stage("ghcr.io/astral-sh/uv:0.5.0").is_none());
624    }
625
626    #[test]
627    fn test_parse_global_args() {
628        let content = r#"
629ARG BASE_IMAGE=alpine:3.18
630FROM ${BASE_IMAGE}
631RUN echo "hello"
632"#;
633
634        let dockerfile = Dockerfile::parse(content).unwrap();
635        assert_eq!(dockerfile.global_args.len(), 1);
636        assert_eq!(dockerfile.global_args[0].name, "BASE_IMAGE");
637        assert_eq!(
638            dockerfile.global_args[0].default,
639            Some("alpine:3.18".to_string())
640        );
641    }
642
643    #[test]
644    fn test_get_stage_by_name() {
645        let content = r#"
646FROM alpine:3.18 AS base
647RUN echo "base"
648
649FROM base AS builder
650RUN echo "builder"
651"#;
652
653        let dockerfile = Dockerfile::parse(content).unwrap();
654
655        let base = dockerfile.get_stage("base");
656        assert!(base.is_some());
657        assert_eq!(base.unwrap().index, 0);
658
659        let builder = dockerfile.get_stage("builder");
660        assert!(builder.is_some());
661        assert_eq!(builder.unwrap().index, 1);
662
663        let stage_0 = dockerfile.get_stage("0");
664        assert!(stage_0.is_some());
665        assert_eq!(stage_0.unwrap().name, Some("base".to_string()));
666    }
667
668    #[test]
669    fn test_final_stage() {
670        let content = r#"
671FROM alpine:3.18 AS builder
672RUN echo "builder"
673
674FROM scratch
675COPY --from=builder /app /app
676"#;
677
678        let dockerfile = Dockerfile::parse(content).unwrap();
679        let final_stage = dockerfile.final_stage().unwrap();
680
681        assert_eq!(final_stage.index, 1);
682        assert!(matches!(
683            final_stage.base_image,
684            DockerfileFromTarget::Scratch
685        ));
686    }
687
688    #[test]
689    fn test_parse_env_instruction() {
690        let content = r"
691FROM alpine
692ENV FOO=bar BAZ=qux
693";
694
695        let dockerfile = Dockerfile::parse(content).unwrap();
696        let stage = &dockerfile.stages[0];
697
698        let env = stage
699            .instructions
700            .iter()
701            .find(|i| matches!(i, Instruction::Env(_)));
702        assert!(env.is_some());
703
704        if let Some(Instruction::Env(e)) = env {
705            assert_eq!(e.vars.get("FOO"), Some(&"bar".to_string()));
706            assert_eq!(e.vars.get("BAZ"), Some(&"qux".to_string()));
707        }
708    }
709}