zlayer_builder/dockerfile/
parser.rs1use std::collections::HashMap;
7use std::path::Path;
8use std::str::FromStr;
9
10use dockerfile_parser::{Dockerfile as RawDockerfile, Instruction as RawInstruction};
11use serde::{Deserialize, Serialize};
12use zlayer_types::ImageReference;
13
14use crate::error::{BuildError, Result};
15
16use super::instruction::{
17 AddInstruction, ArgInstruction, CopyInstruction, EnvInstruction, ExposeInstruction,
18 ExposeProtocol, HealthcheckInstruction, Instruction, RunInstruction, ShellOrExec,
19};
20
21#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
31pub enum DockerfileFromTarget {
32 Image(ImageReference),
34 Stage(String),
36 Scratch,
38}
39
40impl DockerfileFromTarget {
41 #[must_use]
53 pub fn parse(s: &str) -> Self {
54 let s = s.trim();
55
56 if s.eq_ignore_ascii_case("scratch") {
57 return Self::Scratch;
58 }
59
60 match ImageReference::from_str(s) {
61 Ok(r) => Self::Image(r),
62 Err(_) => Self::Stage(s.to_string()),
63 }
64 }
65
66 #[must_use]
68 pub fn is_stage(&self) -> bool {
69 matches!(self, Self::Stage(_))
70 }
71
72 #[must_use]
74 pub fn is_scratch(&self) -> bool {
75 matches!(self, Self::Scratch)
76 }
77}
78
79impl std::fmt::Display for DockerfileFromTarget {
80 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
81 match self {
82 Self::Image(r) => write!(f, "{r}"),
83 Self::Stage(name) => f.write_str(name),
84 Self::Scratch => f.write_str("scratch"),
85 }
86 }
87}
88
89#[derive(Debug, Clone, Serialize, Deserialize)]
91pub struct Stage {
92 pub index: usize,
94
95 pub name: Option<String>,
97
98 pub base_image: DockerfileFromTarget,
100
101 pub platform: Option<String>,
103
104 pub instructions: Vec<Instruction>,
106}
107
108impl Stage {
109 #[must_use]
111 pub fn identifier(&self) -> String {
112 self.name.clone().unwrap_or_else(|| self.index.to_string())
113 }
114
115 #[must_use]
117 pub fn matches(&self, name_or_index: &str) -> bool {
118 if let Some(ref name) = self.name {
119 if name == name_or_index {
120 return true;
121 }
122 }
123
124 if let Ok(idx) = name_or_index.parse::<usize>() {
125 return idx == self.index;
126 }
127
128 false
129 }
130}
131
132#[derive(Debug, Clone, Serialize, Deserialize)]
134pub struct Dockerfile {
135 pub global_args: Vec<ArgInstruction>,
137
138 pub stages: Vec<Stage>,
140}
141
142impl Dockerfile {
143 pub fn parse(content: &str) -> Result<Self> {
149 let raw = RawDockerfile::parse(content).map_err(|e| BuildError::DockerfileParse {
150 message: e.to_string(),
151 line: 1,
152 })?;
153
154 Self::from_raw(raw)
155 }
156
157 pub fn from_file(path: impl AsRef<Path>) -> Result<Self> {
163 let content =
164 std::fs::read_to_string(path.as_ref()).map_err(|e| BuildError::ContextRead {
165 path: path.as_ref().to_path_buf(),
166 source: e,
167 })?;
168
169 Self::parse(&content)
170 }
171
172 fn from_raw(raw: RawDockerfile) -> Result<Self> {
174 let mut global_args = Vec::new();
175 let mut stages = Vec::new();
176 let mut current_stage: Option<Stage> = None;
177 let mut stage_index = 0;
178 let mut known_stage_names: std::collections::HashSet<String> =
182 std::collections::HashSet::new();
183
184 for instruction in raw.instructions {
185 match &instruction {
186 RawInstruction::From(from) => {
187 if let Some(stage) = current_stage.take() {
189 stages.push(stage);
190 }
191
192 let raw_from = from.image.content.trim().to_string();
194 let mut base_image = DockerfileFromTarget::parse(&raw_from);
195
196 if matches!(base_image, DockerfileFromTarget::Image(_))
202 && known_stage_names.contains(&raw_from)
203 {
204 base_image = DockerfileFromTarget::Stage(raw_from.clone());
205 }
206
207 let name = from.alias.as_ref().map(|a| a.content.clone());
209
210 if let Some(ref n) = name {
211 known_stage_names.insert(n.clone());
212 }
213
214 let platform = from
216 .flags
217 .iter()
218 .find(|f| f.name.content.as_str() == "platform")
219 .map(|f| f.value.to_string());
220
221 current_stage = Some(Stage {
222 index: stage_index,
223 name,
224 base_image,
225 platform,
226 instructions: Vec::new(),
227 });
228
229 stage_index += 1;
230 }
231
232 RawInstruction::Arg(arg) => {
233 let arg_inst = ArgInstruction {
234 name: arg.name.to_string(),
235 default: arg.value.as_ref().map(std::string::ToString::to_string),
236 };
237
238 if current_stage.is_none() {
239 global_args.push(arg_inst);
240 } else if let Some(ref mut stage) = current_stage {
241 stage.instructions.push(Instruction::Arg(arg_inst));
242 }
243 }
244
245 _ => {
246 if let Some(ref mut stage) = current_stage {
247 if let Some(inst) = Self::convert_instruction(&instruction)? {
248 stage.instructions.push(inst);
249 }
250 }
251 }
252 }
253 }
254
255 if let Some(stage) = current_stage {
257 stages.push(stage);
258 }
259
260 let _stage_names: HashMap<String, usize> = stages
264 .iter()
265 .filter_map(|s| s.name.as_ref().map(|n| (n.clone(), s.index)))
266 .collect();
267 let _num_stages = stages.len();
268
269 Ok(Self {
270 global_args,
271 stages,
272 })
273 }
274
275 #[allow(clippy::too_many_lines)]
277 fn convert_instruction(raw: &RawInstruction) -> Result<Option<Instruction>> {
278 let instruction = match raw {
279 RawInstruction::From(_) => {
280 return Ok(None);
281 }
282
283 RawInstruction::Run(run) => {
284 let command = match &run.expr {
285 dockerfile_parser::ShellOrExecExpr::Shell(s) => {
286 ShellOrExec::Shell(s.to_string())
287 }
288 dockerfile_parser::ShellOrExecExpr::Exec(args) => {
289 ShellOrExec::Exec(args.elements.iter().map(|s| s.content.clone()).collect())
290 }
291 };
292
293 Instruction::Run(RunInstruction {
294 command,
295 mounts: Vec::new(),
296 network: None,
297 security: None,
298 })
299 }
300
301 RawInstruction::Copy(copy) => {
302 let from = copy
303 .flags
304 .iter()
305 .find(|f| f.name.content.as_str() == "from")
306 .map(|f| f.value.to_string());
307
308 let chown = copy
309 .flags
310 .iter()
311 .find(|f| f.name.content.as_str() == "chown")
312 .map(|f| f.value.to_string());
313
314 let chmod = copy
315 .flags
316 .iter()
317 .find(|f| f.name.content.as_str() == "chmod")
318 .map(|f| f.value.to_string());
319
320 let link = copy.flags.iter().any(|f| f.name.content.as_str() == "link");
321
322 let sources: Vec<String> = copy
324 .sources
325 .iter()
326 .map(std::string::ToString::to_string)
327 .collect();
328 let destination = copy.destination.to_string();
329
330 Instruction::Copy(CopyInstruction {
331 sources,
332 destination,
333 from,
334 chown,
335 chmod,
336 link,
337 exclude: Vec::new(),
338 })
339 }
340
341 RawInstruction::Entrypoint(ep) => {
342 let command = match &ep.expr {
343 dockerfile_parser::ShellOrExecExpr::Shell(s) => {
344 ShellOrExec::Shell(s.to_string())
345 }
346 dockerfile_parser::ShellOrExecExpr::Exec(args) => {
347 ShellOrExec::Exec(args.elements.iter().map(|s| s.content.clone()).collect())
348 }
349 };
350 Instruction::Entrypoint(command)
351 }
352
353 RawInstruction::Cmd(cmd) => {
354 let command = match &cmd.expr {
355 dockerfile_parser::ShellOrExecExpr::Shell(s) => {
356 ShellOrExec::Shell(s.to_string())
357 }
358 dockerfile_parser::ShellOrExecExpr::Exec(args) => {
359 ShellOrExec::Exec(args.elements.iter().map(|s| s.content.clone()).collect())
360 }
361 };
362 Instruction::Cmd(command)
363 }
364
365 RawInstruction::Env(env) => {
366 let mut vars = HashMap::new();
367 for var in &env.vars {
368 vars.insert(var.key.to_string(), var.value.to_string());
369 }
370 Instruction::Env(EnvInstruction { vars })
371 }
372
373 RawInstruction::Label(label) => {
374 let mut labels = HashMap::new();
375 for l in &label.labels {
376 labels.insert(l.name.to_string(), l.value.to_string());
377 }
378 Instruction::Label(labels)
379 }
380
381 RawInstruction::Arg(arg) => Instruction::Arg(ArgInstruction {
382 name: arg.name.to_string(),
383 default: arg.value.as_ref().map(std::string::ToString::to_string),
384 }),
385
386 RawInstruction::Misc(misc) => {
387 let instruction_upper = misc.instruction.content.to_uppercase();
388 match instruction_upper.as_str() {
389 "WORKDIR" => Instruction::Workdir(misc.arguments.to_string()),
390
391 "USER" => Instruction::User(misc.arguments.to_string()),
392
393 "VOLUME" => {
394 let args = misc.arguments.to_string();
395 let volumes = if args.trim().starts_with('[') {
396 serde_json::from_str(&args).unwrap_or_else(|_| vec![args])
397 } else {
398 args.split_whitespace().map(String::from).collect()
399 };
400 Instruction::Volume(volumes)
401 }
402
403 "EXPOSE" => {
404 let args = misc.arguments.to_string();
405 let (port_str, protocol) = if let Some((p, proto)) = args.split_once('/') {
406 let proto = match proto.to_lowercase().as_str() {
407 "udp" => ExposeProtocol::Udp,
408 _ => ExposeProtocol::Tcp,
409 };
410 (p, proto)
411 } else {
412 (args.as_str(), ExposeProtocol::Tcp)
413 };
414
415 let port: u16 = port_str.trim().parse().map_err(|_| {
416 BuildError::InvalidInstruction {
417 instruction: "EXPOSE".to_string(),
418 reason: format!("Invalid port number: {port_str}"),
419 }
420 })?;
421
422 Instruction::Expose(ExposeInstruction { port, protocol })
423 }
424
425 "SHELL" => {
426 let args = misc.arguments.to_string();
427 let shell: Vec<String> = serde_json::from_str(&args).map_err(|_| {
428 BuildError::InvalidInstruction {
429 instruction: "SHELL".to_string(),
430 reason: "SHELL requires a JSON array".to_string(),
431 }
432 })?;
433 Instruction::Shell(shell)
434 }
435
436 "STOPSIGNAL" => Instruction::Stopsignal(misc.arguments.to_string()),
437
438 "HEALTHCHECK" => {
439 let args = misc.arguments.to_string().trim().to_string();
440 if args.eq_ignore_ascii_case("NONE") {
441 Instruction::Healthcheck(HealthcheckInstruction::None)
442 } else {
443 let command = if let Some(stripped) = args.strip_prefix("CMD ") {
444 ShellOrExec::Shell(stripped.to_string())
445 } else {
446 ShellOrExec::Shell(args)
447 };
448 Instruction::Healthcheck(HealthcheckInstruction::cmd(command))
449 }
450 }
451
452 "ONBUILD" => {
453 tracing::warn!("ONBUILD instruction parsing not fully implemented");
454 return Ok(None);
455 }
456
457 "MAINTAINER" => {
458 let mut labels = HashMap::new();
459 labels.insert("maintainer".to_string(), misc.arguments.to_string());
460 Instruction::Label(labels)
461 }
462
463 "ADD" => {
464 let args = misc.arguments.to_string();
465 let parts: Vec<String> =
466 args.split_whitespace().map(String::from).collect();
467
468 if parts.len() < 2 {
469 return Err(BuildError::InvalidInstruction {
470 instruction: "ADD".to_string(),
471 reason: "ADD requires at least one source and a destination"
472 .to_string(),
473 });
474 }
475
476 let (sources, dest) = parts.split_at(parts.len() - 1);
477 let destination = dest.first().cloned().unwrap_or_default();
478
479 Instruction::Add(AddInstruction {
480 sources: sources.to_vec(),
481 destination,
482 chown: None,
483 chmod: None,
484 link: false,
485 checksum: None,
486 keep_git_dir: false,
487 })
488 }
489
490 other => {
491 tracing::warn!("Unknown Dockerfile instruction: {}", other);
492 return Ok(None);
493 }
494 }
495 }
496 };
497
498 Ok(Some(instruction))
499 }
500
501 #[must_use]
503 pub fn get_stage(&self, name_or_index: &str) -> Option<&Stage> {
504 self.stages.iter().find(|s| s.matches(name_or_index))
505 }
506
507 #[must_use]
509 pub fn final_stage(&self) -> Option<&Stage> {
510 self.stages.last()
511 }
512
513 #[must_use]
515 pub fn stage_names(&self) -> Vec<String> {
516 self.stages.iter().map(Stage::identifier).collect()
517 }
518
519 #[must_use]
521 pub fn has_stage(&self, name_or_index: &str) -> bool {
522 self.get_stage(name_or_index).is_some()
523 }
524
525 #[must_use]
527 pub fn stage_count(&self) -> usize {
528 self.stages.len()
529 }
530}
531
532#[cfg(test)]
533mod tests {
534 use super::*;
535
536 #[test]
537 fn test_parse_simple_dockerfile() {
538 let content = r#"
539FROM alpine:3.18
540RUN apk add --no-cache curl
541COPY . /app
542WORKDIR /app
543CMD ["./app"]
544"#;
545
546 let dockerfile = Dockerfile::parse(content).unwrap();
547 assert_eq!(dockerfile.stages.len(), 1);
548
549 let stage = &dockerfile.stages[0];
550 assert_eq!(stage.index, 0);
551 assert!(stage.name.is_none());
552 assert_eq!(stage.instructions.len(), 4);
553 }
554
555 #[test]
556 fn test_parse_multistage_dockerfile() {
557 let content = r#"
558FROM golang:1.21 AS builder
559WORKDIR /src
560COPY . .
561RUN go build -o /app
562
563FROM alpine:3.18
564COPY --from=builder /app /app
565CMD ["/app"]
566"#;
567
568 let dockerfile = Dockerfile::parse(content).unwrap();
569 assert_eq!(dockerfile.stages.len(), 2);
570
571 let builder = &dockerfile.stages[0];
572 assert_eq!(builder.name, Some("builder".to_string()));
573
574 let runtime = &dockerfile.stages[1];
575 assert!(runtime.name.is_none());
576
577 let copy = runtime
578 .instructions
579 .iter()
580 .find(|i| matches!(i, Instruction::Copy(_)));
581 assert!(copy.is_some());
582 if let Some(Instruction::Copy(c)) = copy {
583 assert_eq!(c.from, Some("builder".to_string()));
584 }
585 }
586
587 #[test]
588 fn test_parse_copy_from_external_image_reference() {
589 let content = r"
593FROM alpine:3.18
594COPY --from=ghcr.io/astral-sh/uv:0.5.0 /uv /usr/local/bin/uv
595RUN /usr/local/bin/uv --version
596";
597
598 let dockerfile = Dockerfile::parse(content).unwrap();
599 assert_eq!(dockerfile.stages.len(), 1);
600
601 let copy = dockerfile.stages[0]
602 .instructions
603 .iter()
604 .find_map(|i| {
605 if let Instruction::Copy(c) = i {
606 Some(c)
607 } else {
608 None
609 }
610 })
611 .expect("COPY instruction present");
612
613 assert_eq!(
614 copy.from,
615 Some("ghcr.io/astral-sh/uv:0.5.0".to_string()),
616 "external image ref must be preserved verbatim in CopyInstruction.from",
617 );
618 assert_eq!(copy.sources, vec!["/uv".to_string()]);
619 assert_eq!(copy.destination, "/usr/local/bin/uv".to_string());
620
621 assert!(dockerfile.get_stage("ghcr.io/astral-sh/uv:0.5.0").is_none());
624 }
625
626 #[test]
627 fn test_parse_global_args() {
628 let content = r#"
629ARG BASE_IMAGE=alpine:3.18
630FROM ${BASE_IMAGE}
631RUN echo "hello"
632"#;
633
634 let dockerfile = Dockerfile::parse(content).unwrap();
635 assert_eq!(dockerfile.global_args.len(), 1);
636 assert_eq!(dockerfile.global_args[0].name, "BASE_IMAGE");
637 assert_eq!(
638 dockerfile.global_args[0].default,
639 Some("alpine:3.18".to_string())
640 );
641 }
642
643 #[test]
644 fn test_get_stage_by_name() {
645 let content = r#"
646FROM alpine:3.18 AS base
647RUN echo "base"
648
649FROM base AS builder
650RUN echo "builder"
651"#;
652
653 let dockerfile = Dockerfile::parse(content).unwrap();
654
655 let base = dockerfile.get_stage("base");
656 assert!(base.is_some());
657 assert_eq!(base.unwrap().index, 0);
658
659 let builder = dockerfile.get_stage("builder");
660 assert!(builder.is_some());
661 assert_eq!(builder.unwrap().index, 1);
662
663 let stage_0 = dockerfile.get_stage("0");
664 assert!(stage_0.is_some());
665 assert_eq!(stage_0.unwrap().name, Some("base".to_string()));
666 }
667
668 #[test]
669 fn test_final_stage() {
670 let content = r#"
671FROM alpine:3.18 AS builder
672RUN echo "builder"
673
674FROM scratch
675COPY --from=builder /app /app
676"#;
677
678 let dockerfile = Dockerfile::parse(content).unwrap();
679 let final_stage = dockerfile.final_stage().unwrap();
680
681 assert_eq!(final_stage.index, 1);
682 assert!(matches!(
683 final_stage.base_image,
684 DockerfileFromTarget::Scratch
685 ));
686 }
687
688 #[test]
689 fn test_parse_env_instruction() {
690 let content = r"
691FROM alpine
692ENV FOO=bar BAZ=qux
693";
694
695 let dockerfile = Dockerfile::parse(content).unwrap();
696 let stage = &dockerfile.stages[0];
697
698 let env = stage
699 .instructions
700 .iter()
701 .find(|i| matches!(i, Instruction::Env(_)));
702 assert!(env.is_some());
703
704 if let Some(Instruction::Env(e)) = env {
705 assert_eq!(e.vars.get("FOO"), Some(&"bar".to_string()));
706 assert_eq!(e.vars.get("BAZ"), Some(&"qux".to_string()));
707 }
708 }
709}