parse_dockerfile/
lib.rs

1// SPDX-License-Identifier: Apache-2.0 OR MIT
2
3/*!
4Dockerfile parser, written in Rust.
5
6### Usage
7
8<!-- Note: Document from sync-markdown-to-rustdoc:start through sync-markdown-to-rustdoc:end
9     is synchronized from README.md. Any changes to that range are not preserved. -->
10<!-- tidy:sync-markdown-to-rustdoc:start -->
11
12To use this crate as a library, add this to your `Cargo.toml`:
13
14```toml
15[dependencies]
16parse-dockerfile = { version = "0.1", default-features = false }
17```
18
19<div class="rustdoc-alert rustdoc-alert-note">
20
21> **ⓘ Note**
22>
23> We recommend disabling default features because they enable CLI-related
24> dependencies which the library part does not use.
25
26</div>
27
28<!-- omit in toc -->
29### Examples
30
31```
32use parse_dockerfile::{parse, Instruction};
33
34let text = r#"
35ARG UBUNTU_VERSION=latest
36
37FROM ubuntu:${UBUNTU_VERSION}
38RUN echo
39"#;
40
41let dockerfile = parse(text).unwrap();
42
43// Iterate over all instructions.
44let mut instructions = dockerfile.instructions.iter();
45assert!(matches!(instructions.next(), Some(Instruction::Arg(..))));
46assert!(matches!(instructions.next(), Some(Instruction::From(..))));
47assert!(matches!(instructions.next(), Some(Instruction::Run(..))));
48assert!(matches!(instructions.next(), None));
49
50// Iterate over global args.
51let mut global_args = dockerfile.global_args();
52let global_arg1 = global_args.next().unwrap();
53assert_eq!(global_arg1.arguments.value, "UBUNTU_VERSION=latest");
54assert!(matches!(global_args.next(), None));
55
56// Iterate over stages.
57let mut stages = dockerfile.stages();
58let stage1 = stages.next().unwrap();
59assert_eq!(stage1.from.image.value, "ubuntu:${UBUNTU_VERSION}");
60let mut stage1_instructions = stage1.instructions.iter();
61assert!(matches!(stage1_instructions.next(), Some(Instruction::Run(..))));
62assert!(matches!(stage1_instructions.next(), None));
63assert!(matches!(stages.next(), None));
64```
65
66<!-- omit in toc -->
67### Optional features
68
69- **`serde`** — Implements [`serde::Serialize`] trait for parse-dockerfile types.
70
71[`serde::Serialize`]: https://docs.rs/serde/latest/serde/trait.Serialize.html
72
73<!-- tidy:sync-markdown-to-rustdoc:end -->
74*/
75
76#![doc(test(
77    no_crate_inject,
78    attr(
79        deny(warnings, rust_2018_idioms, single_use_lifetimes),
80        allow(dead_code, unused_variables)
81    )
82))]
83#![forbid(unsafe_code)]
84#![warn(
85    // Lints that may help when writing public library.
86    missing_debug_implementations,
87    missing_docs,
88    clippy::alloc_instead_of_core,
89    clippy::exhaustive_enums,
90    clippy::exhaustive_structs,
91    clippy::impl_trait_in_params,
92    // clippy::missing_inline_in_public_items,
93    // clippy::std_instead_of_alloc,
94    clippy::std_instead_of_core,
95)]
96#![allow(clippy::inline_always)]
97
98#[cfg(test)]
99#[path = "gen/tests/assert_impl.rs"]
100mod assert_impl;
101#[cfg(test)]
102#[path = "gen/tests/track_size.rs"]
103mod track_size;
104
105mod error;
106
107use std::{borrow::Cow, collections::HashMap, mem, ops::Range, str};
108
109use smallvec::SmallVec;
110
111pub use self::error::Error;
112use self::error::{ErrorKind, Result};
113
114/// Parses dockerfile from the given `text`.
115#[allow(clippy::missing_panics_doc)]
116pub fn parse(text: &str) -> Result<Dockerfile<'_>> {
117    let mut p = ParseIter::new(text)?;
118    let mut s = p.s;
119
120    let mut instructions = Vec::with_capacity(p.text.len() / 60);
121    let mut stages = Vec::with_capacity(1);
122    let mut named_stages = 0;
123    let mut current_stage = None;
124    while let Some((&b, s_next)) = s.split_first() {
125        let instruction =
126            parse_instruction(&mut p, &mut s, b, s_next).map_err(|e| e.into_error(&p))?;
127        match instruction {
128            Instruction::From(from) => {
129                named_stages += from.as_.is_some() as usize;
130                let new_stage = instructions.len();
131                if let Some(prev_stage) = current_stage.replace(new_stage) {
132                    stages.push(prev_stage..new_stage);
133                }
134                instructions.push(Instruction::From(from));
135            }
136            arg @ Instruction::Arg(..) => instructions.push(arg),
137            instruction => {
138                if current_stage.is_none() {
139                    return Err(ErrorKind::Expected("FROM", instruction.instruction_span().start)
140                        .into_error(&p));
141                }
142                instructions.push(instruction);
143            }
144        }
145        skip_comments_and_whitespaces(&mut s, p.escape_byte);
146    }
147    if let Some(current_stage) = current_stage {
148        stages.push(current_stage..instructions.len());
149    }
150
151    if stages.is_empty() {
152        // https://github.com/moby/buildkit/blob/e83d79a51fb49aeb921d8a2348ae14a58701c98c/frontend/dockerfile/dockerfile2llb/convert.go#L263
153        return Err(ErrorKind::NoStages.into_error(&p));
154    }
155    // TODO: https://github.com/moby/buildkit/blob/e83d79a51fb49aeb921d8a2348ae14a58701c98c/frontend/dockerfile/dockerfile2llb/convert.go#L302
156    // > base name (%s) should not be blank
157
158    let mut stages_by_name = HashMap::with_capacity(named_stages);
159    for (i, stage) in stages.iter().enumerate() {
160        let Instruction::From(from) = &instructions[stage.start] else { unreachable!() };
161        if let Some((_as, name)) = &from.as_ {
162            if let Some(first_occurrence) = stages_by_name.insert(name.value.clone(), i) {
163                let Instruction::From(from) = &instructions[stages[first_occurrence].start] else {
164                    unreachable!()
165                };
166                let first = from.as_.as_ref().unwrap().1.span.clone();
167                let second = name.span.clone();
168                return Err(ErrorKind::DuplicateName { first, second }.into_error(&p));
169            }
170        }
171    }
172
173    Ok(Dockerfile { parser_directives: p.parser_directives, instructions, stages, stages_by_name })
174}
175
176/// Returns an iterator over instructions in the given `text`.
177///
178/// Unlike [`parse`] function, the returned iterator doesn't error on
179/// duplicate stage names.
180pub fn parse_iter(text: &str) -> Result<ParseIter<'_>> {
181    ParseIter::new(text)
182}
183
184/// A dockerfile.
185#[derive(Debug)]
186#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
187#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
188pub struct Dockerfile<'a> {
189    /// Parser directives.
190    pub parser_directives: ParserDirectives<'a>,
191    /// Instructions.
192    pub instructions: Vec<Instruction<'a>>,
193    #[cfg_attr(feature = "serde", serde(skip))]
194    stages: Vec<Range<usize>>,
195    #[cfg_attr(feature = "serde", serde(skip))]
196    stages_by_name: HashMap<Cow<'a, str>, usize>,
197}
198impl<'a> Dockerfile<'a> {
199    /// Returns an iterator over global args.
200    #[allow(clippy::missing_panics_doc)] // self.stages is not empty
201    #[must_use]
202    pub fn global_args<'b>(&'b self) -> impl ExactSizeIterator<Item = &'b ArgInstruction<'a>> {
203        self.instructions[..self.stages.first().unwrap().start].iter().map(|arg| {
204            let Instruction::Arg(arg) = arg else { unreachable!() };
205            arg
206        })
207    }
208    /// Gets a stage by name.
209    #[must_use]
210    pub fn stage<'b>(&'b self, name: &str) -> Option<Stage<'a, 'b>> {
211        let i = *self.stages_by_name.get(name)?;
212        let stage = &self.stages[i];
213        let Instruction::From(from) = &self.instructions[stage.start] else { unreachable!() };
214        Some(Stage { from, instructions: &self.instructions[stage.start + 1..stage.end] })
215    }
216    /// Returns an iterator over stages.
217    #[must_use]
218    pub fn stages<'b>(&'b self) -> impl ExactSizeIterator<Item = Stage<'a, 'b>> {
219        self.stages.iter().map(move |stage| {
220            let Instruction::From(from) = &self.instructions[stage.start] else { unreachable!() };
221            Stage { from, instructions: &self.instructions[stage.start + 1..stage.end] }
222        })
223    }
224}
225/// A stage.
226#[derive(Debug)]
227#[non_exhaustive]
228pub struct Stage<'a, 'b> {
229    /// The `FROM` instruction.
230    pub from: &'b FromInstruction<'a>,
231    /// The remaining instructions.
232    pub instructions: &'b [Instruction<'a>],
233}
234
235/// Parser directives.
236///
237/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#parser-directives)
238#[derive(Debug)]
239#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
240#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
241#[non_exhaustive]
242pub struct ParserDirectives<'a> {
243    /// `syntax` parser directive.
244    ///
245    /// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#syntax)
246    pub syntax: Option<ParserDirective<&'a str>>,
247    /// `escape` parser directive.
248    ///
249    /// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#escape)
250    pub escape: Option<ParserDirective<char>>,
251    /// `check` parser directive.
252    ///
253    /// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#check)
254    pub check: Option<ParserDirective<&'a str>>,
255}
256/// A parser directive.
257#[derive(Debug)]
258#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
259#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
260pub struct ParserDirective<T> {
261    /// ```text
262    /// syntax=value
263    /// ^
264    /// ```
265    start: usize,
266    /// ```text
267    /// syntax=value
268    ///        ^^^^^
269    /// ```
270    pub value: Spanned<T>,
271}
272impl<T> ParserDirective<T> {
273    /// ```text
274    /// syntax=value
275    /// ^^^^^^^^^^^^
276    /// ```
277    #[must_use]
278    pub fn span(&self) -> Span {
279        self.start..self.value.span.end
280    }
281}
282
283/// An instruction.
284#[derive(Debug)]
285#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
286#[cfg_attr(feature = "serde", serde(tag = "kind"))]
287#[cfg_attr(feature = "serde", serde(rename_all = "SCREAMING_SNAKE_CASE"))]
288#[non_exhaustive]
289pub enum Instruction<'a> {
290    /// `ADD` instruction.
291    Add(AddInstruction<'a>),
292    /// `ARG` instruction.
293    Arg(ArgInstruction<'a>),
294    /// `CMD` instruction.
295    Cmd(CmdInstruction<'a>),
296    /// `COPY` instruction.
297    Copy(CopyInstruction<'a>),
298    /// `ENTRYPOINT` instruction.
299    Entrypoint(EntrypointInstruction<'a>),
300    /// `ENV` instruction.
301    Env(EnvInstruction<'a>),
302    /// `EXPOSE` instruction.
303    Expose(ExposeInstruction<'a>),
304    /// `FROM` instruction.
305    From(FromInstruction<'a>),
306    /// `HEALTHCHECK` instruction.
307    Healthcheck(HealthcheckInstruction<'a>),
308    /// `LABEL` instruction.
309    Label(LabelInstruction<'a>),
310    /// `MAINTAINER` instruction (deprecated).
311    Maintainer(MaintainerInstruction<'a>),
312    /// `ONBUILD` instruction.
313    Onbuild(OnbuildInstruction<'a>),
314    /// `RUN` instruction.
315    Run(RunInstruction<'a>),
316    /// `SHELL` instruction.
317    Shell(ShellInstruction<'a>),
318    /// `STOPSIGNAL` instruction.
319    Stopsignal(StopsignalInstruction<'a>),
320    /// `USER` instruction.
321    User(UserInstruction<'a>),
322    /// `VOLUME` instruction.
323    Volume(VolumeInstruction<'a>),
324    /// `WORKDIR` instruction.
325    Workdir(WorkdirInstruction<'a>),
326}
327impl Instruction<'_> {
328    fn instruction_span(&self) -> Span {
329        match self {
330            Instruction::Add(instruction) => instruction.add.span.clone(),
331            Instruction::Arg(instruction) => instruction.arg.span.clone(),
332            Instruction::Cmd(instruction) => instruction.cmd.span.clone(),
333            Instruction::Copy(instruction) => instruction.copy.span.clone(),
334            Instruction::Entrypoint(instruction) => instruction.entrypoint.span.clone(),
335            Instruction::Env(instruction) => instruction.env.span.clone(),
336            Instruction::Expose(instruction) => instruction.expose.span.clone(),
337            Instruction::From(instruction) => instruction.from.span.clone(),
338            Instruction::Healthcheck(instruction) => instruction.healthcheck.span.clone(),
339            Instruction::Label(instruction) => instruction.label.span.clone(),
340            Instruction::Maintainer(instruction) => instruction.maintainer.span.clone(),
341            Instruction::Onbuild(instruction) => instruction.onbuild.span.clone(),
342            Instruction::Run(instruction) => instruction.run.span.clone(),
343            Instruction::Shell(instruction) => instruction.shell.span.clone(),
344            Instruction::Stopsignal(instruction) => instruction.stopsignal.span.clone(),
345            Instruction::User(instruction) => instruction.user.span.clone(),
346            Instruction::Volume(instruction) => instruction.volume.span.clone(),
347            Instruction::Workdir(instruction) => instruction.workdir.span.clone(),
348        }
349    }
350}
351/// An `ADD` instruction.
352///
353/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#add)
354#[derive(Debug)]
355#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
356#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
357#[non_exhaustive]
358pub struct AddInstruction<'a> {
359    /// ```text
360    /// ADD [options] <src> ... <dest>
361    /// ^^^
362    /// ```
363    pub add: Keyword,
364    /// ```text
365    /// ADD [options] <src> ... <dest>
366    ///     ^^^^^^^^^
367    /// ```
368    pub options: SmallVec<[Flag<'a>; 1]>,
369    /// ```text
370    /// ADD [options] <src> ... <dest>
371    ///               ^^^^^^^^^
372    /// ```
373    // At least 1
374    pub src: SmallVec<[Source<'a>; 1]>,
375    /// ```text
376    /// ADD [options] <src> ... <dest>
377    ///                         ^^^^^^
378    /// ```
379    pub dest: UnescapedString<'a>,
380}
381/// An `ARG` instruction.
382///
383/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#arg)
384#[derive(Debug)]
385#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
386#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
387#[non_exhaustive]
388pub struct ArgInstruction<'a> {
389    /// ```text
390    /// ARG <name>[=<default value>] [<name>[=<default value>]...]
391    /// ^^^
392    /// ```
393    pub arg: Keyword,
394    /// ```text
395    /// ARG <name>[=<default value>] [<name>[=<default value>]...]
396    ///     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
397    /// ```
398    // TODO: SmallVec<[NameOptValue<'a>; 1]>
399    pub arguments: UnescapedString<'a>,
400}
401/// A `CMD` instruction.
402///
403/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#cmd)
404#[derive(Debug)]
405#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
406#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
407#[non_exhaustive]
408pub struct CmdInstruction<'a> {
409    /// ```text
410    /// CMD ["executable", "param"]
411    /// ^^^
412    /// ```
413    pub cmd: Keyword,
414    /// ```text
415    /// CMD ["executable", "param"]
416    ///     ^^^^^^^^^^^^^^^^^^^^^^^
417    /// ```
418    pub arguments: Command<'a>,
419}
420/// A `COPY` instruction.
421///
422/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#copy)
423#[derive(Debug)]
424#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
425#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
426#[non_exhaustive]
427pub struct CopyInstruction<'a> {
428    /// ```text
429    /// COPY [options] <src> ... <dest>
430    /// ^^^^
431    /// ```
432    pub copy: Keyword,
433    /// ```text
434    /// COPY [options] <src> ... <dest>
435    ///      ^^^^^^^^^
436    /// ```
437    pub options: SmallVec<[Flag<'a>; 1]>,
438    /// ```text
439    /// COPY [options] <src> ... <dest>
440    ///                ^^^^^^^^^
441    /// ```
442    // At least 1
443    pub src: SmallVec<[Source<'a>; 1]>,
444    /// ```text
445    /// COPY [options] <src> ... <dest>
446    ///                          ^^^^^^
447    /// ```
448    pub dest: UnescapedString<'a>,
449}
450/// A enum that represents source value of [`ARG` instruction](ArgInstruction) and
451/// [`COPY` instruction](CopyInstruction).
452#[derive(Debug)]
453#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
454#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
455#[non_exhaustive]
456pub enum Source<'a> {
457    /// Path or URL.
458    Path(UnescapedString<'a>),
459    /// Here-document.
460    HereDoc(HereDoc<'a>),
461}
462/// An `ENTRYPOINT` instruction.
463///
464/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#entrypoint)
465#[derive(Debug)]
466#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
467#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
468#[non_exhaustive]
469pub struct EntrypointInstruction<'a> {
470    /// ```text
471    /// ENTRYPOINT ["executable", "param"]
472    /// ^^^^^^^^^^
473    /// ```
474    pub entrypoint: Keyword,
475    /// ```text
476    /// ENTRYPOINT ["executable", "param"]
477    ///            ^^^^^^^^^^^^^^^^^^^^^^^
478    /// ```
479    pub arguments: Command<'a>,
480}
481/// An `ENV` instruction.
482///
483/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#env)
484#[derive(Debug)]
485#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
486#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
487#[non_exhaustive]
488pub struct EnvInstruction<'a> {
489    /// ```text
490    /// ENV <key>=<value> [<key>=<value>...]
491    /// ^^^
492    /// ```
493    pub env: Keyword,
494    /// ```text
495    /// ENV <key>=<value> [<key>=<value>...]
496    ///     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
497    /// ```
498    // TODO: SmallVec<[NameValue<'a>; 1]>
499    pub arguments: UnescapedString<'a>,
500}
501/// An `EXPOSE` instruction.
502///
503/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#expose)
504#[derive(Debug)]
505#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
506#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
507#[non_exhaustive]
508pub struct ExposeInstruction<'a> {
509    /// ```text
510    /// EXPOSE <port>[/<protocol>] [<port>[/<protocol>]...]
511    /// ^^^^^^
512    /// ```
513    pub expose: Keyword,
514    /// ```text
515    /// EXPOSE <port>[/<protocol>] [<port>[/<protocol>]...]
516    ///        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
517    /// ```
518    pub arguments: SmallVec<[UnescapedString<'a>; 1]>,
519}
520/// A `FROM` instruction.
521///
522/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#from)
523#[derive(Debug)]
524#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
525#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
526#[non_exhaustive]
527pub struct FromInstruction<'a> {
528    /// ```text
529    /// FROM [--platform=<platform>] <image> [AS <name>]
530    /// ^^^^
531    /// ```
532    pub from: Keyword,
533    /// ```text
534    /// FROM [--platform=<platform>] <image> [AS <name>]
535    ///      ^^^^^^^^^^^^^^^^^^^^^^^
536    /// ```
537    pub options: Vec<Flag<'a>>,
538    /// ```text
539    /// FROM [--platform=<platform>] <image> [AS <name>]
540    ///                              ^^^^^^^
541    /// ```
542    pub image: UnescapedString<'a>,
543    /// ```text
544    /// FROM [--platform=<platform>] <image> [AS <name>]
545    ///                                      ^^^^^^^^^^^
546    /// ```
547    pub as_: Option<(Keyword, UnescapedString<'a>)>,
548}
549/// A `HEALTHCHECK` instruction.
550///
551/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#healthcheck)
552#[derive(Debug)]
553#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
554#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
555#[non_exhaustive]
556pub struct HealthcheckInstruction<'a> {
557    /// ```text
558    /// HEALTHCHECK [options] CMD command
559    /// ^^^^^^^^^^^
560    /// ```
561    pub healthcheck: Keyword,
562    /// ```text
563    /// HEALTHCHECK [options] CMD command
564    ///             ^^^^^^^^^
565    /// ```
566    pub options: Vec<Flag<'a>>,
567    /// ```text
568    /// HEALTHCHECK [options] CMD command
569    ///                       ^^^^^^^^^^^
570    /// ```
571    pub arguments: HealthcheckArguments<'a>,
572}
573/// Arguments of the [`HEALTHCHECK` instruction](HealthcheckInstruction).
574#[derive(Debug)]
575#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
576#[cfg_attr(feature = "serde", serde(tag = "kind"))]
577#[cfg_attr(feature = "serde", serde(rename_all = "SCREAMING_SNAKE_CASE"))]
578#[non_exhaustive]
579pub enum HealthcheckArguments<'a> {
580    /// `HEALTHCHECK [options] CMD ...`
581    #[non_exhaustive]
582    Cmd {
583        /// ```text
584        /// HEALTHCHECK [options] CMD command
585        ///                       ^^^
586        /// ```
587        cmd: Keyword,
588        /// ```text
589        /// HEALTHCHECK [options] CMD command
590        ///                           ^^^^^^^
591        /// ```
592        arguments: Command<'a>,
593    },
594    /// `HEALTHCHECK [options] NONE`
595    #[non_exhaustive]
596    None {
597        /// ```text
598        /// HEALTHCHECK [options] NONE
599        ///                       ^^^^
600        /// ```
601        none: Keyword,
602    },
603}
604/// A `LABEL` instruction.
605///
606/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#label)
607#[derive(Debug)]
608#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
609#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
610#[non_exhaustive]
611pub struct LabelInstruction<'a> {
612    /// ```text
613    /// LABEL <key>=<value> [<key>=<value>...]
614    /// ^^^^^
615    /// ```
616    pub label: Keyword,
617    /// ```text
618    /// LABEL <key>=<value> [<key>=<value>...]
619    ///       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
620    /// ```
621    // TODO: SmallVec<[NameValue<'a>; 1]>
622    pub arguments: UnescapedString<'a>,
623}
624/// A `MAINTAINER` instruction (deprecated).
625///
626/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#maintainer-deprecated)
627#[derive(Debug)]
628#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
629#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
630#[non_exhaustive]
631pub struct MaintainerInstruction<'a> {
632    /// ```text
633    /// MAINTAINER <name>
634    /// ^^^^^^^^^^
635    /// ```
636    pub maintainer: Keyword,
637    /// ```text
638    /// MAINTAINER <name>
639    ///            ^^^^^^
640    /// ```
641    pub name: UnescapedString<'a>,
642}
643/// A `ONBUILD` instruction.
644///
645/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#onbuild)
646#[derive(Debug)]
647#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
648#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
649#[non_exhaustive]
650pub struct OnbuildInstruction<'a> {
651    /// ```text
652    /// ONBUILD <INSTRUCTION>
653    /// ^^^^^^^
654    /// ```
655    pub onbuild: Keyword,
656    /// ```text
657    /// ONBUILD <INSTRUCTION>
658    ///         ^^^^^^^^^^^^^
659    /// ```
660    pub instruction: Box<Instruction<'a>>,
661}
662/// A `RUN` instruction.
663///
664/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#run)
665#[derive(Debug)]
666#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
667#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
668#[non_exhaustive]
669pub struct RunInstruction<'a> {
670    /// ```text
671    /// RUN [options] <command> ...
672    /// ^^^
673    /// ```
674    pub run: Keyword,
675    /// ```text
676    /// RUN [options] <command> ...
677    ///     ^^^^^^^^^
678    /// ```
679    pub options: SmallVec<[Flag<'a>; 1]>,
680    /// ```text
681    /// RUN [options] <command> ...
682    ///               ^^^^^^^^^^^^^
683    /// ```
684    pub arguments: Command<'a>,
685    /// ```text
686    ///   RUN [options] <<EOF
687    /// /               ^^^^^
688    /// | ...
689    /// | EOF
690    /// |_^^^
691    /// ```
692    pub here_docs: Vec<HereDoc<'a>>,
693}
694/// A `SHELL` instruction.
695///
696/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#shell)
697#[derive(Debug)]
698#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
699#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
700#[non_exhaustive]
701pub struct ShellInstruction<'a> {
702    /// ```text
703    /// SHELL ["executable", "param"]
704    /// ^^^^^
705    /// ```
706    pub shell: Keyword,
707    /// ```text
708    /// SHELL ["executable", "param"]
709    ///       ^^^^^^^^^^^^^^^^^^^^^^^
710    /// ```
711    // Usually at least 2, e.g., ["/bin/sh", "-c"]
712    // Common cases are 4, e.g., ["/bin/bash", "-o", "pipefail", "-c"]
713    pub arguments: SmallVec<[UnescapedString<'a>; 4]>,
714}
715/// A `STOPSIGNAL` instruction.
716///
717/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#stopsignal)
718#[derive(Debug)]
719#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
720#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
721#[non_exhaustive]
722pub struct StopsignalInstruction<'a> {
723    /// ```text
724    /// STOPSIGNAL signal
725    /// ^^^^^^^^^^
726    /// ```
727    pub stopsignal: Keyword,
728    /// ```text
729    /// STOPSIGNAL signal
730    ///            ^^^^^^
731    /// ```
732    pub arguments: UnescapedString<'a>,
733}
734/// A `USER` instruction.
735///
736/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#user)
737#[derive(Debug)]
738#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
739#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
740#[non_exhaustive]
741pub struct UserInstruction<'a> {
742    /// ```text
743    /// USER <user>[:<group>]
744    /// ^^^^
745    /// ```
746    pub user: Keyword,
747    /// ```text
748    /// USER <user>[:<group>]
749    ///      ^^^^^^^^^^^^^^^^
750    /// ```
751    pub arguments: UnescapedString<'a>,
752}
753/// A `VOLUME` instruction.
754///
755/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#volume)
756#[derive(Debug)]
757#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
758#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
759#[non_exhaustive]
760pub struct VolumeInstruction<'a> {
761    /// ```text
762    /// VOLUME ["/data"]
763    /// ^^^^^^
764    /// ```
765    pub volume: Keyword,
766    /// ```text
767    /// VOLUME ["/data"]
768    ///        ^^^^^^^^^
769    /// ```
770    pub arguments: JsonOrStringArray<'a, 1>,
771}
772/// A `WORKDIR` instruction.
773///
774/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#workdir)
775#[derive(Debug)]
776#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
777#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
778#[non_exhaustive]
779pub struct WorkdirInstruction<'a> {
780    /// ```text
781    /// WORKDIR /path/to/workdir
782    /// ^^^^^^^
783    /// ```
784    pub workdir: Keyword,
785    /// ```text
786    /// WORKDIR /path/to/workdir
787    ///         ^^^^^^^^^^^^^^^^
788    /// ```
789    pub arguments: UnescapedString<'a>,
790}
791
792/// A keyword.
793#[derive(Debug)]
794#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
795#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
796#[non_exhaustive]
797pub struct Keyword {
798    #[allow(missing_docs)]
799    pub span: Span,
800}
801
802/// An option flag.
803#[derive(Debug)]
804#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
805#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
806pub struct Flag<'a> {
807    /// ```text
808    /// --platform=linux/amd64
809    /// ^
810    /// ```
811    flag_start: usize,
812    /// ```text
813    /// --platform=linux/amd64
814    ///   ^^^^^^^^
815    /// ```
816    pub name: UnescapedString<'a>,
817    /// ```text
818    /// --platform=linux/amd64
819    ///            ^^^^^^^^^^^
820    /// ```
821    pub value: Option<UnescapedString<'a>>,
822}
823impl Flag<'_> {
824    /// ```text
825    /// --platform=linux/amd64
826    /// ^^^^^^^^^^
827    /// ```
828    #[must_use]
829    pub fn flag_span(&self) -> Span {
830        self.flag_start..self.name.span.end
831    }
832    /// ```text
833    /// --platform=linux/amd64
834    /// ^^^^^^^^^^^^^^^^^^^^^^
835    /// ```
836    #[must_use]
837    pub fn span(&self) -> Span {
838        match &self.value {
839            Some(v) => self.flag_start..v.span.end,
840            None => self.flag_span(),
841        }
842    }
843}
844
845/// An unescaped string.
846#[derive(Debug, PartialEq)]
847#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
848#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
849#[non_exhaustive]
850pub struct UnescapedString<'a> {
851    #[allow(missing_docs)]
852    pub span: Span,
853    #[allow(missing_docs)]
854    pub value: Cow<'a, str>,
855}
856
857/// A command.
858///
859/// This is used in the [`RUN`](RunInstruction), [`CMD`](CmdInstruction), and
860/// [`ENTRYPOINT`](EntrypointInstruction) instructions.
861///
862/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#shell-and-exec-form)
863#[derive(Debug)]
864#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
865#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
866#[non_exhaustive]
867pub enum Command<'a> {
868    /// Exec-form (JSON array)
869    // At least 1
870    Exec(Spanned<SmallVec<[UnescapedString<'a>; 1]>>),
871    /// Shell-form (space-separated string or here-documents), escape preserved
872    Shell(Spanned<&'a str>),
873}
874
875// TODO: merge two? it reduce size, but make confusing when array modified.
876/// A JSON array or space-separated string.
877///
878/// This is used in the [`VOLUME` instruction](VolumeInstruction).
879#[derive(Debug)]
880#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
881#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
882#[allow(clippy::exhaustive_enums)]
883pub enum JsonOrStringArray<'a, const N: usize> {
884    /// JSON array.
885    Json(Spanned<SmallVec<[UnescapedString<'a>; N]>>),
886    /// Space-separated string.
887    String(SmallVec<[UnescapedString<'a>; N]>),
888}
889
890/// A here-document.
891#[derive(Debug)]
892#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
893#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
894#[non_exhaustive]
895pub struct HereDoc<'a> {
896    #[allow(missing_docs)]
897    pub span: Span,
898    /// `false` if delimiter is quoted.
899    pub expand: bool,
900    #[allow(missing_docs)]
901    pub value: Cow<'a, str>,
902}
903
904/// A spanned value.
905#[derive(Debug)]
906#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
907#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
908#[allow(clippy::exhaustive_structs)]
909pub struct Spanned<T> {
910    #[allow(missing_docs)]
911    pub span: Span,
912    #[allow(missing_docs)]
913    pub value: T,
914}
915
916#[allow(missing_docs)]
917pub type Span = Range<usize>;
918
919// -----------------------------------------------------------------------------
920// Parsing
921
922/// An iterator over instructions.
923///
924/// This type is returned by [`parse_iter`] function.
925#[allow(missing_debug_implementations)]
926#[must_use = "iterators are lazy and do nothing unless consumed"]
927pub struct ParseIter<'a> {
928    text: &'a str,
929    s: &'a [u8],
930    escape_byte: u8,
931    has_stage: bool,
932    in_onbuild: bool,
933    parser_directives: ParserDirectives<'a>,
934}
935impl<'a> ParseIter<'a> {
936    fn new(mut text: &'a str) -> Result<Self> {
937        // https://github.com/moby/moby/pull/23234
938        if text.as_bytes().starts_with(UTF8_BOM) {
939            text = &text[UTF8_BOM.len()..];
940        }
941        let mut p = Self {
942            text,
943            s: text.as_bytes(),
944            escape_byte: DEFAULT_ESCAPE_BYTE,
945            has_stage: false,
946            in_onbuild: false,
947            parser_directives: ParserDirectives {
948                // https://docs.docker.com/reference/dockerfile/#parser-directives
949                syntax: None,
950                escape: None,
951                // https://github.com/moby/buildkit/pull/4962
952                check: None,
953            },
954        };
955
956        parse_parser_directives(&mut p).map_err(|e| e.into_error(&p))?;
957
958        // https://docs.docker.com/reference/dockerfile/#format
959        // > For backward compatibility, leading whitespace before comments (#) and
960        // > instructions (such as RUN) are ignored, but discouraged.
961        skip_comments_and_whitespaces(&mut p.s, p.escape_byte);
962        Ok(p)
963    }
964}
965impl<'a> Iterator for ParseIter<'a> {
966    type Item = Result<Instruction<'a>>;
967    #[inline]
968    fn next(&mut self) -> Option<Self::Item> {
969        let p = self;
970        let mut s = p.s;
971        if let Some((&b, s_next)) = s.split_first() {
972            let instruction = match parse_instruction(p, &mut s, b, s_next) {
973                Ok(i) => i,
974                Err(e) => return Some(Err(e.into_error(p))),
975            };
976            match &instruction {
977                Instruction::From(..) => {
978                    p.has_stage = true;
979                }
980                Instruction::Arg(..) => {}
981                instruction => {
982                    if !p.has_stage {
983                        return Some(Err(ErrorKind::Expected(
984                            "FROM",
985                            instruction.instruction_span().start,
986                        )
987                        .into_error(p)));
988                    }
989                }
990            }
991            skip_comments_and_whitespaces(&mut s, p.escape_byte);
992            p.s = s;
993            return Some(Ok(instruction));
994        }
995        if !p.has_stage {
996            // https://github.com/moby/buildkit/blob/e83d79a51fb49aeb921d8a2348ae14a58701c98c/frontend/dockerfile/dockerfile2llb/convert.go#L263
997            return Some(Err(ErrorKind::NoStages.into_error(p)));
998        }
999        None
1000    }
1001}
1002
1003const DEFAULT_ESCAPE_BYTE: u8 = b'\\';
1004
1005fn parse_parser_directives(p: &mut ParseIter<'_>) -> Result<(), ErrorKind> {
1006    while let Some((&b'#', s_next)) = p.s.split_first() {
1007        p.s = s_next;
1008        skip_spaces_no_escape(&mut p.s);
1009        let directive_start = p.text.len() - p.s.len();
1010        if token(&mut p.s, b"SYNTAX") {
1011            skip_spaces_no_escape(&mut p.s);
1012            if let Some((&b'=', s_next)) = p.s.split_first() {
1013                p.s = s_next;
1014                if p.parser_directives.syntax.is_some() {
1015                    // > Invalid due to appearing twice
1016                    p.parser_directives.syntax = None;
1017                    p.parser_directives.escape = None;
1018                    p.parser_directives.check = None;
1019                    p.escape_byte = DEFAULT_ESCAPE_BYTE;
1020                    skip_this_line_no_escape(&mut p.s);
1021                    break;
1022                }
1023                skip_spaces_no_escape(&mut p.s);
1024                let value_start = p.text.len() - p.s.len();
1025                skip_non_whitespace_no_escape(&mut p.s);
1026                let end = p.text.len() - p.s.len();
1027                let value = p.text[value_start..end].trim_ascii_end();
1028                p.parser_directives.syntax = Some(ParserDirective {
1029                    start: directive_start,
1030                    value: Spanned { span: value_start..value_start + value.len(), value },
1031                });
1032                skip_this_line_no_escape(&mut p.s);
1033                continue;
1034            }
1035        } else if token(&mut p.s, b"CHECK") {
1036            skip_spaces_no_escape(&mut p.s);
1037            if let Some((&b'=', s_next)) = p.s.split_first() {
1038                p.s = s_next;
1039                if p.parser_directives.check.is_some() {
1040                    // > Invalid due to appearing twice
1041                    p.parser_directives.syntax = None;
1042                    p.parser_directives.escape = None;
1043                    p.parser_directives.check = None;
1044                    p.escape_byte = DEFAULT_ESCAPE_BYTE;
1045                    skip_this_line_no_escape(&mut p.s);
1046                    break;
1047                }
1048                skip_spaces_no_escape(&mut p.s);
1049                let value_start = p.text.len() - p.s.len();
1050                skip_non_whitespace_no_escape(&mut p.s);
1051                let end = p.text.len() - p.s.len();
1052                let value = p.text[value_start..end].trim_ascii_end();
1053                p.parser_directives.check = Some(ParserDirective {
1054                    start: directive_start,
1055                    value: Spanned { span: value_start..value_start + value.len(), value },
1056                });
1057                skip_this_line_no_escape(&mut p.s);
1058                continue;
1059            }
1060        } else if token(&mut p.s, b"ESCAPE") {
1061            skip_spaces_no_escape(&mut p.s);
1062            if let Some((&b'=', s_next)) = p.s.split_first() {
1063                p.s = s_next;
1064                if p.parser_directives.escape.is_some() {
1065                    // > Invalid due to appearing twice
1066                    p.parser_directives.syntax = None;
1067                    p.parser_directives.escape = None;
1068                    p.parser_directives.check = None;
1069                    p.escape_byte = DEFAULT_ESCAPE_BYTE;
1070                    skip_this_line_no_escape(&mut p.s);
1071                    break;
1072                }
1073                skip_spaces_no_escape(&mut p.s);
1074                let value_start = p.text.len() - p.s.len();
1075                skip_non_whitespace_no_escape(&mut p.s);
1076                let end = p.text.len() - p.s.len();
1077                let value = p.text[value_start..end].trim_ascii_end();
1078                match value {
1079                    "`" => p.escape_byte = b'`',
1080                    "\\" => {}
1081                    _ => return Err(ErrorKind::InvalidEscape { escape_start: value_start }),
1082                }
1083                p.parser_directives.escape = Some(ParserDirective {
1084                    start: directive_start,
1085                    value: Spanned {
1086                        span: value_start..value_start + value.len(),
1087                        value: p.escape_byte as char,
1088                    },
1089                });
1090                skip_this_line_no_escape(&mut p.s);
1091                continue;
1092            }
1093        }
1094        skip_this_line_no_escape(&mut p.s);
1095        break;
1096    }
1097    Ok(())
1098}
1099
1100#[inline]
1101fn parse_instruction<'a>(
1102    p: &mut ParseIter<'a>,
1103    s: &mut &'a [u8],
1104    b: u8,
1105    s_next: &'a [u8],
1106) -> Result<Instruction<'a>, ErrorKind> {
1107    let instruction_start = p.text.len() - s.len();
1108    *s = s_next;
1109    // NB: `token_slow` must be called after all `token` calls.
1110    match b & TO_UPPER8 {
1111        b'A' => {
1112            if token(s, &b"ARG"[1..]) {
1113                let instruction_span = instruction_start..p.text.len() - s.len();
1114                if spaces_or_line_end(s, p.escape_byte) {
1115                    return parse_arg(p, s, Keyword { span: instruction_span });
1116                }
1117            } else if token(s, &b"ADD"[1..]) {
1118                let instruction_span = instruction_start..p.text.len() - s.len();
1119                if spaces_or_line_end(s, p.escape_byte) {
1120                    let add = Keyword { span: instruction_span };
1121                    let (options, src, dest) = parse_add_or_copy(p, s, &add)?;
1122                    return Ok(Instruction::Add(AddInstruction { add, options, src, dest }));
1123                }
1124            } else if token_slow(s, &b"ARG"[1..], p.escape_byte) {
1125                let instruction_span = instruction_start..p.text.len() - s.len();
1126                if spaces_or_line_end(s, p.escape_byte) {
1127                    return parse_arg(p, s, Keyword { span: instruction_span });
1128                }
1129            } else if token_slow(s, &b"ADD"[1..], p.escape_byte) {
1130                let instruction_span = instruction_start..p.text.len() - s.len();
1131                if spaces_or_line_end(s, p.escape_byte) {
1132                    let add = Keyword { span: instruction_span };
1133                    let (options, src, dest) = parse_add_or_copy(p, s, &add)?;
1134                    return Ok(Instruction::Add(AddInstruction { add, options, src, dest }));
1135                }
1136            }
1137        }
1138        b'C' => {
1139            if token(s, &b"COPY"[1..]) {
1140                let instruction_span = instruction_start..p.text.len() - s.len();
1141                if spaces_or_line_end(s, p.escape_byte) {
1142                    let copy = Keyword { span: instruction_span };
1143                    let (options, src, dest) = parse_add_or_copy(p, s, &copy)?;
1144                    return Ok(Instruction::Copy(CopyInstruction { copy, options, src, dest }));
1145                }
1146            } else if token(s, &b"CMD"[1..]) {
1147                let instruction_span = instruction_start..p.text.len() - s.len();
1148                if spaces_or_line_end(s, p.escape_byte) {
1149                    return parse_cmd(p, s, Keyword { span: instruction_span });
1150                }
1151            } else if token_slow(s, &b"COPY"[1..], p.escape_byte) {
1152                let instruction_span = instruction_start..p.text.len() - s.len();
1153                if spaces_or_line_end(s, p.escape_byte) {
1154                    let copy = Keyword { span: instruction_span };
1155                    let (options, src, dest) = parse_add_or_copy(p, s, &copy)?;
1156                    return Ok(Instruction::Copy(CopyInstruction { copy, options, src, dest }));
1157                }
1158            } else if token_slow(s, &b"CMD"[1..], p.escape_byte) {
1159                let instruction_span = instruction_start..p.text.len() - s.len();
1160                if spaces_or_line_end(s, p.escape_byte) {
1161                    return parse_cmd(p, s, Keyword { span: instruction_span });
1162                }
1163            }
1164        }
1165        b'E' => {
1166            if token(s, &b"ENV"[1..]) {
1167                let instruction_span = instruction_start..p.text.len() - s.len();
1168                if spaces_or_line_end(s, p.escape_byte) {
1169                    return parse_env(p, s, Keyword { span: instruction_span });
1170                }
1171            } else if token(s, &b"EXPOSE"[1..]) {
1172                let instruction_span = instruction_start..p.text.len() - s.len();
1173                if spaces_or_line_end(s, p.escape_byte) {
1174                    return parse_expose(p, s, Keyword { span: instruction_span });
1175                }
1176            } else if token(s, &b"ENTRYPOINT"[1..]) {
1177                let instruction_span = instruction_start..p.text.len() - s.len();
1178                if spaces_or_line_end(s, p.escape_byte) {
1179                    return parse_entrypoint(p, s, Keyword { span: instruction_span });
1180                }
1181            } else if token_slow(s, &b"ENV"[1..], p.escape_byte) {
1182                let instruction_span = instruction_start..p.text.len() - s.len();
1183                if spaces_or_line_end(s, p.escape_byte) {
1184                    return parse_env(p, s, Keyword { span: instruction_span });
1185                }
1186            } else if token_slow(s, &b"EXPOSE"[1..], p.escape_byte) {
1187                let instruction_span = instruction_start..p.text.len() - s.len();
1188                if spaces_or_line_end(s, p.escape_byte) {
1189                    return parse_expose(p, s, Keyword { span: instruction_span });
1190                }
1191            } else if token_slow(s, &b"ENTRYPOINT"[1..], p.escape_byte) {
1192                let instruction_span = instruction_start..p.text.len() - s.len();
1193                if spaces_or_line_end(s, p.escape_byte) {
1194                    return parse_entrypoint(p, s, Keyword { span: instruction_span });
1195                }
1196            }
1197        }
1198        b'F' => {
1199            if token(s, &b"FROM"[1..]) || token_slow(s, &b"FROM"[1..], p.escape_byte) {
1200                let instruction_span = instruction_start..p.text.len() - s.len();
1201                if spaces_or_line_end(s, p.escape_byte) {
1202                    return parse_from(p, s, Keyword { span: instruction_span });
1203                }
1204            }
1205        }
1206        b'H' => {
1207            if token(s, &b"HEALTHCHECK"[1..]) || token_slow(s, &b"HEALTHCHECK"[1..], p.escape_byte)
1208            {
1209                let instruction_span = instruction_start..p.text.len() - s.len();
1210                if spaces_or_line_end(s, p.escape_byte) {
1211                    return parse_healthcheck(p, s, Keyword { span: instruction_span });
1212                }
1213            }
1214        }
1215        b'L' => {
1216            if token(s, &b"LABEL"[1..]) || token_slow(s, &b"LABEL"[1..], p.escape_byte) {
1217                let instruction_span = instruction_start..p.text.len() - s.len();
1218                if spaces_or_line_end(s, p.escape_byte) {
1219                    return parse_label(p, s, Keyword { span: instruction_span });
1220                }
1221            }
1222        }
1223        b'M' => {
1224            if token(s, &b"MAINTAINER"[1..]) || token_slow(s, &b"MAINTAINER"[1..], p.escape_byte) {
1225                let instruction_span = instruction_start..p.text.len() - s.len();
1226                if spaces_or_line_end(s, p.escape_byte) {
1227                    return parse_maintainer(p, s, Keyword { span: instruction_span });
1228                }
1229            }
1230        }
1231        b'O' => {
1232            if token(s, &b"ONBUILD"[1..]) || token_slow(s, &b"ONBUILD"[1..], p.escape_byte) {
1233                let instruction_span = instruction_start..p.text.len() - s.len();
1234                if spaces_or_line_end(s, p.escape_byte) {
1235                    return parse_onbuild(p, s, Keyword { span: instruction_span });
1236                }
1237            }
1238        }
1239        b'R' => {
1240            if token(s, &b"RUN"[1..]) || token_slow(s, &b"RUN"[1..], p.escape_byte) {
1241                let instruction_span = instruction_start..p.text.len() - s.len();
1242                if spaces_or_line_end(s, p.escape_byte) {
1243                    return parse_run(p, s, Keyword { span: instruction_span });
1244                }
1245            }
1246        }
1247        b'S' => {
1248            if token(s, &b"SHELL"[1..]) {
1249                let instruction_span = instruction_start..p.text.len() - s.len();
1250                if spaces_or_line_end(s, p.escape_byte) {
1251                    return parse_shell(p, s, Keyword { span: instruction_span });
1252                }
1253            } else if token(s, &b"STOPSIGNAL"[1..]) {
1254                let instruction_span = instruction_start..p.text.len() - s.len();
1255                if spaces_or_line_end(s, p.escape_byte) {
1256                    return parse_stopsignal(p, s, Keyword { span: instruction_span });
1257                }
1258            } else if token_slow(s, &b"SHELL"[1..], p.escape_byte) {
1259                let instruction_span = instruction_start..p.text.len() - s.len();
1260                if spaces_or_line_end(s, p.escape_byte) {
1261                    return parse_shell(p, s, Keyword { span: instruction_span });
1262                }
1263            } else if token_slow(s, &b"STOPSIGNAL"[1..], p.escape_byte) {
1264                let instruction_span = instruction_start..p.text.len() - s.len();
1265                if spaces_or_line_end(s, p.escape_byte) {
1266                    return parse_stopsignal(p, s, Keyword { span: instruction_span });
1267                }
1268            }
1269        }
1270        b'U' => {
1271            if token(s, &b"USER"[1..]) || token_slow(s, &b"USER"[1..], p.escape_byte) {
1272                let instruction_span = instruction_start..p.text.len() - s.len();
1273                if spaces_or_line_end(s, p.escape_byte) {
1274                    return parse_user(p, s, Keyword { span: instruction_span });
1275                }
1276            }
1277        }
1278        b'V' => {
1279            if token(s, &b"VOLUME"[1..]) || token_slow(s, &b"VOLUME"[1..], p.escape_byte) {
1280                let instruction_span = instruction_start..p.text.len() - s.len();
1281                if spaces_or_line_end(s, p.escape_byte) {
1282                    return parse_volume(p, s, Keyword { span: instruction_span });
1283                }
1284            }
1285        }
1286        b'W' => {
1287            if token(s, &b"WORKDIR"[1..]) || token_slow(s, &b"WORKDIR"[1..], p.escape_byte) {
1288                let instruction_span = instruction_start..p.text.len() - s.len();
1289                if spaces_or_line_end(s, p.escape_byte) {
1290                    return parse_workdir(p, s, Keyword { span: instruction_span });
1291                }
1292            }
1293        }
1294        _ => {}
1295    }
1296    Err(ErrorKind::UnknownInstruction { instruction_start })
1297}
1298
1299#[inline]
1300fn parse_arg<'a>(
1301    p: &mut ParseIter<'a>,
1302    s: &mut &'a [u8],
1303    instruction: Keyword,
1304) -> Result<Instruction<'a>, ErrorKind> {
1305    debug_assert!(token_slow(
1306        &mut p.text[instruction.span.clone()].as_bytes(),
1307        b"ARG",
1308        p.escape_byte,
1309    ));
1310    let arguments = collect_non_line_unescaped_consume_line(s, p.text, p.escape_byte);
1311    if arguments.value.is_empty() {
1312        return Err(ErrorKind::AtLeastOneArgument { instruction_start: instruction.span.start });
1313    }
1314    Ok(Instruction::Arg(ArgInstruction { arg: instruction, arguments }))
1315}
1316
1317#[inline]
1318fn parse_add_or_copy<'a>(
1319    p: &mut ParseIter<'a>,
1320    s: &mut &'a [u8],
1321    instruction: &Keyword,
1322) -> Result<(SmallVec<[Flag<'a>; 1]>, SmallVec<[Source<'a>; 1]>, UnescapedString<'a>), ErrorKind> {
1323    debug_assert!(
1324        token_slow(&mut p.text[instruction.span.clone()].as_bytes(), b"ADD", p.escape_byte,)
1325            || token_slow(&mut p.text[instruction.span.clone()].as_bytes(), b"COPY", p.escape_byte,)
1326    );
1327    let options = parse_options(s, p.text, p.escape_byte);
1328    if is_maybe_json(s) {
1329        let mut tmp = *s;
1330        if let Ok(((src, dest), _array_span)) = parse_json_array::<(
1331            SmallVec<[Source<'_>; 1]>,
1332            Option<_>,
1333        )>(&mut tmp, p.text, p.escape_byte)
1334        {
1335            debug_assert!(is_line_end(tmp.first()));
1336            if tmp.is_empty() {
1337                *s = &[];
1338            } else {
1339                *s = &tmp[1..];
1340            }
1341            if src.is_empty() {
1342                return Err(ErrorKind::AtLeastTwoArguments {
1343                    instruction_start: instruction.span.start,
1344                });
1345            }
1346            return Ok((options, src, dest.unwrap()));
1347        }
1348    }
1349    let (mut src, dest) = collect_space_separated_unescaped_consume_line::<(
1350        SmallVec<[Source<'_>; 1]>,
1351        Option<_>,
1352    )>(s, p.text, p.escape_byte);
1353    if src.is_empty() {
1354        return Err(ErrorKind::AtLeastTwoArguments { instruction_start: instruction.span.start });
1355    }
1356    for src in &mut src {
1357        let Source::Path(path) = src else { unreachable!() };
1358        let Some(mut delim) = path.value.as_bytes().strip_prefix(b"<<") else { continue };
1359        if delim.is_empty() {
1360            continue;
1361        }
1362        let mut strip_tab = false;
1363        let mut quote = None;
1364        if let Some((&b'-', delim_next)) = delim.split_first() {
1365            strip_tab = true;
1366            delim = delim_next;
1367        }
1368        if let Some((&b, delim_next)) = delim.split_first() {
1369            if matches!(b, b'"' | b'\'') {
1370                quote = Some(b);
1371                delim = delim_next;
1372                if delim.last() != Some(&b) {
1373                    return Err(ErrorKind::ExpectedOwned(
1374                        format!(
1375                            "quote ({}), but found '{}'",
1376                            b as char,
1377                            *delim.last().unwrap_or(&0) as char
1378                        ),
1379                        p.text.len() - s.len(),
1380                    ));
1381                }
1382                delim = &delim[..delim.len() - 1];
1383            }
1384        }
1385        if strip_tab {
1386            let (here_doc, span) = collect_here_doc_strip_tab(s, p.text, p.escape_byte, delim)?;
1387            *src = Source::HereDoc(HereDoc { span, expand: quote.is_none(), value: here_doc });
1388        } else {
1389            let (here_doc, span) = collect_here_doc_no_strip_tab(s, p.text, p.escape_byte, delim)?;
1390            *src =
1391                Source::HereDoc(HereDoc { span, expand: quote.is_none(), value: here_doc.into() });
1392        }
1393    }
1394    Ok((options, src, dest.unwrap()))
1395}
1396
1397#[allow(clippy::unnecessary_wraps)]
1398#[inline]
1399fn parse_cmd<'a>(
1400    p: &mut ParseIter<'a>,
1401    s: &mut &'a [u8],
1402    instruction: Keyword,
1403) -> Result<Instruction<'a>, ErrorKind> {
1404    debug_assert!(token_slow(
1405        &mut p.text[instruction.span.clone()].as_bytes(),
1406        b"CMD",
1407        p.escape_byte,
1408    ));
1409    if is_maybe_json(s) {
1410        let mut tmp = *s;
1411        if let Ok((arguments, array_span)) =
1412            parse_json_array::<SmallVec<[_; 1]>>(&mut tmp, p.text, p.escape_byte)
1413        {
1414            debug_assert!(is_line_end(tmp.first()));
1415            if tmp.is_empty() {
1416                *s = &[];
1417            } else {
1418                *s = &tmp[1..];
1419            }
1420            // "CMD []" seems to be okay?
1421            // https://github.com/moby/buildkit/blob/6d143f5602a61acef286f39ee75f1cb33c367d44/frontend/dockerfile/parser/testfiles/brimstone-docker-consul/Dockerfile#L3
1422            return Ok(Instruction::Cmd(CmdInstruction {
1423                cmd: instruction,
1424                arguments: Command::Exec(Spanned { span: array_span, value: arguments }),
1425            }));
1426        }
1427    }
1428    let arguments_start = p.text.len() - s.len();
1429    skip_this_line(s, p.escape_byte);
1430    let end = p.text.len() - s.len();
1431    let arguments = p.text[arguments_start..end].trim_ascii_end();
1432    Ok(Instruction::Cmd(CmdInstruction {
1433        cmd: instruction,
1434        arguments: Command::Shell(Spanned {
1435            span: arguments_start..arguments_start + arguments.len(),
1436            value: arguments,
1437        }),
1438    }))
1439}
1440
1441#[inline]
1442fn parse_env<'a>(
1443    p: &mut ParseIter<'a>,
1444    s: &mut &'a [u8],
1445    instruction: Keyword,
1446) -> Result<Instruction<'a>, ErrorKind> {
1447    debug_assert!(token_slow(
1448        &mut p.text[instruction.span.clone()].as_bytes(),
1449        b"ENV",
1450        p.escape_byte,
1451    ));
1452    let arguments = collect_non_line_unescaped_consume_line(s, p.text, p.escape_byte);
1453    if arguments.value.is_empty() {
1454        return Err(ErrorKind::AtLeastOneArgument { instruction_start: instruction.span.start });
1455    }
1456    Ok(Instruction::Env(EnvInstruction { env: instruction, arguments }))
1457}
1458
1459#[inline]
1460fn parse_expose<'a>(
1461    p: &mut ParseIter<'a>,
1462    s: &mut &'a [u8],
1463    instruction: Keyword,
1464) -> Result<Instruction<'a>, ErrorKind> {
1465    debug_assert!(token_slow(
1466        &mut p.text[instruction.span.clone()].as_bytes(),
1467        b"EXPOSE",
1468        p.escape_byte,
1469    ));
1470    let arguments: SmallVec<[_; 1]> =
1471        collect_space_separated_unescaped_consume_line(s, p.text, p.escape_byte);
1472    if arguments.is_empty() {
1473        return Err(ErrorKind::AtLeastOneArgument { instruction_start: instruction.span.start });
1474    }
1475    Ok(Instruction::Expose(ExposeInstruction { expose: instruction, arguments }))
1476}
1477
1478#[inline]
1479fn parse_entrypoint<'a>(
1480    p: &mut ParseIter<'a>,
1481    s: &mut &'a [u8],
1482    instruction: Keyword,
1483) -> Result<Instruction<'a>, ErrorKind> {
1484    debug_assert!(token_slow(
1485        &mut p.text[instruction.span.clone()].as_bytes(),
1486        b"ENTRYPOINT",
1487        p.escape_byte,
1488    ));
1489    if is_maybe_json(s) {
1490        let mut tmp = *s;
1491        if let Ok((arguments, array_span)) =
1492            parse_json_array::<SmallVec<[_; 1]>>(&mut tmp, p.text, p.escape_byte)
1493        {
1494            debug_assert!(is_line_end(tmp.first()));
1495            if tmp.is_empty() {
1496                *s = &[];
1497            } else {
1498                *s = &tmp[1..];
1499            }
1500            if arguments.is_empty() {
1501                return Err(ErrorKind::AtLeastOneArgument {
1502                    instruction_start: instruction.span.start,
1503                });
1504            }
1505            return Ok(Instruction::Entrypoint(EntrypointInstruction {
1506                entrypoint: instruction,
1507                arguments: Command::Exec(Spanned { span: array_span, value: arguments }),
1508            }));
1509        }
1510    }
1511    let arguments_start = p.text.len() - s.len();
1512    skip_this_line(s, p.escape_byte);
1513    let end = p.text.len() - s.len();
1514    let arguments = p.text[arguments_start..end].trim_ascii_end();
1515    if arguments.is_empty() {
1516        return Err(ErrorKind::AtLeastOneArgument { instruction_start: instruction.span.start });
1517    }
1518    Ok(Instruction::Entrypoint(EntrypointInstruction {
1519        entrypoint: instruction,
1520        arguments: Command::Shell(Spanned {
1521            span: arguments_start..arguments_start + arguments.len(),
1522            value: arguments,
1523        }),
1524    }))
1525}
1526
1527#[inline]
1528fn parse_from<'a>(
1529    p: &mut ParseIter<'a>,
1530    s: &mut &'a [u8],
1531    instruction: Keyword,
1532) -> Result<Instruction<'a>, ErrorKind> {
1533    debug_assert!(token_slow(
1534        &mut p.text[instruction.span.clone()].as_bytes(),
1535        b"FROM",
1536        p.escape_byte,
1537    ));
1538    let options = parse_options(s, p.text, p.escape_byte);
1539    // TODO: https://github.com/moby/buildkit/blob/e83d79a51fb49aeb921d8a2348ae14a58701c98c/frontend/dockerfile/dockerfile2llb/convert.go#L302
1540    // > base name (%s) should not be blank
1541    let image = collect_non_whitespace_unescaped(s, p.text, p.escape_byte);
1542    if image.value.is_empty() {
1543        return Err(ErrorKind::AtLeastOneArgument { instruction_start: instruction.span.start });
1544    }
1545    let mut as_ = None;
1546    if skip_spaces(s, p.escape_byte) {
1547        let as_start = p.text.len() - s.len();
1548        if token(s, b"AS") || token_slow(s, b"AS", p.escape_byte) {
1549            let as_span = as_start..p.text.len() - s.len();
1550            if !skip_spaces(s, p.escape_byte) {
1551                return Err(ErrorKind::Expected("AS", as_start));
1552            }
1553            let name = collect_non_whitespace_unescaped(s, p.text, p.escape_byte);
1554            skip_spaces(s, p.escape_byte);
1555            if !is_line_end(s.first()) {
1556                return Err(ErrorKind::Expected("newline or eof", p.text.len() - s.len()));
1557            }
1558            as_ = Some((Keyword { span: as_span }, name));
1559        } else if !is_line_end(s.first()) {
1560            return Err(ErrorKind::Expected("AS", as_start));
1561        }
1562    }
1563    Ok(Instruction::From(FromInstruction { from: instruction, options, image, as_ }))
1564}
1565
1566#[inline]
1567fn parse_healthcheck<'a>(
1568    p: &mut ParseIter<'a>,
1569    s: &mut &'a [u8],
1570    instruction: Keyword,
1571) -> Result<Instruction<'a>, ErrorKind> {
1572    debug_assert!(token_slow(
1573        &mut p.text[instruction.span.clone()].as_bytes(),
1574        b"HEALTHCHECK",
1575        p.escape_byte,
1576    ));
1577    let options = parse_options(s, p.text, p.escape_byte);
1578    let Some((&b, s_next)) = s.split_first() else {
1579        return Err(ErrorKind::Expected("CMD or NONE", p.text.len() - s.len()));
1580    };
1581    let cmd_or_none_start = p.text.len() - s.len();
1582    match b & TO_UPPER8 {
1583        b'C' => {
1584            *s = s_next;
1585            if token(s, &b"CMD"[1..]) || token_slow(s, &b"CMD"[1..], p.escape_byte) {
1586                let cmd_span = cmd_or_none_start..p.text.len() - s.len();
1587                let cmd_keyword = Keyword { span: cmd_span };
1588                if spaces_or_line_end(s, p.escape_byte) {
1589                    if is_maybe_json(s) {
1590                        let mut tmp = *s;
1591                        if let Ok((arguments, array_span)) =
1592                            parse_json_array::<SmallVec<[_; 1]>>(&mut tmp, p.text, p.escape_byte)
1593                        {
1594                            debug_assert!(is_line_end(tmp.first()));
1595                            if tmp.is_empty() {
1596                                *s = &[];
1597                            } else {
1598                                *s = &tmp[1..];
1599                            }
1600                            if arguments.is_empty() {
1601                                return Err(ErrorKind::Expected(
1602                                    "at least 1 arguments",
1603                                    array_span.start,
1604                                ));
1605                            }
1606                            return Ok(Instruction::Healthcheck(HealthcheckInstruction {
1607                                healthcheck: instruction,
1608                                options,
1609                                arguments: HealthcheckArguments::Cmd {
1610                                    cmd: cmd_keyword,
1611                                    arguments: Command::Exec(Spanned {
1612                                        span: array_span,
1613                                        value: arguments,
1614                                    }),
1615                                },
1616                            }));
1617                        }
1618                    }
1619                    let arguments_start = p.text.len() - s.len();
1620                    skip_this_line(s, p.escape_byte);
1621                    let end = p.text.len() - s.len();
1622                    let arguments = p.text[arguments_start..end].trim_ascii_end();
1623                    return Ok(Instruction::Healthcheck(HealthcheckInstruction {
1624                        healthcheck: instruction,
1625                        options,
1626                        arguments: HealthcheckArguments::Cmd {
1627                            cmd: cmd_keyword,
1628                            arguments: Command::Shell(Spanned {
1629                                span: arguments_start..arguments_start + arguments.len(),
1630                                value: arguments,
1631                            }),
1632                        },
1633                    }));
1634                }
1635            }
1636        }
1637        b'N' => {
1638            *s = s_next;
1639            if token(s, &b"NONE"[1..]) || token_slow(s, &b"NONE"[1..], p.escape_byte) {
1640                let none_span = cmd_or_none_start..p.text.len() - s.len();
1641                skip_spaces(s, p.escape_byte);
1642                if !is_line_end(s.first()) {
1643                    // TODO: error kind
1644                    return Err(ErrorKind::Expected(
1645                        "HEALTHCHECK NONE takes no arguments",
1646                        p.text.len() - s.len(),
1647                    ));
1648                }
1649                // TODO: HEALTHCHECK NONE doesn't support options
1650                let none_keyword = Keyword { span: none_span };
1651                return Ok(Instruction::Healthcheck(HealthcheckInstruction {
1652                    healthcheck: instruction,
1653                    options,
1654                    arguments: HealthcheckArguments::None { none: none_keyword },
1655                }));
1656            }
1657        }
1658        _ => {}
1659    }
1660    Err(ErrorKind::Expected("CMD or NONE", p.text.len() - s.len()))
1661}
1662
1663#[inline]
1664fn parse_label<'a>(
1665    p: &mut ParseIter<'a>,
1666    s: &mut &'a [u8],
1667    instruction: Keyword,
1668) -> Result<Instruction<'a>, ErrorKind> {
1669    debug_assert!(token_slow(
1670        &mut p.text[instruction.span.clone()].as_bytes(),
1671        b"LABEL",
1672        p.escape_byte,
1673    ));
1674    let arguments = collect_non_line_unescaped_consume_line(s, p.text, p.escape_byte);
1675    if arguments.value.is_empty() {
1676        return Err(ErrorKind::AtLeastOneArgument { instruction_start: instruction.span.start });
1677    }
1678    Ok(Instruction::Label(LabelInstruction { label: instruction, arguments }))
1679}
1680
1681#[cold]
1682fn parse_maintainer<'a>(
1683    p: &mut ParseIter<'a>,
1684    s: &mut &'a [u8],
1685    instruction: Keyword,
1686) -> Result<Instruction<'a>, ErrorKind> {
1687    debug_assert!(token_slow(
1688        &mut p.text[instruction.span.clone()].as_bytes(),
1689        b"MAINTAINER",
1690        p.escape_byte,
1691    ));
1692    let name = collect_non_line_unescaped_consume_line(s, p.text, p.escape_byte);
1693    if name.value.is_empty() {
1694        return Err(ErrorKind::ExactlyOneArgument { instruction_start: instruction.span.start });
1695    }
1696    Ok(Instruction::Maintainer(MaintainerInstruction { maintainer: instruction, name }))
1697}
1698
1699#[inline]
1700fn parse_onbuild<'a>(
1701    p: &mut ParseIter<'a>,
1702    s: &mut &'a [u8],
1703    instruction: Keyword,
1704) -> Result<Instruction<'a>, ErrorKind> {
1705    debug_assert!(token_slow(
1706        &mut p.text[instruction.span.clone()].as_bytes(),
1707        b"ONBUILD",
1708        p.escape_byte,
1709    ));
1710    // https://docs.docker.com/reference/dockerfile/#onbuild-limitations
1711    if mem::replace(&mut p.in_onbuild, true) {
1712        // TODO: error kind
1713        return Err(ErrorKind::Expected("ONBUILD ONBUILD is not allowed", instruction.span.start));
1714    }
1715    let Some((&b, s_next)) = s.split_first() else {
1716        return Err(ErrorKind::Expected("instruction after ONBUILD", instruction.span.start));
1717    };
1718    // TODO: https://docs.docker.com/reference/dockerfile/#onbuild-limitations
1719    // match b & TO_UPPER8 {
1720    //     b'F' => {
1721    //         if token(s, b"FROM") || token_slow(s, b"FROM", p.escape_byte) {
1722    //             // TODO: error kind
1723    //             return Err(ErrorKind::Expected(
1724    //                 "ONBUILD FROM is not allowed",
1725    //                 instruction.span.start,
1726    //             ));
1727    //         }
1728    //     }
1729    //     b'M' => {
1730    //         if token(s, b"MAINTAINER")
1731    //             || token_slow(s, b"MAINTAINER", p.escape_byte)
1732    //         {
1733    //             // TODO: error kind
1734    //             return Err(ErrorKind::Expected(
1735    //                 "ONBUILD MAINTAINER is not allowed",
1736    //                 instruction.span.start,
1737    //             ));
1738    //         }
1739    //     }
1740    //     _ => {}
1741    // }
1742    let inner_instruction = parse_instruction(p, s, b, s_next)?;
1743    p.in_onbuild = false;
1744    Ok(Instruction::Onbuild(OnbuildInstruction {
1745        onbuild: instruction,
1746        instruction: Box::new(inner_instruction),
1747    }))
1748}
1749
1750#[inline]
1751fn parse_run<'a>(
1752    p: &mut ParseIter<'a>,
1753    s: &mut &'a [u8],
1754    instruction: Keyword,
1755) -> Result<Instruction<'a>, ErrorKind> {
1756    debug_assert!(token_slow(
1757        &mut p.text[instruction.span.clone()].as_bytes(),
1758        b"RUN",
1759        p.escape_byte,
1760    ));
1761    let options = parse_options(s, p.text, p.escape_byte);
1762    if is_maybe_json(s) {
1763        let mut tmp = *s;
1764        if let Ok((arguments, array_span)) =
1765            parse_json_array::<SmallVec<[_; 1]>>(&mut tmp, p.text, p.escape_byte)
1766        {
1767            debug_assert!(is_line_end(tmp.first()));
1768            if tmp.is_empty() {
1769                *s = &[];
1770            } else {
1771                *s = &tmp[1..];
1772            }
1773            if arguments.is_empty() {
1774                return Err(ErrorKind::AtLeastOneArgument {
1775                    instruction_start: instruction.span.start,
1776                });
1777            }
1778            return Ok(Instruction::Run(RunInstruction {
1779                run: instruction,
1780                options,
1781                arguments: Command::Exec(Spanned { span: array_span, value: arguments }),
1782                // TODO: https://github.com/moby/buildkit/issues/2207
1783                here_docs: vec![],
1784            }));
1785        }
1786    }
1787
1788    // https://docs.docker.com/reference/dockerfile/#here-documents
1789    let mut strip_tab = false;
1790    let mut quote = None;
1791    let mut pos = 2;
1792    // At least 5, <<E\nE
1793    if s.len() >= 5 && s.starts_with(b"<<") && {
1794        if s[pos] == b'-' {
1795            strip_tab = true;
1796            pos += 1;
1797        }
1798        if matches!(s[pos], b'"' | b'\'') {
1799            quote = Some(s[pos]);
1800            pos += 1;
1801        }
1802        // TODO: non-ascii_alphanumeric
1803        s[pos].is_ascii_alphanumeric()
1804    } {
1805        *s = &s[pos..];
1806        let delim_start = p.text.len() - s.len();
1807        // TODO: non-ascii_alphanumeric
1808        while let Some((&b, s_next)) = s.split_first() {
1809            if b.is_ascii_alphanumeric() {
1810                *s = s_next;
1811                continue;
1812            }
1813            break;
1814        }
1815        let delim = &p.text.as_bytes()[delim_start..p.text.len() - s.len()];
1816        if let Some(quote) = quote {
1817            if let Some((&b, s_next)) = s.split_first() {
1818                if b != quote {
1819                    return Err(ErrorKind::ExpectedOwned(
1820                        format!("quote ({}), but found '{}'", quote as char, b as char),
1821                        p.text.len() - s.len(),
1822                    ));
1823                }
1824                *s = s_next;
1825            } else {
1826                return Err(ErrorKind::ExpectedOwned(
1827                    format!("quote ({}), but reached eof", quote as char),
1828                    p.text.len() - s.len(),
1829                ));
1830            }
1831        }
1832        // TODO: skip space
1833        let arguments_start = p.text.len() - s.len();
1834        skip_this_line(s, p.escape_byte);
1835        let end = p.text.len() - s.len();
1836        let arguments = p.text[arguments_start..end].trim_ascii_end();
1837        let here_doc = if strip_tab {
1838            let (here_doc, span) = collect_here_doc_strip_tab(s, p.text, p.escape_byte, delim)?;
1839            HereDoc { span, expand: quote.is_none(), value: here_doc }
1840        } else {
1841            let (here_doc, span) = collect_here_doc_no_strip_tab(s, p.text, p.escape_byte, delim)?;
1842            HereDoc { span, expand: quote.is_none(), value: here_doc.into() }
1843        };
1844        return Ok(Instruction::Run(RunInstruction {
1845            run: instruction,
1846            options,
1847            arguments: Command::Shell(Spanned {
1848                span: arguments_start..arguments_start + arguments.len(),
1849                value: arguments,
1850            }),
1851            // TODO: multiple here-docs
1852            here_docs: vec![here_doc],
1853        }));
1854    }
1855
1856    let arguments_start = p.text.len() - s.len();
1857    skip_this_line(s, p.escape_byte);
1858    let end = p.text.len() - s.len();
1859    let arguments = p.text[arguments_start..end].trim_ascii_end();
1860    Ok(Instruction::Run(RunInstruction {
1861        run: instruction,
1862        options,
1863        arguments: Command::Shell(Spanned {
1864            span: arguments_start..arguments_start + arguments.len(),
1865            value: arguments,
1866        }),
1867        here_docs: vec![],
1868    }))
1869}
1870
1871#[inline]
1872fn parse_shell<'a>(
1873    p: &mut ParseIter<'a>,
1874    s: &mut &'a [u8],
1875    instruction: Keyword,
1876) -> Result<Instruction<'a>, ErrorKind> {
1877    debug_assert!(token_slow(
1878        &mut p.text[instruction.span.clone()].as_bytes(),
1879        b"SHELL",
1880        p.escape_byte,
1881    ));
1882    if !is_maybe_json(s) {
1883        return Err(ErrorKind::Expected("JSON array", p.text.len() - s.len()));
1884    }
1885    match parse_json_array::<SmallVec<[_; 4]>>(s, p.text, p.escape_byte) {
1886        Ok((arguments, _array_span)) => {
1887            if !s.is_empty() {
1888                *s = &s[1..];
1889            }
1890            if arguments.is_empty() {
1891                return Err(ErrorKind::AtLeastOneArgument {
1892                    instruction_start: instruction.span.start,
1893                });
1894            }
1895            Ok(Instruction::Shell(ShellInstruction { shell: instruction, arguments }))
1896        }
1897        Err(array_start) => Err(ErrorKind::Json { arguments_start: array_start }),
1898    }
1899}
1900
1901#[inline]
1902fn parse_stopsignal<'a>(
1903    p: &mut ParseIter<'a>,
1904    s: &mut &'a [u8],
1905    instruction: Keyword,
1906) -> Result<Instruction<'a>, ErrorKind> {
1907    debug_assert!(token_slow(
1908        &mut p.text[instruction.span.clone()].as_bytes(),
1909        b"STOPSIGNAL",
1910        p.escape_byte,
1911    ));
1912    // TODO: space is disallowed?
1913    let arguments = collect_non_line_unescaped_consume_line(s, p.text, p.escape_byte);
1914    if arguments.value.is_empty() {
1915        return Err(ErrorKind::ExactlyOneArgument { instruction_start: instruction.span.start });
1916    }
1917    Ok(Instruction::Stopsignal(StopsignalInstruction { stopsignal: instruction, arguments }))
1918}
1919
1920#[inline]
1921fn parse_user<'a>(
1922    p: &mut ParseIter<'a>,
1923    s: &mut &'a [u8],
1924    instruction: Keyword,
1925) -> Result<Instruction<'a>, ErrorKind> {
1926    debug_assert!(token_slow(
1927        &mut p.text[instruction.span.clone()].as_bytes(),
1928        b"USER",
1929        p.escape_byte,
1930    ));
1931    // TODO: space is disallowed?
1932    let arguments = collect_non_line_unescaped_consume_line(s, p.text, p.escape_byte);
1933    if arguments.value.is_empty() {
1934        return Err(ErrorKind::ExactlyOneArgument { instruction_start: instruction.span.start });
1935    }
1936    Ok(Instruction::User(UserInstruction { user: instruction, arguments }))
1937}
1938
1939#[inline]
1940fn parse_volume<'a>(
1941    p: &mut ParseIter<'a>,
1942    s: &mut &'a [u8],
1943    instruction: Keyword,
1944) -> Result<Instruction<'a>, ErrorKind> {
1945    debug_assert!(token_slow(
1946        &mut p.text[instruction.span.clone()].as_bytes(),
1947        b"VOLUME",
1948        p.escape_byte,
1949    ));
1950    if is_maybe_json(s) {
1951        let mut tmp = *s;
1952        if let Ok((arguments, array_span)) = parse_json_array(&mut tmp, p.text, p.escape_byte) {
1953            debug_assert!(is_line_end(tmp.first()));
1954            if tmp.is_empty() {
1955                *s = &[];
1956            } else {
1957                *s = &tmp[1..];
1958            }
1959            // "VOLUME []" seems to be okay?
1960            return Ok(Instruction::Volume(VolumeInstruction {
1961                volume: instruction,
1962                arguments: JsonOrStringArray::Json(Spanned { span: array_span, value: arguments }),
1963            }));
1964        }
1965    }
1966    let arguments: SmallVec<[_; 1]> =
1967        collect_space_separated_unescaped_consume_line(s, p.text, p.escape_byte);
1968    if arguments.is_empty() {
1969        // TODO: "VOLUME" too?
1970        return Err(ErrorKind::AtLeastOneArgument { instruction_start: instruction.span.start });
1971    }
1972    Ok(Instruction::Volume(VolumeInstruction {
1973        volume: instruction,
1974        arguments: JsonOrStringArray::String(arguments),
1975    }))
1976}
1977
1978#[inline]
1979fn parse_workdir<'a>(
1980    p: &mut ParseIter<'a>,
1981    s: &mut &'a [u8],
1982    instruction: Keyword,
1983) -> Result<Instruction<'a>, ErrorKind> {
1984    debug_assert!(token_slow(
1985        &mut p.text[instruction.span.clone()].as_bytes(),
1986        b"WORKDIR",
1987        p.escape_byte,
1988    ));
1989    // TODO: space is disallowed if not escaped/quoted?
1990    let arguments = collect_non_line_unescaped_consume_line(s, p.text, p.escape_byte);
1991    if arguments.value.is_empty() {
1992        return Err(ErrorKind::ExactlyOneArgument { instruction_start: instruction.span.start });
1993    }
1994    Ok(Instruction::Workdir(WorkdirInstruction { workdir: instruction, arguments }))
1995}
1996
1997// -----------------------------------------------------------------------------
1998// Parsing Helpers
1999
2000// [\r\n]
2001const LINE: u8 = 1 << 0;
2002// [ \t]
2003const SPACE: u8 = 1 << 1;
2004// [ \r\n\t]
2005const WHITESPACE: u8 = 1 << 2;
2006// [#]
2007const COMMENT: u8 = 1 << 3;
2008// ["]
2009const DOUBLE_QUOTE: u8 = 1 << 4;
2010// [\`]
2011const POSSIBLE_ESCAPE: u8 = 1 << 5;
2012// [=]
2013const EQ: u8 = 1 << 6;
2014
2015static TABLE: [u8; 256] = {
2016    let mut table = [0; 256];
2017    let mut i = 0;
2018    loop {
2019        match i {
2020            b' ' | b'\t' => table[i as usize] = WHITESPACE | SPACE,
2021            b'\n' | b'\r' => table[i as usize] = WHITESPACE | LINE,
2022            b'#' => table[i as usize] = COMMENT,
2023            b'"' => table[i as usize] = DOUBLE_QUOTE,
2024            b'\\' | b'`' => table[i as usize] = POSSIBLE_ESCAPE,
2025            b'=' => table[i as usize] = EQ,
2026            _ => {}
2027        }
2028        if i == u8::MAX {
2029            break;
2030        }
2031        i += 1;
2032    }
2033    table
2034};
2035
2036const UTF8_BOM: &[u8] = &[0xEF, 0xBB, 0xBF];
2037
2038trait Store<T>: Sized {
2039    fn new() -> Self;
2040    fn push(&mut self, val: T);
2041}
2042impl<T> Store<T> for Vec<T> {
2043    #[inline]
2044    fn new() -> Self {
2045        Self::new()
2046    }
2047    #[inline]
2048    fn push(&mut self, val: T) {
2049        self.push(val);
2050    }
2051}
2052impl<T, const N: usize> Store<T> for SmallVec<[T; N]> {
2053    #[inline]
2054    fn new() -> Self {
2055        Self::new()
2056    }
2057    #[inline]
2058    fn push(&mut self, val: T) {
2059        self.push(val);
2060    }
2061}
2062impl<'a, const N: usize> Store<UnescapedString<'a>>
2063    for (SmallVec<[Source<'a>; N]>, Option<UnescapedString<'a>>)
2064{
2065    #[inline]
2066    fn new() -> Self {
2067        (SmallVec::new(), None)
2068    }
2069    #[inline]
2070    fn push(&mut self, val: UnescapedString<'a>) {
2071        if let Some(val) = self.1.replace(val) {
2072            self.0.push(Source::Path(val));
2073        }
2074    }
2075}
2076
2077#[inline]
2078fn parse_options<'a, S: Store<Flag<'a>>>(s: &mut &[u8], start: &'a str, escape_byte: u8) -> S {
2079    let mut options = S::new();
2080    'outer: loop {
2081        let Some((&b'-', mut s_next)) = s.split_first() else {
2082            break;
2083        };
2084        loop {
2085            let Some((&b, s_next_next)) = s_next.split_first() else {
2086                break 'outer;
2087            };
2088            if b == b'-' {
2089                s_next = s_next_next;
2090                break;
2091            }
2092            if skip_line_escape(&mut s_next, b, s_next_next, escape_byte) {
2093                skip_line_escape_followup(&mut s_next, escape_byte);
2094                continue;
2095            }
2096            break 'outer;
2097        }
2098        let flag_start = start.len() - s.len();
2099        *s = s_next;
2100        let name = collect_until_unescaped::<{ WHITESPACE | EQ }>(s, start, escape_byte);
2101        let Some((&b'=', s_next)) = s.split_first() else {
2102            options.push(Flag { flag_start, name, value: None });
2103            skip_spaces(s, escape_byte);
2104            continue;
2105        };
2106        *s = s_next;
2107        let value = collect_non_whitespace_unescaped(s, start, escape_byte);
2108        options.push(Flag { flag_start, name, value: Some(value) });
2109        skip_spaces(s, escape_byte);
2110    }
2111    options
2112}
2113
2114fn parse_json_array<'a, S: Store<UnescapedString<'a>>>(
2115    s: &mut &[u8],
2116    start: &'a str,
2117    escape_byte: u8,
2118) -> Result<(S, Span), usize> {
2119    debug_assert_eq!(s.first(), Some(&b'['));
2120    debug_assert_ne!(s.get(1), Some(&b'['));
2121    let mut res = S::new();
2122    let array_start = start.len() - s.len();
2123    *s = &s[1..];
2124    skip_spaces(s, escape_byte);
2125    let (&b, s_next) = s.split_first().ok_or(array_start)?;
2126    match b {
2127        b'"' => {
2128            *s = s_next;
2129            loop {
2130                let full_word_start = start.len() - s.len();
2131                let mut word_start = full_word_start;
2132                let mut buf = String::new();
2133                loop {
2134                    let (&b, s_next) = s.split_first().ok_or(array_start)?;
2135                    if TABLE[b as usize] & (LINE | DOUBLE_QUOTE | POSSIBLE_ESCAPE) == 0 {
2136                        *s = s_next;
2137                        continue;
2138                    }
2139                    match b {
2140                        b'"' => break,
2141                        b'\n' | b'\r' => return Err(array_start),
2142                        _ => {}
2143                    }
2144                    let word_end = start.len() - s.len();
2145                    if skip_line_escape(s, b, s_next, escape_byte) {
2146                        skip_line_escape_followup(s, escape_byte);
2147                        // dockerfile escape
2148                        buf.push_str(&start[word_start..word_end]);
2149                        word_start = start.len() - s.len();
2150                        continue;
2151                    }
2152                    if b == b'\\' {
2153                        // JSON escape
2154                        let word_end = start.len() - s.len();
2155                        buf.push_str(&start[word_start..word_end]);
2156                        *s = s_next;
2157                        let (new, new_start) = match *s.first().ok_or(array_start)? {
2158                            b @ (b'"' | b'\\' | b'/') => (b as char, 1),
2159                            b'b' => ('\x08', 1),
2160                            b'f' => ('\x0c', 1),
2161                            b'n' => ('\n', 1),
2162                            b'r' => ('\r', 1),
2163                            b't' => ('\t', 1),
2164                            b'u' => (parse_json_hex_escape(s, array_start)?, 5),
2165                            _ => return Err(array_start), // invalid escape
2166                        };
2167                        buf.push(new);
2168                        *s = &s[new_start..];
2169                        word_start = start.len() - s.len();
2170                        continue;
2171                    }
2172                    *s = s_next;
2173                }
2174                let word_end = start.len() - s.len();
2175                let value = if buf.is_empty() {
2176                    // no escape
2177                    Cow::Borrowed(&start[word_start..word_end])
2178                } else {
2179                    buf.push_str(&start[word_start..word_end]);
2180                    Cow::Owned(buf)
2181                };
2182                res.push(UnescapedString { span: full_word_start..word_end, value });
2183                *s = &s[1..]; // drop "
2184                skip_spaces(s, escape_byte);
2185                let (&b, s_next) = s.split_first().ok_or(array_start)?;
2186                match b {
2187                    b',' => {
2188                        *s = s_next;
2189                        skip_spaces(s, escape_byte);
2190                        let (&b, s_next) = s.split_first().ok_or(array_start)?;
2191                        if b == b'"' {
2192                            *s = s_next;
2193                            continue;
2194                        }
2195                        return Err(array_start);
2196                    }
2197                    b']' => {
2198                        *s = s_next;
2199                        break;
2200                    }
2201                    _ => return Err(array_start),
2202                }
2203            }
2204        }
2205        b']' => *s = s_next,
2206        _ => return Err(array_start),
2207    }
2208    let array_end = start.len() - s.len();
2209    skip_spaces(s, escape_byte);
2210    if !is_line_end(s.first()) {
2211        return Err(array_start);
2212    }
2213    Ok((res, array_start..array_end))
2214}
2215// Adapted from https://github.com/serde-rs/json/blob/3f1c6de4af28b1f6c5100da323f2bffaf7c2083f/src/read.rs
2216#[cold]
2217fn parse_json_hex_escape(s: &mut &[u8], array_start: usize) -> Result<char, usize> {
2218    fn decode_hex_escape(s: &mut &[u8], array_start: usize) -> Result<u16, usize> {
2219        if s.len() < 4 {
2220            return Err(array_start); // EofWhileParsingString
2221        }
2222
2223        let mut n = 0;
2224        for _ in 0..4 {
2225            let ch = decode_hex_val(s[0]);
2226            *s = &s[1..];
2227            match ch {
2228                None => return Err(array_start), // InvalidEscape
2229                Some(val) => {
2230                    n = (n << 4) + val;
2231                }
2232            }
2233        }
2234        Ok(n)
2235    }
2236
2237    fn decode_hex_val(val: u8) -> Option<u16> {
2238        let n = HEX_DECODE_TABLE[val as usize] as u16;
2239        if n == u8::MAX as u16 {
2240            None
2241        } else {
2242            Some(n)
2243        }
2244    }
2245
2246    let c = match decode_hex_escape(s, array_start)? {
2247        _n @ 0xDC00..=0xDFFF => return Err(array_start), // ErrorCode::LoneLeadingSurrogateInHexEscape)
2248
2249        // Non-BMP characters are encoded as a sequence of two hex
2250        // escapes, representing UTF-16 surrogates. If deserializing a
2251        // utf-8 string the surrogates are required to be paired,
2252        // whereas deserializing a byte string accepts lone surrogates.
2253        n1 @ 0xD800..=0xDBFF => {
2254            if s.first() == Some(&b'\\') {
2255                *s = &s[1..];
2256            } else {
2257                return Err(array_start); // UnexpectedEndOfHexEscape
2258            }
2259
2260            if s.first() == Some(&b'u') {
2261                *s = &s[1..];
2262            } else {
2263                return Err(array_start); // UnexpectedEndOfHexEscape
2264            }
2265
2266            let n2 = decode_hex_escape(s, array_start)?;
2267
2268            if n2 < 0xDC00 || n2 > 0xDFFF {
2269                return Err(array_start); // LoneLeadingSurrogateInHexEscape
2270            }
2271
2272            let n = ((((n1 - 0xD800) as u32) << 10) | (n2 - 0xDC00) as u32) + 0x1_0000;
2273
2274            match char::from_u32(n) {
2275                Some(c) => c,
2276                None => return Err(array_start), // InvalidUnicodeCodePoint
2277            }
2278        }
2279
2280        // Every u16 outside of the surrogate ranges above is guaranteed
2281        // to be a legal char.
2282        n => char::from_u32(n as u32).unwrap(),
2283    };
2284    Ok(c)
2285}
2286#[allow(clippy::needless_raw_string_hashes)]
2287#[test]
2288fn test_parse_json_array() {
2289    // empty
2290    let t = r#"[]"#;
2291    let mut s = t.as_bytes();
2292    assert_eq!(&*parse_json_array::<Vec<_>>(&mut s, t, b'\\').unwrap().0, &[]);
2293    assert_eq!(s, b"");
2294    let t = r#"[ ]"#;
2295    let mut s = t.as_bytes();
2296    assert_eq!(&*parse_json_array::<Vec<_>>(&mut s, t, b'\\').unwrap().0, &[]);
2297    assert_eq!(s, b"");
2298    // one value
2299    let t = r#"["abc"]"#;
2300    let mut s = t.as_bytes();
2301    assert_eq!(&*parse_json_array::<Vec<_>>(&mut s, t, b'\\').unwrap().0, &[UnescapedString {
2302        span: 2..5,
2303        value: "abc".into()
2304    }]);
2305    assert_eq!(s, b"");
2306    // multi values
2307    let t = "[\"ab\",\"c\" ,  \"de\" ] \n";
2308    let mut s = t.as_bytes();
2309    assert_eq!(&*parse_json_array::<Vec<_>>(&mut s, t, b'\\').unwrap().0, &[
2310        UnescapedString { span: 2..4, value: "ab".into() },
2311        UnescapedString { span: 7..8, value: "c".into() },
2312        UnescapedString { span: 14..16, value: "de".into() },
2313    ]);
2314    assert_eq!(s, b"\n");
2315    // escape
2316    // TODO: \uXXXX
2317    let t = "[\"a\\\"\\\\\\/\\b\\f\\n\\r\\tbc\"]";
2318    let mut s = t.as_bytes();
2319    assert_eq!(&*parse_json_array::<Vec<_>>(&mut s, t, b'\\').unwrap().0, &[UnescapedString {
2320        span: 2..21,
2321        value: "a\"\\/\x08\x0c\n\r\tbc".into()
2322    }]);
2323    assert_eq!(s, b"");
2324
2325    // fail (single quote)
2326    let t = r#"['abc']"#;
2327    let mut s = t.as_bytes();
2328    assert_eq!(parse_json_array::<Vec<_>>(&mut s, t, b'\\'), Err(0));
2329    assert_eq!(s, br#"'abc']"#);
2330    // fail (extra comma)
2331    let t = r#"["abc",]"#;
2332    let mut s = t.as_bytes();
2333    assert_eq!(parse_json_array::<Vec<_>>(&mut s, t, b'\\'), Err(0));
2334    assert_eq!(s, br#"]"#);
2335    // fail (extra char after array)
2336    let t = r#"["abc"] c"#;
2337    let mut s = t.as_bytes();
2338    assert_eq!(parse_json_array::<Vec<_>>(&mut s, t, b'\\'), Err(0));
2339    assert_eq!(s, br#"c"#);
2340    // fail (invalid escape)
2341    let t = "[\"ab\\c\"]";
2342    let mut s = t.as_bytes();
2343    assert_eq!(parse_json_array::<Vec<_>>(&mut s, t, b'\\'), Err(0));
2344    assert_eq!(s, b"c\"]");
2345    // TODO: more from https://github.com/serde-rs/json/blob/3f1c6de4af28b1f6c5100da323f2bffaf7c2083f/tests/test.rs#L1060
2346}
2347
2348/// Skips spaces and tabs, and returns `true` if one or more spaces or tabs ware
2349/// consumed. (not consumes non-spaces/tabs characters.
2350#[inline]
2351fn skip_spaces_no_escape(s: &mut &[u8]) -> bool {
2352    let start = *s;
2353    while let Some((&b, s_next)) = s.split_first() {
2354        if TABLE[b as usize] & SPACE != 0 {
2355            *s = s_next;
2356            continue;
2357        }
2358        break;
2359    }
2360    start.len() != s.len()
2361}
2362/// Skips spaces and tabs, and returns `true` if one or more spaces or tabs ware
2363/// consumed. (not consumes non-space/tab characters.
2364#[inline]
2365fn skip_spaces(s: &mut &[u8], escape_byte: u8) -> bool {
2366    let mut has_space = false;
2367    while let Some((&b, s_next)) = s.split_first() {
2368        let t = TABLE[b as usize];
2369        if t & (SPACE | POSSIBLE_ESCAPE) != 0 {
2370            if t & SPACE != 0 {
2371                *s = s_next;
2372                has_space = true;
2373                continue;
2374            }
2375            if skip_line_escape(s, b, s_next, escape_byte) {
2376                skip_line_escape_followup(s, escape_byte);
2377                continue;
2378            }
2379        }
2380        break;
2381    }
2382    has_space
2383}
2384/// Consumes spaces and tabs, and returns `true` if one or more spaces or tabs ware
2385/// consumed, or reached line end. (not consumes non-space/tab characters.
2386#[inline]
2387fn spaces_or_line_end(s: &mut &[u8], escape_byte: u8) -> bool {
2388    let mut has_space = false;
2389    loop {
2390        let Some((&b, s_next)) = s.split_first() else { return true };
2391        {
2392            let t = TABLE[b as usize];
2393            if t & (WHITESPACE | POSSIBLE_ESCAPE) != 0 {
2394                if t & SPACE != 0 {
2395                    *s = s_next;
2396                    has_space = true;
2397                    continue;
2398                }
2399                if t & LINE != 0 {
2400                    return true;
2401                }
2402                if skip_line_escape(s, b, s_next, escape_byte) {
2403                    skip_line_escape_followup(s, escape_byte);
2404                    continue;
2405                }
2406            }
2407            break;
2408        }
2409    }
2410    has_space
2411}
2412
2413#[inline]
2414fn skip_comments_and_whitespaces(s: &mut &[u8], escape_byte: u8) {
2415    while let Some((&b, s_next)) = s.split_first() {
2416        let t = TABLE[b as usize];
2417        if t & (WHITESPACE | COMMENT | POSSIBLE_ESCAPE) != 0 {
2418            if t & WHITESPACE != 0 {
2419                *s = s_next;
2420                continue;
2421            }
2422            if t & COMMENT != 0 {
2423                *s = s_next;
2424                skip_this_line_no_escape(s);
2425                continue;
2426            }
2427            if skip_line_escape(s, b, s_next, escape_byte) {
2428                skip_line_escape_followup(s, escape_byte);
2429                continue;
2430            }
2431        }
2432        break;
2433    }
2434}
2435
2436#[inline]
2437fn is_line_end(b: Option<&u8>) -> bool {
2438    matches!(b, Some(b'\n' | b'\r') | None)
2439}
2440#[inline]
2441fn is_maybe_json(s: &[u8]) -> bool {
2442    // ADD/COPY: checking [[ to handle escape of [ https://docs.docker.com/reference/dockerfile/#add
2443    // Others: TODO: checking [[ to handle [[ -e .. ], but not enough to check [ -e .. ]
2444    s.first() == Some(&b'[') && s.get(1) != Some(&b'[')
2445}
2446
2447#[inline]
2448fn collect_here_doc_no_strip_tab<'a>(
2449    s: &mut &[u8],
2450    start: &'a str,
2451    _escape_byte: u8,
2452    delim: &[u8],
2453) -> Result<(&'a str, Span), ErrorKind> {
2454    let here_doc_start = start.len() - s.len();
2455    loop {
2456        if s.len() < delim.len() {
2457            return Err(ErrorKind::ExpectedOwned(
2458                str::from_utf8(delim).unwrap().to_owned(),
2459                start.len() - s.len(),
2460            ));
2461        }
2462        if s.starts_with(delim) && is_line_end(s.get(delim.len())) {
2463            break;
2464        }
2465        skip_this_line_no_escape(s);
2466    }
2467    let end = start.len() - s.len();
2468    *s = &s[delim.len()..];
2469    if !s.is_empty() {
2470        *s = &s[1..];
2471    }
2472    let span = here_doc_start..end;
2473    Ok((&start[span.clone()], span))
2474}
2475#[inline]
2476fn collect_here_doc_strip_tab<'a>(
2477    s: &mut &[u8],
2478    start: &'a str,
2479    _escape_byte: u8,
2480    delim: &[u8],
2481) -> Result<(Cow<'a, str>, Span), ErrorKind> {
2482    let here_doc_start = start.len() - s.len();
2483    let mut current_start = here_doc_start;
2484    let mut res = String::new();
2485    loop {
2486        if s.len() < delim.len() {
2487            return Err(ErrorKind::ExpectedOwned(
2488                str::from_utf8(delim).unwrap().to_owned(),
2489                start.len() - s.len(),
2490            ));
2491        }
2492        if let Some((&b'\t', s_next)) = s.split_first() {
2493            let end = start.len() - s.len();
2494            res.push_str(&start[current_start..end]);
2495            *s = s_next;
2496            while let Some((&b'\t', s_next)) = s.split_first() {
2497                *s = s_next;
2498            }
2499            current_start = start.len() - s.len();
2500        }
2501        if s.starts_with(delim) && is_line_end(s.get(delim.len())) {
2502            break;
2503        }
2504        skip_this_line_no_escape(s);
2505    }
2506    let end = start.len() - s.len();
2507    *s = &s[delim.len()..];
2508    if !s.is_empty() {
2509        *s = &s[1..];
2510    }
2511    let span = here_doc_start..end;
2512    if here_doc_start == current_start {
2513        Ok((Cow::Borrowed(&start[span.clone()]), span))
2514    } else {
2515        res.push_str(&start[current_start..end]);
2516        Ok((Cow::Owned(res), span))
2517    }
2518}
2519// TODO: escaped/quoted space?
2520#[inline]
2521fn collect_space_separated_unescaped_consume_line<'a, S: Store<UnescapedString<'a>>>(
2522    s: &mut &[u8],
2523    start: &'a str,
2524    escape_byte: u8,
2525) -> S {
2526    let mut res = S::new();
2527    loop {
2528        let val = collect_non_whitespace_unescaped(s, start, escape_byte);
2529        if !val.value.is_empty() {
2530            res.push(val);
2531            if skip_spaces(s, escape_byte) {
2532                continue;
2533            }
2534        }
2535        debug_assert!(is_line_end(s.first()));
2536        if !s.is_empty() {
2537            *s = &s[1..];
2538        }
2539        break;
2540    }
2541    res
2542}
2543#[inline]
2544fn collect_non_whitespace_unescaped<'a>(
2545    s: &mut &[u8],
2546    start: &'a str,
2547    escape_byte: u8,
2548) -> UnescapedString<'a> {
2549    collect_until_unescaped::<WHITESPACE>(s, start, escape_byte)
2550}
2551#[inline]
2552fn collect_non_line_unescaped_consume_line<'a>(
2553    s: &mut &[u8],
2554    start: &'a str,
2555    escape_byte: u8,
2556) -> UnescapedString<'a> {
2557    let mut val = collect_until_unescaped::<LINE>(s, start, escape_byte);
2558    debug_assert!(is_line_end(s.first()));
2559    if !s.is_empty() {
2560        *s = &s[1..];
2561    }
2562    // trim trailing spaces
2563    match &mut val.value {
2564        Cow::Borrowed(v) => {
2565            while let Some(b' ' | b'\t') = v.as_bytes().last() {
2566                *v = &v[..v.len() - 1];
2567                val.span.end -= 1;
2568            }
2569        }
2570        Cow::Owned(v) => {
2571            while let Some(b' ' | b'\t') = v.as_bytes().last() {
2572                v.pop();
2573                val.span.end -= 1;
2574            }
2575        }
2576    }
2577    val
2578}
2579#[inline]
2580fn collect_until_unescaped<'a, const UNTIL_MASK: u8>(
2581    s: &mut &[u8],
2582    start: &'a str,
2583    escape_byte: u8,
2584) -> UnescapedString<'a> {
2585    let full_word_start = start.len() - s.len();
2586    let mut word_start = full_word_start;
2587    let mut buf = String::new();
2588    while let Some((&b, s_next)) = s.split_first() {
2589        let t = TABLE[b as usize];
2590        if t & (UNTIL_MASK | POSSIBLE_ESCAPE) != 0 {
2591            if t & UNTIL_MASK != 0 {
2592                break;
2593            }
2594            let word_end = start.len() - s.len();
2595            if skip_line_escape(s, b, s_next, escape_byte) {
2596                skip_line_escape_followup(s, escape_byte);
2597                buf.push_str(&start[word_start..word_end]);
2598                word_start = start.len() - s.len();
2599                continue;
2600            }
2601        }
2602        *s = s_next;
2603    }
2604    let word_end = start.len() - s.len();
2605    let value = if buf.is_empty() {
2606        // no escape
2607        Cow::Borrowed(&start[word_start..word_end])
2608    } else {
2609        buf.push_str(&start[word_start..word_end]);
2610        Cow::Owned(buf)
2611    };
2612    UnescapedString { span: full_word_start..word_end, value }
2613}
2614
2615/// Skips non-whitespace (non-`[ \r\n\t]`) characters, and returns `true`
2616/// if one or more non-whitespace characters are present. (not consumes whitespace character).
2617#[inline]
2618fn skip_non_whitespace_no_escape(s: &mut &[u8]) -> bool {
2619    let start = *s;
2620    while let Some((&b, s_next)) = s.split_first() {
2621        if TABLE[b as usize] & WHITESPACE != 0 {
2622            break;
2623        }
2624        *s = s_next;
2625    }
2626    start.len() != s.len()
2627}
2628// #[inline]
2629// fn skip_non_whitespace(s: &mut &[u8], escape_byte: u8) -> bool {
2630//     let mut has_non_whitespace = false;
2631//     while let Some((&b, s_next)) = s.split_first() {
2632//         if TABLE[b as usize] & WHITESPACE != 0 {
2633//             break;
2634//         }
2635//         if is_line_escape(b, s_next, escape_byte) {
2636//             skip_line_escape(s, b, s_next, escape_byte);
2637//             continue;
2638//         }
2639//         *s = s_next;
2640//         has_non_whitespace = true;
2641//         continue;
2642//     }
2643//     has_non_whitespace
2644// }
2645
2646#[inline]
2647fn skip_line_escape<'a>(s: &mut &'a [u8], b: u8, s_next: &'a [u8], escape_byte: u8) -> bool {
2648    if b == escape_byte {
2649        if let Some((&b, mut s_next)) = s_next.split_first() {
2650            if b == b'\n' {
2651                *s = s_next;
2652                return true;
2653            }
2654            if b == b'\r' {
2655                if s_next.first() == Some(&b'\n') {
2656                    *s = &s_next[1..];
2657                } else {
2658                    *s = s_next;
2659                }
2660                return true;
2661            }
2662            // It seems that "\\ \n" is also accepted.
2663            // https://github.com/moby/buildkit/blob/6d143f5602a61acef286f39ee75f1cb33c367d44/frontend/dockerfile/cmd/dockerfile-frontend/Dockerfile#L19C23-L19C24
2664            if TABLE[b as usize] & SPACE != 0 {
2665                skip_spaces_no_escape(&mut s_next);
2666                if let Some((&b, s_next)) = s_next.split_first() {
2667                    if b == b'\n' {
2668                        *s = s_next;
2669                        return true;
2670                    }
2671                    if b == b'\r' {
2672                        if s_next.first() == Some(&b'\n') {
2673                            *s = &s_next[1..];
2674                        } else {
2675                            *s = s_next;
2676                        }
2677                        return true;
2678                    }
2679                }
2680            }
2681        }
2682    }
2683    false
2684}
2685#[inline]
2686fn skip_line_escape_followup(s: &mut &[u8], _escape_byte: u8) {
2687    while let Some((&b, mut s_next)) = s.split_first() {
2688        let t = TABLE[b as usize];
2689        if t & (WHITESPACE | COMMENT) != 0 {
2690            if t & SPACE != 0 {
2691                // TODO: escape after spaces is handled in skip_spaces_no_escape
2692                skip_spaces_no_escape(&mut s_next);
2693                if let Some((&b, s_next)) = s_next.split_first() {
2694                    let t = TABLE[b as usize];
2695                    if t & (COMMENT | LINE) != 0 {
2696                        // comment or empty continuation line
2697                        *s = s_next;
2698                        if t & COMMENT != 0 {
2699                            skip_this_line_no_escape(s);
2700                        }
2701                        continue;
2702                    }
2703                }
2704            } else {
2705                // comment or empty continuation line
2706                *s = s_next;
2707                if t & COMMENT != 0 {
2708                    skip_this_line_no_escape(s);
2709                }
2710                continue;
2711            }
2712        }
2713        break;
2714    }
2715}
2716
2717#[inline]
2718fn skip_this_line_no_escape(s: &mut &[u8]) {
2719    while let Some((&b, s_next)) = s.split_first() {
2720        *s = s_next;
2721        if TABLE[b as usize] & LINE != 0 {
2722            break;
2723        }
2724    }
2725}
2726/// Skips non-line (non-`[\r\n]`) characters. (consumes line character).
2727#[inline]
2728fn skip_this_line(s: &mut &[u8], escape_byte: u8) {
2729    let mut has_space_only = 0;
2730    while let Some((&b, s_next)) = s.split_first() {
2731        let t = TABLE[b as usize];
2732        if t & (LINE | COMMENT | POSSIBLE_ESCAPE) != 0 {
2733            if t & LINE != 0 {
2734                *s = s_next;
2735                break;
2736            }
2737            if has_space_only != 0 && t & COMMENT != 0 {
2738                *s = s_next;
2739                skip_this_line_no_escape(s);
2740                continue;
2741            }
2742            if skip_line_escape(s, b, s_next, escape_byte) {
2743                skip_line_escape_followup(s, escape_byte);
2744                has_space_only = SPACE;
2745                continue;
2746            }
2747        }
2748        has_space_only &= t;
2749        *s = s_next;
2750    }
2751}
2752
2753#[inline(always)]
2754fn token(s: &mut &[u8], token: &'static [u8]) -> bool {
2755    let matched = starts_with_ignore_ascii_case(s, token);
2756    if matched {
2757        *s = &s[token.len()..];
2758        true
2759    } else {
2760        false
2761    }
2762}
2763#[cold]
2764fn token_slow(s: &mut &[u8], mut token: &'static [u8], escape_byte: u8) -> bool {
2765    debug_assert!(!token.is_empty() && token.iter().all(|&n| n & TO_UPPER8 == n));
2766    if s.len() < token.len() {
2767        return false;
2768    }
2769    let mut tmp = *s;
2770    while let Some((&b, tmp_next)) = tmp.split_first() {
2771        if b & TO_UPPER8 == token[0] {
2772            tmp = tmp_next;
2773            token = &token[1..];
2774            if token.is_empty() {
2775                *s = tmp;
2776                return true;
2777            }
2778            continue;
2779        }
2780        if skip_line_escape(&mut tmp, b, tmp_next, escape_byte) {
2781            skip_line_escape_followup(&mut tmp, escape_byte);
2782            continue;
2783        }
2784        break;
2785    }
2786    false
2787}
2788
2789const TO_UPPER8: u8 = 0xDF;
2790const TO_UPPER64: u64 = 0xDFDFDFDFDFDFDFDF;
2791
2792#[inline(always)] // Ensure the code getting the length of the needle is inlined.
2793fn starts_with_ignore_ascii_case(mut s: &[u8], mut needle: &'static [u8]) -> bool {
2794    debug_assert!(!needle.is_empty() && needle.iter().all(|&n| n & TO_UPPER8 == n));
2795    if s.len() < needle.len() {
2796        return false;
2797    }
2798    if needle.len() == 1 {
2799        return needle[0] == s[0] & TO_UPPER8;
2800    }
2801    if needle.len() >= 8 {
2802        loop {
2803            if u64::from_ne_bytes(needle[..8].try_into().unwrap())
2804                != u64::from_ne_bytes(s[..8].try_into().unwrap()) & TO_UPPER64
2805            {
2806                return false;
2807            }
2808            needle = &needle[8..];
2809            s = &s[8..];
2810            if needle.len() < 8 {
2811                if needle.is_empty() {
2812                    return true;
2813                }
2814                break;
2815            }
2816        }
2817    }
2818    let s = {
2819        let mut buf = [0; 8];
2820        buf[..needle.len()].copy_from_slice(&s[..needle.len()]);
2821        u64::from_ne_bytes(buf)
2822    };
2823    let needle = {
2824        let mut buf = [0; 8];
2825        buf[..needle.len()].copy_from_slice(needle);
2826        u64::from_ne_bytes(buf)
2827    };
2828    needle == s & TO_UPPER64
2829}
2830#[test]
2831fn test_starts_with_ignore_ascii_case() {
2832    assert!(starts_with_ignore_ascii_case(b"ABC", b"ABC"));
2833    assert!(starts_with_ignore_ascii_case(b"abc", b"ABC"));
2834    assert!(starts_with_ignore_ascii_case(b"AbC", b"ABC"));
2835    assert!(!starts_with_ignore_ascii_case(b"ABB", b"ABC"));
2836    assert!(starts_with_ignore_ascii_case(b"ABCDEFGH", b"ABCDEFGH"));
2837    assert!(starts_with_ignore_ascii_case(b"abcdefgh", b"ABCDEFGH"));
2838    assert!(starts_with_ignore_ascii_case(b"AbCdEfGh", b"ABCDEFGH"));
2839    assert!(!starts_with_ignore_ascii_case(b"ABCDEFGc", b"ABCDEFGH"));
2840    assert!(starts_with_ignore_ascii_case(
2841        b"ABCDEFGHIJKLMNOPQRSTUVWXYZ",
2842        b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
2843    ));
2844    assert!(starts_with_ignore_ascii_case(
2845        b"abcdefghijklmnopqrstuvwxyz",
2846        b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
2847    ));
2848    assert!(starts_with_ignore_ascii_case(
2849        b"aBcDeFgHiJkLmNoPqRsTuVwXyZ",
2850        b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
2851    ));
2852    assert!(!starts_with_ignore_ascii_case(
2853        b"aBcDeFgHiJkLmNoPqRsTuVwXyc",
2854        b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
2855    ));
2856}
2857
2858// Lookup table for ascii to hex decoding.
2859#[rustfmt::skip]
2860static HEX_DECODE_TABLE: [u8; 256] = {
2861    const __: u8 = u8::MAX;
2862    [
2863        //  _1  _2  _3  _4  _5  _6  _7  _8  _9  _A  _B  _C  _D  _E  _F
2864        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 0_
2865        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 1_
2866        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2_
2867         0,  1,  2,  3,  4,  5,  6,  7,  8,  9, __, __, __, __, __, __, // 3_
2868        __, 10, 11, 12, 13, 14, 15, __, __, __, __, __, __, __, __, __, // 4_
2869        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 5_
2870        __, 10, 11, 12, 13, 14, 15, __, __, __, __, __, __, __, __, __, // 6_
2871        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7_
2872        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8_
2873        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9_
2874        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A_
2875        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B_
2876        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C_
2877        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D_
2878        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E_
2879        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F_
2880    ]
2881};
parse_dockerfile/lib.rs

parse_dockerfile/
lib.rs