parse_dockerfile/
lib.rs

1// SPDX-License-Identifier: Apache-2.0 OR MIT
2
3/*!
4Dockerfile parser, written in Rust.
5
6### Usage
7
8<!-- Note: Document from sync-markdown-to-rustdoc:start through sync-markdown-to-rustdoc:end
9     is synchronized from README.md. Any changes to that range are not preserved. -->
10<!-- tidy:sync-markdown-to-rustdoc:start -->
11
12To use this crate as a library, add this to your `Cargo.toml`:
13
14```toml
15[dependencies]
16parse-dockerfile = { version = "0.1", default-features = false }
17```
18
19<div class="rustdoc-alert rustdoc-alert-note">
20
21> **ⓘ Note**
22>
23> We recommend disabling default features because they enable CLI-related
24> dependencies which the library part does not use.
25
26</div>
27
28<!-- omit in toc -->
29### Examples
30
31```
32use parse_dockerfile::{parse, Instruction};
33
34let text = r#"
35ARG UBUNTU_VERSION=latest
36
37FROM ubuntu:${UBUNTU_VERSION}
38RUN echo
39"#;
40
41let dockerfile = parse(text).unwrap();
42
43// Iterate over all instructions.
44let mut instructions = dockerfile.instructions.iter();
45assert!(matches!(instructions.next(), Some(Instruction::Arg(..))));
46assert!(matches!(instructions.next(), Some(Instruction::From(..))));
47assert!(matches!(instructions.next(), Some(Instruction::Run(..))));
48assert!(matches!(instructions.next(), None));
49
50// Iterate over global args.
51let mut global_args = dockerfile.global_args();
52let global_arg1 = global_args.next().unwrap();
53assert_eq!(global_arg1.arguments.value, "UBUNTU_VERSION=latest");
54assert!(matches!(global_args.next(), None));
55
56// Iterate over stages.
57let mut stages = dockerfile.stages();
58let stage1 = stages.next().unwrap();
59assert_eq!(stage1.from.image.value, "ubuntu:${UBUNTU_VERSION}");
60let mut stage1_instructions = stage1.instructions.iter();
61assert!(matches!(stage1_instructions.next(), Some(Instruction::Run(..))));
62assert!(matches!(stage1_instructions.next(), None));
63assert!(matches!(stages.next(), None));
64```
65
66<!-- omit in toc -->
67### Optional features
68
69- **`serde`** — Implements [`serde::Serialize`] trait for parse-dockerfile types.
70
71[`serde::Serialize`]: https://docs.rs/serde/latest/serde/trait.Serialize.html
72
73<!-- tidy:sync-markdown-to-rustdoc:end -->
74*/
75
76#![doc(test(
77    no_crate_inject,
78    attr(
79        deny(warnings, rust_2018_idioms, single_use_lifetimes),
80        allow(dead_code, unused_variables)
81    )
82))]
83#![forbid(unsafe_code)]
84#![warn(
85    // Lints that may help when writing public library.
86    missing_debug_implementations,
87    missing_docs,
88    clippy::alloc_instead_of_core,
89    clippy::exhaustive_enums,
90    clippy::exhaustive_structs,
91    clippy::impl_trait_in_params,
92    // clippy::missing_inline_in_public_items,
93    // clippy::std_instead_of_alloc,
94    clippy::std_instead_of_core,
95)]
96#![allow(clippy::inline_always)]
97
98#[cfg(test)]
99#[path = "gen/tests/assert_impl.rs"]
100mod assert_impl;
101#[cfg(test)]
102#[path = "gen/tests/track_size.rs"]
103mod track_size;
104
105mod error;
106
107use core::{mem, ops::Range, str};
108use std::{borrow::Cow, collections::HashMap};
109
110use smallvec::SmallVec;
111
112pub use self::error::Error;
113use self::error::{ErrorKind, Result};
114
115/// Parses dockerfile from the given `text`.
116#[allow(clippy::missing_panics_doc)]
117pub fn parse(text: &str) -> Result<Dockerfile<'_>> {
118    let mut p = ParseIter::new(text)?;
119    let mut s = p.s;
120
121    let mut instructions = Vec::with_capacity(p.text.len() / 60);
122    let mut stages = Vec::with_capacity(1);
123    let mut named_stages = 0;
124    let mut current_stage = None;
125    while let Some((&b, s_next)) = s.split_first() {
126        let instruction =
127            parse_instruction(&mut p, &mut s, b, s_next).map_err(|e| e.into_error(&p))?;
128        match instruction {
129            Instruction::From(from) => {
130                named_stages += from.as_.is_some() as usize;
131                let new_stage = instructions.len();
132                if let Some(prev_stage) = current_stage.replace(new_stage) {
133                    stages.push(prev_stage..new_stage);
134                }
135                instructions.push(Instruction::From(from));
136            }
137            arg @ Instruction::Arg(..) => instructions.push(arg),
138            instruction => {
139                if current_stage.is_none() {
140                    return Err(ErrorKind::Expected("FROM", instruction.instruction_span().start)
141                        .into_error(&p));
142                }
143                instructions.push(instruction);
144            }
145        }
146        skip_comments_and_whitespaces(&mut s, p.escape_byte);
147    }
148    if let Some(current_stage) = current_stage {
149        stages.push(current_stage..instructions.len());
150    }
151
152    if stages.is_empty() {
153        // https://github.com/moby/buildkit/blob/e83d79a51fb49aeb921d8a2348ae14a58701c98c/frontend/dockerfile/dockerfile2llb/convert.go#L263
154        return Err(ErrorKind::NoStages.into_error(&p));
155    }
156    // TODO: https://github.com/moby/buildkit/blob/e83d79a51fb49aeb921d8a2348ae14a58701c98c/frontend/dockerfile/dockerfile2llb/convert.go#L302
157    // > base name (%s) should not be blank
158
159    let mut stages_by_name = HashMap::with_capacity(named_stages);
160    for (i, stage) in stages.iter().enumerate() {
161        let Instruction::From(from) = &instructions[stage.start] else { unreachable!() };
162        if let Some((_as, name)) = &from.as_ {
163            if let Some(first_occurrence) = stages_by_name.insert(name.value.clone(), i) {
164                let Instruction::From(from) = &instructions[stages[first_occurrence].start] else {
165                    unreachable!()
166                };
167                let first = from.as_.as_ref().unwrap().1.span.clone();
168                let second = name.span.clone();
169                return Err(ErrorKind::DuplicateName { first, second }.into_error(&p));
170            }
171        }
172    }
173
174    Ok(Dockerfile { parser_directives: p.parser_directives, instructions, stages, stages_by_name })
175}
176
177/// Returns an iterator over instructions in the given `text`.
178///
179/// Unlike [`parse`] function, the returned iterator doesn't error on
180/// duplicate stage names.
181pub fn parse_iter(text: &str) -> Result<ParseIter<'_>> {
182    ParseIter::new(text)
183}
184
185/// A dockerfile.
186#[derive(Debug)]
187#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
188#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
189pub struct Dockerfile<'a> {
190    /// Parser directives.
191    pub parser_directives: ParserDirectives<'a>,
192    /// Instructions.
193    pub instructions: Vec<Instruction<'a>>,
194    #[cfg_attr(feature = "serde", serde(skip))]
195    stages: Vec<Range<usize>>,
196    #[cfg_attr(feature = "serde", serde(skip))]
197    stages_by_name: HashMap<Cow<'a, str>, usize>,
198}
199impl<'a> Dockerfile<'a> {
200    /// Returns an iterator over global args.
201    #[allow(clippy::missing_panics_doc)] // self.stages is not empty
202    #[must_use]
203    pub fn global_args<'b>(&'b self) -> impl ExactSizeIterator<Item = &'b ArgInstruction<'a>> {
204        self.instructions[..self.stages.first().unwrap().start].iter().map(|arg| {
205            let Instruction::Arg(arg) = arg else { unreachable!() };
206            arg
207        })
208    }
209    /// Gets a stage by name.
210    #[must_use]
211    pub fn stage<'b>(&'b self, name: &str) -> Option<Stage<'a, 'b>> {
212        let i = *self.stages_by_name.get(name)?;
213        let stage = &self.stages[i];
214        let Instruction::From(from) = &self.instructions[stage.start] else { unreachable!() };
215        Some(Stage { from, instructions: &self.instructions[stage.start + 1..stage.end] })
216    }
217    /// Returns an iterator over stages.
218    #[must_use]
219    pub fn stages<'b>(&'b self) -> impl ExactSizeIterator<Item = Stage<'a, 'b>> {
220        self.stages.iter().map(move |stage| {
221            let Instruction::From(from) = &self.instructions[stage.start] else { unreachable!() };
222            Stage { from, instructions: &self.instructions[stage.start + 1..stage.end] }
223        })
224    }
225}
226/// A stage.
227#[derive(Debug)]
228#[non_exhaustive]
229pub struct Stage<'a, 'b> {
230    /// The `FROM` instruction.
231    pub from: &'b FromInstruction<'a>,
232    /// The remaining instructions.
233    pub instructions: &'b [Instruction<'a>],
234}
235
236/// Parser directives.
237///
238/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#parser-directives)
239#[derive(Debug)]
240#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
241#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
242#[non_exhaustive]
243pub struct ParserDirectives<'a> {
244    /// `syntax` parser directive.
245    ///
246    /// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#syntax)
247    pub syntax: Option<ParserDirective<&'a str>>,
248    /// `escape` parser directive.
249    ///
250    /// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#escape)
251    pub escape: Option<ParserDirective<char>>,
252    /// `check` parser directive.
253    ///
254    /// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#check)
255    pub check: Option<ParserDirective<&'a str>>,
256}
257/// A parser directive.
258#[derive(Debug)]
259#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
260#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
261pub struct ParserDirective<T> {
262    /// ```text
263    /// syntax=value
264    /// ^
265    /// ```
266    start: usize,
267    /// ```text
268    /// syntax=value
269    ///        ^^^^^
270    /// ```
271    pub value: Spanned<T>,
272}
273impl<T> ParserDirective<T> {
274    /// ```text
275    /// syntax=value
276    /// ^^^^^^^^^^^^
277    /// ```
278    #[must_use]
279    pub fn span(&self) -> Span {
280        self.start..self.value.span.end
281    }
282}
283
284/// An instruction.
285#[derive(Debug)]
286#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
287#[cfg_attr(feature = "serde", serde(tag = "kind"))]
288#[cfg_attr(feature = "serde", serde(rename_all = "SCREAMING_SNAKE_CASE"))]
289#[non_exhaustive]
290pub enum Instruction<'a> {
291    /// `ADD` instruction.
292    Add(AddInstruction<'a>),
293    /// `ARG` instruction.
294    Arg(ArgInstruction<'a>),
295    /// `CMD` instruction.
296    Cmd(CmdInstruction<'a>),
297    /// `COPY` instruction.
298    Copy(CopyInstruction<'a>),
299    /// `ENTRYPOINT` instruction.
300    Entrypoint(EntrypointInstruction<'a>),
301    /// `ENV` instruction.
302    Env(EnvInstruction<'a>),
303    /// `EXPOSE` instruction.
304    Expose(ExposeInstruction<'a>),
305    /// `FROM` instruction.
306    From(FromInstruction<'a>),
307    /// `HEALTHCHECK` instruction.
308    Healthcheck(HealthcheckInstruction<'a>),
309    /// `LABEL` instruction.
310    Label(LabelInstruction<'a>),
311    /// `MAINTAINER` instruction (deprecated).
312    Maintainer(MaintainerInstruction<'a>),
313    /// `ONBUILD` instruction.
314    Onbuild(OnbuildInstruction<'a>),
315    /// `RUN` instruction.
316    Run(RunInstruction<'a>),
317    /// `SHELL` instruction.
318    Shell(ShellInstruction<'a>),
319    /// `STOPSIGNAL` instruction.
320    Stopsignal(StopsignalInstruction<'a>),
321    /// `USER` instruction.
322    User(UserInstruction<'a>),
323    /// `VOLUME` instruction.
324    Volume(VolumeInstruction<'a>),
325    /// `WORKDIR` instruction.
326    Workdir(WorkdirInstruction<'a>),
327}
328impl Instruction<'_> {
329    fn instruction_span(&self) -> Span {
330        match self {
331            Instruction::Add(instruction) => instruction.add.span.clone(),
332            Instruction::Arg(instruction) => instruction.arg.span.clone(),
333            Instruction::Cmd(instruction) => instruction.cmd.span.clone(),
334            Instruction::Copy(instruction) => instruction.copy.span.clone(),
335            Instruction::Entrypoint(instruction) => instruction.entrypoint.span.clone(),
336            Instruction::Env(instruction) => instruction.env.span.clone(),
337            Instruction::Expose(instruction) => instruction.expose.span.clone(),
338            Instruction::From(instruction) => instruction.from.span.clone(),
339            Instruction::Healthcheck(instruction) => instruction.healthcheck.span.clone(),
340            Instruction::Label(instruction) => instruction.label.span.clone(),
341            Instruction::Maintainer(instruction) => instruction.maintainer.span.clone(),
342            Instruction::Onbuild(instruction) => instruction.onbuild.span.clone(),
343            Instruction::Run(instruction) => instruction.run.span.clone(),
344            Instruction::Shell(instruction) => instruction.shell.span.clone(),
345            Instruction::Stopsignal(instruction) => instruction.stopsignal.span.clone(),
346            Instruction::User(instruction) => instruction.user.span.clone(),
347            Instruction::Volume(instruction) => instruction.volume.span.clone(),
348            Instruction::Workdir(instruction) => instruction.workdir.span.clone(),
349        }
350    }
351}
352/// An `ADD` instruction.
353///
354/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#add)
355#[derive(Debug)]
356#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
357#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
358#[non_exhaustive]
359pub struct AddInstruction<'a> {
360    /// ```text
361    /// ADD [options] <src> ... <dest>
362    /// ^^^
363    /// ```
364    pub add: Keyword,
365    /// ```text
366    /// ADD [options] <src> ... <dest>
367    ///     ^^^^^^^^^
368    /// ```
369    pub options: SmallVec<[Flag<'a>; 1]>,
370    /// ```text
371    /// ADD [options] <src> ... <dest>
372    ///               ^^^^^^^^^
373    /// ```
374    // At least 1
375    pub src: SmallVec<[Source<'a>; 1]>,
376    /// ```text
377    /// ADD [options] <src> ... <dest>
378    ///                         ^^^^^^
379    /// ```
380    pub dest: UnescapedString<'a>,
381}
382/// An `ARG` instruction.
383///
384/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#arg)
385#[derive(Debug)]
386#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
387#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
388#[non_exhaustive]
389pub struct ArgInstruction<'a> {
390    /// ```text
391    /// ARG <name>[=<default value>] [<name>[=<default value>]...]
392    /// ^^^
393    /// ```
394    pub arg: Keyword,
395    /// ```text
396    /// ARG <name>[=<default value>] [<name>[=<default value>]...]
397    ///     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
398    /// ```
399    // TODO: SmallVec<[NameOptValue<'a>; 1]>
400    pub arguments: UnescapedString<'a>,
401}
402/// A `CMD` instruction.
403///
404/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#cmd)
405#[derive(Debug)]
406#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
407#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
408#[non_exhaustive]
409pub struct CmdInstruction<'a> {
410    /// ```text
411    /// CMD ["executable", "param"]
412    /// ^^^
413    /// ```
414    pub cmd: Keyword,
415    /// ```text
416    /// CMD ["executable", "param"]
417    ///     ^^^^^^^^^^^^^^^^^^^^^^^
418    /// ```
419    pub arguments: Command<'a>,
420}
421/// A `COPY` instruction.
422///
423/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#copy)
424#[derive(Debug)]
425#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
426#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
427#[non_exhaustive]
428pub struct CopyInstruction<'a> {
429    /// ```text
430    /// COPY [options] <src> ... <dest>
431    /// ^^^^
432    /// ```
433    pub copy: Keyword,
434    /// ```text
435    /// COPY [options] <src> ... <dest>
436    ///      ^^^^^^^^^
437    /// ```
438    pub options: SmallVec<[Flag<'a>; 1]>,
439    /// ```text
440    /// COPY [options] <src> ... <dest>
441    ///                ^^^^^^^^^
442    /// ```
443    // At least 1
444    pub src: SmallVec<[Source<'a>; 1]>,
445    /// ```text
446    /// COPY [options] <src> ... <dest>
447    ///                          ^^^^^^
448    /// ```
449    pub dest: UnescapedString<'a>,
450}
451/// A enum that represents source value of [`ARG` instruction](ArgInstruction) and
452/// [`COPY` instruction](CopyInstruction).
453#[derive(Debug)]
454#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
455#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
456#[non_exhaustive]
457pub enum Source<'a> {
458    /// Path or URL.
459    Path(UnescapedString<'a>),
460    /// Here-document.
461    HereDoc(HereDoc<'a>),
462}
463/// An `ENTRYPOINT` instruction.
464///
465/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#entrypoint)
466#[derive(Debug)]
467#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
468#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
469#[non_exhaustive]
470pub struct EntrypointInstruction<'a> {
471    /// ```text
472    /// ENTRYPOINT ["executable", "param"]
473    /// ^^^^^^^^^^
474    /// ```
475    pub entrypoint: Keyword,
476    /// ```text
477    /// ENTRYPOINT ["executable", "param"]
478    ///            ^^^^^^^^^^^^^^^^^^^^^^^
479    /// ```
480    pub arguments: Command<'a>,
481}
482/// An `ENV` instruction.
483///
484/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#env)
485#[derive(Debug)]
486#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
487#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
488#[non_exhaustive]
489pub struct EnvInstruction<'a> {
490    /// ```text
491    /// ENV <key>=<value> [<key>=<value>...]
492    /// ^^^
493    /// ```
494    pub env: Keyword,
495    /// ```text
496    /// ENV <key>=<value> [<key>=<value>...]
497    ///     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
498    /// ```
499    // TODO: SmallVec<[NameValue<'a>; 1]>
500    pub arguments: UnescapedString<'a>,
501}
502/// An `EXPOSE` instruction.
503///
504/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#expose)
505#[derive(Debug)]
506#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
507#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
508#[non_exhaustive]
509pub struct ExposeInstruction<'a> {
510    /// ```text
511    /// EXPOSE <port>[/<protocol>] [<port>[/<protocol>]...]
512    /// ^^^^^^
513    /// ```
514    pub expose: Keyword,
515    /// ```text
516    /// EXPOSE <port>[/<protocol>] [<port>[/<protocol>]...]
517    ///        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
518    /// ```
519    pub arguments: SmallVec<[UnescapedString<'a>; 1]>,
520}
521/// A `FROM` instruction.
522///
523/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#from)
524#[derive(Debug)]
525#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
526#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
527#[non_exhaustive]
528pub struct FromInstruction<'a> {
529    /// ```text
530    /// FROM [--platform=<platform>] <image> [AS <name>]
531    /// ^^^^
532    /// ```
533    pub from: Keyword,
534    /// ```text
535    /// FROM [--platform=<platform>] <image> [AS <name>]
536    ///      ^^^^^^^^^^^^^^^^^^^^^^^
537    /// ```
538    pub options: Vec<Flag<'a>>,
539    /// ```text
540    /// FROM [--platform=<platform>] <image> [AS <name>]
541    ///                              ^^^^^^^
542    /// ```
543    pub image: UnescapedString<'a>,
544    /// ```text
545    /// FROM [--platform=<platform>] <image> [AS <name>]
546    ///                                      ^^^^^^^^^^^
547    /// ```
548    pub as_: Option<(Keyword, UnescapedString<'a>)>,
549}
550/// A `HEALTHCHECK` instruction.
551///
552/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#healthcheck)
553#[derive(Debug)]
554#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
555#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
556#[non_exhaustive]
557pub struct HealthcheckInstruction<'a> {
558    /// ```text
559    /// HEALTHCHECK [options] CMD command
560    /// ^^^^^^^^^^^
561    /// ```
562    pub healthcheck: Keyword,
563    /// ```text
564    /// HEALTHCHECK [options] CMD command
565    ///             ^^^^^^^^^
566    /// ```
567    pub options: Vec<Flag<'a>>,
568    /// ```text
569    /// HEALTHCHECK [options] CMD command
570    ///                       ^^^^^^^^^^^
571    /// ```
572    pub arguments: HealthcheckArguments<'a>,
573}
574/// Arguments of the [`HEALTHCHECK` instruction](HealthcheckInstruction).
575#[derive(Debug)]
576#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
577#[cfg_attr(feature = "serde", serde(tag = "kind"))]
578#[cfg_attr(feature = "serde", serde(rename_all = "SCREAMING_SNAKE_CASE"))]
579#[non_exhaustive]
580pub enum HealthcheckArguments<'a> {
581    /// `HEALTHCHECK [options] CMD ...`
582    #[non_exhaustive]
583    Cmd {
584        /// ```text
585        /// HEALTHCHECK [options] CMD command
586        ///                       ^^^
587        /// ```
588        cmd: Keyword,
589        /// ```text
590        /// HEALTHCHECK [options] CMD command
591        ///                           ^^^^^^^
592        /// ```
593        arguments: Command<'a>,
594    },
595    /// `HEALTHCHECK [options] NONE`
596    #[non_exhaustive]
597    None {
598        /// ```text
599        /// HEALTHCHECK [options] NONE
600        ///                       ^^^^
601        /// ```
602        none: Keyword,
603    },
604}
605/// A `LABEL` instruction.
606///
607/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#label)
608#[derive(Debug)]
609#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
610#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
611#[non_exhaustive]
612pub struct LabelInstruction<'a> {
613    /// ```text
614    /// LABEL <key>=<value> [<key>=<value>...]
615    /// ^^^^^
616    /// ```
617    pub label: Keyword,
618    /// ```text
619    /// LABEL <key>=<value> [<key>=<value>...]
620    ///       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
621    /// ```
622    // TODO: SmallVec<[NameValue<'a>; 1]>
623    pub arguments: UnescapedString<'a>,
624}
625/// A `MAINTAINER` instruction (deprecated).
626///
627/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#maintainer-deprecated)
628#[derive(Debug)]
629#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
630#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
631#[non_exhaustive]
632pub struct MaintainerInstruction<'a> {
633    /// ```text
634    /// MAINTAINER <name>
635    /// ^^^^^^^^^^
636    /// ```
637    pub maintainer: Keyword,
638    /// ```text
639    /// MAINTAINER <name>
640    ///            ^^^^^^
641    /// ```
642    pub name: UnescapedString<'a>,
643}
644/// A `ONBUILD` instruction.
645///
646/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#onbuild)
647#[derive(Debug)]
648#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
649#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
650#[non_exhaustive]
651pub struct OnbuildInstruction<'a> {
652    /// ```text
653    /// ONBUILD <INSTRUCTION>
654    /// ^^^^^^^
655    /// ```
656    pub onbuild: Keyword,
657    /// ```text
658    /// ONBUILD <INSTRUCTION>
659    ///         ^^^^^^^^^^^^^
660    /// ```
661    pub instruction: Box<Instruction<'a>>,
662}
663/// A `RUN` instruction.
664///
665/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#run)
666#[derive(Debug)]
667#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
668#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
669#[non_exhaustive]
670pub struct RunInstruction<'a> {
671    /// ```text
672    /// RUN [options] <command> ...
673    /// ^^^
674    /// ```
675    pub run: Keyword,
676    /// ```text
677    /// RUN [options] <command> ...
678    ///     ^^^^^^^^^
679    /// ```
680    pub options: SmallVec<[Flag<'a>; 1]>,
681    /// ```text
682    /// RUN [options] <command> ...
683    ///               ^^^^^^^^^^^^^
684    /// ```
685    pub arguments: Command<'a>,
686    /// ```text
687    ///   RUN [options] <<EOF
688    /// /               ^^^^^
689    /// | ...
690    /// | EOF
691    /// |_^^^
692    /// ```
693    pub here_docs: Vec<HereDoc<'a>>,
694}
695/// A `SHELL` instruction.
696///
697/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#shell)
698#[derive(Debug)]
699#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
700#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
701#[non_exhaustive]
702pub struct ShellInstruction<'a> {
703    /// ```text
704    /// SHELL ["executable", "param"]
705    /// ^^^^^
706    /// ```
707    pub shell: Keyword,
708    /// ```text
709    /// SHELL ["executable", "param"]
710    ///       ^^^^^^^^^^^^^^^^^^^^^^^
711    /// ```
712    // Usually at least 2, e.g., ["/bin/sh", "-c"]
713    // Common cases are 4, e.g., ["/bin/bash", "-o", "pipefail", "-c"]
714    pub arguments: SmallVec<[UnescapedString<'a>; 4]>,
715}
716/// A `STOPSIGNAL` instruction.
717///
718/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#stopsignal)
719#[derive(Debug)]
720#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
721#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
722#[non_exhaustive]
723pub struct StopsignalInstruction<'a> {
724    /// ```text
725    /// STOPSIGNAL signal
726    /// ^^^^^^^^^^
727    /// ```
728    pub stopsignal: Keyword,
729    /// ```text
730    /// STOPSIGNAL signal
731    ///            ^^^^^^
732    /// ```
733    pub arguments: UnescapedString<'a>,
734}
735/// A `USER` instruction.
736///
737/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#user)
738#[derive(Debug)]
739#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
740#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
741#[non_exhaustive]
742pub struct UserInstruction<'a> {
743    /// ```text
744    /// USER <user>[:<group>]
745    /// ^^^^
746    /// ```
747    pub user: Keyword,
748    /// ```text
749    /// USER <user>[:<group>]
750    ///      ^^^^^^^^^^^^^^^^
751    /// ```
752    pub arguments: UnescapedString<'a>,
753}
754/// A `VOLUME` instruction.
755///
756/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#volume)
757#[derive(Debug)]
758#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
759#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
760#[non_exhaustive]
761pub struct VolumeInstruction<'a> {
762    /// ```text
763    /// VOLUME ["/data"]
764    /// ^^^^^^
765    /// ```
766    pub volume: Keyword,
767    /// ```text
768    /// VOLUME ["/data"]
769    ///        ^^^^^^^^^
770    /// ```
771    pub arguments: JsonOrStringArray<'a, 1>,
772}
773/// A `WORKDIR` instruction.
774///
775/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#workdir)
776#[derive(Debug)]
777#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
778#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
779#[non_exhaustive]
780pub struct WorkdirInstruction<'a> {
781    /// ```text
782    /// WORKDIR /path/to/workdir
783    /// ^^^^^^^
784    /// ```
785    pub workdir: Keyword,
786    /// ```text
787    /// WORKDIR /path/to/workdir
788    ///         ^^^^^^^^^^^^^^^^
789    /// ```
790    pub arguments: UnescapedString<'a>,
791}
792
793/// A keyword.
794#[derive(Debug)]
795#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
796#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
797#[non_exhaustive]
798pub struct Keyword {
799    #[allow(missing_docs)]
800    pub span: Span,
801}
802
803/// An option flag.
804#[derive(Debug)]
805#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
806#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
807pub struct Flag<'a> {
808    /// ```text
809    /// --platform=linux/amd64
810    /// ^
811    /// ```
812    flag_start: usize,
813    /// ```text
814    /// --platform=linux/amd64
815    ///   ^^^^^^^^
816    /// ```
817    pub name: UnescapedString<'a>,
818    /// ```text
819    /// --platform=linux/amd64
820    ///            ^^^^^^^^^^^
821    /// ```
822    pub value: Option<UnescapedString<'a>>,
823}
824impl Flag<'_> {
825    /// ```text
826    /// --platform=linux/amd64
827    /// ^^^^^^^^^^
828    /// ```
829    #[must_use]
830    pub fn flag_span(&self) -> Span {
831        self.flag_start..self.name.span.end
832    }
833    /// ```text
834    /// --platform=linux/amd64
835    /// ^^^^^^^^^^^^^^^^^^^^^^
836    /// ```
837    #[must_use]
838    pub fn span(&self) -> Span {
839        match &self.value {
840            Some(v) => self.flag_start..v.span.end,
841            None => self.flag_span(),
842        }
843    }
844}
845
846/// An unescaped string.
847#[derive(Debug, PartialEq)]
848#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
849#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
850#[non_exhaustive]
851pub struct UnescapedString<'a> {
852    #[allow(missing_docs)]
853    pub span: Span,
854    #[allow(missing_docs)]
855    pub value: Cow<'a, str>,
856}
857
858/// A command.
859///
860/// This is used in the [`RUN`](RunInstruction), [`CMD`](CmdInstruction), and
861/// [`ENTRYPOINT`](EntrypointInstruction) instructions.
862///
863/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#shell-and-exec-form)
864#[derive(Debug)]
865#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
866#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
867#[non_exhaustive]
868pub enum Command<'a> {
869    /// Exec-form (JSON array)
870    // At least 1
871    Exec(Spanned<SmallVec<[UnescapedString<'a>; 1]>>),
872    /// Shell-form (space-separated string or here-documents), escape preserved
873    Shell(Spanned<&'a str>),
874}
875
876// TODO: merge two? it reduce size, but make confusing when array modified.
877/// A JSON array or space-separated string.
878///
879/// This is used in the [`VOLUME` instruction](VolumeInstruction).
880#[derive(Debug)]
881#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
882#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
883#[allow(clippy::exhaustive_enums)]
884pub enum JsonOrStringArray<'a, const N: usize> {
885    /// JSON array.
886    Json(Spanned<SmallVec<[UnescapedString<'a>; N]>>),
887    /// Space-separated string.
888    String(SmallVec<[UnescapedString<'a>; N]>),
889}
890
891/// A here-document.
892#[derive(Debug)]
893#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
894#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
895#[non_exhaustive]
896pub struct HereDoc<'a> {
897    #[allow(missing_docs)]
898    pub span: Span,
899    /// `false` if delimiter is quoted.
900    pub expand: bool,
901    #[allow(missing_docs)]
902    pub value: Cow<'a, str>,
903}
904
905/// A spanned value.
906#[derive(Debug)]
907#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
908#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
909#[allow(clippy::exhaustive_structs)]
910pub struct Spanned<T> {
911    #[allow(missing_docs)]
912    pub span: Span,
913    #[allow(missing_docs)]
914    pub value: T,
915}
916
917#[allow(missing_docs)]
918pub type Span = Range<usize>;
919
920// -----------------------------------------------------------------------------
921// Parsing
922
923/// An iterator over instructions.
924///
925/// This type is returned by [`parse_iter`] function.
926#[allow(missing_debug_implementations)]
927#[must_use = "iterators are lazy and do nothing unless consumed"]
928pub struct ParseIter<'a> {
929    text: &'a str,
930    s: &'a [u8],
931    escape_byte: u8,
932    has_stage: bool,
933    in_onbuild: bool,
934    parser_directives: ParserDirectives<'a>,
935}
936impl<'a> ParseIter<'a> {
937    fn new(mut text: &'a str) -> Result<Self> {
938        // https://github.com/moby/moby/pull/23234
939        if text.as_bytes().starts_with(UTF8_BOM) {
940            text = &text[UTF8_BOM.len()..];
941        }
942        let mut p = Self {
943            text,
944            s: text.as_bytes(),
945            escape_byte: DEFAULT_ESCAPE_BYTE,
946            has_stage: false,
947            in_onbuild: false,
948            parser_directives: ParserDirectives {
949                // https://docs.docker.com/reference/dockerfile/#parser-directives
950                syntax: None,
951                escape: None,
952                // https://github.com/moby/buildkit/pull/4962
953                check: None,
954            },
955        };
956
957        parse_parser_directives(&mut p).map_err(|e| e.into_error(&p))?;
958
959        // https://docs.docker.com/reference/dockerfile/#format
960        // > For backward compatibility, leading whitespace before comments (#) and
961        // > instructions (such as RUN) are ignored, but discouraged.
962        skip_comments_and_whitespaces(&mut p.s, p.escape_byte);
963        Ok(p)
964    }
965}
966impl<'a> Iterator for ParseIter<'a> {
967    type Item = Result<Instruction<'a>>;
968    #[inline]
969    fn next(&mut self) -> Option<Self::Item> {
970        let p = self;
971        let mut s = p.s;
972        if let Some((&b, s_next)) = s.split_first() {
973            let instruction = match parse_instruction(p, &mut s, b, s_next) {
974                Ok(i) => i,
975                Err(e) => return Some(Err(e.into_error(p))),
976            };
977            match &instruction {
978                Instruction::From(..) => {
979                    p.has_stage = true;
980                }
981                Instruction::Arg(..) => {}
982                instruction => {
983                    if !p.has_stage {
984                        return Some(Err(ErrorKind::Expected(
985                            "FROM",
986                            instruction.instruction_span().start,
987                        )
988                        .into_error(p)));
989                    }
990                }
991            }
992            skip_comments_and_whitespaces(&mut s, p.escape_byte);
993            p.s = s;
994            return Some(Ok(instruction));
995        }
996        if !p.has_stage {
997            // https://github.com/moby/buildkit/blob/e83d79a51fb49aeb921d8a2348ae14a58701c98c/frontend/dockerfile/dockerfile2llb/convert.go#L263
998            return Some(Err(ErrorKind::NoStages.into_error(p)));
999        }
1000        None
1001    }
1002}
1003
1004const DEFAULT_ESCAPE_BYTE: u8 = b'\\';
1005
1006fn parse_parser_directives(p: &mut ParseIter<'_>) -> Result<(), ErrorKind> {
1007    while let Some((&b'#', s_next)) = p.s.split_first() {
1008        p.s = s_next;
1009        skip_spaces_no_escape(&mut p.s);
1010        let directive_start = p.text.len() - p.s.len();
1011        if token(&mut p.s, b"SYNTAX") {
1012            skip_spaces_no_escape(&mut p.s);
1013            if let Some((&b'=', s_next)) = p.s.split_first() {
1014                p.s = s_next;
1015                if p.parser_directives.syntax.is_some() {
1016                    // > Invalid due to appearing twice
1017                    p.parser_directives.syntax = None;
1018                    p.parser_directives.escape = None;
1019                    p.parser_directives.check = None;
1020                    p.escape_byte = DEFAULT_ESCAPE_BYTE;
1021                    skip_this_line_no_escape(&mut p.s);
1022                    break;
1023                }
1024                skip_spaces_no_escape(&mut p.s);
1025                let value_start = p.text.len() - p.s.len();
1026                skip_non_whitespace_no_escape(&mut p.s);
1027                let end = p.text.len() - p.s.len();
1028                let value = p.text[value_start..end].trim_ascii_end();
1029                p.parser_directives.syntax = Some(ParserDirective {
1030                    start: directive_start,
1031                    value: Spanned { span: value_start..value_start + value.len(), value },
1032                });
1033                skip_this_line_no_escape(&mut p.s);
1034                continue;
1035            }
1036        } else if token(&mut p.s, b"CHECK") {
1037            skip_spaces_no_escape(&mut p.s);
1038            if let Some((&b'=', s_next)) = p.s.split_first() {
1039                p.s = s_next;
1040                if p.parser_directives.check.is_some() {
1041                    // > Invalid due to appearing twice
1042                    p.parser_directives.syntax = None;
1043                    p.parser_directives.escape = None;
1044                    p.parser_directives.check = None;
1045                    p.escape_byte = DEFAULT_ESCAPE_BYTE;
1046                    skip_this_line_no_escape(&mut p.s);
1047                    break;
1048                }
1049                skip_spaces_no_escape(&mut p.s);
1050                let value_start = p.text.len() - p.s.len();
1051                skip_non_whitespace_no_escape(&mut p.s);
1052                let end = p.text.len() - p.s.len();
1053                let value = p.text[value_start..end].trim_ascii_end();
1054                p.parser_directives.check = Some(ParserDirective {
1055                    start: directive_start,
1056                    value: Spanned { span: value_start..value_start + value.len(), value },
1057                });
1058                skip_this_line_no_escape(&mut p.s);
1059                continue;
1060            }
1061        } else if token(&mut p.s, b"ESCAPE") {
1062            skip_spaces_no_escape(&mut p.s);
1063            if let Some((&b'=', s_next)) = p.s.split_first() {
1064                p.s = s_next;
1065                if p.parser_directives.escape.is_some() {
1066                    // > Invalid due to appearing twice
1067                    p.parser_directives.syntax = None;
1068                    p.parser_directives.escape = None;
1069                    p.parser_directives.check = None;
1070                    p.escape_byte = DEFAULT_ESCAPE_BYTE;
1071                    skip_this_line_no_escape(&mut p.s);
1072                    break;
1073                }
1074                skip_spaces_no_escape(&mut p.s);
1075                let value_start = p.text.len() - p.s.len();
1076                skip_non_whitespace_no_escape(&mut p.s);
1077                let end = p.text.len() - p.s.len();
1078                let value = p.text[value_start..end].trim_ascii_end();
1079                match value {
1080                    "`" => p.escape_byte = b'`',
1081                    "\\" => {}
1082                    _ => return Err(ErrorKind::InvalidEscape { escape_start: value_start }),
1083                }
1084                p.parser_directives.escape = Some(ParserDirective {
1085                    start: directive_start,
1086                    value: Spanned {
1087                        span: value_start..value_start + value.len(),
1088                        value: p.escape_byte as char,
1089                    },
1090                });
1091                skip_this_line_no_escape(&mut p.s);
1092                continue;
1093            }
1094        }
1095        skip_this_line_no_escape(&mut p.s);
1096        break;
1097    }
1098    Ok(())
1099}
1100
1101#[inline]
1102fn parse_instruction<'a>(
1103    p: &mut ParseIter<'a>,
1104    s: &mut &'a [u8],
1105    b: u8,
1106    s_next: &'a [u8],
1107) -> Result<Instruction<'a>, ErrorKind> {
1108    let instruction_start = p.text.len() - s.len();
1109    *s = s_next;
1110    // NB: `token_slow` must be called after all `token` calls.
1111    match b & TO_UPPER8 {
1112        b'A' => {
1113            if token(s, &b"ARG"[1..]) {
1114                let instruction_span = instruction_start..p.text.len() - s.len();
1115                if spaces_or_line_end(s, p.escape_byte) {
1116                    return parse_arg(p, s, Keyword { span: instruction_span });
1117                }
1118            } else if token(s, &b"ADD"[1..]) {
1119                let instruction_span = instruction_start..p.text.len() - s.len();
1120                if spaces_or_line_end(s, p.escape_byte) {
1121                    let add = Keyword { span: instruction_span };
1122                    let (options, src, dest) = parse_add_or_copy(p, s, &add)?;
1123                    return Ok(Instruction::Add(AddInstruction { add, options, src, dest }));
1124                }
1125            } else if token_slow(s, &b"ARG"[1..], p.escape_byte) {
1126                let instruction_span = instruction_start..p.text.len() - s.len();
1127                if spaces_or_line_end(s, p.escape_byte) {
1128                    return parse_arg(p, s, Keyword { span: instruction_span });
1129                }
1130            } else if token_slow(s, &b"ADD"[1..], p.escape_byte) {
1131                let instruction_span = instruction_start..p.text.len() - s.len();
1132                if spaces_or_line_end(s, p.escape_byte) {
1133                    let add = Keyword { span: instruction_span };
1134                    let (options, src, dest) = parse_add_or_copy(p, s, &add)?;
1135                    return Ok(Instruction::Add(AddInstruction { add, options, src, dest }));
1136                }
1137            }
1138        }
1139        b'C' => {
1140            if token(s, &b"COPY"[1..]) {
1141                let instruction_span = instruction_start..p.text.len() - s.len();
1142                if spaces_or_line_end(s, p.escape_byte) {
1143                    let copy = Keyword { span: instruction_span };
1144                    let (options, src, dest) = parse_add_or_copy(p, s, &copy)?;
1145                    return Ok(Instruction::Copy(CopyInstruction { copy, options, src, dest }));
1146                }
1147            } else if token(s, &b"CMD"[1..]) {
1148                let instruction_span = instruction_start..p.text.len() - s.len();
1149                if spaces_or_line_end(s, p.escape_byte) {
1150                    return parse_cmd(p, s, Keyword { span: instruction_span });
1151                }
1152            } else if token_slow(s, &b"COPY"[1..], p.escape_byte) {
1153                let instruction_span = instruction_start..p.text.len() - s.len();
1154                if spaces_or_line_end(s, p.escape_byte) {
1155                    let copy = Keyword { span: instruction_span };
1156                    let (options, src, dest) = parse_add_or_copy(p, s, &copy)?;
1157                    return Ok(Instruction::Copy(CopyInstruction { copy, options, src, dest }));
1158                }
1159            } else if token_slow(s, &b"CMD"[1..], p.escape_byte) {
1160                let instruction_span = instruction_start..p.text.len() - s.len();
1161                if spaces_or_line_end(s, p.escape_byte) {
1162                    return parse_cmd(p, s, Keyword { span: instruction_span });
1163                }
1164            }
1165        }
1166        b'E' => {
1167            if token(s, &b"ENV"[1..]) {
1168                let instruction_span = instruction_start..p.text.len() - s.len();
1169                if spaces_or_line_end(s, p.escape_byte) {
1170                    return parse_env(p, s, Keyword { span: instruction_span });
1171                }
1172            } else if token(s, &b"EXPOSE"[1..]) {
1173                let instruction_span = instruction_start..p.text.len() - s.len();
1174                if spaces_or_line_end(s, p.escape_byte) {
1175                    return parse_expose(p, s, Keyword { span: instruction_span });
1176                }
1177            } else if token(s, &b"ENTRYPOINT"[1..]) {
1178                let instruction_span = instruction_start..p.text.len() - s.len();
1179                if spaces_or_line_end(s, p.escape_byte) {
1180                    return parse_entrypoint(p, s, Keyword { span: instruction_span });
1181                }
1182            } else if token_slow(s, &b"ENV"[1..], p.escape_byte) {
1183                let instruction_span = instruction_start..p.text.len() - s.len();
1184                if spaces_or_line_end(s, p.escape_byte) {
1185                    return parse_env(p, s, Keyword { span: instruction_span });
1186                }
1187            } else if token_slow(s, &b"EXPOSE"[1..], p.escape_byte) {
1188                let instruction_span = instruction_start..p.text.len() - s.len();
1189                if spaces_or_line_end(s, p.escape_byte) {
1190                    return parse_expose(p, s, Keyword { span: instruction_span });
1191                }
1192            } else if token_slow(s, &b"ENTRYPOINT"[1..], p.escape_byte) {
1193                let instruction_span = instruction_start..p.text.len() - s.len();
1194                if spaces_or_line_end(s, p.escape_byte) {
1195                    return parse_entrypoint(p, s, Keyword { span: instruction_span });
1196                }
1197            }
1198        }
1199        b'F' => {
1200            if token(s, &b"FROM"[1..]) || token_slow(s, &b"FROM"[1..], p.escape_byte) {
1201                let instruction_span = instruction_start..p.text.len() - s.len();
1202                if spaces_or_line_end(s, p.escape_byte) {
1203                    return parse_from(p, s, Keyword { span: instruction_span });
1204                }
1205            }
1206        }
1207        b'H' => {
1208            if token(s, &b"HEALTHCHECK"[1..]) || token_slow(s, &b"HEALTHCHECK"[1..], p.escape_byte)
1209            {
1210                let instruction_span = instruction_start..p.text.len() - s.len();
1211                if spaces_or_line_end(s, p.escape_byte) {
1212                    return parse_healthcheck(p, s, Keyword { span: instruction_span });
1213                }
1214            }
1215        }
1216        b'L' => {
1217            if token(s, &b"LABEL"[1..]) || token_slow(s, &b"LABEL"[1..], p.escape_byte) {
1218                let instruction_span = instruction_start..p.text.len() - s.len();
1219                if spaces_or_line_end(s, p.escape_byte) {
1220                    return parse_label(p, s, Keyword { span: instruction_span });
1221                }
1222            }
1223        }
1224        b'M' => {
1225            if token(s, &b"MAINTAINER"[1..]) || token_slow(s, &b"MAINTAINER"[1..], p.escape_byte) {
1226                let instruction_span = instruction_start..p.text.len() - s.len();
1227                if spaces_or_line_end(s, p.escape_byte) {
1228                    return parse_maintainer(p, s, Keyword { span: instruction_span });
1229                }
1230            }
1231        }
1232        b'O' => {
1233            if token(s, &b"ONBUILD"[1..]) || token_slow(s, &b"ONBUILD"[1..], p.escape_byte) {
1234                let instruction_span = instruction_start..p.text.len() - s.len();
1235                if spaces_or_line_end(s, p.escape_byte) {
1236                    return parse_onbuild(p, s, Keyword { span: instruction_span });
1237                }
1238            }
1239        }
1240        b'R' => {
1241            if token(s, &b"RUN"[1..]) || token_slow(s, &b"RUN"[1..], p.escape_byte) {
1242                let instruction_span = instruction_start..p.text.len() - s.len();
1243                if spaces_or_line_end(s, p.escape_byte) {
1244                    return parse_run(p, s, Keyword { span: instruction_span });
1245                }
1246            }
1247        }
1248        b'S' => {
1249            if token(s, &b"SHELL"[1..]) {
1250                let instruction_span = instruction_start..p.text.len() - s.len();
1251                if spaces_or_line_end(s, p.escape_byte) {
1252                    return parse_shell(p, s, Keyword { span: instruction_span });
1253                }
1254            } else if token(s, &b"STOPSIGNAL"[1..]) {
1255                let instruction_span = instruction_start..p.text.len() - s.len();
1256                if spaces_or_line_end(s, p.escape_byte) {
1257                    return parse_stopsignal(p, s, Keyword { span: instruction_span });
1258                }
1259            } else if token_slow(s, &b"SHELL"[1..], p.escape_byte) {
1260                let instruction_span = instruction_start..p.text.len() - s.len();
1261                if spaces_or_line_end(s, p.escape_byte) {
1262                    return parse_shell(p, s, Keyword { span: instruction_span });
1263                }
1264            } else if token_slow(s, &b"STOPSIGNAL"[1..], p.escape_byte) {
1265                let instruction_span = instruction_start..p.text.len() - s.len();
1266                if spaces_or_line_end(s, p.escape_byte) {
1267                    return parse_stopsignal(p, s, Keyword { span: instruction_span });
1268                }
1269            }
1270        }
1271        b'U' => {
1272            if token(s, &b"USER"[1..]) || token_slow(s, &b"USER"[1..], p.escape_byte) {
1273                let instruction_span = instruction_start..p.text.len() - s.len();
1274                if spaces_or_line_end(s, p.escape_byte) {
1275                    return parse_user(p, s, Keyword { span: instruction_span });
1276                }
1277            }
1278        }
1279        b'V' => {
1280            if token(s, &b"VOLUME"[1..]) || token_slow(s, &b"VOLUME"[1..], p.escape_byte) {
1281                let instruction_span = instruction_start..p.text.len() - s.len();
1282                if spaces_or_line_end(s, p.escape_byte) {
1283                    return parse_volume(p, s, Keyword { span: instruction_span });
1284                }
1285            }
1286        }
1287        b'W' => {
1288            if token(s, &b"WORKDIR"[1..]) || token_slow(s, &b"WORKDIR"[1..], p.escape_byte) {
1289                let instruction_span = instruction_start..p.text.len() - s.len();
1290                if spaces_or_line_end(s, p.escape_byte) {
1291                    return parse_workdir(p, s, Keyword { span: instruction_span });
1292                }
1293            }
1294        }
1295        _ => {}
1296    }
1297    Err(ErrorKind::UnknownInstruction { instruction_start })
1298}
1299
1300#[inline]
1301fn parse_arg<'a>(
1302    p: &mut ParseIter<'a>,
1303    s: &mut &'a [u8],
1304    instruction: Keyword,
1305) -> Result<Instruction<'a>, ErrorKind> {
1306    debug_assert!(token_slow(
1307        &mut p.text[instruction.span.clone()].as_bytes(),
1308        b"ARG",
1309        p.escape_byte,
1310    ));
1311    let arguments = collect_non_line_unescaped_consume_line(s, p.text, p.escape_byte);
1312    if arguments.value.is_empty() {
1313        return Err(ErrorKind::AtLeastOneArgument { instruction_start: instruction.span.start });
1314    }
1315    Ok(Instruction::Arg(ArgInstruction { arg: instruction, arguments }))
1316}
1317
1318#[inline]
1319fn parse_add_or_copy<'a>(
1320    p: &mut ParseIter<'a>,
1321    s: &mut &'a [u8],
1322    instruction: &Keyword,
1323) -> Result<(SmallVec<[Flag<'a>; 1]>, SmallVec<[Source<'a>; 1]>, UnescapedString<'a>), ErrorKind> {
1324    debug_assert!(
1325        token_slow(&mut p.text[instruction.span.clone()].as_bytes(), b"ADD", p.escape_byte,)
1326            || token_slow(&mut p.text[instruction.span.clone()].as_bytes(), b"COPY", p.escape_byte,)
1327    );
1328    let options = parse_options(s, p.text, p.escape_byte);
1329    if is_maybe_json(s) {
1330        let mut tmp = *s;
1331        if let Ok(((src, dest), _array_span)) = parse_json_array::<(
1332            SmallVec<[Source<'_>; 1]>,
1333            Option<_>,
1334        )>(&mut tmp, p.text, p.escape_byte)
1335        {
1336            debug_assert!(is_line_end(tmp.first()));
1337            if tmp.is_empty() {
1338                *s = &[];
1339            } else {
1340                *s = &tmp[1..];
1341            }
1342            if src.is_empty() {
1343                return Err(ErrorKind::AtLeastTwoArguments {
1344                    instruction_start: instruction.span.start,
1345                });
1346            }
1347            return Ok((options, src, dest.unwrap()));
1348        }
1349    }
1350    let (mut src, dest) = collect_space_separated_unescaped_consume_line::<(
1351        SmallVec<[Source<'_>; 1]>,
1352        Option<_>,
1353    )>(s, p.text, p.escape_byte);
1354    if src.is_empty() {
1355        return Err(ErrorKind::AtLeastTwoArguments { instruction_start: instruction.span.start });
1356    }
1357    for src in &mut src {
1358        let Source::Path(path) = src else { unreachable!() };
1359        let Some(mut delim) = path.value.as_bytes().strip_prefix(b"<<") else { continue };
1360        if delim.is_empty() {
1361            continue;
1362        }
1363        let mut strip_tab = false;
1364        let mut quote = None;
1365        if let Some((&b'-', delim_next)) = delim.split_first() {
1366            strip_tab = true;
1367            delim = delim_next;
1368        }
1369        if let Some((&b, delim_next)) = delim.split_first() {
1370            if matches!(b, b'"' | b'\'') {
1371                quote = Some(b);
1372                delim = delim_next;
1373                if delim.last() != Some(&b) {
1374                    return Err(ErrorKind::ExpectedOwned(
1375                        format!(
1376                            "quote ({}), but found '{}'",
1377                            b as char,
1378                            *delim.last().unwrap_or(&0) as char
1379                        ),
1380                        p.text.len() - s.len(),
1381                    ));
1382                }
1383                delim = &delim[..delim.len() - 1];
1384            }
1385        }
1386        if strip_tab {
1387            let (here_doc, span) = collect_here_doc_strip_tab(s, p.text, p.escape_byte, delim)?;
1388            *src = Source::HereDoc(HereDoc { span, expand: quote.is_none(), value: here_doc });
1389        } else {
1390            let (here_doc, span) = collect_here_doc_no_strip_tab(s, p.text, p.escape_byte, delim)?;
1391            *src =
1392                Source::HereDoc(HereDoc { span, expand: quote.is_none(), value: here_doc.into() });
1393        }
1394    }
1395    Ok((options, src, dest.unwrap()))
1396}
1397
1398#[allow(clippy::unnecessary_wraps)]
1399#[inline]
1400fn parse_cmd<'a>(
1401    p: &mut ParseIter<'a>,
1402    s: &mut &'a [u8],
1403    instruction: Keyword,
1404) -> Result<Instruction<'a>, ErrorKind> {
1405    debug_assert!(token_slow(
1406        &mut p.text[instruction.span.clone()].as_bytes(),
1407        b"CMD",
1408        p.escape_byte,
1409    ));
1410    if is_maybe_json(s) {
1411        let mut tmp = *s;
1412        if let Ok((arguments, array_span)) =
1413            parse_json_array::<SmallVec<[_; 1]>>(&mut tmp, p.text, p.escape_byte)
1414        {
1415            debug_assert!(is_line_end(tmp.first()));
1416            if tmp.is_empty() {
1417                *s = &[];
1418            } else {
1419                *s = &tmp[1..];
1420            }
1421            // "CMD []" seems to be okay?
1422            // https://github.com/moby/buildkit/blob/6d143f5602a61acef286f39ee75f1cb33c367d44/frontend/dockerfile/parser/testfiles/brimstone-docker-consul/Dockerfile#L3
1423            return Ok(Instruction::Cmd(CmdInstruction {
1424                cmd: instruction,
1425                arguments: Command::Exec(Spanned { span: array_span, value: arguments }),
1426            }));
1427        }
1428    }
1429    let arguments_start = p.text.len() - s.len();
1430    skip_this_line(s, p.escape_byte);
1431    let end = p.text.len() - s.len();
1432    let arguments = p.text[arguments_start..end].trim_ascii_end();
1433    Ok(Instruction::Cmd(CmdInstruction {
1434        cmd: instruction,
1435        arguments: Command::Shell(Spanned {
1436            span: arguments_start..arguments_start + arguments.len(),
1437            value: arguments,
1438        }),
1439    }))
1440}
1441
1442#[inline]
1443fn parse_env<'a>(
1444    p: &mut ParseIter<'a>,
1445    s: &mut &'a [u8],
1446    instruction: Keyword,
1447) -> Result<Instruction<'a>, ErrorKind> {
1448    debug_assert!(token_slow(
1449        &mut p.text[instruction.span.clone()].as_bytes(),
1450        b"ENV",
1451        p.escape_byte,
1452    ));
1453    let arguments = collect_non_line_unescaped_consume_line(s, p.text, p.escape_byte);
1454    if arguments.value.is_empty() {
1455        return Err(ErrorKind::AtLeastOneArgument { instruction_start: instruction.span.start });
1456    }
1457    Ok(Instruction::Env(EnvInstruction { env: instruction, arguments }))
1458}
1459
1460#[inline]
1461fn parse_expose<'a>(
1462    p: &mut ParseIter<'a>,
1463    s: &mut &'a [u8],
1464    instruction: Keyword,
1465) -> Result<Instruction<'a>, ErrorKind> {
1466    debug_assert!(token_slow(
1467        &mut p.text[instruction.span.clone()].as_bytes(),
1468        b"EXPOSE",
1469        p.escape_byte,
1470    ));
1471    let arguments: SmallVec<[_; 1]> =
1472        collect_space_separated_unescaped_consume_line(s, p.text, p.escape_byte);
1473    if arguments.is_empty() {
1474        return Err(ErrorKind::AtLeastOneArgument { instruction_start: instruction.span.start });
1475    }
1476    Ok(Instruction::Expose(ExposeInstruction { expose: instruction, arguments }))
1477}
1478
1479#[inline]
1480fn parse_entrypoint<'a>(
1481    p: &mut ParseIter<'a>,
1482    s: &mut &'a [u8],
1483    instruction: Keyword,
1484) -> Result<Instruction<'a>, ErrorKind> {
1485    debug_assert!(token_slow(
1486        &mut p.text[instruction.span.clone()].as_bytes(),
1487        b"ENTRYPOINT",
1488        p.escape_byte,
1489    ));
1490    if is_maybe_json(s) {
1491        let mut tmp = *s;
1492        if let Ok((arguments, array_span)) =
1493            parse_json_array::<SmallVec<[_; 1]>>(&mut tmp, p.text, p.escape_byte)
1494        {
1495            debug_assert!(is_line_end(tmp.first()));
1496            if tmp.is_empty() {
1497                *s = &[];
1498            } else {
1499                *s = &tmp[1..];
1500            }
1501            if arguments.is_empty() {
1502                return Err(ErrorKind::AtLeastOneArgument {
1503                    instruction_start: instruction.span.start,
1504                });
1505            }
1506            return Ok(Instruction::Entrypoint(EntrypointInstruction {
1507                entrypoint: instruction,
1508                arguments: Command::Exec(Spanned { span: array_span, value: arguments }),
1509            }));
1510        }
1511    }
1512    let arguments_start = p.text.len() - s.len();
1513    skip_this_line(s, p.escape_byte);
1514    let end = p.text.len() - s.len();
1515    let arguments = p.text[arguments_start..end].trim_ascii_end();
1516    if arguments.is_empty() {
1517        return Err(ErrorKind::AtLeastOneArgument { instruction_start: instruction.span.start });
1518    }
1519    Ok(Instruction::Entrypoint(EntrypointInstruction {
1520        entrypoint: instruction,
1521        arguments: Command::Shell(Spanned {
1522            span: arguments_start..arguments_start + arguments.len(),
1523            value: arguments,
1524        }),
1525    }))
1526}
1527
1528#[inline]
1529fn parse_from<'a>(
1530    p: &mut ParseIter<'a>,
1531    s: &mut &'a [u8],
1532    instruction: Keyword,
1533) -> Result<Instruction<'a>, ErrorKind> {
1534    debug_assert!(token_slow(
1535        &mut p.text[instruction.span.clone()].as_bytes(),
1536        b"FROM",
1537        p.escape_byte,
1538    ));
1539    let options = parse_options(s, p.text, p.escape_byte);
1540    // TODO: https://github.com/moby/buildkit/blob/e83d79a51fb49aeb921d8a2348ae14a58701c98c/frontend/dockerfile/dockerfile2llb/convert.go#L302
1541    // > base name (%s) should not be blank
1542    let image = collect_non_whitespace_unescaped(s, p.text, p.escape_byte);
1543    if image.value.is_empty() {
1544        return Err(ErrorKind::AtLeastOneArgument { instruction_start: instruction.span.start });
1545    }
1546    let mut as_ = None;
1547    if skip_spaces(s, p.escape_byte) {
1548        let as_start = p.text.len() - s.len();
1549        if token(s, b"AS") || token_slow(s, b"AS", p.escape_byte) {
1550            let as_span = as_start..p.text.len() - s.len();
1551            if !skip_spaces(s, p.escape_byte) {
1552                return Err(ErrorKind::Expected("AS", as_start));
1553            }
1554            let name = collect_non_whitespace_unescaped(s, p.text, p.escape_byte);
1555            skip_spaces(s, p.escape_byte);
1556            if !is_line_end(s.first()) {
1557                return Err(ErrorKind::Expected("newline or eof", p.text.len() - s.len()));
1558            }
1559            as_ = Some((Keyword { span: as_span }, name));
1560        } else if !is_line_end(s.first()) {
1561            return Err(ErrorKind::Expected("AS", as_start));
1562        }
1563    }
1564    Ok(Instruction::From(FromInstruction { from: instruction, options, image, as_ }))
1565}
1566
1567#[inline]
1568fn parse_healthcheck<'a>(
1569    p: &mut ParseIter<'a>,
1570    s: &mut &'a [u8],
1571    instruction: Keyword,
1572) -> Result<Instruction<'a>, ErrorKind> {
1573    debug_assert!(token_slow(
1574        &mut p.text[instruction.span.clone()].as_bytes(),
1575        b"HEALTHCHECK",
1576        p.escape_byte,
1577    ));
1578    let options = parse_options(s, p.text, p.escape_byte);
1579    let Some((&b, s_next)) = s.split_first() else {
1580        return Err(ErrorKind::Expected("CMD or NONE", p.text.len() - s.len()));
1581    };
1582    let cmd_or_none_start = p.text.len() - s.len();
1583    match b & TO_UPPER8 {
1584        b'C' => {
1585            *s = s_next;
1586            if token(s, &b"CMD"[1..]) || token_slow(s, &b"CMD"[1..], p.escape_byte) {
1587                let cmd_span = cmd_or_none_start..p.text.len() - s.len();
1588                let cmd_keyword = Keyword { span: cmd_span };
1589                if spaces_or_line_end(s, p.escape_byte) {
1590                    if is_maybe_json(s) {
1591                        let mut tmp = *s;
1592                        if let Ok((arguments, array_span)) =
1593                            parse_json_array::<SmallVec<[_; 1]>>(&mut tmp, p.text, p.escape_byte)
1594                        {
1595                            debug_assert!(is_line_end(tmp.first()));
1596                            if tmp.is_empty() {
1597                                *s = &[];
1598                            } else {
1599                                *s = &tmp[1..];
1600                            }
1601                            if arguments.is_empty() {
1602                                return Err(ErrorKind::Expected(
1603                                    "at least 1 arguments",
1604                                    array_span.start,
1605                                ));
1606                            }
1607                            return Ok(Instruction::Healthcheck(HealthcheckInstruction {
1608                                healthcheck: instruction,
1609                                options,
1610                                arguments: HealthcheckArguments::Cmd {
1611                                    cmd: cmd_keyword,
1612                                    arguments: Command::Exec(Spanned {
1613                                        span: array_span,
1614                                        value: arguments,
1615                                    }),
1616                                },
1617                            }));
1618                        }
1619                    }
1620                    let arguments_start = p.text.len() - s.len();
1621                    skip_this_line(s, p.escape_byte);
1622                    let end = p.text.len() - s.len();
1623                    let arguments = p.text[arguments_start..end].trim_ascii_end();
1624                    return Ok(Instruction::Healthcheck(HealthcheckInstruction {
1625                        healthcheck: instruction,
1626                        options,
1627                        arguments: HealthcheckArguments::Cmd {
1628                            cmd: cmd_keyword,
1629                            arguments: Command::Shell(Spanned {
1630                                span: arguments_start..arguments_start + arguments.len(),
1631                                value: arguments,
1632                            }),
1633                        },
1634                    }));
1635                }
1636            }
1637        }
1638        b'N' => {
1639            *s = s_next;
1640            if token(s, &b"NONE"[1..]) || token_slow(s, &b"NONE"[1..], p.escape_byte) {
1641                let none_span = cmd_or_none_start..p.text.len() - s.len();
1642                skip_spaces(s, p.escape_byte);
1643                if !is_line_end(s.first()) {
1644                    // TODO: error kind
1645                    return Err(ErrorKind::Expected(
1646                        "HEALTHCHECK NONE takes no arguments",
1647                        p.text.len() - s.len(),
1648                    ));
1649                }
1650                // TODO: HEALTHCHECK NONE doesn't support options
1651                let none_keyword = Keyword { span: none_span };
1652                return Ok(Instruction::Healthcheck(HealthcheckInstruction {
1653                    healthcheck: instruction,
1654                    options,
1655                    arguments: HealthcheckArguments::None { none: none_keyword },
1656                }));
1657            }
1658        }
1659        _ => {}
1660    }
1661    Err(ErrorKind::Expected("CMD or NONE", p.text.len() - s.len()))
1662}
1663
1664#[inline]
1665fn parse_label<'a>(
1666    p: &mut ParseIter<'a>,
1667    s: &mut &'a [u8],
1668    instruction: Keyword,
1669) -> Result<Instruction<'a>, ErrorKind> {
1670    debug_assert!(token_slow(
1671        &mut p.text[instruction.span.clone()].as_bytes(),
1672        b"LABEL",
1673        p.escape_byte,
1674    ));
1675    let arguments = collect_non_line_unescaped_consume_line(s, p.text, p.escape_byte);
1676    if arguments.value.is_empty() {
1677        return Err(ErrorKind::AtLeastOneArgument { instruction_start: instruction.span.start });
1678    }
1679    Ok(Instruction::Label(LabelInstruction { label: instruction, arguments }))
1680}
1681
1682#[cold]
1683fn parse_maintainer<'a>(
1684    p: &mut ParseIter<'a>,
1685    s: &mut &'a [u8],
1686    instruction: Keyword,
1687) -> Result<Instruction<'a>, ErrorKind> {
1688    debug_assert!(token_slow(
1689        &mut p.text[instruction.span.clone()].as_bytes(),
1690        b"MAINTAINER",
1691        p.escape_byte,
1692    ));
1693    let name = collect_non_line_unescaped_consume_line(s, p.text, p.escape_byte);
1694    if name.value.is_empty() {
1695        return Err(ErrorKind::ExactlyOneArgument { instruction_start: instruction.span.start });
1696    }
1697    Ok(Instruction::Maintainer(MaintainerInstruction { maintainer: instruction, name }))
1698}
1699
1700#[inline]
1701fn parse_onbuild<'a>(
1702    p: &mut ParseIter<'a>,
1703    s: &mut &'a [u8],
1704    instruction: Keyword,
1705) -> Result<Instruction<'a>, ErrorKind> {
1706    debug_assert!(token_slow(
1707        &mut p.text[instruction.span.clone()].as_bytes(),
1708        b"ONBUILD",
1709        p.escape_byte,
1710    ));
1711    // https://docs.docker.com/reference/dockerfile/#onbuild-limitations
1712    if mem::replace(&mut p.in_onbuild, true) {
1713        // TODO: error kind
1714        return Err(ErrorKind::Expected("ONBUILD ONBUILD is not allowed", instruction.span.start));
1715    }
1716    let Some((&b, s_next)) = s.split_first() else {
1717        return Err(ErrorKind::Expected("instruction after ONBUILD", instruction.span.start));
1718    };
1719    // TODO: https://docs.docker.com/reference/dockerfile/#onbuild-limitations
1720    // match b & TO_UPPER8 {
1721    //     b'F' => {
1722    //         if token(s, b"FROM") || token_slow(s, b"FROM", p.escape_byte) {
1723    //             // TODO: error kind
1724    //             return Err(ErrorKind::Expected(
1725    //                 "ONBUILD FROM is not allowed",
1726    //                 instruction.span.start,
1727    //             ));
1728    //         }
1729    //     }
1730    //     b'M' => {
1731    //         if token(s, b"MAINTAINER")
1732    //             || token_slow(s, b"MAINTAINER", p.escape_byte)
1733    //         {
1734    //             // TODO: error kind
1735    //             return Err(ErrorKind::Expected(
1736    //                 "ONBUILD MAINTAINER is not allowed",
1737    //                 instruction.span.start,
1738    //             ));
1739    //         }
1740    //     }
1741    //     _ => {}
1742    // }
1743    let inner_instruction = parse_instruction(p, s, b, s_next)?;
1744    p.in_onbuild = false;
1745    Ok(Instruction::Onbuild(OnbuildInstruction {
1746        onbuild: instruction,
1747        instruction: Box::new(inner_instruction),
1748    }))
1749}
1750
1751#[inline]
1752fn parse_run<'a>(
1753    p: &mut ParseIter<'a>,
1754    s: &mut &'a [u8],
1755    instruction: Keyword,
1756) -> Result<Instruction<'a>, ErrorKind> {
1757    debug_assert!(token_slow(
1758        &mut p.text[instruction.span.clone()].as_bytes(),
1759        b"RUN",
1760        p.escape_byte,
1761    ));
1762    let options = parse_options(s, p.text, p.escape_byte);
1763    if is_maybe_json(s) {
1764        let mut tmp = *s;
1765        if let Ok((arguments, array_span)) =
1766            parse_json_array::<SmallVec<[_; 1]>>(&mut tmp, p.text, p.escape_byte)
1767        {
1768            debug_assert!(is_line_end(tmp.first()));
1769            if tmp.is_empty() {
1770                *s = &[];
1771            } else {
1772                *s = &tmp[1..];
1773            }
1774            if arguments.is_empty() {
1775                return Err(ErrorKind::AtLeastOneArgument {
1776                    instruction_start: instruction.span.start,
1777                });
1778            }
1779            return Ok(Instruction::Run(RunInstruction {
1780                run: instruction,
1781                options,
1782                arguments: Command::Exec(Spanned { span: array_span, value: arguments }),
1783                // TODO: https://github.com/moby/buildkit/issues/2207
1784                here_docs: vec![],
1785            }));
1786        }
1787    }
1788
1789    // https://docs.docker.com/reference/dockerfile/#here-documents
1790    let mut strip_tab = false;
1791    let mut quote = None;
1792    let mut pos = 2;
1793    // At least 5, <<E\nE
1794    if s.len() >= 5 && s.starts_with(b"<<") && {
1795        if s[pos] == b'-' {
1796            strip_tab = true;
1797            pos += 1;
1798        }
1799        if matches!(s[pos], b'"' | b'\'') {
1800            quote = Some(s[pos]);
1801            pos += 1;
1802        }
1803        // TODO: non-ascii_alphanumeric
1804        s[pos].is_ascii_alphanumeric()
1805    } {
1806        *s = &s[pos..];
1807        let delim_start = p.text.len() - s.len();
1808        // TODO: non-ascii_alphanumeric
1809        while let Some((&b, s_next)) = s.split_first() {
1810            if b.is_ascii_alphanumeric() {
1811                *s = s_next;
1812                continue;
1813            }
1814            break;
1815        }
1816        let delim = &p.text.as_bytes()[delim_start..p.text.len() - s.len()];
1817        if let Some(quote) = quote {
1818            if let Some((&b, s_next)) = s.split_first() {
1819                if b != quote {
1820                    return Err(ErrorKind::ExpectedOwned(
1821                        format!("quote ({}), but found '{}'", quote as char, b as char),
1822                        p.text.len() - s.len(),
1823                    ));
1824                }
1825                *s = s_next;
1826            } else {
1827                return Err(ErrorKind::ExpectedOwned(
1828                    format!("quote ({}), but reached eof", quote as char),
1829                    p.text.len() - s.len(),
1830                ));
1831            }
1832        }
1833        // TODO: skip space
1834        let arguments_start = p.text.len() - s.len();
1835        skip_this_line(s, p.escape_byte);
1836        let end = p.text.len() - s.len();
1837        let arguments = p.text[arguments_start..end].trim_ascii_end();
1838        let here_doc = if strip_tab {
1839            let (here_doc, span) = collect_here_doc_strip_tab(s, p.text, p.escape_byte, delim)?;
1840            HereDoc { span, expand: quote.is_none(), value: here_doc }
1841        } else {
1842            let (here_doc, span) = collect_here_doc_no_strip_tab(s, p.text, p.escape_byte, delim)?;
1843            HereDoc { span, expand: quote.is_none(), value: here_doc.into() }
1844        };
1845        return Ok(Instruction::Run(RunInstruction {
1846            run: instruction,
1847            options,
1848            arguments: Command::Shell(Spanned {
1849                span: arguments_start..arguments_start + arguments.len(),
1850                value: arguments,
1851            }),
1852            // TODO: multiple here-docs
1853            here_docs: vec![here_doc],
1854        }));
1855    }
1856
1857    let arguments_start = p.text.len() - s.len();
1858    skip_this_line(s, p.escape_byte);
1859    let end = p.text.len() - s.len();
1860    let arguments = p.text[arguments_start..end].trim_ascii_end();
1861    Ok(Instruction::Run(RunInstruction {
1862        run: instruction,
1863        options,
1864        arguments: Command::Shell(Spanned {
1865            span: arguments_start..arguments_start + arguments.len(),
1866            value: arguments,
1867        }),
1868        here_docs: vec![],
1869    }))
1870}
1871
1872#[inline]
1873fn parse_shell<'a>(
1874    p: &mut ParseIter<'a>,
1875    s: &mut &'a [u8],
1876    instruction: Keyword,
1877) -> Result<Instruction<'a>, ErrorKind> {
1878    debug_assert!(token_slow(
1879        &mut p.text[instruction.span.clone()].as_bytes(),
1880        b"SHELL",
1881        p.escape_byte,
1882    ));
1883    if !is_maybe_json(s) {
1884        return Err(ErrorKind::Expected("JSON array", p.text.len() - s.len()));
1885    }
1886    match parse_json_array::<SmallVec<[_; 4]>>(s, p.text, p.escape_byte) {
1887        Ok((arguments, _array_span)) => {
1888            if !s.is_empty() {
1889                *s = &s[1..];
1890            }
1891            if arguments.is_empty() {
1892                return Err(ErrorKind::AtLeastOneArgument {
1893                    instruction_start: instruction.span.start,
1894                });
1895            }
1896            Ok(Instruction::Shell(ShellInstruction { shell: instruction, arguments }))
1897        }
1898        Err(array_start) => Err(ErrorKind::Json { arguments_start: array_start }),
1899    }
1900}
1901
1902#[inline]
1903fn parse_stopsignal<'a>(
1904    p: &mut ParseIter<'a>,
1905    s: &mut &'a [u8],
1906    instruction: Keyword,
1907) -> Result<Instruction<'a>, ErrorKind> {
1908    debug_assert!(token_slow(
1909        &mut p.text[instruction.span.clone()].as_bytes(),
1910        b"STOPSIGNAL",
1911        p.escape_byte,
1912    ));
1913    // TODO: space is disallowed?
1914    let arguments = collect_non_line_unescaped_consume_line(s, p.text, p.escape_byte);
1915    if arguments.value.is_empty() {
1916        return Err(ErrorKind::ExactlyOneArgument { instruction_start: instruction.span.start });
1917    }
1918    Ok(Instruction::Stopsignal(StopsignalInstruction { stopsignal: instruction, arguments }))
1919}
1920
1921#[inline]
1922fn parse_user<'a>(
1923    p: &mut ParseIter<'a>,
1924    s: &mut &'a [u8],
1925    instruction: Keyword,
1926) -> Result<Instruction<'a>, ErrorKind> {
1927    debug_assert!(token_slow(
1928        &mut p.text[instruction.span.clone()].as_bytes(),
1929        b"USER",
1930        p.escape_byte,
1931    ));
1932    // TODO: space is disallowed?
1933    let arguments = collect_non_line_unescaped_consume_line(s, p.text, p.escape_byte);
1934    if arguments.value.is_empty() {
1935        return Err(ErrorKind::ExactlyOneArgument { instruction_start: instruction.span.start });
1936    }
1937    Ok(Instruction::User(UserInstruction { user: instruction, arguments }))
1938}
1939
1940#[inline]
1941fn parse_volume<'a>(
1942    p: &mut ParseIter<'a>,
1943    s: &mut &'a [u8],
1944    instruction: Keyword,
1945) -> Result<Instruction<'a>, ErrorKind> {
1946    debug_assert!(token_slow(
1947        &mut p.text[instruction.span.clone()].as_bytes(),
1948        b"VOLUME",
1949        p.escape_byte,
1950    ));
1951    if is_maybe_json(s) {
1952        let mut tmp = *s;
1953        if let Ok((arguments, array_span)) = parse_json_array(&mut tmp, p.text, p.escape_byte) {
1954            debug_assert!(is_line_end(tmp.first()));
1955            if tmp.is_empty() {
1956                *s = &[];
1957            } else {
1958                *s = &tmp[1..];
1959            }
1960            // "VOLUME []" seems to be okay?
1961            return Ok(Instruction::Volume(VolumeInstruction {
1962                volume: instruction,
1963                arguments: JsonOrStringArray::Json(Spanned { span: array_span, value: arguments }),
1964            }));
1965        }
1966    }
1967    let arguments: SmallVec<[_; 1]> =
1968        collect_space_separated_unescaped_consume_line(s, p.text, p.escape_byte);
1969    if arguments.is_empty() {
1970        // TODO: "VOLUME" too?
1971        return Err(ErrorKind::AtLeastOneArgument { instruction_start: instruction.span.start });
1972    }
1973    Ok(Instruction::Volume(VolumeInstruction {
1974        volume: instruction,
1975        arguments: JsonOrStringArray::String(arguments),
1976    }))
1977}
1978
1979#[inline]
1980fn parse_workdir<'a>(
1981    p: &mut ParseIter<'a>,
1982    s: &mut &'a [u8],
1983    instruction: Keyword,
1984) -> Result<Instruction<'a>, ErrorKind> {
1985    debug_assert!(token_slow(
1986        &mut p.text[instruction.span.clone()].as_bytes(),
1987        b"WORKDIR",
1988        p.escape_byte,
1989    ));
1990    // TODO: space is disallowed if not escaped/quoted?
1991    let arguments = collect_non_line_unescaped_consume_line(s, p.text, p.escape_byte);
1992    if arguments.value.is_empty() {
1993        return Err(ErrorKind::ExactlyOneArgument { instruction_start: instruction.span.start });
1994    }
1995    Ok(Instruction::Workdir(WorkdirInstruction { workdir: instruction, arguments }))
1996}
1997
1998// -----------------------------------------------------------------------------
1999// Parsing Helpers
2000
2001// [\r\n]
2002const LINE: u8 = 1 << 0;
2003// [ \t]
2004const SPACE: u8 = 1 << 1;
2005// [ \r\n\t]
2006const WHITESPACE: u8 = 1 << 2;
2007// [#]
2008const COMMENT: u8 = 1 << 3;
2009// ["]
2010const DOUBLE_QUOTE: u8 = 1 << 4;
2011// [\`]
2012const POSSIBLE_ESCAPE: u8 = 1 << 5;
2013// [=]
2014const EQ: u8 = 1 << 6;
2015
2016static TABLE: [u8; 256] = {
2017    let mut table = [0; 256];
2018    let mut i = 0;
2019    loop {
2020        match i {
2021            b' ' | b'\t' => table[i as usize] = WHITESPACE | SPACE,
2022            b'\n' | b'\r' => table[i as usize] = WHITESPACE | LINE,
2023            b'#' => table[i as usize] = COMMENT,
2024            b'"' => table[i as usize] = DOUBLE_QUOTE,
2025            b'\\' | b'`' => table[i as usize] = POSSIBLE_ESCAPE,
2026            b'=' => table[i as usize] = EQ,
2027            _ => {}
2028        }
2029        if i == u8::MAX {
2030            break;
2031        }
2032        i += 1;
2033    }
2034    table
2035};
2036
2037const UTF8_BOM: &[u8] = &[0xEF, 0xBB, 0xBF];
2038
2039trait Store<T>: Sized {
2040    fn new() -> Self;
2041    fn push(&mut self, val: T);
2042}
2043impl<T> Store<T> for Vec<T> {
2044    #[inline]
2045    fn new() -> Self {
2046        Self::new()
2047    }
2048    #[inline]
2049    fn push(&mut self, val: T) {
2050        self.push(val);
2051    }
2052}
2053impl<T, const N: usize> Store<T> for SmallVec<[T; N]> {
2054    #[inline]
2055    fn new() -> Self {
2056        Self::new()
2057    }
2058    #[inline]
2059    fn push(&mut self, val: T) {
2060        self.push(val);
2061    }
2062}
2063impl<'a, const N: usize> Store<UnescapedString<'a>>
2064    for (SmallVec<[Source<'a>; N]>, Option<UnescapedString<'a>>)
2065{
2066    #[inline]
2067    fn new() -> Self {
2068        (SmallVec::new(), None)
2069    }
2070    #[inline]
2071    fn push(&mut self, val: UnescapedString<'a>) {
2072        if let Some(val) = self.1.replace(val) {
2073            self.0.push(Source::Path(val));
2074        }
2075    }
2076}
2077
2078#[inline]
2079fn parse_options<'a, S: Store<Flag<'a>>>(s: &mut &[u8], start: &'a str, escape_byte: u8) -> S {
2080    let mut options = S::new();
2081    'outer: loop {
2082        let Some((&b'-', mut s_next)) = s.split_first() else {
2083            break;
2084        };
2085        loop {
2086            let Some((&b, s_next_next)) = s_next.split_first() else {
2087                break 'outer;
2088            };
2089            if b == b'-' {
2090                s_next = s_next_next;
2091                break;
2092            }
2093            if skip_line_escape(&mut s_next, b, s_next_next, escape_byte) {
2094                skip_line_escape_followup(&mut s_next, escape_byte);
2095                continue;
2096            }
2097            break 'outer;
2098        }
2099        let flag_start = start.len() - s.len();
2100        *s = s_next;
2101        let name = collect_until_unescaped::<{ WHITESPACE | EQ }>(s, start, escape_byte);
2102        let Some((&b'=', s_next)) = s.split_first() else {
2103            options.push(Flag { flag_start, name, value: None });
2104            skip_spaces(s, escape_byte);
2105            continue;
2106        };
2107        *s = s_next;
2108        let value = collect_non_whitespace_unescaped(s, start, escape_byte);
2109        options.push(Flag { flag_start, name, value: Some(value) });
2110        skip_spaces(s, escape_byte);
2111    }
2112    options
2113}
2114
2115fn parse_json_array<'a, S: Store<UnescapedString<'a>>>(
2116    s: &mut &[u8],
2117    start: &'a str,
2118    escape_byte: u8,
2119) -> Result<(S, Span), usize> {
2120    debug_assert_eq!(s.first(), Some(&b'['));
2121    debug_assert_ne!(s.get(1), Some(&b'['));
2122    let mut res = S::new();
2123    let array_start = start.len() - s.len();
2124    *s = &s[1..];
2125    skip_spaces(s, escape_byte);
2126    let (&b, s_next) = s.split_first().ok_or(array_start)?;
2127    match b {
2128        b'"' => {
2129            *s = s_next;
2130            loop {
2131                let full_word_start = start.len() - s.len();
2132                let mut word_start = full_word_start;
2133                let mut buf = String::new();
2134                loop {
2135                    let (&b, s_next) = s.split_first().ok_or(array_start)?;
2136                    if TABLE[b as usize] & (LINE | DOUBLE_QUOTE | POSSIBLE_ESCAPE) == 0 {
2137                        *s = s_next;
2138                        continue;
2139                    }
2140                    match b {
2141                        b'"' => break,
2142                        b'\n' | b'\r' => return Err(array_start),
2143                        _ => {}
2144                    }
2145                    let word_end = start.len() - s.len();
2146                    if skip_line_escape(s, b, s_next, escape_byte) {
2147                        skip_line_escape_followup(s, escape_byte);
2148                        // dockerfile escape
2149                        buf.push_str(&start[word_start..word_end]);
2150                        word_start = start.len() - s.len();
2151                        continue;
2152                    }
2153                    if b == b'\\' {
2154                        // JSON escape
2155                        let word_end = start.len() - s.len();
2156                        buf.push_str(&start[word_start..word_end]);
2157                        *s = s_next;
2158                        let (new, new_start) = match *s.first().ok_or(array_start)? {
2159                            b @ (b'"' | b'\\' | b'/') => (b as char, 1),
2160                            b'b' => ('\x08', 1),
2161                            b'f' => ('\x0c', 1),
2162                            b'n' => ('\n', 1),
2163                            b'r' => ('\r', 1),
2164                            b't' => ('\t', 1),
2165                            b'u' => (parse_json_hex_escape(s, array_start)?, 5),
2166                            _ => return Err(array_start), // invalid escape
2167                        };
2168                        buf.push(new);
2169                        *s = &s[new_start..];
2170                        word_start = start.len() - s.len();
2171                        continue;
2172                    }
2173                    *s = s_next;
2174                }
2175                let word_end = start.len() - s.len();
2176                let value = if buf.is_empty() {
2177                    // no escape
2178                    Cow::Borrowed(&start[word_start..word_end])
2179                } else {
2180                    buf.push_str(&start[word_start..word_end]);
2181                    Cow::Owned(buf)
2182                };
2183                res.push(UnescapedString { span: full_word_start..word_end, value });
2184                *s = &s[1..]; // drop "
2185                skip_spaces(s, escape_byte);
2186                let (&b, s_next) = s.split_first().ok_or(array_start)?;
2187                match b {
2188                    b',' => {
2189                        *s = s_next;
2190                        skip_spaces(s, escape_byte);
2191                        let (&b, s_next) = s.split_first().ok_or(array_start)?;
2192                        if b == b'"' {
2193                            *s = s_next;
2194                            continue;
2195                        }
2196                        return Err(array_start);
2197                    }
2198                    b']' => {
2199                        *s = s_next;
2200                        break;
2201                    }
2202                    _ => return Err(array_start),
2203                }
2204            }
2205        }
2206        b']' => *s = s_next,
2207        _ => return Err(array_start),
2208    }
2209    let array_end = start.len() - s.len();
2210    skip_spaces(s, escape_byte);
2211    if !is_line_end(s.first()) {
2212        return Err(array_start);
2213    }
2214    Ok((res, array_start..array_end))
2215}
2216// Adapted from https://github.com/serde-rs/json/blob/3f1c6de4af28b1f6c5100da323f2bffaf7c2083f/src/read.rs
2217#[cold]
2218fn parse_json_hex_escape(s: &mut &[u8], array_start: usize) -> Result<char, usize> {
2219    fn decode_hex_escape(s: &mut &[u8], array_start: usize) -> Result<u16, usize> {
2220        if s.len() < 4 {
2221            return Err(array_start); // EofWhileParsingString
2222        }
2223
2224        let mut n = 0;
2225        for _ in 0..4 {
2226            let ch = decode_hex_val(s[0]);
2227            *s = &s[1..];
2228            match ch {
2229                None => return Err(array_start), // InvalidEscape
2230                Some(val) => {
2231                    n = (n << 4) + val;
2232                }
2233            }
2234        }
2235        Ok(n)
2236    }
2237
2238    fn decode_hex_val(val: u8) -> Option<u16> {
2239        let n = HEX_DECODE_TABLE[val as usize] as u16;
2240        if n == u8::MAX as u16 { None } else { Some(n) }
2241    }
2242
2243    let c = match decode_hex_escape(s, array_start)? {
2244        _n @ 0xDC00..=0xDFFF => return Err(array_start), // ErrorCode::LoneLeadingSurrogateInHexEscape)
2245
2246        // Non-BMP characters are encoded as a sequence of two hex
2247        // escapes, representing UTF-16 surrogates. If deserializing a
2248        // utf-8 string the surrogates are required to be paired,
2249        // whereas deserializing a byte string accepts lone surrogates.
2250        n1 @ 0xD800..=0xDBFF => {
2251            if s.first() == Some(&b'\\') {
2252                *s = &s[1..];
2253            } else {
2254                return Err(array_start); // UnexpectedEndOfHexEscape
2255            }
2256
2257            if s.first() == Some(&b'u') {
2258                *s = &s[1..];
2259            } else {
2260                return Err(array_start); // UnexpectedEndOfHexEscape
2261            }
2262
2263            let n2 = decode_hex_escape(s, array_start)?;
2264
2265            if n2 < 0xDC00 || n2 > 0xDFFF {
2266                return Err(array_start); // LoneLeadingSurrogateInHexEscape
2267            }
2268
2269            let n = ((((n1 - 0xD800) as u32) << 10) | (n2 - 0xDC00) as u32) + 0x1_0000;
2270
2271            match char::from_u32(n) {
2272                Some(c) => c,
2273                None => return Err(array_start), // InvalidUnicodeCodePoint
2274            }
2275        }
2276
2277        // Every u16 outside of the surrogate ranges above is guaranteed
2278        // to be a legal char.
2279        n => char::from_u32(n as u32).unwrap(),
2280    };
2281    Ok(c)
2282}
2283#[allow(clippy::needless_raw_string_hashes)]
2284#[test]
2285fn test_parse_json_array() {
2286    // empty
2287    let t = r#"[]"#;
2288    let mut s = t.as_bytes();
2289    assert_eq!(&*parse_json_array::<Vec<_>>(&mut s, t, b'\\').unwrap().0, &[]);
2290    assert_eq!(s, b"");
2291    let t = r#"[ ]"#;
2292    let mut s = t.as_bytes();
2293    assert_eq!(&*parse_json_array::<Vec<_>>(&mut s, t, b'\\').unwrap().0, &[]);
2294    assert_eq!(s, b"");
2295    // one value
2296    let t = r#"["abc"]"#;
2297    let mut s = t.as_bytes();
2298    assert_eq!(&*parse_json_array::<Vec<_>>(&mut s, t, b'\\').unwrap().0, &[UnescapedString {
2299        span: 2..5,
2300        value: "abc".into()
2301    }]);
2302    assert_eq!(s, b"");
2303    // multi values
2304    let t = "[\"ab\",\"c\" ,  \"de\" ] \n";
2305    let mut s = t.as_bytes();
2306    assert_eq!(&*parse_json_array::<Vec<_>>(&mut s, t, b'\\').unwrap().0, &[
2307        UnescapedString { span: 2..4, value: "ab".into() },
2308        UnescapedString { span: 7..8, value: "c".into() },
2309        UnescapedString { span: 14..16, value: "de".into() },
2310    ]);
2311    assert_eq!(s, b"\n");
2312    // escape
2313    // TODO: \uXXXX
2314    let t = "[\"a\\\"\\\\\\/\\b\\f\\n\\r\\tbc\"]";
2315    let mut s = t.as_bytes();
2316    assert_eq!(&*parse_json_array::<Vec<_>>(&mut s, t, b'\\').unwrap().0, &[UnescapedString {
2317        span: 2..21,
2318        value: "a\"\\/\x08\x0c\n\r\tbc".into()
2319    }]);
2320    assert_eq!(s, b"");
2321
2322    // fail (single quote)
2323    let t = r#"['abc']"#;
2324    let mut s = t.as_bytes();
2325    assert_eq!(parse_json_array::<Vec<_>>(&mut s, t, b'\\'), Err(0));
2326    assert_eq!(s, br#"'abc']"#);
2327    // fail (extra comma)
2328    let t = r#"["abc",]"#;
2329    let mut s = t.as_bytes();
2330    assert_eq!(parse_json_array::<Vec<_>>(&mut s, t, b'\\'), Err(0));
2331    assert_eq!(s, br#"]"#);
2332    // fail (extra char after array)
2333    let t = r#"["abc"] c"#;
2334    let mut s = t.as_bytes();
2335    assert_eq!(parse_json_array::<Vec<_>>(&mut s, t, b'\\'), Err(0));
2336    assert_eq!(s, br#"c"#);
2337    // fail (invalid escape)
2338    let t = "[\"ab\\c\"]";
2339    let mut s = t.as_bytes();
2340    assert_eq!(parse_json_array::<Vec<_>>(&mut s, t, b'\\'), Err(0));
2341    assert_eq!(s, b"c\"]");
2342    // TODO: more from https://github.com/serde-rs/json/blob/3f1c6de4af28b1f6c5100da323f2bffaf7c2083f/tests/test.rs#L1060
2343}
2344
2345/// Skips spaces and tabs, and returns `true` if one or more spaces or tabs ware
2346/// consumed. (not consumes non-spaces/tabs characters.
2347#[inline]
2348fn skip_spaces_no_escape(s: &mut &[u8]) -> bool {
2349    let start = *s;
2350    while let Some((&b, s_next)) = s.split_first() {
2351        if TABLE[b as usize] & SPACE != 0 {
2352            *s = s_next;
2353            continue;
2354        }
2355        break;
2356    }
2357    start.len() != s.len()
2358}
2359/// Skips spaces and tabs, and returns `true` if one or more spaces or tabs ware
2360/// consumed. (not consumes non-space/tab characters.
2361#[inline]
2362fn skip_spaces(s: &mut &[u8], escape_byte: u8) -> bool {
2363    let mut has_space = false;
2364    while let Some((&b, s_next)) = s.split_first() {
2365        let t = TABLE[b as usize];
2366        if t & (SPACE | POSSIBLE_ESCAPE) != 0 {
2367            if t & SPACE != 0 {
2368                *s = s_next;
2369                has_space = true;
2370                continue;
2371            }
2372            if skip_line_escape(s, b, s_next, escape_byte) {
2373                skip_line_escape_followup(s, escape_byte);
2374                continue;
2375            }
2376        }
2377        break;
2378    }
2379    has_space
2380}
2381/// Consumes spaces and tabs, and returns `true` if one or more spaces or tabs ware
2382/// consumed, or reached line end. (not consumes non-space/tab characters.
2383#[inline]
2384fn spaces_or_line_end(s: &mut &[u8], escape_byte: u8) -> bool {
2385    let mut has_space = false;
2386    loop {
2387        let Some((&b, s_next)) = s.split_first() else { return true };
2388        {
2389            let t = TABLE[b as usize];
2390            if t & (WHITESPACE | POSSIBLE_ESCAPE) != 0 {
2391                if t & SPACE != 0 {
2392                    *s = s_next;
2393                    has_space = true;
2394                    continue;
2395                }
2396                if t & LINE != 0 {
2397                    return true;
2398                }
2399                if skip_line_escape(s, b, s_next, escape_byte) {
2400                    skip_line_escape_followup(s, escape_byte);
2401                    continue;
2402                }
2403            }
2404            break;
2405        }
2406    }
2407    has_space
2408}
2409
2410#[inline]
2411fn skip_comments_and_whitespaces(s: &mut &[u8], escape_byte: u8) {
2412    while let Some((&b, s_next)) = s.split_first() {
2413        let t = TABLE[b as usize];
2414        if t & (WHITESPACE | COMMENT | POSSIBLE_ESCAPE) != 0 {
2415            if t & WHITESPACE != 0 {
2416                *s = s_next;
2417                continue;
2418            }
2419            if t & COMMENT != 0 {
2420                *s = s_next;
2421                skip_this_line_no_escape(s);
2422                continue;
2423            }
2424            if skip_line_escape(s, b, s_next, escape_byte) {
2425                skip_line_escape_followup(s, escape_byte);
2426                continue;
2427            }
2428        }
2429        break;
2430    }
2431}
2432
2433#[inline]
2434fn is_line_end(b: Option<&u8>) -> bool {
2435    matches!(b, Some(b'\n' | b'\r') | None)
2436}
2437#[inline]
2438fn is_maybe_json(s: &[u8]) -> bool {
2439    // ADD/COPY: checking [[ to handle escape of [ https://docs.docker.com/reference/dockerfile/#add
2440    // Others: TODO: checking [[ to handle [[ -e .. ], but not enough to check [ -e .. ]
2441    s.first() == Some(&b'[') && s.get(1) != Some(&b'[')
2442}
2443
2444#[inline]
2445fn collect_here_doc_no_strip_tab<'a>(
2446    s: &mut &[u8],
2447    start: &'a str,
2448    _escape_byte: u8,
2449    delim: &[u8],
2450) -> Result<(&'a str, Span), ErrorKind> {
2451    let here_doc_start = start.len() - s.len();
2452    loop {
2453        if s.len() < delim.len() {
2454            return Err(ErrorKind::ExpectedOwned(
2455                str::from_utf8(delim).unwrap().to_owned(),
2456                start.len() - s.len(),
2457            ));
2458        }
2459        if s.starts_with(delim) && is_line_end(s.get(delim.len())) {
2460            break;
2461        }
2462        skip_this_line_no_escape(s);
2463    }
2464    let end = start.len() - s.len();
2465    *s = &s[delim.len()..];
2466    if !s.is_empty() {
2467        *s = &s[1..];
2468    }
2469    let span = here_doc_start..end;
2470    Ok((&start[span.clone()], span))
2471}
2472#[inline]
2473fn collect_here_doc_strip_tab<'a>(
2474    s: &mut &[u8],
2475    start: &'a str,
2476    _escape_byte: u8,
2477    delim: &[u8],
2478) -> Result<(Cow<'a, str>, Span), ErrorKind> {
2479    let here_doc_start = start.len() - s.len();
2480    let mut current_start = here_doc_start;
2481    let mut res = String::new();
2482    loop {
2483        if s.len() < delim.len() {
2484            return Err(ErrorKind::ExpectedOwned(
2485                str::from_utf8(delim).unwrap().to_owned(),
2486                start.len() - s.len(),
2487            ));
2488        }
2489        if let Some((&b'\t', s_next)) = s.split_first() {
2490            let end = start.len() - s.len();
2491            res.push_str(&start[current_start..end]);
2492            *s = s_next;
2493            while let Some((&b'\t', s_next)) = s.split_first() {
2494                *s = s_next;
2495            }
2496            current_start = start.len() - s.len();
2497        }
2498        if s.starts_with(delim) && is_line_end(s.get(delim.len())) {
2499            break;
2500        }
2501        skip_this_line_no_escape(s);
2502    }
2503    let end = start.len() - s.len();
2504    *s = &s[delim.len()..];
2505    if !s.is_empty() {
2506        *s = &s[1..];
2507    }
2508    let span = here_doc_start..end;
2509    if here_doc_start == current_start {
2510        Ok((Cow::Borrowed(&start[span.clone()]), span))
2511    } else {
2512        res.push_str(&start[current_start..end]);
2513        Ok((Cow::Owned(res), span))
2514    }
2515}
2516// TODO: escaped/quoted space?
2517#[inline]
2518fn collect_space_separated_unescaped_consume_line<'a, S: Store<UnescapedString<'a>>>(
2519    s: &mut &[u8],
2520    start: &'a str,
2521    escape_byte: u8,
2522) -> S {
2523    let mut res = S::new();
2524    loop {
2525        let val = collect_non_whitespace_unescaped(s, start, escape_byte);
2526        if !val.value.is_empty() {
2527            res.push(val);
2528            if skip_spaces(s, escape_byte) {
2529                continue;
2530            }
2531        }
2532        debug_assert!(is_line_end(s.first()));
2533        if !s.is_empty() {
2534            *s = &s[1..];
2535        }
2536        break;
2537    }
2538    res
2539}
2540#[inline]
2541fn collect_non_whitespace_unescaped<'a>(
2542    s: &mut &[u8],
2543    start: &'a str,
2544    escape_byte: u8,
2545) -> UnescapedString<'a> {
2546    collect_until_unescaped::<WHITESPACE>(s, start, escape_byte)
2547}
2548#[inline]
2549fn collect_non_line_unescaped_consume_line<'a>(
2550    s: &mut &[u8],
2551    start: &'a str,
2552    escape_byte: u8,
2553) -> UnescapedString<'a> {
2554    let mut val = collect_until_unescaped::<LINE>(s, start, escape_byte);
2555    debug_assert!(is_line_end(s.first()));
2556    if !s.is_empty() {
2557        *s = &s[1..];
2558    }
2559    // trim trailing spaces
2560    match &mut val.value {
2561        Cow::Borrowed(v) => {
2562            while let Some(b' ' | b'\t') = v.as_bytes().last() {
2563                *v = &v[..v.len() - 1];
2564                val.span.end -= 1;
2565            }
2566        }
2567        Cow::Owned(v) => {
2568            while let Some(b' ' | b'\t') = v.as_bytes().last() {
2569                v.pop();
2570                val.span.end -= 1;
2571            }
2572        }
2573    }
2574    val
2575}
2576#[inline]
2577fn collect_until_unescaped<'a, const UNTIL_MASK: u8>(
2578    s: &mut &[u8],
2579    start: &'a str,
2580    escape_byte: u8,
2581) -> UnescapedString<'a> {
2582    let full_word_start = start.len() - s.len();
2583    let mut word_start = full_word_start;
2584    let mut buf = String::new();
2585    while let Some((&b, s_next)) = s.split_first() {
2586        let t = TABLE[b as usize];
2587        if t & (UNTIL_MASK | POSSIBLE_ESCAPE) != 0 {
2588            if t & UNTIL_MASK != 0 {
2589                break;
2590            }
2591            let word_end = start.len() - s.len();
2592            if skip_line_escape(s, b, s_next, escape_byte) {
2593                skip_line_escape_followup(s, escape_byte);
2594                buf.push_str(&start[word_start..word_end]);
2595                word_start = start.len() - s.len();
2596                continue;
2597            }
2598        }
2599        *s = s_next;
2600    }
2601    let word_end = start.len() - s.len();
2602    let value = if buf.is_empty() {
2603        // no escape
2604        Cow::Borrowed(&start[word_start..word_end])
2605    } else {
2606        buf.push_str(&start[word_start..word_end]);
2607        Cow::Owned(buf)
2608    };
2609    UnescapedString { span: full_word_start..word_end, value }
2610}
2611
2612/// Skips non-whitespace (non-`[ \r\n\t]`) characters, and returns `true`
2613/// if one or more non-whitespace characters are present. (not consumes whitespace character).
2614#[inline]
2615fn skip_non_whitespace_no_escape(s: &mut &[u8]) -> bool {
2616    let start = *s;
2617    while let Some((&b, s_next)) = s.split_first() {
2618        if TABLE[b as usize] & WHITESPACE != 0 {
2619            break;
2620        }
2621        *s = s_next;
2622    }
2623    start.len() != s.len()
2624}
2625// #[inline]
2626// fn skip_non_whitespace(s: &mut &[u8], escape_byte: u8) -> bool {
2627//     let mut has_non_whitespace = false;
2628//     while let Some((&b, s_next)) = s.split_first() {
2629//         if TABLE[b as usize] & WHITESPACE != 0 {
2630//             break;
2631//         }
2632//         if is_line_escape(b, s_next, escape_byte) {
2633//             skip_line_escape(s, b, s_next, escape_byte);
2634//             continue;
2635//         }
2636//         *s = s_next;
2637//         has_non_whitespace = true;
2638//         continue;
2639//     }
2640//     has_non_whitespace
2641// }
2642
2643#[inline]
2644fn skip_line_escape<'a>(s: &mut &'a [u8], b: u8, s_next: &'a [u8], escape_byte: u8) -> bool {
2645    if b == escape_byte {
2646        if let Some((&b, mut s_next)) = s_next.split_first() {
2647            if b == b'\n' {
2648                *s = s_next;
2649                return true;
2650            }
2651            if b == b'\r' {
2652                if s_next.first() == Some(&b'\n') {
2653                    *s = &s_next[1..];
2654                } else {
2655                    *s = s_next;
2656                }
2657                return true;
2658            }
2659            // It seems that "\\ \n" is also accepted.
2660            // https://github.com/moby/buildkit/blob/6d143f5602a61acef286f39ee75f1cb33c367d44/frontend/dockerfile/cmd/dockerfile-frontend/Dockerfile#L19C23-L19C24
2661            if TABLE[b as usize] & SPACE != 0 {
2662                skip_spaces_no_escape(&mut s_next);
2663                if let Some((&b, s_next)) = s_next.split_first() {
2664                    if b == b'\n' {
2665                        *s = s_next;
2666                        return true;
2667                    }
2668                    if b == b'\r' {
2669                        if s_next.first() == Some(&b'\n') {
2670                            *s = &s_next[1..];
2671                        } else {
2672                            *s = s_next;
2673                        }
2674                        return true;
2675                    }
2676                }
2677            }
2678        }
2679    }
2680    false
2681}
2682#[inline]
2683fn skip_line_escape_followup(s: &mut &[u8], _escape_byte: u8) {
2684    while let Some((&b, mut s_next)) = s.split_first() {
2685        let t = TABLE[b as usize];
2686        if t & (WHITESPACE | COMMENT) != 0 {
2687            if t & SPACE != 0 {
2688                // TODO: escape after spaces is handled in skip_spaces_no_escape
2689                skip_spaces_no_escape(&mut s_next);
2690                if let Some((&b, s_next)) = s_next.split_first() {
2691                    let t = TABLE[b as usize];
2692                    if t & (COMMENT | LINE) != 0 {
2693                        // comment or empty continuation line
2694                        *s = s_next;
2695                        if t & COMMENT != 0 {
2696                            skip_this_line_no_escape(s);
2697                        }
2698                        continue;
2699                    }
2700                }
2701            } else {
2702                // comment or empty continuation line
2703                *s = s_next;
2704                if t & COMMENT != 0 {
2705                    skip_this_line_no_escape(s);
2706                }
2707                continue;
2708            }
2709        }
2710        break;
2711    }
2712}
2713
2714#[inline]
2715fn skip_this_line_no_escape(s: &mut &[u8]) {
2716    while let Some((&b, s_next)) = s.split_first() {
2717        *s = s_next;
2718        if TABLE[b as usize] & LINE != 0 {
2719            break;
2720        }
2721    }
2722}
2723/// Skips non-line (non-`[\r\n]`) characters. (consumes line character).
2724#[inline]
2725fn skip_this_line(s: &mut &[u8], escape_byte: u8) {
2726    let mut has_space_only = 0;
2727    while let Some((&b, s_next)) = s.split_first() {
2728        let t = TABLE[b as usize];
2729        if t & (LINE | COMMENT | POSSIBLE_ESCAPE) != 0 {
2730            if t & LINE != 0 {
2731                *s = s_next;
2732                break;
2733            }
2734            if has_space_only != 0 && t & COMMENT != 0 {
2735                *s = s_next;
2736                skip_this_line_no_escape(s);
2737                continue;
2738            }
2739            if skip_line_escape(s, b, s_next, escape_byte) {
2740                skip_line_escape_followup(s, escape_byte);
2741                has_space_only = SPACE;
2742                continue;
2743            }
2744        }
2745        has_space_only &= t;
2746        *s = s_next;
2747    }
2748}
2749
2750#[inline(always)]
2751fn token(s: &mut &[u8], token: &'static [u8]) -> bool {
2752    let matched = starts_with_ignore_ascii_case(s, token);
2753    if matched {
2754        *s = &s[token.len()..];
2755        true
2756    } else {
2757        false
2758    }
2759}
2760#[cold]
2761fn token_slow(s: &mut &[u8], mut token: &'static [u8], escape_byte: u8) -> bool {
2762    debug_assert!(!token.is_empty() && token.iter().all(|&n| n & TO_UPPER8 == n));
2763    if s.len() < token.len() {
2764        return false;
2765    }
2766    let mut tmp = *s;
2767    while let Some((&b, tmp_next)) = tmp.split_first() {
2768        if b & TO_UPPER8 == token[0] {
2769            tmp = tmp_next;
2770            token = &token[1..];
2771            if token.is_empty() {
2772                *s = tmp;
2773                return true;
2774            }
2775            continue;
2776        }
2777        if skip_line_escape(&mut tmp, b, tmp_next, escape_byte) {
2778            skip_line_escape_followup(&mut tmp, escape_byte);
2779            continue;
2780        }
2781        break;
2782    }
2783    false
2784}
2785
2786const TO_UPPER8: u8 = 0xDF;
2787const TO_UPPER64: u64 = 0xDFDFDFDFDFDFDFDF;
2788
2789#[inline(always)] // Ensure the code getting the length of the needle is inlined.
2790fn starts_with_ignore_ascii_case(mut s: &[u8], mut needle: &'static [u8]) -> bool {
2791    debug_assert!(!needle.is_empty() && needle.iter().all(|&n| n & TO_UPPER8 == n));
2792    if s.len() < needle.len() {
2793        return false;
2794    }
2795    if needle.len() == 1 {
2796        return needle[0] == s[0] & TO_UPPER8;
2797    }
2798    if needle.len() >= 8 {
2799        loop {
2800            if u64::from_ne_bytes(needle[..8].try_into().unwrap())
2801                != u64::from_ne_bytes(s[..8].try_into().unwrap()) & TO_UPPER64
2802            {
2803                return false;
2804            }
2805            needle = &needle[8..];
2806            s = &s[8..];
2807            if needle.len() < 8 {
2808                if needle.is_empty() {
2809                    return true;
2810                }
2811                break;
2812            }
2813        }
2814    }
2815    let s = {
2816        let mut buf = [0; 8];
2817        buf[..needle.len()].copy_from_slice(&s[..needle.len()]);
2818        u64::from_ne_bytes(buf)
2819    };
2820    let needle = {
2821        let mut buf = [0; 8];
2822        buf[..needle.len()].copy_from_slice(needle);
2823        u64::from_ne_bytes(buf)
2824    };
2825    needle == s & TO_UPPER64
2826}
2827#[test]
2828fn test_starts_with_ignore_ascii_case() {
2829    assert!(starts_with_ignore_ascii_case(b"ABC", b"ABC"));
2830    assert!(starts_with_ignore_ascii_case(b"abc", b"ABC"));
2831    assert!(starts_with_ignore_ascii_case(b"AbC", b"ABC"));
2832    assert!(!starts_with_ignore_ascii_case(b"ABB", b"ABC"));
2833    assert!(starts_with_ignore_ascii_case(b"ABCDEFGH", b"ABCDEFGH"));
2834    assert!(starts_with_ignore_ascii_case(b"abcdefgh", b"ABCDEFGH"));
2835    assert!(starts_with_ignore_ascii_case(b"AbCdEfGh", b"ABCDEFGH"));
2836    assert!(!starts_with_ignore_ascii_case(b"ABCDEFGc", b"ABCDEFGH"));
2837    assert!(starts_with_ignore_ascii_case(
2838        b"ABCDEFGHIJKLMNOPQRSTUVWXYZ",
2839        b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
2840    ));
2841    assert!(starts_with_ignore_ascii_case(
2842        b"abcdefghijklmnopqrstuvwxyz",
2843        b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
2844    ));
2845    assert!(starts_with_ignore_ascii_case(
2846        b"aBcDeFgHiJkLmNoPqRsTuVwXyZ",
2847        b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
2848    ));
2849    assert!(!starts_with_ignore_ascii_case(
2850        b"aBcDeFgHiJkLmNoPqRsTuVwXyc",
2851        b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
2852    ));
2853}
2854
2855// Lookup table for ascii to hex decoding.
2856#[rustfmt::skip]
2857static HEX_DECODE_TABLE: [u8; 256] = {
2858    const __: u8 = u8::MAX;
2859    [
2860        //  _1  _2  _3  _4  _5  _6  _7  _8  _9  _A  _B  _C  _D  _E  _F
2861        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 0_
2862        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 1_
2863        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2_
2864         0,  1,  2,  3,  4,  5,  6,  7,  8,  9, __, __, __, __, __, __, // 3_
2865        __, 10, 11, 12, 13, 14, 15, __, __, __, __, __, __, __, __, __, // 4_
2866        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 5_
2867        __, 10, 11, 12, 13, 14, 15, __, __, __, __, __, __, __, __, __, // 6_
2868        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7_
2869        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8_
2870        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9_
2871        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A_
2872        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B_
2873        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C_
2874        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D_
2875        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E_
2876        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F_
2877    ]
2878};