Skip to main content

parse_dockerfile/
lib.rs

1// SPDX-License-Identifier: Apache-2.0 OR MIT
2
3/*!
4Dockerfile parser, written in Rust.
5
6### Usage
7
8<!-- Note: Document from sync-markdown-to-rustdoc:start through sync-markdown-to-rustdoc:end
9     is synchronized from README.md. Any changes to that range are not preserved. -->
10<!-- tidy:sync-markdown-to-rustdoc:start -->
11
12To use this crate as a library, add this to your `Cargo.toml`:
13
14```toml
15[dependencies]
16parse-dockerfile = { version = "0.1", default-features = false }
17```
18
19<div class="rustdoc-alert rustdoc-alert-note">
20
21> **ⓘ Note**
22>
23> We recommend disabling default features because they enable CLI-related
24> dependencies which the library part does not use.
25
26</div>
27
28<!-- omit in toc -->
29### Examples
30
31```
32use parse_dockerfile::{parse, Instruction};
33
34let text = "
35ARG UBUNTU_VERSION=latest
36
37FROM ubuntu:${UBUNTU_VERSION}
38RUN echo
39";
40
41let dockerfile = parse(text).unwrap();
42
43// Iterate over all instructions.
44let mut instructions = dockerfile.instructions.iter();
45assert!(matches!(instructions.next(), Some(Instruction::Arg(..))));
46assert!(matches!(instructions.next(), Some(Instruction::From(..))));
47assert!(matches!(instructions.next(), Some(Instruction::Run(..))));
48assert!(instructions.next().is_none());
49
50// Iterate over global args.
51let mut global_args = dockerfile.global_args();
52let global_arg1 = global_args.next().unwrap();
53assert_eq!(global_arg1.arguments.value, "UBUNTU_VERSION=latest");
54assert!(global_args.next().is_none());
55
56// Iterate over stages.
57let mut stages = dockerfile.stages();
58let stage1 = stages.next().unwrap();
59assert_eq!(stage1.from.image.value, "ubuntu:${UBUNTU_VERSION}");
60let mut stage1_instructions = stage1.instructions.iter();
61assert!(matches!(stage1_instructions.next(), Some(Instruction::Run(..))));
62assert!(stage1_instructions.next().is_none());
63assert!(stages.next().is_none());
64```
65
66<!-- omit in toc -->
67### Optional features
68
69- **`serde`** — Implements [`serde::Serialize`] trait for parse-dockerfile types.
70
71[`serde::Serialize`]: https://docs.rs/serde/latest/serde/trait.Serialize.html
72
73<!-- tidy:sync-markdown-to-rustdoc:end -->
74*/
75
76#![no_std]
77#![doc(test(
78    no_crate_inject,
79    attr(allow(
80        dead_code,
81        unused_variables,
82        clippy::undocumented_unsafe_blocks,
83        clippy::unused_trait_names,
84    ))
85))]
86#![forbid(unsafe_code)]
87#![warn(
88    // Lints that may help when writing public library.
89    missing_debug_implementations,
90    missing_docs,
91    clippy::alloc_instead_of_core,
92    clippy::exhaustive_enums,
93    clippy::exhaustive_structs,
94    clippy::impl_trait_in_params,
95    clippy::std_instead_of_alloc,
96    clippy::std_instead_of_core,
97    // clippy::missing_inline_in_public_items,
98)]
99#![allow(clippy::inline_always)]
100
101extern crate alloc;
102extern crate std;
103
104#[cfg(test)]
105#[path = "gen/tests/assert_impl.rs"]
106mod assert_impl;
107#[cfg(test)]
108#[path = "gen/tests/track_size.rs"]
109mod track_size;
110
111mod error;
112
113use alloc::{borrow::Cow, boxed::Box, string::String, vec, vec::Vec};
114use core::{ops::Range, str};
115use std::collections::HashMap;
116
117use smallvec::SmallVec;
118
119pub use self::error::Error;
120use self::error::{ErrorKind, Result};
121
122/// Parses dockerfile from the given `text`.
123#[allow(clippy::missing_panics_doc)]
124pub fn parse(text: &str) -> Result<Dockerfile<'_>> {
125    let mut p = ParseIter::new(text)?;
126    let mut s = p.s;
127
128    let mut instructions = Vec::with_capacity((p.text.len() / 60).min(1024));
129    let mut stages = Vec::with_capacity(1);
130    let mut named_stages = 0;
131    let mut current_stage = None;
132    while let Some((&b, s_next)) = s.split_first() {
133        let instruction =
134            parse_instruction(&mut p, &mut s, b, s_next).map_err(|e| e.into_error(&p))?;
135        match instruction {
136            Instruction::From(from) => {
137                named_stages += from.as_.is_some() as usize;
138                let new_stage = instructions.len();
139                if let Some(prev_stage) = current_stage.replace(new_stage) {
140                    stages.push(prev_stage..new_stage);
141                }
142                instructions.push(Instruction::From(from));
143            }
144            arg @ Instruction::Arg(..) => instructions.push(arg),
145            instruction => {
146                if current_stage.is_none() {
147                    return Err(error::expected("FROM", instruction.instruction_span().start)
148                        .into_error(&p));
149                }
150                instructions.push(instruction);
151            }
152        }
153        consume_comments_and_whitespaces(&mut s, p.escape_byte);
154    }
155    if let Some(current_stage) = current_stage {
156        stages.push(current_stage..instructions.len());
157    }
158
159    if stages.is_empty() {
160        // https://github.com/moby/buildkit/blob/v0.30/frontend/dockerfile/dockerfile2llb/convert.go#L278
161        return Err(error::no_stage().into_error(&p));
162    }
163    // TODO: https://github.com/moby/buildkit/blob/v0.30/frontend/dockerfile/dockerfile2llb/convert.go#L413
164    // > base name (%s) should not be blank
165
166    let mut stages_by_name = HashMap::with_capacity(named_stages);
167    for (i, stage) in stages.iter().enumerate() {
168        let Instruction::From(from) = &instructions[stage.start] else { unreachable!() };
169        if let Some((_as, name)) = &from.as_ {
170            if let Some(first_occurrence) = stages_by_name.insert(name.value.clone(), i) {
171                let Instruction::From(from) = &instructions[stages[first_occurrence].start] else {
172                    unreachable!()
173                };
174                let first_start = from.as_.as_ref().unwrap().1.span.start;
175                let second_start = name.span.start;
176                return Err(error::duplicate_name(first_start, second_start).into_error(&p));
177            }
178        }
179    }
180
181    Ok(Dockerfile { parser_directives: p.parser_directives, instructions, stages, stages_by_name })
182}
183
184/// Returns an iterator over instructions in the given `text`.
185///
186/// Unlike [`parse`] function, the returned iterator doesn't error on
187/// duplicate stage names.
188///
189/// # Errors
190///
191/// When `.next()` on the returned iterator has returned an error, the behavior
192/// of subsequent `.next()` calls is unspecified.
193pub fn parse_iter(text: &str) -> Result<ParseIter<'_>> {
194    ParseIter::new(text)
195}
196
197/// A dockerfile.
198#[derive(Debug)]
199#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
200#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
201pub struct Dockerfile<'a> {
202    /// Parser directives.
203    pub parser_directives: ParserDirectives<'a>,
204    /// Instructions.
205    pub instructions: Vec<Instruction<'a>>,
206    #[cfg_attr(feature = "serde", serde(skip))]
207    stages: Vec<Range<usize>>,
208    #[cfg_attr(feature = "serde", serde(skip))]
209    stages_by_name: HashMap<Cow<'a, str>, usize>,
210}
211impl<'a> Dockerfile<'a> {
212    /// Returns an iterator over global args.
213    #[allow(clippy::missing_panics_doc)] // self.stages is not empty
214    #[must_use]
215    pub fn global_args<'b>(&'b self) -> impl ExactSizeIterator<Item = &'b ArgInstruction<'a>> {
216        self.instructions[..self.stages.first().unwrap().start].iter().map(|arg| {
217            let Instruction::Arg(arg) = arg else { unreachable!() };
218            arg
219        })
220    }
221    /// Gets a stage by name.
222    #[must_use]
223    pub fn stage<'b>(&'b self, name: &str) -> Option<Stage<'a, 'b>> {
224        let i = *self.stages_by_name.get(name)?;
225        let stage = &self.stages[i];
226        let Instruction::From(from) = &self.instructions[stage.start] else { unreachable!() };
227        Some(Stage { from, instructions: &self.instructions[stage.start + 1..stage.end] })
228    }
229    /// Returns an iterator over stages.
230    #[must_use]
231    pub fn stages<'b>(&'b self) -> impl ExactSizeIterator<Item = Stage<'a, 'b>> {
232        self.stages.iter().map(move |stage| {
233            let Instruction::From(from) = &self.instructions[stage.start] else { unreachable!() };
234            Stage { from, instructions: &self.instructions[stage.start + 1..stage.end] }
235        })
236    }
237}
238/// A stage.
239#[derive(Debug)]
240#[non_exhaustive]
241pub struct Stage<'a, 'b> {
242    /// The `FROM` instruction.
243    pub from: &'b FromInstruction<'a>,
244    /// The remaining instructions.
245    pub instructions: &'b [Instruction<'a>],
246}
247
248/// Parser directives.
249///
250/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#parser-directives)
251#[derive(Debug)]
252#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
253#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
254#[non_exhaustive]
255pub struct ParserDirectives<'a> {
256    /// `syntax` parser directive.
257    ///
258    /// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#syntax)
259    pub syntax: Option<ParserDirective<&'a str>>,
260    /// `escape` parser directive.
261    ///
262    /// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#escape)
263    pub escape: Option<ParserDirective<char>>,
264    /// `check` parser directive.
265    ///
266    /// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#check)
267    pub check: Option<ParserDirective<&'a str>>,
268}
269/// A parser directive.
270#[derive(Debug)]
271#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
272#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
273pub struct ParserDirective<T> {
274    /// ```text
275    /// syntax=value
276    /// ^
277    /// ```
278    start: usize,
279    /// ```text
280    /// syntax=value
281    ///        ^^^^^
282    /// ```
283    pub value: Spanned<T>,
284}
285impl<T> ParserDirective<T> {
286    /// ```text
287    /// syntax=value
288    /// ^^^^^^^^^^^^
289    /// ```
290    #[must_use]
291    pub fn span(&self) -> Span {
292        self.start..self.value.span.end
293    }
294}
295
296/// An instruction.
297#[derive(Debug)]
298#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
299#[cfg_attr(feature = "serde", serde(tag = "kind"))]
300#[cfg_attr(feature = "serde", serde(rename_all = "SCREAMING_SNAKE_CASE"))]
301#[non_exhaustive]
302// NB: When adding new variants, update ALL_INST in tests/test.rs.
303pub enum Instruction<'a> {
304    /// `ADD` instruction.
305    Add(AddInstruction<'a>),
306    /// `ARG` instruction.
307    Arg(ArgInstruction<'a>),
308    /// `CMD` instruction.
309    Cmd(CmdInstruction<'a>),
310    /// `COPY` instruction.
311    Copy(CopyInstruction<'a>),
312    /// `ENTRYPOINT` instruction.
313    Entrypoint(EntrypointInstruction<'a>),
314    /// `ENV` instruction.
315    Env(EnvInstruction<'a>),
316    /// `EXPOSE` instruction.
317    Expose(ExposeInstruction<'a>),
318    /// `FROM` instruction.
319    From(FromInstruction<'a>),
320    /// `HEALTHCHECK` instruction.
321    Healthcheck(HealthcheckInstruction<'a>),
322    /// `LABEL` instruction.
323    Label(LabelInstruction<'a>),
324    /// `MAINTAINER` instruction (deprecated).
325    Maintainer(MaintainerInstruction<'a>),
326    /// `ONBUILD` instruction.
327    Onbuild(OnbuildInstruction<'a>),
328    /// `RUN` instruction.
329    Run(RunInstruction<'a>),
330    /// `SHELL` instruction.
331    Shell(ShellInstruction<'a>),
332    /// `STOPSIGNAL` instruction.
333    Stopsignal(StopsignalInstruction<'a>),
334    /// `USER` instruction.
335    User(UserInstruction<'a>),
336    /// `VOLUME` instruction.
337    Volume(VolumeInstruction<'a>),
338    /// `WORKDIR` instruction.
339    Workdir(WorkdirInstruction<'a>),
340}
341impl Instruction<'_> {
342    fn instruction_span(&self) -> Span {
343        match self {
344            Instruction::Add(instruction) => instruction.add.span.clone(),
345            Instruction::Arg(instruction) => instruction.arg.span.clone(),
346            Instruction::Cmd(instruction) => instruction.cmd.span.clone(),
347            Instruction::Copy(instruction) => instruction.copy.span.clone(),
348            Instruction::Entrypoint(instruction) => instruction.entrypoint.span.clone(),
349            Instruction::Env(instruction) => instruction.env.span.clone(),
350            Instruction::Expose(instruction) => instruction.expose.span.clone(),
351            Instruction::From(instruction) => instruction.from.span.clone(),
352            Instruction::Healthcheck(instruction) => instruction.healthcheck.span.clone(),
353            Instruction::Label(instruction) => instruction.label.span.clone(),
354            Instruction::Maintainer(instruction) => instruction.maintainer.span.clone(),
355            Instruction::Onbuild(instruction) => instruction.onbuild.span.clone(),
356            Instruction::Run(instruction) => instruction.run.span.clone(),
357            Instruction::Shell(instruction) => instruction.shell.span.clone(),
358            Instruction::Stopsignal(instruction) => instruction.stopsignal.span.clone(),
359            Instruction::User(instruction) => instruction.user.span.clone(),
360            Instruction::Volume(instruction) => instruction.volume.span.clone(),
361            Instruction::Workdir(instruction) => instruction.workdir.span.clone(),
362        }
363    }
364}
365/// An `ADD` instruction.
366///
367/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#add)
368#[derive(Debug)]
369#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
370#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
371#[non_exhaustive]
372pub struct AddInstruction<'a> {
373    /// ```text
374    /// ADD [options] <src> ... <dest>
375    /// ^^^
376    /// ```
377    pub add: Keyword,
378    /// ```text
379    /// ADD [options] <src> ... <dest>
380    ///     ^^^^^^^^^
381    /// ```
382    pub options: SmallVec<[Flag<'a>; 1]>,
383    /// ```text
384    /// ADD [options] <src> ... <dest>
385    ///               ^^^^^^^^^
386    /// ```
387    // At least 1
388    pub src: SmallVec<[Source<'a>; 1]>,
389    /// ```text
390    /// ADD [options] <src> ... <dest>
391    ///                         ^^^^^^
392    /// ```
393    pub dest: UnescapedString<'a>,
394}
395/// An `ARG` instruction.
396///
397/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#arg)
398#[derive(Debug)]
399#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
400#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
401#[non_exhaustive]
402pub struct ArgInstruction<'a> {
403    /// ```text
404    /// ARG <name>[=<default value>] [<name>[=<default value>]...]
405    /// ^^^
406    /// ```
407    pub arg: Keyword,
408    /// ```text
409    /// ARG <name>[=<default value>] [<name>[=<default value>]...]
410    ///     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
411    /// ```
412    // TODO: SmallVec<[NameOptValue<'a>; 1]>
413    pub arguments: UnescapedString<'a>,
414}
415/// A `CMD` instruction.
416///
417/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#cmd)
418#[derive(Debug)]
419#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
420#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
421#[non_exhaustive]
422pub struct CmdInstruction<'a> {
423    /// ```text
424    /// CMD ["executable", "param"]
425    /// ^^^
426    /// ```
427    pub cmd: Keyword,
428    /// ```text
429    /// CMD ["executable", "param"]
430    ///     ^^^^^^^^^^^^^^^^^^^^^^^
431    /// ```
432    pub arguments: Command<'a>,
433}
434/// A `COPY` instruction.
435///
436/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#copy)
437#[derive(Debug)]
438#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
439#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
440#[non_exhaustive]
441pub struct CopyInstruction<'a> {
442    /// ```text
443    /// COPY [options] <src> ... <dest>
444    /// ^^^^
445    /// ```
446    pub copy: Keyword,
447    /// ```text
448    /// COPY [options] <src> ... <dest>
449    ///      ^^^^^^^^^
450    /// ```
451    pub options: SmallVec<[Flag<'a>; 1]>,
452    /// ```text
453    /// COPY [options] <src> ... <dest>
454    ///                ^^^^^^^^^
455    /// ```
456    // At least 1
457    pub src: SmallVec<[Source<'a>; 1]>,
458    /// ```text
459    /// COPY [options] <src> ... <dest>
460    ///                          ^^^^^^
461    /// ```
462    pub dest: UnescapedString<'a>,
463}
464/// A enum that represents source value of [`ARG` instruction](ArgInstruction) and
465/// [`COPY` instruction](CopyInstruction).
466#[derive(Debug)]
467#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
468#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
469#[non_exhaustive]
470pub enum Source<'a> {
471    /// Path or URL.
472    Path(UnescapedString<'a>),
473    /// Here-document.
474    HereDoc(HereDoc<'a>),
475}
476/// An `ENTRYPOINT` instruction.
477///
478/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#entrypoint)
479#[derive(Debug)]
480#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
481#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
482#[non_exhaustive]
483pub struct EntrypointInstruction<'a> {
484    /// ```text
485    /// ENTRYPOINT ["executable", "param"]
486    /// ^^^^^^^^^^
487    /// ```
488    pub entrypoint: Keyword,
489    /// ```text
490    /// ENTRYPOINT ["executable", "param"]
491    ///            ^^^^^^^^^^^^^^^^^^^^^^^
492    /// ```
493    pub arguments: Command<'a>,
494}
495/// An `ENV` instruction.
496///
497/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#env)
498#[derive(Debug)]
499#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
500#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
501#[non_exhaustive]
502pub struct EnvInstruction<'a> {
503    /// ```text
504    /// ENV <key>=<value> [<key>=<value>...]
505    /// ^^^
506    /// ```
507    pub env: Keyword,
508    /// ```text
509    /// ENV <key>=<value> [<key>=<value>...]
510    ///     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
511    /// ```
512    // TODO: SmallVec<[NameValue<'a>; 1]>
513    pub arguments: UnescapedString<'a>,
514}
515/// An `EXPOSE` instruction.
516///
517/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#expose)
518#[derive(Debug)]
519#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
520#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
521#[non_exhaustive]
522pub struct ExposeInstruction<'a> {
523    /// ```text
524    /// EXPOSE <port>[/<protocol>] [<port>[/<protocol>]...]
525    /// ^^^^^^
526    /// ```
527    pub expose: Keyword,
528    /// ```text
529    /// EXPOSE <port>[/<protocol>] [<port>[/<protocol>]...]
530    ///        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
531    /// ```
532    pub arguments: SmallVec<[UnescapedString<'a>; 1]>,
533}
534/// A `FROM` instruction.
535///
536/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#from)
537#[derive(Debug)]
538#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
539#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
540#[non_exhaustive]
541pub struct FromInstruction<'a> {
542    /// ```text
543    /// FROM [--platform=<platform>] <image> [AS <name>]
544    /// ^^^^
545    /// ```
546    pub from: Keyword,
547    /// ```text
548    /// FROM [--platform=<platform>] <image> [AS <name>]
549    ///      ^^^^^^^^^^^^^^^^^^^^^^^
550    /// ```
551    pub options: Vec<Flag<'a>>,
552    /// ```text
553    /// FROM [--platform=<platform>] <image> [AS <name>]
554    ///                              ^^^^^^^
555    /// ```
556    pub image: UnescapedString<'a>,
557    /// ```text
558    /// FROM [--platform=<platform>] <image> [AS <name>]
559    ///                                      ^^^^^^^^^^^
560    /// ```
561    pub as_: Option<(Keyword, UnescapedString<'a>)>,
562}
563/// A `HEALTHCHECK` instruction.
564///
565/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#healthcheck)
566#[derive(Debug)]
567#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
568#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
569#[non_exhaustive]
570pub struct HealthcheckInstruction<'a> {
571    /// ```text
572    /// HEALTHCHECK [options] CMD command
573    /// ^^^^^^^^^^^
574    /// ```
575    pub healthcheck: Keyword,
576    /// ```text
577    /// HEALTHCHECK [options] CMD command
578    ///             ^^^^^^^^^
579    /// ```
580    pub options: Vec<Flag<'a>>,
581    /// ```text
582    /// HEALTHCHECK [options] CMD command
583    ///                       ^^^^^^^^^^^
584    /// ```
585    pub arguments: HealthcheckArguments<'a>,
586}
587/// Arguments of the [`HEALTHCHECK` instruction](HealthcheckInstruction).
588#[derive(Debug)]
589#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
590#[cfg_attr(feature = "serde", serde(tag = "kind"))]
591#[cfg_attr(feature = "serde", serde(rename_all = "SCREAMING_SNAKE_CASE"))]
592#[non_exhaustive]
593pub enum HealthcheckArguments<'a> {
594    /// `HEALTHCHECK [options] CMD ...`
595    #[non_exhaustive]
596    Cmd {
597        /// ```text
598        /// HEALTHCHECK [options] CMD command
599        ///                       ^^^
600        /// ```
601        cmd: Keyword,
602        /// ```text
603        /// HEALTHCHECK [options] CMD command
604        ///                           ^^^^^^^
605        /// ```
606        arguments: Command<'a>,
607    },
608    /// `HEALTHCHECK [options] NONE`
609    #[non_exhaustive]
610    None {
611        /// ```text
612        /// HEALTHCHECK [options] NONE
613        ///                       ^^^^
614        /// ```
615        none: Keyword,
616    },
617}
618/// A `LABEL` instruction.
619///
620/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#label)
621#[derive(Debug)]
622#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
623#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
624#[non_exhaustive]
625pub struct LabelInstruction<'a> {
626    /// ```text
627    /// LABEL <key>=<value> [<key>=<value>...]
628    /// ^^^^^
629    /// ```
630    pub label: Keyword,
631    /// ```text
632    /// LABEL <key>=<value> [<key>=<value>...]
633    ///       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
634    /// ```
635    // TODO: SmallVec<[NameValue<'a>; 1]>
636    pub arguments: UnescapedString<'a>,
637}
638/// A `MAINTAINER` instruction (deprecated).
639///
640/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#maintainer-deprecated)
641#[derive(Debug)]
642#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
643#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
644#[non_exhaustive]
645pub struct MaintainerInstruction<'a> {
646    /// ```text
647    /// MAINTAINER <name>
648    /// ^^^^^^^^^^
649    /// ```
650    pub maintainer: Keyword,
651    /// ```text
652    /// MAINTAINER <name>
653    ///            ^^^^^^
654    /// ```
655    pub name: UnescapedString<'a>,
656}
657/// A `ONBUILD` instruction.
658///
659/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#onbuild)
660#[derive(Debug)]
661#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
662#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
663#[non_exhaustive]
664pub struct OnbuildInstruction<'a> {
665    /// ```text
666    /// ONBUILD <INSTRUCTION>
667    /// ^^^^^^^
668    /// ```
669    pub onbuild: Keyword,
670    /// ```text
671    /// ONBUILD <INSTRUCTION>
672    ///         ^^^^^^^^^^^^^
673    /// ```
674    pub instruction: Box<Instruction<'a>>,
675}
676/// A `RUN` instruction.
677///
678/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#run)
679#[derive(Debug)]
680#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
681#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
682#[non_exhaustive]
683pub struct RunInstruction<'a> {
684    /// ```text
685    /// RUN [options] <command> ...
686    /// ^^^
687    /// ```
688    pub run: Keyword,
689    /// ```text
690    /// RUN [options] <command> ...
691    ///     ^^^^^^^^^
692    /// ```
693    pub options: SmallVec<[Flag<'a>; 1]>,
694    /// ```text
695    /// RUN [options] <command> ...
696    ///               ^^^^^^^^^^^^^
697    /// ```
698    pub arguments: Command<'a>,
699    /// ```text
700    ///   RUN [options] <<EOF
701    /// /               ^^^^^
702    /// | ...
703    /// | EOF
704    /// |_^^^
705    /// ```
706    pub here_docs: Vec<HereDoc<'a>>,
707}
708/// A `SHELL` instruction.
709///
710/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#shell)
711#[derive(Debug)]
712#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
713#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
714#[non_exhaustive]
715pub struct ShellInstruction<'a> {
716    /// ```text
717    /// SHELL ["executable", "param"]
718    /// ^^^^^
719    /// ```
720    pub shell: Keyword,
721    /// ```text
722    /// SHELL ["executable", "param"]
723    ///       ^^^^^^^^^^^^^^^^^^^^^^^
724    /// ```
725    // Usually at least 2, e.g., ["/bin/sh", "-c"]
726    // Common cases are 4, e.g., ["/bin/bash", "-o", "pipefail", "-c"]
727    pub arguments: SmallVec<[UnescapedString<'a>; 4]>,
728}
729/// A `STOPSIGNAL` instruction.
730///
731/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#stopsignal)
732#[derive(Debug)]
733#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
734#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
735#[non_exhaustive]
736pub struct StopsignalInstruction<'a> {
737    /// ```text
738    /// STOPSIGNAL signal
739    /// ^^^^^^^^^^
740    /// ```
741    pub stopsignal: Keyword,
742    /// ```text
743    /// STOPSIGNAL signal
744    ///            ^^^^^^
745    /// ```
746    pub arguments: UnescapedString<'a>,
747}
748/// A `USER` instruction.
749///
750/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#user)
751#[derive(Debug)]
752#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
753#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
754#[non_exhaustive]
755pub struct UserInstruction<'a> {
756    /// ```text
757    /// USER <user>[:<group>]
758    /// ^^^^
759    /// ```
760    pub user: Keyword,
761    /// ```text
762    /// USER <user>[:<group>]
763    ///      ^^^^^^^^^^^^^^^^
764    /// ```
765    pub arguments: UnescapedString<'a>,
766}
767/// A `VOLUME` instruction.
768///
769/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#volume)
770#[derive(Debug)]
771#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
772#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
773#[non_exhaustive]
774pub struct VolumeInstruction<'a> {
775    /// ```text
776    /// VOLUME ["/data"]
777    /// ^^^^^^
778    /// ```
779    pub volume: Keyword,
780    /// ```text
781    /// VOLUME ["/data"]
782    ///        ^^^^^^^^^
783    /// ```
784    pub arguments: JsonOrStringArray<'a, 1>,
785}
786/// A `WORKDIR` instruction.
787///
788/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#workdir)
789#[derive(Debug)]
790#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
791#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
792#[non_exhaustive]
793pub struct WorkdirInstruction<'a> {
794    /// ```text
795    /// WORKDIR /path/to/workdir
796    /// ^^^^^^^
797    /// ```
798    pub workdir: Keyword,
799    /// ```text
800    /// WORKDIR /path/to/workdir
801    ///         ^^^^^^^^^^^^^^^^
802    /// ```
803    pub arguments: UnescapedString<'a>,
804}
805
806/// A keyword.
807#[derive(Debug)]
808#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
809#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
810#[non_exhaustive]
811pub struct Keyword {
812    #[allow(missing_docs)]
813    pub span: Span,
814}
815
816/// An option flag.
817#[derive(Debug)]
818#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
819#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
820pub struct Flag<'a> {
821    /// ```text
822    /// --platform=linux/amd64
823    /// ^
824    /// ```
825    flag_start: usize,
826    /// ```text
827    /// --platform=linux/amd64
828    ///   ^^^^^^^^
829    /// ```
830    pub name: UnescapedString<'a>,
831    /// ```text
832    /// --platform=linux/amd64
833    ///            ^^^^^^^^^^^
834    /// ```
835    pub value: Option<UnescapedString<'a>>,
836}
837impl Flag<'_> {
838    /// ```text
839    /// --platform=linux/amd64
840    /// ^^^^^^^^^^
841    /// ```
842    #[must_use]
843    pub fn flag_span(&self) -> Span {
844        self.flag_start..self.name.span.end
845    }
846    /// ```text
847    /// --platform=linux/amd64
848    /// ^^^^^^^^^^^^^^^^^^^^^^
849    /// ```
850    #[must_use]
851    pub fn span(&self) -> Span {
852        match &self.value {
853            Some(v) => self.flag_start..v.span.end,
854            None => self.flag_span(),
855        }
856    }
857}
858
859/// An unescaped string.
860#[derive(Debug, PartialEq)]
861#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
862#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
863#[non_exhaustive]
864pub struct UnescapedString<'a> {
865    #[allow(missing_docs)]
866    pub span: Span,
867    #[allow(missing_docs)]
868    pub value: Cow<'a, str>,
869}
870impl UnescapedString<'_> {
871    #[inline]
872    fn trim_end(&mut self) {
873        // trim trailing spaces of the value
874        match &mut self.value {
875            Cow::Borrowed(v) => {
876                while let Some(&b) = v.as_bytes().last() {
877                    if TABLE[b as usize] & (WHITESPACE | POSSIBLE_LINE) == 0 {
878                        break;
879                    }
880                    *v = &v[..v.len() - 1];
881                    self.span.end -= 1;
882                }
883            }
884            Cow::Owned(v) => {
885                while let Some(&b) = v.as_bytes().last() {
886                    if TABLE[b as usize] & (WHITESPACE | POSSIBLE_LINE) == 0 {
887                        break;
888                    }
889                    v.pop();
890                    self.span.end -= 1;
891                }
892            }
893        }
894    }
895}
896
897/// A command.
898///
899/// This is used in the [`RUN`](RunInstruction), [`CMD`](CmdInstruction), and
900/// [`ENTRYPOINT`](EntrypointInstruction) instructions.
901///
902/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#shell-and-exec-form)
903#[derive(Debug)]
904#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
905#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
906#[non_exhaustive]
907pub enum Command<'a> {
908    /// Exec-form (JSON array)
909    // At least 1
910    Exec(Spanned<SmallVec<[UnescapedString<'a>; 1]>>),
911    /// Shell-form (space-separated string or here-documents), escape preserved
912    Shell(Spanned<&'a str>),
913}
914
915// TODO: merge two? it reduce size, but make confusing when array modified.
916/// A JSON array or space-separated string.
917///
918/// This is used in the [`VOLUME` instruction](VolumeInstruction).
919#[derive(Debug)]
920#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
921#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
922#[allow(clippy::exhaustive_enums)]
923pub enum JsonOrStringArray<'a, const N: usize> {
924    /// JSON array.
925    Json(Spanned<SmallVec<[UnescapedString<'a>; N]>>),
926    /// Space-separated string.
927    String(SmallVec<[UnescapedString<'a>; N]>),
928}
929
930/// A here-document.
931#[derive(Debug)]
932#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
933#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
934#[non_exhaustive]
935pub struct HereDoc<'a> {
936    #[allow(missing_docs)]
937    pub span: Span,
938    /// `false` if delimiter is quoted.
939    pub expand: bool,
940    #[allow(missing_docs)]
941    pub value: Cow<'a, str>,
942}
943
944/// A spanned value.
945#[derive(Debug)]
946#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
947#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
948#[allow(clippy::exhaustive_structs)]
949pub struct Spanned<T> {
950    #[allow(missing_docs)]
951    pub span: Span,
952    #[allow(missing_docs)]
953    pub value: T,
954}
955
956#[allow(missing_docs)]
957pub type Span = Range<usize>;
958
959// -----------------------------------------------------------------------------
960// Parsing
961
962/// An iterator over instructions.
963///
964/// This type is returned by [`parse_iter`] function.
965#[allow(missing_debug_implementations)]
966#[must_use = "iterators are lazy and do nothing unless consumed"]
967pub struct ParseIter<'a> {
968    text: &'a str,
969    s: &'a [u8],
970    escape_byte: u8,
971    has_stage: bool,
972    in_onbuild: bool,
973    parser_directives: ParserDirectives<'a>,
974}
975impl<'a> ParseIter<'a> {
976    fn new(mut text: &'a str) -> Result<Self> {
977        // https://github.com/moby/moby/pull/23234
978        if text.as_bytes().starts_with(UTF8_BOM) {
979            text = &text[UTF8_BOM.len()..];
980        }
981        let mut p = Self {
982            text,
983            s: text.as_bytes(),
984            escape_byte: DEFAULT_ESCAPE_BYTE,
985            has_stage: false,
986            in_onbuild: false,
987            parser_directives: ParserDirectives {
988                // https://docs.docker.com/reference/dockerfile/#parser-directives
989                syntax: None,
990                escape: None,
991                check: None,
992            },
993        };
994
995        parse_parser_directives(&mut p).map_err(|e| e.into_error(&p))?;
996
997        // https://docs.docker.com/reference/dockerfile/#format
998        // > For backward compatibility, leading whitespace before comments (#) and
999        // > instructions (such as RUN) are ignored, but discouraged.
1000        consume_comments_and_whitespaces(&mut p.s, p.escape_byte);
1001        Ok(p)
1002    }
1003}
1004impl<'a> Iterator for ParseIter<'a> {
1005    type Item = Result<Instruction<'a>>;
1006    #[inline]
1007    fn next(&mut self) -> Option<Self::Item> {
1008        let p = self;
1009        let mut s = p.s;
1010        if let Some((&b, s_next)) = s.split_first() {
1011            let instruction = match parse_instruction(p, &mut s, b, s_next) {
1012                Ok(i) => i,
1013                Err(e) => return Some(Err(e.into_error(p))),
1014            };
1015            match &instruction {
1016                Instruction::From(..) => {
1017                    p.has_stage = true;
1018                }
1019                Instruction::Arg(..) => {}
1020                instruction => {
1021                    if !p.has_stage {
1022                        return Some(Err(error::expected(
1023                            "FROM",
1024                            instruction.instruction_span().start,
1025                        )
1026                        .into_error(p)));
1027                    }
1028                }
1029            }
1030            consume_comments_and_whitespaces(&mut s, p.escape_byte);
1031            p.s = s;
1032            return Some(Ok(instruction));
1033        }
1034        if !p.has_stage {
1035            // https://github.com/moby/buildkit/blob/v0.30/frontend/dockerfile/dockerfile2llb/convert.go#L278
1036            return Some(Err(error::no_stage().into_error(p)));
1037        }
1038        None
1039    }
1040}
1041
1042const DEFAULT_ESCAPE_BYTE: u8 = b'\\';
1043
1044fn parse_parser_directives(p: &mut ParseIter<'_>) -> Result<(), ErrorKind> {
1045    while let Some((&b'#', s_next)) = p.s.split_first() {
1046        p.s = s_next;
1047        consume_whitespaces_no_line_continuation(&mut p.s);
1048        let directive_start = p.text.len() - p.s.len();
1049        if token(&mut p.s, b"SYNTAX") {
1050            consume_whitespaces_no_line_continuation(&mut p.s);
1051            if let Some((&b'=', s_next)) = p.s.split_first() {
1052                p.s = s_next;
1053                if p.parser_directives.syntax.is_some() {
1054                    // > Invalid due to appearing twice
1055                    p.parser_directives.syntax = None;
1056                    p.parser_directives.escape = None;
1057                    p.parser_directives.check = None;
1058                    p.escape_byte = DEFAULT_ESCAPE_BYTE;
1059                    consume_current_line_no_line_continuation(&mut p.s);
1060                    break;
1061                }
1062                consume_whitespaces_no_line_continuation(&mut p.s);
1063                let value_start = p.text.len() - p.s.len();
1064                consume_until_whitespaces_or_line_no_line_continuation(&mut p.s);
1065                let end = p.text.len() - p.s.len();
1066                let value = trim_end(p.text, value_start, end);
1067                p.parser_directives.syntax = Some(ParserDirective {
1068                    start: directive_start,
1069                    value: Spanned { span: value_start..value_start + value.len(), value },
1070                });
1071                consume_current_line_no_line_continuation(&mut p.s);
1072                continue;
1073            }
1074        } else if token(&mut p.s, b"CHECK") {
1075            consume_whitespaces_no_line_continuation(&mut p.s);
1076            if let Some((&b'=', s_next)) = p.s.split_first() {
1077                p.s = s_next;
1078                if p.parser_directives.check.is_some() {
1079                    // > Invalid due to appearing twice
1080                    p.parser_directives.syntax = None;
1081                    p.parser_directives.escape = None;
1082                    p.parser_directives.check = None;
1083                    p.escape_byte = DEFAULT_ESCAPE_BYTE;
1084                    consume_current_line_no_line_continuation(&mut p.s);
1085                    break;
1086                }
1087                consume_whitespaces_no_line_continuation(&mut p.s);
1088                let value_start = p.text.len() - p.s.len();
1089                consume_until_whitespaces_or_line_no_line_continuation(&mut p.s);
1090                let end = p.text.len() - p.s.len();
1091                let value = trim_end(p.text, value_start, end);
1092                p.parser_directives.check = Some(ParserDirective {
1093                    start: directive_start,
1094                    value: Spanned { span: value_start..value_start + value.len(), value },
1095                });
1096                consume_current_line_no_line_continuation(&mut p.s);
1097                continue;
1098            }
1099        } else if token(&mut p.s, b"ESCAPE") {
1100            consume_whitespaces_no_line_continuation(&mut p.s);
1101            if let Some((&b'=', s_next)) = p.s.split_first() {
1102                p.s = s_next;
1103                if p.parser_directives.escape.is_some() {
1104                    // > Invalid due to appearing twice
1105                    p.parser_directives.syntax = None;
1106                    p.parser_directives.escape = None;
1107                    p.parser_directives.check = None;
1108                    p.escape_byte = DEFAULT_ESCAPE_BYTE;
1109                    consume_current_line_no_line_continuation(&mut p.s);
1110                    break;
1111                }
1112                consume_whitespaces_no_line_continuation(&mut p.s);
1113                let value_start = p.text.len() - p.s.len();
1114                consume_until_whitespaces_or_line_no_line_continuation(&mut p.s);
1115                let end = p.text.len() - p.s.len();
1116                let value = trim_end(p.text, value_start, end);
1117                match value {
1118                    "`" => p.escape_byte = b'`',
1119                    "\\" => {}
1120                    _ => return Err(error::invalid_escape(value_start)),
1121                }
1122                p.parser_directives.escape = Some(ParserDirective {
1123                    start: directive_start,
1124                    value: Spanned {
1125                        span: value_start..value_start + value.len(),
1126                        value: p.escape_byte as char,
1127                    },
1128                });
1129                consume_current_line_no_line_continuation(&mut p.s);
1130                continue;
1131            }
1132        }
1133        consume_current_line_no_line_continuation(&mut p.s);
1134        break;
1135    }
1136    Ok(())
1137}
1138
1139#[inline]
1140fn parse_instruction<'a>(
1141    p: &mut ParseIter<'a>,
1142    s: &mut &'a [u8],
1143    b: u8,
1144    s_next: &'a [u8],
1145) -> Result<Instruction<'a>, ErrorKind> {
1146    let instruction_start = p.text.len() - s.len();
1147    *s = s_next;
1148    // NB: `token_slow` must be called after all `token` calls.
1149    match b & TO_UPPER8 {
1150        b'A' => {
1151            if token(s, &b"ARG"[1..]) {
1152                let instruction_span = instruction_start..p.text.len() - s.len();
1153                if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1154                    return parse_arg(p, s, Keyword { span: instruction_span });
1155                }
1156            } else if token(s, &b"ADD"[1..]) {
1157                let instruction_span = instruction_start..p.text.len() - s.len();
1158                if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1159                    let add = Keyword { span: instruction_span };
1160                    let (options, src, dest) = parse_add_or_copy(p, s, &add)?;
1161                    return Ok(Instruction::Add(AddInstruction { add, options, src, dest }));
1162                }
1163            } else if token_slow(s, &b"ARG"[1..], p.escape_byte) {
1164                let instruction_span = instruction_start..p.text.len() - s.len();
1165                if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1166                    return parse_arg(p, s, Keyword { span: instruction_span });
1167                }
1168            } else if token_slow(s, &b"ADD"[1..], p.escape_byte) {
1169                let instruction_span = instruction_start..p.text.len() - s.len();
1170                if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1171                    let add = Keyword { span: instruction_span };
1172                    let (options, src, dest) = parse_add_or_copy(p, s, &add)?;
1173                    return Ok(Instruction::Add(AddInstruction { add, options, src, dest }));
1174                }
1175            }
1176        }
1177        b'C' => {
1178            if token(s, &b"COPY"[1..]) {
1179                let instruction_span = instruction_start..p.text.len() - s.len();
1180                if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1181                    let copy = Keyword { span: instruction_span };
1182                    let (options, src, dest) = parse_add_or_copy(p, s, &copy)?;
1183                    return Ok(Instruction::Copy(CopyInstruction { copy, options, src, dest }));
1184                }
1185            } else if token(s, &b"CMD"[1..]) {
1186                let instruction_span = instruction_start..p.text.len() - s.len();
1187                if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1188                    return parse_cmd(p, s, Keyword { span: instruction_span });
1189                }
1190            } else if token_slow(s, &b"COPY"[1..], p.escape_byte) {
1191                let instruction_span = instruction_start..p.text.len() - s.len();
1192                if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1193                    let copy = Keyword { span: instruction_span };
1194                    let (options, src, dest) = parse_add_or_copy(p, s, &copy)?;
1195                    return Ok(Instruction::Copy(CopyInstruction { copy, options, src, dest }));
1196                }
1197            } else if token_slow(s, &b"CMD"[1..], p.escape_byte) {
1198                let instruction_span = instruction_start..p.text.len() - s.len();
1199                if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1200                    return parse_cmd(p, s, Keyword { span: instruction_span });
1201                }
1202            }
1203        }
1204        b'E' => {
1205            if token(s, &b"ENV"[1..]) {
1206                let instruction_span = instruction_start..p.text.len() - s.len();
1207                if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1208                    return parse_env(p, s, Keyword { span: instruction_span });
1209                }
1210            } else if token(s, &b"EXPOSE"[1..]) {
1211                let instruction_span = instruction_start..p.text.len() - s.len();
1212                if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1213                    return parse_expose(p, s, Keyword { span: instruction_span });
1214                }
1215            } else if token(s, &b"ENTRYPOINT"[1..]) {
1216                let instruction_span = instruction_start..p.text.len() - s.len();
1217                if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1218                    return parse_entrypoint(p, s, Keyword { span: instruction_span });
1219                }
1220            } else if token_slow(s, &b"ENV"[1..], p.escape_byte) {
1221                let instruction_span = instruction_start..p.text.len() - s.len();
1222                if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1223                    return parse_env(p, s, Keyword { span: instruction_span });
1224                }
1225            } else if token_slow(s, &b"EXPOSE"[1..], p.escape_byte) {
1226                let instruction_span = instruction_start..p.text.len() - s.len();
1227                if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1228                    return parse_expose(p, s, Keyword { span: instruction_span });
1229                }
1230            } else if token_slow(s, &b"ENTRYPOINT"[1..], p.escape_byte) {
1231                let instruction_span = instruction_start..p.text.len() - s.len();
1232                if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1233                    return parse_entrypoint(p, s, Keyword { span: instruction_span });
1234                }
1235            }
1236        }
1237        b'F' => {
1238            cold_path();
1239            if token(s, &b"FROM"[1..]) || token_slow(s, &b"FROM"[1..], p.escape_byte) {
1240                let instruction_span = instruction_start..p.text.len() - s.len();
1241                if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1242                    return parse_from(p, s, Keyword { span: instruction_span });
1243                }
1244            }
1245        }
1246        b'H' => {
1247            cold_path();
1248            if token(s, &b"HEALTHCHECK"[1..]) || token_slow(s, &b"HEALTHCHECK"[1..], p.escape_byte)
1249            {
1250                let instruction_span = instruction_start..p.text.len() - s.len();
1251                if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1252                    return parse_healthcheck(p, s, Keyword { span: instruction_span });
1253                }
1254            }
1255        }
1256        b'L' => {
1257            cold_path();
1258            if token(s, &b"LABEL"[1..]) || token_slow(s, &b"LABEL"[1..], p.escape_byte) {
1259                let instruction_span = instruction_start..p.text.len() - s.len();
1260                if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1261                    return parse_label(p, s, Keyword { span: instruction_span });
1262                }
1263            }
1264        }
1265        b'M' => {
1266            cold_path();
1267            if token(s, &b"MAINTAINER"[1..]) || token_slow(s, &b"MAINTAINER"[1..], p.escape_byte) {
1268                let instruction_span = instruction_start..p.text.len() - s.len();
1269                if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1270                    return parse_maintainer(p, s, Keyword { span: instruction_span });
1271                }
1272            }
1273        }
1274        b'O' => {
1275            cold_path();
1276            if token(s, &b"ONBUILD"[1..]) || token_slow(s, &b"ONBUILD"[1..], p.escape_byte) {
1277                let instruction_span = instruction_start..p.text.len() - s.len();
1278                if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1279                    return parse_onbuild(p, s, Keyword { span: instruction_span });
1280                }
1281            }
1282        }
1283        b'R' => {
1284            if token(s, &b"RUN"[1..]) || token_slow(s, &b"RUN"[1..], p.escape_byte) {
1285                let instruction_span = instruction_start..p.text.len() - s.len();
1286                if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1287                    return parse_run(p, s, Keyword { span: instruction_span });
1288                }
1289            }
1290        }
1291        b'S' => {
1292            cold_path();
1293            if token(s, &b"SHELL"[1..]) {
1294                let instruction_span = instruction_start..p.text.len() - s.len();
1295                if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1296                    return parse_shell(p, s, Keyword { span: instruction_span });
1297                }
1298            } else if token(s, &b"STOPSIGNAL"[1..]) {
1299                let instruction_span = instruction_start..p.text.len() - s.len();
1300                if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1301                    return parse_stopsignal(p, s, Keyword { span: instruction_span });
1302                }
1303            } else if token_slow(s, &b"SHELL"[1..], p.escape_byte) {
1304                let instruction_span = instruction_start..p.text.len() - s.len();
1305                if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1306                    return parse_shell(p, s, Keyword { span: instruction_span });
1307                }
1308            } else if token_slow(s, &b"STOPSIGNAL"[1..], p.escape_byte) {
1309                let instruction_span = instruction_start..p.text.len() - s.len();
1310                if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1311                    return parse_stopsignal(p, s, Keyword { span: instruction_span });
1312                }
1313            }
1314        }
1315        b'U' => {
1316            cold_path();
1317            if token(s, &b"USER"[1..]) || token_slow(s, &b"USER"[1..], p.escape_byte) {
1318                let instruction_span = instruction_start..p.text.len() - s.len();
1319                if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1320                    return parse_user(p, s, Keyword { span: instruction_span });
1321                }
1322            }
1323        }
1324        b'V' => {
1325            cold_path();
1326            if token(s, &b"VOLUME"[1..]) || token_slow(s, &b"VOLUME"[1..], p.escape_byte) {
1327                let instruction_span = instruction_start..p.text.len() - s.len();
1328                if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1329                    return parse_volume(p, s, Keyword { span: instruction_span });
1330                }
1331            }
1332        }
1333        b'W' => {
1334            cold_path();
1335            if token(s, &b"WORKDIR"[1..]) || token_slow(s, &b"WORKDIR"[1..], p.escape_byte) {
1336                let instruction_span = instruction_start..p.text.len() - s.len();
1337                if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1338                    return parse_workdir(p, s, Keyword { span: instruction_span });
1339                }
1340            }
1341        }
1342        _ => {}
1343    }
1344    Err(error::unknown_instruction(instruction_start))
1345}
1346
1347#[inline]
1348fn parse_arg<'a>(
1349    p: &mut ParseIter<'a>,
1350    s: &mut &'a [u8],
1351    instruction: Keyword,
1352) -> Result<Instruction<'a>, ErrorKind> {
1353    debug_assert!(token_slow(
1354        &mut p.text[instruction.span.clone()].as_bytes(),
1355        b"ARG",
1356        p.escape_byte,
1357    ));
1358    let mut arguments = collect_until_line_consume_newline(s, p.text, p.escape_byte);
1359    arguments.trim_end();
1360    if arguments.value.is_empty() {
1361        return Err(error::at_least_one_argument(instruction.span.start));
1362    }
1363    Ok(Instruction::Arg(ArgInstruction { arg: instruction, arguments }))
1364}
1365
1366#[inline]
1367fn parse_add_or_copy<'a>(
1368    p: &mut ParseIter<'a>,
1369    s: &mut &'a [u8],
1370    instruction: &Keyword,
1371) -> Result<(SmallVec<[Flag<'a>; 1]>, SmallVec<[Source<'a>; 1]>, UnescapedString<'a>), ErrorKind> {
1372    debug_assert!(
1373        token_slow(&mut p.text[instruction.span.clone()].as_bytes(), b"ADD", p.escape_byte,)
1374            || token_slow(&mut p.text[instruction.span.clone()].as_bytes(), b"COPY", p.escape_byte,)
1375    );
1376    let options = parse_options(s, p.text, p.escape_byte);
1377    if is_maybe_json(s) {
1378        let mut tmp = *s;
1379        if let Ok(((src, dest), _array_span)) = parse_json_array::<(
1380            SmallVec<[Source<'_>; 1]>,
1381            Option<_>,
1382        )>(&mut tmp, p.text, p.escape_byte)
1383        {
1384            if let Some((&b, s_next)) = tmp.split_first() {
1385                let consumed = consume_newline(b, s, s_next);
1386                debug_assert!(consumed);
1387            } else {
1388                *s = &[];
1389            }
1390            if src.is_empty() {
1391                return Err(error::at_least_two_arguments(instruction.span.start));
1392            }
1393            return Ok((options, src, dest.unwrap()));
1394        }
1395    }
1396    let (mut src, dest) = collect_space_separated_consume_line::<(
1397        SmallVec<[Source<'_>; 1]>,
1398        Option<_>,
1399    )>(s, p.text, p.escape_byte);
1400    if src.is_empty() {
1401        return Err(error::at_least_two_arguments(instruction.span.start));
1402    }
1403    for src in &mut src {
1404        let Source::Path(path) = src else { unreachable!() };
1405        let mut val = path.value.as_bytes();
1406        let Some(val_next) = val.strip_prefix(b"<<") else { continue };
1407        let Some((delim, strip_tab, expand)) =
1408            collect_here_doc_delim(&mut val, val_next, &path.value)?
1409        else {
1410            continue;
1411        };
1412        debug_assert!(val.is_empty()); // because of collect_space_separated_unescaped_consume_line
1413        let (here_doc, span) = collect_here_doc(s, p.text, &delim, strip_tab)?;
1414        *src = Source::HereDoc(HereDoc { span, expand, value: here_doc });
1415    }
1416    Ok((options, src, dest.unwrap()))
1417}
1418
1419#[allow(clippy::unnecessary_wraps)]
1420#[inline]
1421fn parse_cmd<'a>(
1422    p: &mut ParseIter<'a>,
1423    s: &mut &'a [u8],
1424    instruction: Keyword,
1425) -> Result<Instruction<'a>, ErrorKind> {
1426    debug_assert!(token_slow(
1427        &mut p.text[instruction.span.clone()].as_bytes(),
1428        b"CMD",
1429        p.escape_byte,
1430    ));
1431    if is_maybe_json(s) {
1432        let mut tmp = *s;
1433        if let Ok((arguments, array_span)) =
1434            parse_json_array::<SmallVec<[_; 1]>>(&mut tmp, p.text, p.escape_byte)
1435        {
1436            if let Some((&b, s_next)) = tmp.split_first() {
1437                let consumed = consume_newline(b, s, s_next);
1438                debug_assert!(consumed);
1439            } else {
1440                *s = &[];
1441            }
1442            // "CMD []" seems to be okay?
1443            // https://github.com/moby/buildkit/blob/v0.30/frontend/dockerfile/parser/testfiles/brimstone-docker-consul/Dockerfile#L3
1444            return Ok(Instruction::Cmd(CmdInstruction {
1445                cmd: instruction,
1446                arguments: Command::Exec(Spanned { span: array_span, value: arguments }),
1447            }));
1448        }
1449    }
1450    let arguments_start = p.text.len() - s.len();
1451    consume_current_line(s, p.escape_byte);
1452    let end = p.text.len() - s.len();
1453    let arguments = trim_end(p.text, arguments_start, end);
1454    Ok(Instruction::Cmd(CmdInstruction {
1455        cmd: instruction,
1456        arguments: Command::Shell(Spanned {
1457            span: arguments_start..arguments_start + arguments.len(),
1458            value: arguments,
1459        }),
1460    }))
1461}
1462
1463#[inline]
1464fn parse_env<'a>(
1465    p: &mut ParseIter<'a>,
1466    s: &mut &'a [u8],
1467    instruction: Keyword,
1468) -> Result<Instruction<'a>, ErrorKind> {
1469    debug_assert!(token_slow(
1470        &mut p.text[instruction.span.clone()].as_bytes(),
1471        b"ENV",
1472        p.escape_byte,
1473    ));
1474    let mut arguments = collect_until_line_consume_newline(s, p.text, p.escape_byte);
1475    arguments.trim_end();
1476    if arguments.value.is_empty() {
1477        return Err(error::at_least_one_argument(instruction.span.start));
1478    }
1479    Ok(Instruction::Env(EnvInstruction { env: instruction, arguments }))
1480}
1481
1482#[inline]
1483fn parse_expose<'a>(
1484    p: &mut ParseIter<'a>,
1485    s: &mut &'a [u8],
1486    instruction: Keyword,
1487) -> Result<Instruction<'a>, ErrorKind> {
1488    debug_assert!(token_slow(
1489        &mut p.text[instruction.span.clone()].as_bytes(),
1490        b"EXPOSE",
1491        p.escape_byte,
1492    ));
1493    let arguments: SmallVec<[_; 1]> =
1494        collect_space_separated_consume_line(s, p.text, p.escape_byte);
1495    if arguments.is_empty() {
1496        return Err(error::at_least_one_argument(instruction.span.start));
1497    }
1498    Ok(Instruction::Expose(ExposeInstruction { expose: instruction, arguments }))
1499}
1500
1501#[inline]
1502fn parse_entrypoint<'a>(
1503    p: &mut ParseIter<'a>,
1504    s: &mut &'a [u8],
1505    instruction: Keyword,
1506) -> Result<Instruction<'a>, ErrorKind> {
1507    debug_assert!(token_slow(
1508        &mut p.text[instruction.span.clone()].as_bytes(),
1509        b"ENTRYPOINT",
1510        p.escape_byte,
1511    ));
1512    if is_maybe_json(s) {
1513        let mut tmp = *s;
1514        if let Ok((arguments, array_span)) =
1515            parse_json_array::<SmallVec<[_; 1]>>(&mut tmp, p.text, p.escape_byte)
1516        {
1517            if let Some((&b, s_next)) = tmp.split_first() {
1518                let consumed = consume_newline(b, s, s_next);
1519                debug_assert!(consumed);
1520            } else {
1521                *s = &[];
1522            }
1523            if arguments.is_empty() {
1524                return Err(error::at_least_one_argument(instruction.span.start));
1525            }
1526            return Ok(Instruction::Entrypoint(EntrypointInstruction {
1527                entrypoint: instruction,
1528                arguments: Command::Exec(Spanned { span: array_span, value: arguments }),
1529            }));
1530        }
1531    }
1532    let arguments_start = p.text.len() - s.len();
1533    consume_current_line(s, p.escape_byte);
1534    let end = p.text.len() - s.len();
1535    let arguments = trim_end(p.text, arguments_start, end);
1536    if arguments.is_empty() {
1537        return Err(error::at_least_one_argument(instruction.span.start));
1538    }
1539    Ok(Instruction::Entrypoint(EntrypointInstruction {
1540        entrypoint: instruction,
1541        arguments: Command::Shell(Spanned {
1542            span: arguments_start..arguments_start + arguments.len(),
1543            value: arguments,
1544        }),
1545    }))
1546}
1547
1548#[inline]
1549fn parse_from<'a>(
1550    p: &mut ParseIter<'a>,
1551    s: &mut &'a [u8],
1552    instruction: Keyword,
1553) -> Result<Instruction<'a>, ErrorKind> {
1554    debug_assert!(token_slow(
1555        &mut p.text[instruction.span.clone()].as_bytes(),
1556        b"FROM",
1557        p.escape_byte,
1558    ));
1559    let options = parse_options(s, p.text, p.escape_byte);
1560    // TODO: https://github.com/moby/buildkit/blob/v0.30/frontend/dockerfile/dockerfile2llb/convert.go#L413
1561    // > base name (%s) should not be blank
1562    let image = collect_non_whitespace(s, p.text, p.escape_byte);
1563    if image.value.is_empty() {
1564        return Err(error::at_least_one_argument(instruction.span.start));
1565    }
1566    let mut as_ = None;
1567    if consume_whitespaces(s, p.escape_byte) {
1568        let as_start = p.text.len() - s.len();
1569        if token(s, b"AS") || token_slow(s, b"AS", p.escape_byte) {
1570            let as_span = as_start..p.text.len() - s.len();
1571            if !consume_whitespaces(s, p.escape_byte) {
1572                return Err(error::expected("AS", as_start));
1573            }
1574            let name = collect_non_whitespace(s, p.text, p.escape_byte);
1575            consume_whitespaces(s, p.escape_byte);
1576            if !is_line_end(s.first()) {
1577                return Err(error::expected("newline or eof", p.text.len() - s.len()));
1578            }
1579            as_ = Some((Keyword { span: as_span }, name));
1580        } else if !is_line_end(s.first()) {
1581            return Err(error::expected("AS", as_start));
1582        }
1583    }
1584    Ok(Instruction::From(FromInstruction { from: instruction, options, image, as_ }))
1585}
1586
1587#[inline]
1588fn parse_healthcheck<'a>(
1589    p: &mut ParseIter<'a>,
1590    s: &mut &'a [u8],
1591    instruction: Keyword,
1592) -> Result<Instruction<'a>, ErrorKind> {
1593    debug_assert!(token_slow(
1594        &mut p.text[instruction.span.clone()].as_bytes(),
1595        b"HEALTHCHECK",
1596        p.escape_byte,
1597    ));
1598    let options = parse_options(s, p.text, p.escape_byte);
1599    let Some((&b, s_next)) = s.split_first() else {
1600        return Err(error::expected("CMD or NONE", p.text.len() - s.len()));
1601    };
1602    let cmd_or_none_start = p.text.len() - s.len();
1603    match b & TO_UPPER8 {
1604        b'C' => {
1605            *s = s_next;
1606            if token(s, &b"CMD"[1..]) || token_slow(s, &b"CMD"[1..], p.escape_byte) {
1607                let cmd_span = cmd_or_none_start..p.text.len() - s.len();
1608                let cmd_keyword = Keyword { span: cmd_span };
1609                if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1610                    if is_maybe_json(s) {
1611                        let mut tmp = *s;
1612                        if let Ok((arguments, array_span)) =
1613                            parse_json_array::<SmallVec<[_; 1]>>(&mut tmp, p.text, p.escape_byte)
1614                        {
1615                            if let Some((&b, s_next)) = tmp.split_first() {
1616                                let consumed = consume_newline(b, s, s_next);
1617                                debug_assert!(consumed);
1618                            } else {
1619                                *s = &[];
1620                            }
1621                            if arguments.is_empty() {
1622                                return Err(error::at_least_one_argument(instruction.span.start));
1623                            }
1624                            return Ok(Instruction::Healthcheck(HealthcheckInstruction {
1625                                healthcheck: instruction,
1626                                options,
1627                                arguments: HealthcheckArguments::Cmd {
1628                                    cmd: cmd_keyword,
1629                                    arguments: Command::Exec(Spanned {
1630                                        span: array_span,
1631                                        value: arguments,
1632                                    }),
1633                                },
1634                            }));
1635                        }
1636                    }
1637                    let arguments_start = p.text.len() - s.len();
1638                    consume_current_line(s, p.escape_byte);
1639                    let end = p.text.len() - s.len();
1640                    let arguments = trim_end(p.text, arguments_start, end);
1641                    return Ok(Instruction::Healthcheck(HealthcheckInstruction {
1642                        healthcheck: instruction,
1643                        options,
1644                        arguments: HealthcheckArguments::Cmd {
1645                            cmd: cmd_keyword,
1646                            arguments: Command::Shell(Spanned {
1647                                span: arguments_start..arguments_start + arguments.len(),
1648                                value: arguments,
1649                            }),
1650                        },
1651                    }));
1652                }
1653            }
1654        }
1655        b'N' => {
1656            *s = s_next;
1657            if token(s, &b"NONE"[1..]) || token_slow(s, &b"NONE"[1..], p.escape_byte) {
1658                let none_span = cmd_or_none_start..p.text.len() - s.len();
1659                consume_whitespaces(s, p.escape_byte);
1660                if !is_line_end(s.first()) {
1661                    return Err(error::other(
1662                        "HEALTHCHECK NONE does not accept arguments",
1663                        p.text.len() - s.len(),
1664                    ));
1665                }
1666                // TODO: HEALTHCHECK NONE doesn't support options
1667                let none_keyword = Keyword { span: none_span };
1668                return Ok(Instruction::Healthcheck(HealthcheckInstruction {
1669                    healthcheck: instruction,
1670                    options,
1671                    arguments: HealthcheckArguments::None { none: none_keyword },
1672                }));
1673            }
1674        }
1675        _ => {}
1676    }
1677    Err(error::expected("CMD or NONE", p.text.len() - s.len()))
1678}
1679
1680#[inline]
1681fn parse_label<'a>(
1682    p: &mut ParseIter<'a>,
1683    s: &mut &'a [u8],
1684    instruction: Keyword,
1685) -> Result<Instruction<'a>, ErrorKind> {
1686    debug_assert!(token_slow(
1687        &mut p.text[instruction.span.clone()].as_bytes(),
1688        b"LABEL",
1689        p.escape_byte,
1690    ));
1691    let mut arguments = collect_until_line_consume_newline(s, p.text, p.escape_byte);
1692    arguments.trim_end();
1693    if arguments.value.is_empty() {
1694        return Err(error::at_least_one_argument(instruction.span.start));
1695    }
1696    Ok(Instruction::Label(LabelInstruction { label: instruction, arguments }))
1697}
1698
1699#[cold]
1700fn parse_maintainer<'a>(
1701    p: &mut ParseIter<'a>,
1702    s: &mut &'a [u8],
1703    instruction: Keyword,
1704) -> Result<Instruction<'a>, ErrorKind> {
1705    debug_assert!(token_slow(
1706        &mut p.text[instruction.span.clone()].as_bytes(),
1707        b"MAINTAINER",
1708        p.escape_byte,
1709    ));
1710    let mut name = collect_until_line_consume_newline(s, p.text, p.escape_byte);
1711    name.trim_end();
1712    if name.value.is_empty() {
1713        return Err(error::exactly_one_argument(instruction.span.start));
1714    }
1715    Ok(Instruction::Maintainer(MaintainerInstruction { maintainer: instruction, name }))
1716}
1717
1718#[inline]
1719fn parse_onbuild<'a>(
1720    p: &mut ParseIter<'a>,
1721    s: &mut &'a [u8],
1722    instruction: Keyword,
1723) -> Result<Instruction<'a>, ErrorKind> {
1724    debug_assert!(token_slow(
1725        &mut p.text[instruction.span.clone()].as_bytes(),
1726        b"ONBUILD",
1727        p.escape_byte,
1728    ));
1729    // https://docs.docker.com/reference/dockerfile/#onbuild-limitations
1730    if p.in_onbuild {
1731        return Err(error::other("ONBUILD ONBUILD is not allowed", instruction.span.start));
1732    }
1733    p.in_onbuild = true;
1734    let Some((&b, s_next)) = s.split_first() else {
1735        return Err(error::expected("instruction after ONBUILD", instruction.span.start));
1736    };
1737    // TODO: https://docs.docker.com/reference/dockerfile/#onbuild-limitations
1738    // match b & TO_UPPER8 {
1739    //     b'F' => {
1740    //         if token(s, b"FROM") || token_slow(s, b"FROM", p.escape_byte) {
1741    //             return Err(error::other(p,
1742    //                 "ONBUILD FROM is not allowed",
1743    //                 instruction.span.start,
1744    //             ));
1745    //         }
1746    //     }
1747    //     b'M' => {
1748    //         if token(s, b"MAINTAINER")
1749    //             || token_slow(s, b"MAINTAINER", p.escape_byte)
1750    //         {
1751    //             return Err(error::other(p,
1752    //                 "ONBUILD MAINTAINER is not allowed",
1753    //                 instruction.span.start,
1754    //             ));
1755    //         }
1756    //     }
1757    //     _ => {}
1758    // }
1759    let inner_instruction = parse_instruction(p, s, b, s_next)?;
1760    p.in_onbuild = false;
1761    Ok(Instruction::Onbuild(OnbuildInstruction {
1762        onbuild: instruction,
1763        instruction: Box::new(inner_instruction),
1764    }))
1765}
1766
1767#[inline]
1768fn parse_run<'a>(
1769    p: &mut ParseIter<'a>,
1770    s: &mut &'a [u8],
1771    instruction: Keyword,
1772) -> Result<Instruction<'a>, ErrorKind> {
1773    debug_assert!(token_slow(
1774        &mut p.text[instruction.span.clone()].as_bytes(),
1775        b"RUN",
1776        p.escape_byte,
1777    ));
1778    let options = parse_options(s, p.text, p.escape_byte);
1779    if is_maybe_json(s) {
1780        let mut tmp = *s;
1781        if let Ok((arguments, array_span)) =
1782            parse_json_array::<SmallVec<[_; 1]>>(&mut tmp, p.text, p.escape_byte)
1783        {
1784            if let Some((&b, s_next)) = tmp.split_first() {
1785                let consumed = consume_newline(b, s, s_next);
1786                debug_assert!(consumed);
1787            } else {
1788                *s = &[];
1789            }
1790            if arguments.is_empty() {
1791                return Err(error::at_least_one_argument(instruction.span.start));
1792            }
1793            return Ok(Instruction::Run(RunInstruction {
1794                run: instruction,
1795                options,
1796                arguments: Command::Exec(Spanned { span: array_span, value: arguments }),
1797                // TODO: https://github.com/moby/buildkit/issues/2207
1798                here_docs: vec![],
1799            }));
1800        }
1801    }
1802
1803    // https://docs.docker.com/reference/dockerfile/#here-documents
1804    // At least 5, <<E\nE
1805    if s.len() >= 5 {
1806        if let Some(s_next) = s.strip_prefix(b"<<") {
1807            if let Some((delim, strip_tab, expand)) = collect_here_doc_delim(s, s_next, p.text)? {
1808                // TODO: skip space
1809                let arguments_start = p.text.len() - s.len();
1810                consume_current_line(s, p.escape_byte);
1811                let end = p.text.len() - s.len();
1812                let arguments = trim_end(p.text, arguments_start, end);
1813                let (here_doc, span) = collect_here_doc(s, p.text, &delim, strip_tab)?;
1814                let here_doc = HereDoc { span, expand, value: here_doc };
1815                return Ok(Instruction::Run(RunInstruction {
1816                    run: instruction,
1817                    options,
1818                    arguments: Command::Shell(Spanned {
1819                        span: arguments_start..arguments_start + arguments.len(),
1820                        value: arguments,
1821                    }),
1822                    // TODO: multiple here-docs
1823                    here_docs: vec![here_doc],
1824                }));
1825            }
1826        }
1827    }
1828
1829    let arguments_start = p.text.len() - s.len();
1830    consume_current_line(s, p.escape_byte);
1831    let end = p.text.len() - s.len();
1832    let arguments = trim_end(p.text, arguments_start, end);
1833    Ok(Instruction::Run(RunInstruction {
1834        run: instruction,
1835        options,
1836        arguments: Command::Shell(Spanned {
1837            span: arguments_start..arguments_start + arguments.len(),
1838            value: arguments,
1839        }),
1840        here_docs: vec![],
1841    }))
1842}
1843
1844#[inline]
1845fn parse_shell<'a>(
1846    p: &mut ParseIter<'a>,
1847    s: &mut &'a [u8],
1848    instruction: Keyword,
1849) -> Result<Instruction<'a>, ErrorKind> {
1850    debug_assert!(token_slow(
1851        &mut p.text[instruction.span.clone()].as_bytes(),
1852        b"SHELL",
1853        p.escape_byte,
1854    ));
1855    if !is_maybe_json(s) {
1856        return Err(error::expected("JSON array", p.text.len() - s.len()));
1857    }
1858    let (arguments, _array_span) =
1859        parse_json_array::<SmallVec<[_; 4]>>(s, p.text, p.escape_byte).map_err(error::json)?;
1860    if let Some((&b, s_next)) = s.split_first() {
1861        let consumed = consume_newline(b, s, s_next);
1862        debug_assert!(consumed);
1863    }
1864    if arguments.is_empty() {
1865        return Err(error::at_least_one_argument(instruction.span.start));
1866    }
1867    Ok(Instruction::Shell(ShellInstruction { shell: instruction, arguments }))
1868}
1869
1870#[inline]
1871fn parse_stopsignal<'a>(
1872    p: &mut ParseIter<'a>,
1873    s: &mut &'a [u8],
1874    instruction: Keyword,
1875) -> Result<Instruction<'a>, ErrorKind> {
1876    debug_assert!(token_slow(
1877        &mut p.text[instruction.span.clone()].as_bytes(),
1878        b"STOPSIGNAL",
1879        p.escape_byte,
1880    ));
1881    // TODO: space is disallowed?
1882    let mut arguments = collect_until_line_consume_newline(s, p.text, p.escape_byte);
1883    arguments.trim_end();
1884    if arguments.value.is_empty() {
1885        return Err(error::exactly_one_argument(instruction.span.start));
1886    }
1887    Ok(Instruction::Stopsignal(StopsignalInstruction { stopsignal: instruction, arguments }))
1888}
1889
1890#[inline]
1891fn parse_user<'a>(
1892    p: &mut ParseIter<'a>,
1893    s: &mut &'a [u8],
1894    instruction: Keyword,
1895) -> Result<Instruction<'a>, ErrorKind> {
1896    debug_assert!(token_slow(
1897        &mut p.text[instruction.span.clone()].as_bytes(),
1898        b"USER",
1899        p.escape_byte,
1900    ));
1901    // TODO: space is disallowed?
1902    let mut arguments = collect_until_line_consume_newline(s, p.text, p.escape_byte);
1903    arguments.trim_end();
1904    if arguments.value.is_empty() {
1905        return Err(error::exactly_one_argument(instruction.span.start));
1906    }
1907    Ok(Instruction::User(UserInstruction { user: instruction, arguments }))
1908}
1909
1910#[inline]
1911fn parse_volume<'a>(
1912    p: &mut ParseIter<'a>,
1913    s: &mut &'a [u8],
1914    instruction: Keyword,
1915) -> Result<Instruction<'a>, ErrorKind> {
1916    debug_assert!(token_slow(
1917        &mut p.text[instruction.span.clone()].as_bytes(),
1918        b"VOLUME",
1919        p.escape_byte,
1920    ));
1921    if is_maybe_json(s) {
1922        let mut tmp = *s;
1923        if let Ok((arguments, array_span)) = parse_json_array(&mut tmp, p.text, p.escape_byte) {
1924            if let Some((&b, s_next)) = tmp.split_first() {
1925                let consumed = consume_newline(b, s, s_next);
1926                debug_assert!(consumed);
1927            } else {
1928                *s = &[];
1929            }
1930            // "VOLUME []" seems to be okay?
1931            return Ok(Instruction::Volume(VolumeInstruction {
1932                volume: instruction,
1933                arguments: JsonOrStringArray::Json(Spanned { span: array_span, value: arguments }),
1934            }));
1935        }
1936    }
1937    let arguments: SmallVec<[_; 1]> =
1938        collect_space_separated_consume_line(s, p.text, p.escape_byte);
1939    if arguments.is_empty() {
1940        // TODO: "VOLUME" too?
1941        return Err(error::at_least_one_argument(instruction.span.start));
1942    }
1943    Ok(Instruction::Volume(VolumeInstruction {
1944        volume: instruction,
1945        arguments: JsonOrStringArray::String(arguments),
1946    }))
1947}
1948
1949#[inline]
1950fn parse_workdir<'a>(
1951    p: &mut ParseIter<'a>,
1952    s: &mut &'a [u8],
1953    instruction: Keyword,
1954) -> Result<Instruction<'a>, ErrorKind> {
1955    debug_assert!(token_slow(
1956        &mut p.text[instruction.span.clone()].as_bytes(),
1957        b"WORKDIR",
1958        p.escape_byte,
1959    ));
1960    // TODO: space is disallowed if not escaped/quoted?
1961    let mut arguments = collect_until_line_consume_newline(s, p.text, p.escape_byte);
1962    arguments.trim_end();
1963    if arguments.value.is_empty() {
1964        return Err(error::exactly_one_argument(instruction.span.start));
1965    }
1966    Ok(Instruction::Workdir(WorkdirInstruction { workdir: instruction, arguments }))
1967}
1968
1969// -----------------------------------------------------------------------------
1970// Parsing Helpers
1971
1972// [\r\n]
1973const POSSIBLE_LINE: u8 = 1 << 0;
1974// [ \t]
1975const SPACE: u8 = 1 << 1;
1976// [ \r\t\v\f]
1977// \v = \x0B, \f = \x0C
1978// https://github.com/moby/buildkit/blob/v0.30/frontend/dockerfile/parser/parser.go#L120
1979const WHITESPACE: u8 = 1 << 2;
1980// [#]
1981const COMMENT: u8 = 1 << 3;
1982// ["]
1983const DOUBLE_QUOTE: u8 = 1 << 4;
1984// [\`]
1985const POSSIBLE_ESCAPE: u8 = 1 << 5;
1986// [=]
1987const EQ: u8 = 1 << 6;
1988// 0x00..0x20
1989const CONTROL: u8 = 1 << 7;
1990
1991static TABLE: [u8; 256] = {
1992    let mut table = [0; 256];
1993    let mut i = 0;
1994    loop {
1995        let mut v = 0;
1996        if i < 0x20 {
1997            v |= CONTROL;
1998        }
1999        match i {
2000            b' ' | b'\t' => v |= WHITESPACE | SPACE,
2001            b'\x0B' | b'\x0C' => v |= WHITESPACE,
2002            b'\r' => v |= WHITESPACE | POSSIBLE_LINE,
2003            b'\n' => v |= POSSIBLE_LINE,
2004            b'#' => v |= COMMENT,
2005            b'"' => v |= DOUBLE_QUOTE,
2006            b'\\' | b'`' => v |= POSSIBLE_ESCAPE,
2007            b'=' => v |= EQ,
2008            _ => {}
2009        }
2010        table[i as usize] = v;
2011        if i == u8::MAX {
2012            break;
2013        }
2014        i += 1;
2015    }
2016    table
2017};
2018
2019// Lookup table for ascii to hex decoding.
2020#[rustfmt::skip]
2021static HEX_DECODE_TABLE: [u8; 256] = {
2022    const __: u8 = u8::MAX;
2023    [
2024        //  _1  _2  _3  _4  _5  _6  _7  _8  _9  _A  _B  _C  _D  _E  _F
2025        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 0_
2026        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 1_
2027        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2_
2028         0,  1,  2,  3,  4,  5,  6,  7,  8,  9, __, __, __, __, __, __, // 3_
2029        __, 10, 11, 12, 13, 14, 15, __, __, __, __, __, __, __, __, __, // 4_
2030        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 5_
2031        __, 10, 11, 12, 13, 14, 15, __, __, __, __, __, __, __, __, __, // 6_
2032        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7_
2033        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8_
2034        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9_
2035        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A_
2036        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B_
2037        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C_
2038        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D_
2039        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E_
2040        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F_
2041    ]
2042};
2043
2044const UTF8_BOM: &[u8] = &[0xEF, 0xBB, 0xBF];
2045
2046trait Store<T>: Sized {
2047    fn new() -> Self;
2048    fn push(&mut self, val: T);
2049}
2050impl<T> Store<T> for Vec<T> {
2051    #[inline]
2052    fn new() -> Self {
2053        Self::new()
2054    }
2055    #[inline]
2056    fn push(&mut self, val: T) {
2057        self.push(val);
2058    }
2059}
2060impl<T, const N: usize> Store<T> for SmallVec<[T; N]> {
2061    #[inline]
2062    fn new() -> Self {
2063        Self::new()
2064    }
2065    #[inline]
2066    fn push(&mut self, val: T) {
2067        self.push(val);
2068    }
2069}
2070impl<'a, const N: usize> Store<UnescapedString<'a>>
2071    for (SmallVec<[Source<'a>; N]>, Option<UnescapedString<'a>>)
2072{
2073    #[inline]
2074    fn new() -> Self {
2075        (SmallVec::new(), None)
2076    }
2077    #[inline]
2078    fn push(&mut self, val: UnescapedString<'a>) {
2079        if let Some(val) = self.1.replace(val) {
2080            self.0.push(Source::Path(val));
2081        }
2082    }
2083}
2084
2085// Equivalent to core::hint::cold_path, but compatible with pre-1.95 rustc.
2086#[inline(always)]
2087#[cold]
2088fn cold_path() {}
2089
2090// Note: must be called after consume_whitespaces
2091#[inline]
2092fn is_line_end(b: Option<&u8>) -> bool {
2093    matches!(b, Some(b'\n') | None)
2094}
2095
2096#[inline]
2097fn parse_options<'a, S: Store<Flag<'a>>>(s: &mut &[u8], start: &'a str, escape_byte: u8) -> S {
2098    let mut options = S::new();
2099    'outer: while let Some((&b'-', mut s_next)) = s.split_first() {
2100        loop {
2101            let Some((&b, s_next_next)) = s_next.split_first() else {
2102                break 'outer;
2103            };
2104            if b == b'-' {
2105                s_next = s_next_next;
2106                break;
2107            }
2108            if consume_line_continuation(&mut s_next, b, s_next_next, escape_byte) {
2109                continue;
2110            }
2111            break 'outer;
2112        }
2113        let flag_start = start.len() - s.len();
2114        *s = s_next;
2115        let name = collect_until::<{ WHITESPACE | POSSIBLE_LINE | EQ }>(s, start, escape_byte);
2116        let Some((&b'=', s_next)) = s.split_first() else {
2117            options.push(Flag { flag_start, name, value: None });
2118            consume_whitespaces(s, escape_byte);
2119            continue;
2120        };
2121        *s = s_next;
2122        let value = collect_non_whitespace(s, start, escape_byte);
2123        options.push(Flag { flag_start, name, value: Some(value) });
2124        consume_whitespaces(s, escape_byte);
2125    }
2126    options
2127}
2128
2129#[inline]
2130fn is_maybe_json(s: &[u8]) -> bool {
2131    // ADD/COPY: checking [[ to handle escape of [ https://docs.docker.com/reference/dockerfile/#add
2132    // Others: TODO: checking [[ to handle [[ -e .. ], but not enough to check [ -e .. ]
2133    s.first() == Some(&b'[') && s.get(1) != Some(&b'[')
2134}
2135fn parse_json_array<'a, S: Store<UnescapedString<'a>>>(
2136    s: &mut &[u8],
2137    start: &'a str,
2138    escape_byte: u8,
2139) -> Result<(S, Span), usize> {
2140    debug_assert!(is_maybe_json(s));
2141    let mut res = S::new();
2142    let array_start = start.len() - s.len();
2143    *s = &s[1..];
2144    consume_whitespaces(s, escape_byte);
2145    let (&b, s_next) = s.split_first().ok_or(array_start)?;
2146    match b {
2147        b'"' => {
2148            *s = s_next;
2149            loop {
2150                let full_word_start = start.len() - s.len();
2151                let mut word_start = full_word_start;
2152                let mut buf = String::new();
2153                loop {
2154                    let (&b, s_next) = s.split_first().ok_or(array_start)?;
2155                    if TABLE[b as usize] & (DOUBLE_QUOTE | POSSIBLE_ESCAPE | CONTROL) == 0 {
2156                        *s = s_next;
2157                        continue;
2158                    }
2159                    match b {
2160                        b'"' => break,
2161                        _ if b < 0x20 => return Err(array_start),
2162                        _ => {}
2163                    }
2164                    let word_end = start.len() - s.len();
2165                    if consume_line_continuation(s, b, s_next, escape_byte) {
2166                        // dockerfile escape
2167                        buf.push_str(&start[word_start..word_end]);
2168                        word_start = start.len() - s.len();
2169                        continue;
2170                    }
2171                    if b == b'\\' {
2172                        // JSON escape
2173                        let word_end = start.len() - s.len();
2174                        buf.push_str(&start[word_start..word_end]);
2175                        *s = s_next;
2176                        if let Some((&b, s_next)) = s.split_first() {
2177                            consume_line_continuation(s, b, s_next, escape_byte);
2178                        }
2179                        let (&b, s_next) = s.split_first().ok_or(array_start)?;
2180                        *s = s_next;
2181                        let new = match b {
2182                            b'"' | b'\\' | b'/' => b as char,
2183                            b'b' => '\x08',
2184                            b'f' => '\x0c',
2185                            b'n' => '\n',
2186                            b'r' => '\r',
2187                            b't' => '\t',
2188                            b'u' => parse_json_hex_escape(s, escape_byte, array_start)?,
2189                            _ => return Err(array_start), // invalid escape
2190                        };
2191                        buf.push(new);
2192                        word_start = start.len() - s.len();
2193                        continue;
2194                    }
2195                    *s = s_next;
2196                }
2197                let word_end = start.len() - s.len();
2198                let value = if full_word_start == word_start {
2199                    // no escape
2200                    Cow::Borrowed(&start[word_start..word_end])
2201                } else {
2202                    buf.push_str(&start[word_start..word_end]);
2203                    Cow::Owned(buf)
2204                };
2205                res.push(UnescapedString { span: full_word_start..word_end, value });
2206                *s = &s[1..]; // drop "
2207                consume_whitespaces(s, escape_byte);
2208                let (&b, s_next) = s.split_first().ok_or(array_start)?;
2209                match b {
2210                    b',' => {
2211                        *s = s_next;
2212                        consume_whitespaces(s, escape_byte);
2213                        let (&b, s_next) = s.split_first().ok_or(array_start)?;
2214                        if b == b'"' {
2215                            *s = s_next;
2216                            continue;
2217                        }
2218                        return Err(array_start);
2219                    }
2220                    b']' => {
2221                        *s = s_next;
2222                        break;
2223                    }
2224                    _ => return Err(array_start),
2225                }
2226            }
2227        }
2228        b']' => *s = s_next,
2229        _ => return Err(array_start),
2230    }
2231    let array_end = start.len() - s.len();
2232    consume_whitespaces(s, escape_byte);
2233    if !is_line_end(s.first()) {
2234        return Err(array_start);
2235    }
2236    Ok((res, array_start..array_end))
2237}
2238// Adapted from https://github.com/serde-rs/json/blob/3f1c6de4af28b1f6c5100da323f2bffaf7c2083f/src/read.rs
2239#[cold]
2240fn parse_json_hex_escape(
2241    s: &mut &[u8],
2242    escape_byte: u8,
2243    array_start: usize,
2244) -> Result<char, usize> {
2245    fn decode_hex_escape(s: &mut &[u8], escape_byte: u8, array_start: usize) -> Result<u16, usize> {
2246        if s.len() < 4 {
2247            return Err(array_start); // EofWhileParsingString
2248        }
2249
2250        let mut n = 0;
2251        for _ in 0..4 {
2252            if let Some((&b, s_next)) = s.split_first() {
2253                consume_line_continuation(s, b, s_next, escape_byte);
2254            }
2255            let (&b, s_next) = s.split_first().ok_or(array_start)?;
2256            *s = s_next;
2257            match decode_hex_val(b) {
2258                None => return Err(array_start), // InvalidEscape
2259                Some(val) => {
2260                    n = (n << 4) + val;
2261                }
2262            }
2263        }
2264        Ok(n)
2265    }
2266
2267    fn decode_hex_val(val: u8) -> Option<u16> {
2268        let n = HEX_DECODE_TABLE[val as usize] as u16;
2269        if n == u8::MAX as u16 { None } else { Some(n) }
2270    }
2271
2272    let c = match decode_hex_escape(s, escape_byte, array_start)? {
2273        _n @ 0xDC00..=0xDFFF => return Err(array_start), // ErrorCode::LoneLeadingSurrogateInHexEscape)
2274
2275        // Non-BMP characters are encoded as a sequence of two hex
2276        // escapes, representing UTF-16 surrogates. If deserializing a
2277        // utf-8 string the surrogates are required to be paired,
2278        // whereas deserializing a byte string accepts lone surrogates.
2279        n1 @ 0xD800..=0xDBFF => {
2280            if let Some((&b, s_next)) = s.split_first() {
2281                consume_line_continuation(s, b, s_next, escape_byte);
2282            }
2283            let Some((&b'\\', s_next)) = s.split_first() else {
2284                return Err(array_start); // UnexpectedEndOfHexEscape
2285            };
2286            *s = s_next;
2287
2288            if let Some((&b, s_next)) = s.split_first() {
2289                consume_line_continuation(s, b, s_next, escape_byte);
2290            }
2291            let Some((&b'u', s_next)) = s.split_first() else {
2292                return Err(array_start); // UnexpectedEndOfHexEscape
2293            };
2294            *s = s_next;
2295
2296            let n2 = decode_hex_escape(s, escape_byte, array_start)?;
2297
2298            if n2 < 0xDC00 || n2 > 0xDFFF {
2299                return Err(array_start); // LoneLeadingSurrogateInHexEscape
2300            }
2301
2302            let n = ((((n1 - 0xD800) as u32) << 10) | (n2 - 0xDC00) as u32) + 0x1_0000;
2303
2304            match char::from_u32(n) {
2305                Some(c) => c,
2306                None => return Err(array_start), // InvalidUnicodeCodePoint
2307            }
2308        }
2309
2310        // Every u16 outside of the surrogate ranges above is guaranteed
2311        // to be a legal char.
2312        n => char::from_u32(n as u32).unwrap(),
2313    };
2314    Ok(c)
2315}
2316#[allow(clippy::needless_raw_string_hashes)]
2317#[test]
2318fn test_parse_json_array() {
2319    // empty
2320    let t = r#"[]"#;
2321    let mut s = t.as_bytes();
2322    assert_eq!(&*parse_json_array::<Vec<_>>(&mut s, t, b'\\').unwrap().0, &[]);
2323    assert_eq!(s, b"");
2324    let t = r#"[ ]"#;
2325    let mut s = t.as_bytes();
2326    assert_eq!(&*parse_json_array::<Vec<_>>(&mut s, t, b'\\').unwrap().0, &[]);
2327    assert_eq!(s, b"");
2328    // one value
2329    let t = r#"["abc"]"#;
2330    let mut s = t.as_bytes();
2331    assert_eq!(&*parse_json_array::<Vec<_>>(&mut s, t, b'\\').unwrap().0, &[UnescapedString {
2332        span: 2..5,
2333        value: "abc".into()
2334    }]);
2335    assert_eq!(s, b"");
2336    // multi values
2337    let t = "[\"ab\",\"c\" ,  \"de\" ] \n";
2338    let mut s = t.as_bytes();
2339    assert_eq!(&*parse_json_array::<Vec<_>>(&mut s, t, b'\\').unwrap().0, &[
2340        UnescapedString { span: 2..4, value: "ab".into() },
2341        UnescapedString { span: 7..8, value: "c".into() },
2342        UnescapedString { span: 14..16, value: "de".into() },
2343    ]);
2344    assert_eq!(s, b"\n");
2345    // escape
2346    let t = "[\"a\\\"\\\\\\/\\b\\f\\n\\r\\tbc\\u12ab\\uAB12\\uD83C\\uDF95\\\n\\\\\nu\\\nD\\\n8\\\n3\\\nC\\\n\\\\\nu\\\nD\\\nF\\\n9\\\n5\\\n\"]";
2347    let mut s = t.as_bytes();
2348    assert_eq!(&*parse_json_array::<Vec<_>>(&mut s, t, b'\\').unwrap().0, &[UnescapedString {
2349        span: 2..83,
2350        value: "a\"\\/\x08\x0c\n\r\tbc\u{12ab}\u{AB12}\u{1F395}\u{1F395}".into()
2351    }]);
2352    assert_eq!(s, b"");
2353
2354    // fail (no ending)
2355    let t = r#"["]"#;
2356    let mut s = t.as_bytes();
2357    assert_eq!(parse_json_array::<Vec<_>>(&mut s, t, b'\\'), Err(0));
2358    assert_eq!(s, br#""#);
2359    let t = r#"["a]"#;
2360    let mut s = t.as_bytes();
2361    assert_eq!(parse_json_array::<Vec<_>>(&mut s, t, b'\\'), Err(0));
2362    assert_eq!(s, br#""#);
2363    // fail (single quote)
2364    let t = r#"['abc']"#;
2365    let mut s = t.as_bytes();
2366    assert_eq!(parse_json_array::<Vec<_>>(&mut s, t, b'\\'), Err(0));
2367    assert_eq!(s, br#"'abc']"#);
2368    // fail (extra comma)
2369    let t = r#"["abc",]"#;
2370    let mut s = t.as_bytes();
2371    assert_eq!(parse_json_array::<Vec<_>>(&mut s, t, b'\\'), Err(0));
2372    assert_eq!(s, br#"]"#);
2373    // fail (extra char after string)
2374    let t = r#"["abc"d]"#;
2375    let mut s = t.as_bytes();
2376    assert_eq!(parse_json_array::<Vec<_>>(&mut s, t, b'\\'), Err(0));
2377    assert_eq!(s, br#"d]"#);
2378    // fail (extra char after array)
2379    let t = r#"["abc"] c"#;
2380    let mut s = t.as_bytes();
2381    assert_eq!(parse_json_array::<Vec<_>>(&mut s, t, b'\\'), Err(0));
2382    assert_eq!(s, br#"c"#);
2383    // fail (invalid escape)
2384    let t = "[\"ab\\c\"]";
2385    let mut s = t.as_bytes();
2386    assert_eq!(parse_json_array::<Vec<_>>(&mut s, t, b'\\'), Err(0));
2387    assert_eq!(s, b"\"]");
2388    // fail (invalid escape)
2389    let t = "[\"\\uD83C\\uFFFF\"]";
2390    let mut s = t.as_bytes();
2391    assert_eq!(parse_json_array::<Vec<_>>(&mut s, t, b'\\'), Err(0));
2392    assert_eq!(s, b"\"]");
2393    // fail (control)
2394    let t = "[\"a\nb\"]";
2395    let mut s = t.as_bytes();
2396    assert_eq!(parse_json_array::<Vec<_>>(&mut s, t, b'\\'), Err(0));
2397    assert_eq!(s, b"\nb\"]");
2398    // fail (control)
2399    let t = "[\"a\x1Fb\"]";
2400    let mut s = t.as_bytes();
2401    assert_eq!(parse_json_array::<Vec<_>>(&mut s, t, b'\\'), Err(0));
2402    assert_eq!(s, b"\x1Fb\"]");
2403    // TODO: more from https://github.com/serde-rs/json/blob/3f1c6de4af28b1f6c5100da323f2bffaf7c2083f/tests/test.rs#L1079
2404}
2405
2406#[inline]
2407fn collect_here_doc_delim<'a>(
2408    s: &mut &'a [u8],
2409    mut s_next: &'a [u8],
2410    start: &'a str,
2411) -> Result<Option<(Cow<'a, [u8]>, bool, bool)>, ErrorKind> {
2412    let strip_tab = if let Some((&b'-', s_next_next)) = s_next.split_first() {
2413        s_next = s_next_next;
2414        true
2415    } else {
2416        false
2417    };
2418    let delim_start = start.len() - s_next.len();
2419    let mut current_start = delim_start;
2420    let mut expand = true;
2421    let mut quote = None;
2422    let mut buf = vec![];
2423    while let Some((&b, s_next_next)) = s_next.split_first() {
2424        match b {
2425            b'"' | b'\'' => {
2426                if let Some(q) = quote {
2427                    if b == q {
2428                        quote = None;
2429                        let end = start.len() - s_next.len();
2430                        buf.extend_from_slice(&start.as_bytes()[current_start..end]);
2431                        current_start = start.len() - s_next_next.len();
2432                    }
2433                } else {
2434                    quote = Some(b);
2435                    expand = false;
2436                    let end = start.len() - s_next.len();
2437                    buf.extend_from_slice(&start.as_bytes()[current_start..end]);
2438                    current_start = start.len() - s_next_next.len();
2439                }
2440            }
2441            b'\\' => {
2442                // here-doc escape is always \ https://github.com/moby/buildkit/blob/v0.30/frontend/dockerfile/parser/parser.go#L482
2443                let end = start.len() - s_next.len();
2444                buf.extend_from_slice(&start.as_bytes()[current_start..end]);
2445                current_start = start.len() - s_next_next.len();
2446                let Some((_, s_next_next)) = s_next_next.split_first() else {
2447                    return Err(error::other("unterminated escape", start.len() - s_next.len()));
2448                };
2449                s_next = s_next_next;
2450                continue;
2451            }
2452            _ if quote.is_none() && TABLE[b as usize] & (WHITESPACE | POSSIBLE_LINE) != 0 => break,
2453            _ => {}
2454        }
2455        s_next = s_next_next;
2456    }
2457    if let Some(quote) = quote {
2458        return Err(error::expected_quote(quote, None, start.len() - s_next.len()));
2459    }
2460    let end = start.len() - s_next.len();
2461    let delim = if delim_start == current_start {
2462        Cow::Borrowed(&start.as_bytes()[delim_start..end])
2463    } else {
2464        buf.extend_from_slice(&start.as_bytes()[current_start..end]);
2465        Cow::Owned(buf)
2466    };
2467    if delim.is_empty() {
2468        return Ok(None);
2469    }
2470    *s = s_next;
2471    Ok(Some((delim, strip_tab, expand)))
2472}
2473#[inline]
2474fn collect_here_doc<'a>(
2475    s: &mut &[u8],
2476    start: &'a str,
2477    delim: &[u8],
2478    strip_tab: bool,
2479) -> Result<(Cow<'a, str>, Span), ErrorKind> {
2480    let here_doc_start = start.len() - s.len();
2481    let mut current_start = here_doc_start;
2482    let mut buf = String::new();
2483    let mut end;
2484    loop {
2485        if strip_tab {
2486            // trim leading tabs
2487            if let Some((&b'\t', mut s_next)) = s.split_first() {
2488                let end = start.len() - s.len();
2489                buf.push_str(&start[current_start..end]);
2490                while let Some((&b'\t', s_next_next)) = s_next.split_first() {
2491                    s_next = s_next_next;
2492                }
2493                *s = s_next;
2494                current_start = start.len() - s.len();
2495            }
2496        }
2497        if s.len() < delim.len() {
2498            return Err(error::expected_here_doc_end(delim, start.len() - s.len()));
2499        }
2500        if s.starts_with(delim) {
2501            let s_next = &s[delim.len()..];
2502            end = start.len() - s.len();
2503            if let Some((&b, s_next)) = s_next.split_first() {
2504                if consume_newline(b, s, s_next) {
2505                    break;
2506                }
2507            } else {
2508                *s = s_next;
2509                break;
2510            }
2511        }
2512        consume_current_line_no_line_continuation(s);
2513    }
2514    let span = here_doc_start..end;
2515    if here_doc_start == current_start {
2516        Ok((Cow::Borrowed(&start[span.clone()]), span))
2517    } else {
2518        buf.push_str(&start[current_start..end]);
2519        Ok((Cow::Owned(buf), span))
2520    }
2521}
2522
2523// TODO: escaped/quoted space?
2524#[inline]
2525fn collect_space_separated_consume_line<'a, S: Store<UnescapedString<'a>>>(
2526    s: &mut &[u8],
2527    start: &'a str,
2528    escape_byte: u8,
2529) -> S {
2530    let mut res = S::new();
2531    loop {
2532        let val = collect_non_whitespace(s, start, escape_byte);
2533        if !val.value.is_empty() {
2534            res.push(val);
2535            if consume_whitespaces(s, escape_byte) {
2536                continue;
2537            }
2538        }
2539        if let Some((&b, s_next)) = s.split_first() {
2540            let consumed = consume_newline(b, s, s_next);
2541            debug_assert!(consumed);
2542        }
2543        break;
2544    }
2545    res
2546}
2547#[inline]
2548fn collect_non_whitespace<'a>(
2549    s: &mut &[u8],
2550    start: &'a str,
2551    escape_byte: u8,
2552) -> UnescapedString<'a> {
2553    collect_until::<{ WHITESPACE | POSSIBLE_LINE }>(s, start, escape_byte)
2554}
2555#[inline]
2556fn collect_until<'a, const UNTIL_MASK: u8>(
2557    s: &mut &[u8],
2558    start: &'a str,
2559    escape_byte: u8,
2560) -> UnescapedString<'a> {
2561    let full_word_start = start.len() - s.len();
2562    let mut word_start = full_word_start;
2563    let mut buf = String::new();
2564    while let Some((&b, s_next)) = s.split_first() {
2565        let t = TABLE[b as usize];
2566        if t & (UNTIL_MASK | POSSIBLE_ESCAPE) != 0 {
2567            if t & UNTIL_MASK != 0 {
2568                break;
2569            }
2570            let word_end = start.len() - s.len();
2571            if consume_line_continuation(s, b, s_next, escape_byte) {
2572                buf.push_str(&start[word_start..word_end]);
2573                word_start = start.len() - s.len();
2574                continue;
2575            }
2576        }
2577        *s = s_next;
2578    }
2579    let word_end = start.len() - s.len();
2580    let value = if full_word_start == word_start {
2581        // no escape
2582        Cow::Borrowed(&start[word_start..word_end])
2583    } else {
2584        buf.push_str(&start[word_start..word_end]);
2585        Cow::Owned(buf)
2586    };
2587    UnescapedString { span: full_word_start..word_end, value }
2588}
2589#[inline]
2590fn collect_until_line_consume_newline<'a>(
2591    s: &mut &[u8],
2592    start: &'a str,
2593    escape_byte: u8,
2594) -> UnescapedString<'a> {
2595    let full_word_start = start.len() - s.len();
2596    let mut word_start = full_word_start;
2597    let mut buf = String::new();
2598    let word_end;
2599    loop {
2600        let Some((&b, s_next)) = s.split_first() else {
2601            word_end = start.len() - s.len();
2602            break;
2603        };
2604        let t = TABLE[b as usize];
2605        if t & (POSSIBLE_LINE | POSSIBLE_ESCAPE) != 0 {
2606            match b {
2607                b'\n' => {
2608                    word_end = start.len() - s.len();
2609                    *s = s_next;
2610                    break;
2611                }
2612                b'\r' => {
2613                    if s_next.first() == Some(&b'\n') {
2614                        word_end = start.len() - s.len();
2615                        *s = &s_next[1..];
2616                        break;
2617                    }
2618                }
2619                _ => {
2620                    let word_end = start.len() - s.len();
2621                    if consume_line_continuation(s, b, s_next, escape_byte) {
2622                        buf.push_str(&start[word_start..word_end]);
2623                        word_start = start.len() - s.len();
2624                        continue;
2625                    }
2626                }
2627            }
2628        }
2629        *s = s_next;
2630    }
2631    let value = if full_word_start == word_start {
2632        // no escape
2633        Cow::Borrowed(&start[word_start..word_end])
2634    } else {
2635        buf.push_str(&start[word_start..word_end]);
2636        Cow::Owned(buf)
2637    };
2638    UnescapedString { span: full_word_start..word_end, value }
2639}
2640
2641/// Consumes a newline if present and returns `true` if consumed.
2642/// (not consumes non-newline characters)
2643#[inline(always)]
2644fn consume_newline<'a>(b: u8, s: &mut &'a [u8], s_next: &'a [u8]) -> bool {
2645    match b {
2646        b'\n' => {
2647            *s = s_next;
2648            return true;
2649        }
2650        b'\r' => {
2651            if s_next.first() == Some(&b'\n') {
2652                *s = &s_next[1..];
2653                return true;
2654            }
2655        }
2656        _ => {}
2657    }
2658    false
2659}
2660
2661/// Consumes a line continuation if present and returns `true` if consumed.
2662#[inline]
2663fn consume_line_continuation<'a>(
2664    s: &mut &'a [u8],
2665    b: u8,
2666    s_next: &'a [u8],
2667    escape_byte: u8,
2668) -> bool {
2669    #[inline]
2670    fn followup(s: &mut &[u8], _escape_byte: u8) {
2671        while let Some((&b, mut s_next)) = s.split_first() {
2672            let t = TABLE[b as usize];
2673            if t & (WHITESPACE | POSSIBLE_LINE | COMMENT) == 0 {
2674                break;
2675            }
2676            if t & WHITESPACE != 0 {
2677                // TODO: escape after spaces is handled in consume_whitespaces_no_line_continuation
2678                consume_whitespaces_no_line_continuation(&mut s_next);
2679                let Some((&b, s_next_next)) = s_next.split_first() else { break };
2680                let t = TABLE[b as usize];
2681                if t & (COMMENT | POSSIBLE_LINE) == 0 {
2682                    break;
2683                }
2684                s_next = s_next_next;
2685            }
2686            *s = s_next;
2687            // comment or empty continuation line
2688            // \r is handled in the above consume_whitespaces_no_line_continuation
2689            if b != b'\n' {
2690                consume_current_line_no_line_continuation(s);
2691            }
2692        }
2693    }
2694
2695    if b == escape_byte {
2696        cold_path();
2697        if let Some((&b, mut s_next)) = s_next.split_first() {
2698            if consume_newline(b, s, s_next) {
2699                followup(s, escape_byte);
2700                return true;
2701            }
2702            // "\\[ \t]\n" is also accepted.
2703            // https://github.com/moby/buildkit/blob/v0.30/frontend/dockerfile/parser/parser.go#L168
2704            if TABLE[b as usize] & SPACE != 0 {
2705                cold_path();
2706                consume_whitespaces_no_line_continuation(&mut s_next);
2707                if let Some((&b, s_next)) = s_next.split_first() {
2708                    if consume_newline(b, s, s_next) {
2709                        followup(s, escape_byte);
2710                        return true;
2711                    }
2712                }
2713            }
2714        }
2715    }
2716    false
2717}
2718
2719/// Consumes until whitespace/line character found without line continuation handling,
2720/// and returns `true` if one or more non-whitespace characters are present.
2721/// (not consumes whitespace/line character)
2722#[inline]
2723fn consume_until_whitespaces_or_line_no_line_continuation(s: &mut &[u8]) -> bool {
2724    let start = *s;
2725    while let Some((&b, s_next)) = s.split_first() {
2726        if TABLE[b as usize] & (WHITESPACE | POSSIBLE_LINE) != 0 {
2727            break;
2728        }
2729        *s = s_next;
2730    }
2731    start.len() != s.len()
2732}
2733
2734/// Consumes the current line without line continuation handling.
2735/// (consumes newline characters of the current line)
2736#[inline]
2737fn consume_current_line_no_line_continuation(s: &mut &[u8]) {
2738    while let Some((&b, s_next)) = s.split_first() {
2739        if consume_newline(b, s, s_next) {
2740            break;
2741        }
2742        *s = s_next;
2743    }
2744}
2745/// Consumes the current line.
2746/// (consumes newline characters of the current line)
2747#[inline]
2748fn consume_current_line(s: &mut &[u8], escape_byte: u8) {
2749    let mut has_whitespace_only = 0;
2750    while let Some((&b, s_next)) = s.split_first() {
2751        let t = TABLE[b as usize];
2752        if t & (POSSIBLE_LINE | COMMENT | POSSIBLE_ESCAPE) != 0 {
2753            if consume_newline(b, s, s_next) {
2754                break;
2755            }
2756            if has_whitespace_only != 0 && t & COMMENT != 0 {
2757                *s = s_next;
2758                consume_current_line_no_line_continuation(s);
2759                continue;
2760            }
2761            if consume_line_continuation(s, b, s_next, escape_byte) {
2762                has_whitespace_only = WHITESPACE;
2763                continue;
2764            }
2765        }
2766        has_whitespace_only &= t;
2767        *s = s_next;
2768    }
2769}
2770
2771/// Consumes whitespaces without line continuation handling, and returns `true`
2772/// if one or more whitespaces ware consumed.
2773/// (not consumes non-whitespace characters)
2774#[inline]
2775fn consume_whitespaces_no_line_continuation(s: &mut &[u8]) -> bool {
2776    let start = *s;
2777    while let Some((&b, s_next)) = s.split_first() {
2778        if TABLE[b as usize] & WHITESPACE != 0 {
2779            *s = s_next;
2780            continue;
2781        }
2782        break;
2783    }
2784    start.len() != s.len()
2785}
2786/// Consumes whitespaces, and returns `true`
2787/// if one or more whitespaces ware consumed.
2788/// (not consumes non-whitespace characters)
2789#[inline]
2790fn consume_whitespaces(s: &mut &[u8], escape_byte: u8) -> bool {
2791    let mut has_space = false;
2792    while let Some((&b, s_next)) = s.split_first() {
2793        let t = TABLE[b as usize];
2794        if t & (WHITESPACE | POSSIBLE_ESCAPE) != 0 {
2795            if t & WHITESPACE != 0 {
2796                *s = s_next;
2797                has_space = true;
2798                continue;
2799            }
2800            if consume_line_continuation(s, b, s_next, escape_byte) {
2801                continue;
2802            }
2803        }
2804        break;
2805    }
2806    has_space
2807}
2808/// Consumes whitespaces, and returns `true`
2809/// if one or more whitespaces ware consumed or reached line end.
2810/// (not consumes non-whitespace characters)
2811#[inline]
2812fn consume_whitespaces_or_is_empty_line(s: &mut &[u8], escape_byte: u8) -> bool {
2813    let mut has_space = false;
2814    loop {
2815        let Some((&b, s_next)) = s.split_first() else { return true };
2816        {
2817            let t = TABLE[b as usize];
2818            if t & (WHITESPACE | POSSIBLE_ESCAPE | POSSIBLE_LINE) != 0 {
2819                if t & WHITESPACE != 0 {
2820                    *s = s_next;
2821                    has_space = true;
2822                    continue;
2823                }
2824                if b == b'\n' {
2825                    return true;
2826                }
2827                if consume_line_continuation(s, b, s_next, escape_byte) {
2828                    continue;
2829                }
2830            }
2831            break;
2832        }
2833    }
2834    has_space
2835}
2836/// Consumes whitespaces, whitespace/empty lines, and comment lines.
2837#[inline]
2838fn consume_comments_and_whitespaces(s: &mut &[u8], escape_byte: u8) {
2839    while let Some((&b, s_next)) = s.split_first() {
2840        let t = TABLE[b as usize];
2841        if t & (WHITESPACE | POSSIBLE_LINE | COMMENT | POSSIBLE_ESCAPE) != 0 {
2842            if t & (WHITESPACE | POSSIBLE_LINE) != 0 {
2843                *s = s_next;
2844                continue;
2845            }
2846            if t & COMMENT != 0 {
2847                *s = s_next;
2848                consume_current_line_no_line_continuation(s);
2849                continue;
2850            }
2851            if consume_line_continuation(s, b, s_next, escape_byte) {
2852                continue;
2853            }
2854        }
2855        break;
2856    }
2857}
2858
2859#[inline]
2860#[track_caller]
2861fn trim_end(text: &str, start: usize, mut end: usize) -> &str {
2862    while start < end {
2863        let next_end = end - 1;
2864        if let Some(&b) = text.as_bytes().get(next_end) {
2865            if TABLE[b as usize] & (WHITESPACE | POSSIBLE_LINE) != 0 {
2866                end = next_end;
2867                continue;
2868            }
2869        }
2870        break;
2871    }
2872    &text[start..end]
2873}
2874
2875#[inline(always)]
2876fn token(s: &mut &[u8], token: &'static [u8]) -> bool {
2877    let matched = starts_with_ignore_ascii_case(s, token);
2878    if matched {
2879        *s = &s[token.len()..];
2880        true
2881    } else {
2882        false
2883    }
2884}
2885#[cold]
2886fn token_slow(s: &mut &[u8], mut token: &'static [u8], escape_byte: u8) -> bool {
2887    debug_assert!(!token.is_empty() && token.iter().all(|&n| n & TO_UPPER8 == n));
2888    if s.len() < token.len() {
2889        return false;
2890    }
2891    let mut tmp = *s;
2892    while let Some((&b, tmp_next)) = tmp.split_first() {
2893        if b & TO_UPPER8 == token[0] {
2894            tmp = tmp_next;
2895            token = &token[1..];
2896            if token.is_empty() {
2897                *s = tmp;
2898                return true;
2899            }
2900            continue;
2901        }
2902        if consume_line_continuation(&mut tmp, b, tmp_next, escape_byte) {
2903            continue;
2904        }
2905        break;
2906    }
2907    false
2908}
2909
2910const TO_UPPER8: u8 = 0xDF;
2911const TO_UPPER64: u64 = 0xDFDF_DFDF_DFDF_DFDF;
2912
2913#[inline(always)] // Ensure the code getting the length of the needle is inlined.
2914fn starts_with_ignore_ascii_case(mut s: &[u8], mut needle: &'static [u8]) -> bool {
2915    debug_assert!(!needle.is_empty() && needle.iter().all(|&n| n & TO_UPPER8 == n));
2916    if s.len() < needle.len() {
2917        return false;
2918    }
2919    if needle.len() == 1 {
2920        return needle[0] == s[0] & TO_UPPER8;
2921    }
2922    if needle.len() >= 8 {
2923        loop {
2924            if u64::from_ne_bytes(needle[..8].try_into().unwrap())
2925                != u64::from_ne_bytes(s[..8].try_into().unwrap()) & TO_UPPER64
2926            {
2927                return false;
2928            }
2929            needle = &needle[8..];
2930            s = &s[8..];
2931            if needle.len() < 8 {
2932                if needle.is_empty() {
2933                    return true;
2934                }
2935                break;
2936            }
2937        }
2938    }
2939    let s = {
2940        let mut buf = [0; 8];
2941        buf[..needle.len()].copy_from_slice(&s[..needle.len()]);
2942        u64::from_ne_bytes(buf)
2943    };
2944    let needle = {
2945        let mut buf = [0; 8];
2946        buf[..needle.len()].copy_from_slice(needle);
2947        u64::from_ne_bytes(buf)
2948    };
2949    needle == s & TO_UPPER64
2950}
2951#[test]
2952fn test_starts_with_ignore_ascii_case() {
2953    assert!(starts_with_ignore_ascii_case(b"ABC", b"ABC"));
2954    assert!(starts_with_ignore_ascii_case(b"abc", b"ABC"));
2955    assert!(starts_with_ignore_ascii_case(b"AbC", b"ABC"));
2956    assert!(!starts_with_ignore_ascii_case(b"ABB", b"ABC"));
2957    assert!(starts_with_ignore_ascii_case(b"ABCDEFGH", b"ABCDEFGH"));
2958    assert!(starts_with_ignore_ascii_case(b"abcdefgh", b"ABCDEFGH"));
2959    assert!(starts_with_ignore_ascii_case(b"AbCdEfGh", b"ABCDEFGH"));
2960    assert!(!starts_with_ignore_ascii_case(b"ABCDEFGc", b"ABCDEFGH"));
2961    assert!(starts_with_ignore_ascii_case(
2962        b"ABCDEFGHIJKLMNOPQRSTUVWXYZ",
2963        b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
2964    ));
2965    assert!(starts_with_ignore_ascii_case(
2966        b"abcdefghijklmnopqrstuvwxyz",
2967        b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
2968    ));
2969    assert!(starts_with_ignore_ascii_case(
2970        b"aBcDeFgHiJkLmNoPqRsTuVwXyZ",
2971        b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
2972    ));
2973    assert!(!starts_with_ignore_ascii_case(
2974        b"aBcDeFgHiJkLmNoPqRsTuVwXyc",
2975        b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
2976    ));
2977}