parse_dockerfile/
lib.rs

1// SPDX-License-Identifier: Apache-2.0 OR MIT
2
3/*!
4Dockerfile parser, written in Rust.
5
6### Usage
7
8<!-- Note: Document from sync-markdown-to-rustdoc:start through sync-markdown-to-rustdoc:end
9     is synchronized from README.md. Any changes to that range are not preserved. -->
10<!-- tidy:sync-markdown-to-rustdoc:start -->
11
12To use this crate as a library, add this to your `Cargo.toml`:
13
14```toml
15[dependencies]
16parse-dockerfile = { version = "0.1", default-features = false }
17```
18
19<div class="rustdoc-alert rustdoc-alert-note">
20
21> **ⓘ Note**
22>
23> We recommend disabling default features because they enable CLI-related
24> dependencies which the library part does not use.
25
26</div>
27
28<!-- omit in toc -->
29### Examples
30
31```
32use parse_dockerfile::{parse, Instruction};
33
34let text = "
35ARG UBUNTU_VERSION=latest
36
37FROM ubuntu:${UBUNTU_VERSION}
38RUN echo
39";
40
41let dockerfile = parse(text).unwrap();
42
43// Iterate over all instructions.
44let mut instructions = dockerfile.instructions.iter();
45assert!(matches!(instructions.next(), Some(Instruction::Arg(..))));
46assert!(matches!(instructions.next(), Some(Instruction::From(..))));
47assert!(matches!(instructions.next(), Some(Instruction::Run(..))));
48assert!(instructions.next().is_none());
49
50// Iterate over global args.
51let mut global_args = dockerfile.global_args();
52let global_arg1 = global_args.next().unwrap();
53assert_eq!(global_arg1.arguments.value, "UBUNTU_VERSION=latest");
54assert!(global_args.next().is_none());
55
56// Iterate over stages.
57let mut stages = dockerfile.stages();
58let stage1 = stages.next().unwrap();
59assert_eq!(stage1.from.image.value, "ubuntu:${UBUNTU_VERSION}");
60let mut stage1_instructions = stage1.instructions.iter();
61assert!(matches!(stage1_instructions.next(), Some(Instruction::Run(..))));
62assert!(stage1_instructions.next().is_none());
63assert!(stages.next().is_none());
64```
65
66<!-- omit in toc -->
67### Optional features
68
69- **`serde`** — Implements [`serde::Serialize`] trait for parse-dockerfile types.
70
71[`serde::Serialize`]: https://docs.rs/serde/latest/serde/trait.Serialize.html
72
73<!-- tidy:sync-markdown-to-rustdoc:end -->
74*/
75
76#![no_std]
77#![doc(test(
78    no_crate_inject,
79    attr(allow(
80        dead_code,
81        unused_variables,
82        clippy::undocumented_unsafe_blocks,
83        clippy::unused_trait_names,
84    ))
85))]
86#![forbid(unsafe_code)]
87#![warn(
88    // Lints that may help when writing public library.
89    missing_debug_implementations,
90    missing_docs,
91    clippy::alloc_instead_of_core,
92    clippy::exhaustive_enums,
93    clippy::exhaustive_structs,
94    clippy::impl_trait_in_params,
95    // clippy::missing_inline_in_public_items,
96    clippy::std_instead_of_alloc,
97    clippy::std_instead_of_core,
98)]
99#![allow(clippy::inline_always)]
100
101extern crate alloc;
102extern crate std;
103
104#[cfg(test)]
105#[path = "gen/tests/assert_impl.rs"]
106mod assert_impl;
107#[cfg(test)]
108#[path = "gen/tests/track_size.rs"]
109mod track_size;
110
111mod error;
112
113use alloc::{borrow::Cow, boxed::Box, string::String, vec, vec::Vec};
114use core::{mem, ops::Range, str};
115use std::collections::HashMap;
116
117use smallvec::SmallVec;
118
119pub use self::error::Error;
120use self::error::{ErrorKind, Result};
121
122/// Parses dockerfile from the given `text`.
123#[allow(clippy::missing_panics_doc)]
124pub fn parse(text: &str) -> Result<Dockerfile<'_>> {
125    let mut p = ParseIter::new(text)?;
126    let mut s = p.s;
127
128    let mut instructions = Vec::with_capacity(p.text.len() / 60);
129    let mut stages = Vec::with_capacity(1);
130    let mut named_stages = 0;
131    let mut current_stage = None;
132    while let Some((&b, s_next)) = s.split_first() {
133        let instruction =
134            parse_instruction(&mut p, &mut s, b, s_next).map_err(|e| e.into_error(&p))?;
135        match instruction {
136            Instruction::From(from) => {
137                named_stages += from.as_.is_some() as usize;
138                let new_stage = instructions.len();
139                if let Some(prev_stage) = current_stage.replace(new_stage) {
140                    stages.push(prev_stage..new_stage);
141                }
142                instructions.push(Instruction::From(from));
143            }
144            arg @ Instruction::Arg(..) => instructions.push(arg),
145            instruction => {
146                if current_stage.is_none() {
147                    return Err(error::expected("FROM", instruction.instruction_span().start)
148                        .into_error(&p));
149                }
150                instructions.push(instruction);
151            }
152        }
153        skip_comments_and_whitespaces(&mut s, p.escape_byte);
154    }
155    if let Some(current_stage) = current_stage {
156        stages.push(current_stage..instructions.len());
157    }
158
159    if stages.is_empty() {
160        // https://github.com/moby/buildkit/blob/e83d79a51fb49aeb921d8a2348ae14a58701c98c/frontend/dockerfile/dockerfile2llb/convert.go#L263
161        return Err(error::no_stage().into_error(&p));
162    }
163    // TODO: https://github.com/moby/buildkit/blob/e83d79a51fb49aeb921d8a2348ae14a58701c98c/frontend/dockerfile/dockerfile2llb/convert.go#L302
164    // > base name (%s) should not be blank
165
166    let mut stages_by_name = HashMap::with_capacity(named_stages);
167    for (i, stage) in stages.iter().enumerate() {
168        let Instruction::From(from) = &instructions[stage.start] else { unreachable!() };
169        if let Some((_as, name)) = &from.as_ {
170            if let Some(first_occurrence) = stages_by_name.insert(name.value.clone(), i) {
171                let Instruction::From(from) = &instructions[stages[first_occurrence].start] else {
172                    unreachable!()
173                };
174                let first_start = from.as_.as_ref().unwrap().1.span.start;
175                let second_start = name.span.start;
176                return Err(error::duplicate_name(first_start, second_start).into_error(&p));
177            }
178        }
179    }
180
181    Ok(Dockerfile { parser_directives: p.parser_directives, instructions, stages, stages_by_name })
182}
183
184/// Returns an iterator over instructions in the given `text`.
185///
186/// Unlike [`parse`] function, the returned iterator doesn't error on
187/// duplicate stage names.
188pub fn parse_iter(text: &str) -> Result<ParseIter<'_>> {
189    ParseIter::new(text)
190}
191
192/// A dockerfile.
193#[derive(Debug)]
194#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
195#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
196pub struct Dockerfile<'a> {
197    /// Parser directives.
198    pub parser_directives: ParserDirectives<'a>,
199    /// Instructions.
200    pub instructions: Vec<Instruction<'a>>,
201    #[cfg_attr(feature = "serde", serde(skip))]
202    stages: Vec<Range<usize>>,
203    #[cfg_attr(feature = "serde", serde(skip))]
204    stages_by_name: HashMap<Cow<'a, str>, usize>,
205}
206impl<'a> Dockerfile<'a> {
207    /// Returns an iterator over global args.
208    #[allow(clippy::missing_panics_doc)] // self.stages is not empty
209    #[must_use]
210    pub fn global_args<'b>(&'b self) -> impl ExactSizeIterator<Item = &'b ArgInstruction<'a>> {
211        self.instructions[..self.stages.first().unwrap().start].iter().map(|arg| {
212            let Instruction::Arg(arg) = arg else { unreachable!() };
213            arg
214        })
215    }
216    /// Gets a stage by name.
217    #[must_use]
218    pub fn stage<'b>(&'b self, name: &str) -> Option<Stage<'a, 'b>> {
219        let i = *self.stages_by_name.get(name)?;
220        let stage = &self.stages[i];
221        let Instruction::From(from) = &self.instructions[stage.start] else { unreachable!() };
222        Some(Stage { from, instructions: &self.instructions[stage.start + 1..stage.end] })
223    }
224    /// Returns an iterator over stages.
225    #[must_use]
226    pub fn stages<'b>(&'b self) -> impl ExactSizeIterator<Item = Stage<'a, 'b>> {
227        self.stages.iter().map(move |stage| {
228            let Instruction::From(from) = &self.instructions[stage.start] else { unreachable!() };
229            Stage { from, instructions: &self.instructions[stage.start + 1..stage.end] }
230        })
231    }
232}
233/// A stage.
234#[derive(Debug)]
235#[non_exhaustive]
236pub struct Stage<'a, 'b> {
237    /// The `FROM` instruction.
238    pub from: &'b FromInstruction<'a>,
239    /// The remaining instructions.
240    pub instructions: &'b [Instruction<'a>],
241}
242
243/// Parser directives.
244///
245/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#parser-directives)
246#[derive(Debug)]
247#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
248#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
249#[non_exhaustive]
250pub struct ParserDirectives<'a> {
251    /// `syntax` parser directive.
252    ///
253    /// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#syntax)
254    pub syntax: Option<ParserDirective<&'a str>>,
255    /// `escape` parser directive.
256    ///
257    /// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#escape)
258    pub escape: Option<ParserDirective<char>>,
259    /// `check` parser directive.
260    ///
261    /// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#check)
262    pub check: Option<ParserDirective<&'a str>>,
263}
264/// A parser directive.
265#[derive(Debug)]
266#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
267#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
268pub struct ParserDirective<T> {
269    /// ```text
270    /// syntax=value
271    /// ^
272    /// ```
273    start: usize,
274    /// ```text
275    /// syntax=value
276    ///        ^^^^^
277    /// ```
278    pub value: Spanned<T>,
279}
280impl<T> ParserDirective<T> {
281    /// ```text
282    /// syntax=value
283    /// ^^^^^^^^^^^^
284    /// ```
285    #[must_use]
286    pub fn span(&self) -> Span {
287        self.start..self.value.span.end
288    }
289}
290
291/// An instruction.
292#[derive(Debug)]
293#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
294#[cfg_attr(feature = "serde", serde(tag = "kind"))]
295#[cfg_attr(feature = "serde", serde(rename_all = "SCREAMING_SNAKE_CASE"))]
296#[non_exhaustive]
297// NB: When adding new variants, update ALL_INST in tests/test.rs.
298pub enum Instruction<'a> {
299    /// `ADD` instruction.
300    Add(AddInstruction<'a>),
301    /// `ARG` instruction.
302    Arg(ArgInstruction<'a>),
303    /// `CMD` instruction.
304    Cmd(CmdInstruction<'a>),
305    /// `COPY` instruction.
306    Copy(CopyInstruction<'a>),
307    /// `ENTRYPOINT` instruction.
308    Entrypoint(EntrypointInstruction<'a>),
309    /// `ENV` instruction.
310    Env(EnvInstruction<'a>),
311    /// `EXPOSE` instruction.
312    Expose(ExposeInstruction<'a>),
313    /// `FROM` instruction.
314    From(FromInstruction<'a>),
315    /// `HEALTHCHECK` instruction.
316    Healthcheck(HealthcheckInstruction<'a>),
317    /// `LABEL` instruction.
318    Label(LabelInstruction<'a>),
319    /// `MAINTAINER` instruction (deprecated).
320    Maintainer(MaintainerInstruction<'a>),
321    /// `ONBUILD` instruction.
322    Onbuild(OnbuildInstruction<'a>),
323    /// `RUN` instruction.
324    Run(RunInstruction<'a>),
325    /// `SHELL` instruction.
326    Shell(ShellInstruction<'a>),
327    /// `STOPSIGNAL` instruction.
328    Stopsignal(StopsignalInstruction<'a>),
329    /// `USER` instruction.
330    User(UserInstruction<'a>),
331    /// `VOLUME` instruction.
332    Volume(VolumeInstruction<'a>),
333    /// `WORKDIR` instruction.
334    Workdir(WorkdirInstruction<'a>),
335}
336impl Instruction<'_> {
337    fn instruction_span(&self) -> Span {
338        match self {
339            Instruction::Add(instruction) => instruction.add.span.clone(),
340            Instruction::Arg(instruction) => instruction.arg.span.clone(),
341            Instruction::Cmd(instruction) => instruction.cmd.span.clone(),
342            Instruction::Copy(instruction) => instruction.copy.span.clone(),
343            Instruction::Entrypoint(instruction) => instruction.entrypoint.span.clone(),
344            Instruction::Env(instruction) => instruction.env.span.clone(),
345            Instruction::Expose(instruction) => instruction.expose.span.clone(),
346            Instruction::From(instruction) => instruction.from.span.clone(),
347            Instruction::Healthcheck(instruction) => instruction.healthcheck.span.clone(),
348            Instruction::Label(instruction) => instruction.label.span.clone(),
349            Instruction::Maintainer(instruction) => instruction.maintainer.span.clone(),
350            Instruction::Onbuild(instruction) => instruction.onbuild.span.clone(),
351            Instruction::Run(instruction) => instruction.run.span.clone(),
352            Instruction::Shell(instruction) => instruction.shell.span.clone(),
353            Instruction::Stopsignal(instruction) => instruction.stopsignal.span.clone(),
354            Instruction::User(instruction) => instruction.user.span.clone(),
355            Instruction::Volume(instruction) => instruction.volume.span.clone(),
356            Instruction::Workdir(instruction) => instruction.workdir.span.clone(),
357        }
358    }
359}
360/// An `ADD` instruction.
361///
362/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#add)
363#[derive(Debug)]
364#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
365#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
366#[non_exhaustive]
367pub struct AddInstruction<'a> {
368    /// ```text
369    /// ADD [options] <src> ... <dest>
370    /// ^^^
371    /// ```
372    pub add: Keyword,
373    /// ```text
374    /// ADD [options] <src> ... <dest>
375    ///     ^^^^^^^^^
376    /// ```
377    pub options: SmallVec<[Flag<'a>; 1]>,
378    /// ```text
379    /// ADD [options] <src> ... <dest>
380    ///               ^^^^^^^^^
381    /// ```
382    // At least 1
383    pub src: SmallVec<[Source<'a>; 1]>,
384    /// ```text
385    /// ADD [options] <src> ... <dest>
386    ///                         ^^^^^^
387    /// ```
388    pub dest: UnescapedString<'a>,
389}
390/// An `ARG` instruction.
391///
392/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#arg)
393#[derive(Debug)]
394#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
395#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
396#[non_exhaustive]
397pub struct ArgInstruction<'a> {
398    /// ```text
399    /// ARG <name>[=<default value>] [<name>[=<default value>]...]
400    /// ^^^
401    /// ```
402    pub arg: Keyword,
403    /// ```text
404    /// ARG <name>[=<default value>] [<name>[=<default value>]...]
405    ///     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
406    /// ```
407    // TODO: SmallVec<[NameOptValue<'a>; 1]>
408    pub arguments: UnescapedString<'a>,
409}
410/// A `CMD` instruction.
411///
412/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#cmd)
413#[derive(Debug)]
414#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
415#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
416#[non_exhaustive]
417pub struct CmdInstruction<'a> {
418    /// ```text
419    /// CMD ["executable", "param"]
420    /// ^^^
421    /// ```
422    pub cmd: Keyword,
423    /// ```text
424    /// CMD ["executable", "param"]
425    ///     ^^^^^^^^^^^^^^^^^^^^^^^
426    /// ```
427    pub arguments: Command<'a>,
428}
429/// A `COPY` instruction.
430///
431/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#copy)
432#[derive(Debug)]
433#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
434#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
435#[non_exhaustive]
436pub struct CopyInstruction<'a> {
437    /// ```text
438    /// COPY [options] <src> ... <dest>
439    /// ^^^^
440    /// ```
441    pub copy: Keyword,
442    /// ```text
443    /// COPY [options] <src> ... <dest>
444    ///      ^^^^^^^^^
445    /// ```
446    pub options: SmallVec<[Flag<'a>; 1]>,
447    /// ```text
448    /// COPY [options] <src> ... <dest>
449    ///                ^^^^^^^^^
450    /// ```
451    // At least 1
452    pub src: SmallVec<[Source<'a>; 1]>,
453    /// ```text
454    /// COPY [options] <src> ... <dest>
455    ///                          ^^^^^^
456    /// ```
457    pub dest: UnescapedString<'a>,
458}
459/// A enum that represents source value of [`ARG` instruction](ArgInstruction) and
460/// [`COPY` instruction](CopyInstruction).
461#[derive(Debug)]
462#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
463#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
464#[non_exhaustive]
465pub enum Source<'a> {
466    /// Path or URL.
467    Path(UnescapedString<'a>),
468    /// Here-document.
469    HereDoc(HereDoc<'a>),
470}
471/// An `ENTRYPOINT` instruction.
472///
473/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#entrypoint)
474#[derive(Debug)]
475#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
476#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
477#[non_exhaustive]
478pub struct EntrypointInstruction<'a> {
479    /// ```text
480    /// ENTRYPOINT ["executable", "param"]
481    /// ^^^^^^^^^^
482    /// ```
483    pub entrypoint: Keyword,
484    /// ```text
485    /// ENTRYPOINT ["executable", "param"]
486    ///            ^^^^^^^^^^^^^^^^^^^^^^^
487    /// ```
488    pub arguments: Command<'a>,
489}
490/// An `ENV` instruction.
491///
492/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#env)
493#[derive(Debug)]
494#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
495#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
496#[non_exhaustive]
497pub struct EnvInstruction<'a> {
498    /// ```text
499    /// ENV <key>=<value> [<key>=<value>...]
500    /// ^^^
501    /// ```
502    pub env: Keyword,
503    /// ```text
504    /// ENV <key>=<value> [<key>=<value>...]
505    ///     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
506    /// ```
507    // TODO: SmallVec<[NameValue<'a>; 1]>
508    pub arguments: UnescapedString<'a>,
509}
510/// An `EXPOSE` instruction.
511///
512/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#expose)
513#[derive(Debug)]
514#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
515#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
516#[non_exhaustive]
517pub struct ExposeInstruction<'a> {
518    /// ```text
519    /// EXPOSE <port>[/<protocol>] [<port>[/<protocol>]...]
520    /// ^^^^^^
521    /// ```
522    pub expose: Keyword,
523    /// ```text
524    /// EXPOSE <port>[/<protocol>] [<port>[/<protocol>]...]
525    ///        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
526    /// ```
527    pub arguments: SmallVec<[UnescapedString<'a>; 1]>,
528}
529/// A `FROM` instruction.
530///
531/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#from)
532#[derive(Debug)]
533#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
534#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
535#[non_exhaustive]
536pub struct FromInstruction<'a> {
537    /// ```text
538    /// FROM [--platform=<platform>] <image> [AS <name>]
539    /// ^^^^
540    /// ```
541    pub from: Keyword,
542    /// ```text
543    /// FROM [--platform=<platform>] <image> [AS <name>]
544    ///      ^^^^^^^^^^^^^^^^^^^^^^^
545    /// ```
546    pub options: Vec<Flag<'a>>,
547    /// ```text
548    /// FROM [--platform=<platform>] <image> [AS <name>]
549    ///                              ^^^^^^^
550    /// ```
551    pub image: UnescapedString<'a>,
552    /// ```text
553    /// FROM [--platform=<platform>] <image> [AS <name>]
554    ///                                      ^^^^^^^^^^^
555    /// ```
556    pub as_: Option<(Keyword, UnescapedString<'a>)>,
557}
558/// A `HEALTHCHECK` instruction.
559///
560/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#healthcheck)
561#[derive(Debug)]
562#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
563#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
564#[non_exhaustive]
565pub struct HealthcheckInstruction<'a> {
566    /// ```text
567    /// HEALTHCHECK [options] CMD command
568    /// ^^^^^^^^^^^
569    /// ```
570    pub healthcheck: Keyword,
571    /// ```text
572    /// HEALTHCHECK [options] CMD command
573    ///             ^^^^^^^^^
574    /// ```
575    pub options: Vec<Flag<'a>>,
576    /// ```text
577    /// HEALTHCHECK [options] CMD command
578    ///                       ^^^^^^^^^^^
579    /// ```
580    pub arguments: HealthcheckArguments<'a>,
581}
582/// Arguments of the [`HEALTHCHECK` instruction](HealthcheckInstruction).
583#[derive(Debug)]
584#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
585#[cfg_attr(feature = "serde", serde(tag = "kind"))]
586#[cfg_attr(feature = "serde", serde(rename_all = "SCREAMING_SNAKE_CASE"))]
587#[non_exhaustive]
588pub enum HealthcheckArguments<'a> {
589    /// `HEALTHCHECK [options] CMD ...`
590    #[non_exhaustive]
591    Cmd {
592        /// ```text
593        /// HEALTHCHECK [options] CMD command
594        ///                       ^^^
595        /// ```
596        cmd: Keyword,
597        /// ```text
598        /// HEALTHCHECK [options] CMD command
599        ///                           ^^^^^^^
600        /// ```
601        arguments: Command<'a>,
602    },
603    /// `HEALTHCHECK [options] NONE`
604    #[non_exhaustive]
605    None {
606        /// ```text
607        /// HEALTHCHECK [options] NONE
608        ///                       ^^^^
609        /// ```
610        none: Keyword,
611    },
612}
613/// A `LABEL` instruction.
614///
615/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#label)
616#[derive(Debug)]
617#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
618#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
619#[non_exhaustive]
620pub struct LabelInstruction<'a> {
621    /// ```text
622    /// LABEL <key>=<value> [<key>=<value>...]
623    /// ^^^^^
624    /// ```
625    pub label: Keyword,
626    /// ```text
627    /// LABEL <key>=<value> [<key>=<value>...]
628    ///       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
629    /// ```
630    // TODO: SmallVec<[NameValue<'a>; 1]>
631    pub arguments: UnescapedString<'a>,
632}
633/// A `MAINTAINER` instruction (deprecated).
634///
635/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#maintainer-deprecated)
636#[derive(Debug)]
637#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
638#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
639#[non_exhaustive]
640pub struct MaintainerInstruction<'a> {
641    /// ```text
642    /// MAINTAINER <name>
643    /// ^^^^^^^^^^
644    /// ```
645    pub maintainer: Keyword,
646    /// ```text
647    /// MAINTAINER <name>
648    ///            ^^^^^^
649    /// ```
650    pub name: UnescapedString<'a>,
651}
652/// A `ONBUILD` instruction.
653///
654/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#onbuild)
655#[derive(Debug)]
656#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
657#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
658#[non_exhaustive]
659pub struct OnbuildInstruction<'a> {
660    /// ```text
661    /// ONBUILD <INSTRUCTION>
662    /// ^^^^^^^
663    /// ```
664    pub onbuild: Keyword,
665    /// ```text
666    /// ONBUILD <INSTRUCTION>
667    ///         ^^^^^^^^^^^^^
668    /// ```
669    pub instruction: Box<Instruction<'a>>,
670}
671/// A `RUN` instruction.
672///
673/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#run)
674#[derive(Debug)]
675#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
676#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
677#[non_exhaustive]
678pub struct RunInstruction<'a> {
679    /// ```text
680    /// RUN [options] <command> ...
681    /// ^^^
682    /// ```
683    pub run: Keyword,
684    /// ```text
685    /// RUN [options] <command> ...
686    ///     ^^^^^^^^^
687    /// ```
688    pub options: SmallVec<[Flag<'a>; 1]>,
689    /// ```text
690    /// RUN [options] <command> ...
691    ///               ^^^^^^^^^^^^^
692    /// ```
693    pub arguments: Command<'a>,
694    /// ```text
695    ///   RUN [options] <<EOF
696    /// /               ^^^^^
697    /// | ...
698    /// | EOF
699    /// |_^^^
700    /// ```
701    pub here_docs: Vec<HereDoc<'a>>,
702}
703/// A `SHELL` instruction.
704///
705/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#shell)
706#[derive(Debug)]
707#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
708#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
709#[non_exhaustive]
710pub struct ShellInstruction<'a> {
711    /// ```text
712    /// SHELL ["executable", "param"]
713    /// ^^^^^
714    /// ```
715    pub shell: Keyword,
716    /// ```text
717    /// SHELL ["executable", "param"]
718    ///       ^^^^^^^^^^^^^^^^^^^^^^^
719    /// ```
720    // Usually at least 2, e.g., ["/bin/sh", "-c"]
721    // Common cases are 4, e.g., ["/bin/bash", "-o", "pipefail", "-c"]
722    pub arguments: SmallVec<[UnescapedString<'a>; 4]>,
723}
724/// A `STOPSIGNAL` instruction.
725///
726/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#stopsignal)
727#[derive(Debug)]
728#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
729#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
730#[non_exhaustive]
731pub struct StopsignalInstruction<'a> {
732    /// ```text
733    /// STOPSIGNAL signal
734    /// ^^^^^^^^^^
735    /// ```
736    pub stopsignal: Keyword,
737    /// ```text
738    /// STOPSIGNAL signal
739    ///            ^^^^^^
740    /// ```
741    pub arguments: UnescapedString<'a>,
742}
743/// A `USER` instruction.
744///
745/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#user)
746#[derive(Debug)]
747#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
748#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
749#[non_exhaustive]
750pub struct UserInstruction<'a> {
751    /// ```text
752    /// USER <user>[:<group>]
753    /// ^^^^
754    /// ```
755    pub user: Keyword,
756    /// ```text
757    /// USER <user>[:<group>]
758    ///      ^^^^^^^^^^^^^^^^
759    /// ```
760    pub arguments: UnescapedString<'a>,
761}
762/// A `VOLUME` instruction.
763///
764/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#volume)
765#[derive(Debug)]
766#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
767#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
768#[non_exhaustive]
769pub struct VolumeInstruction<'a> {
770    /// ```text
771    /// VOLUME ["/data"]
772    /// ^^^^^^
773    /// ```
774    pub volume: Keyword,
775    /// ```text
776    /// VOLUME ["/data"]
777    ///        ^^^^^^^^^
778    /// ```
779    pub arguments: JsonOrStringArray<'a, 1>,
780}
781/// A `WORKDIR` instruction.
782///
783/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#workdir)
784#[derive(Debug)]
785#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
786#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
787#[non_exhaustive]
788pub struct WorkdirInstruction<'a> {
789    /// ```text
790    /// WORKDIR /path/to/workdir
791    /// ^^^^^^^
792    /// ```
793    pub workdir: Keyword,
794    /// ```text
795    /// WORKDIR /path/to/workdir
796    ///         ^^^^^^^^^^^^^^^^
797    /// ```
798    pub arguments: UnescapedString<'a>,
799}
800
801/// A keyword.
802#[derive(Debug)]
803#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
804#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
805#[non_exhaustive]
806pub struct Keyword {
807    #[allow(missing_docs)]
808    pub span: Span,
809}
810
811/// An option flag.
812#[derive(Debug)]
813#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
814#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
815pub struct Flag<'a> {
816    /// ```text
817    /// --platform=linux/amd64
818    /// ^
819    /// ```
820    flag_start: usize,
821    /// ```text
822    /// --platform=linux/amd64
823    ///   ^^^^^^^^
824    /// ```
825    pub name: UnescapedString<'a>,
826    /// ```text
827    /// --platform=linux/amd64
828    ///            ^^^^^^^^^^^
829    /// ```
830    pub value: Option<UnescapedString<'a>>,
831}
832impl Flag<'_> {
833    /// ```text
834    /// --platform=linux/amd64
835    /// ^^^^^^^^^^
836    /// ```
837    #[must_use]
838    pub fn flag_span(&self) -> Span {
839        self.flag_start..self.name.span.end
840    }
841    /// ```text
842    /// --platform=linux/amd64
843    /// ^^^^^^^^^^^^^^^^^^^^^^
844    /// ```
845    #[must_use]
846    pub fn span(&self) -> Span {
847        match &self.value {
848            Some(v) => self.flag_start..v.span.end,
849            None => self.flag_span(),
850        }
851    }
852}
853
854/// An unescaped string.
855#[derive(Debug, PartialEq)]
856#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
857#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
858#[non_exhaustive]
859pub struct UnescapedString<'a> {
860    #[allow(missing_docs)]
861    pub span: Span,
862    #[allow(missing_docs)]
863    pub value: Cow<'a, str>,
864}
865
866/// A command.
867///
868/// This is used in the [`RUN`](RunInstruction), [`CMD`](CmdInstruction), and
869/// [`ENTRYPOINT`](EntrypointInstruction) instructions.
870///
871/// [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#shell-and-exec-form)
872#[derive(Debug)]
873#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
874#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
875#[non_exhaustive]
876pub enum Command<'a> {
877    /// Exec-form (JSON array)
878    // At least 1
879    Exec(Spanned<SmallVec<[UnescapedString<'a>; 1]>>),
880    /// Shell-form (space-separated string or here-documents), escape preserved
881    Shell(Spanned<&'a str>),
882}
883
884// TODO: merge two? it reduce size, but make confusing when array modified.
885/// A JSON array or space-separated string.
886///
887/// This is used in the [`VOLUME` instruction](VolumeInstruction).
888#[derive(Debug)]
889#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
890#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
891#[allow(clippy::exhaustive_enums)]
892pub enum JsonOrStringArray<'a, const N: usize> {
893    /// JSON array.
894    Json(Spanned<SmallVec<[UnescapedString<'a>; N]>>),
895    /// Space-separated string.
896    String(SmallVec<[UnescapedString<'a>; N]>),
897}
898
899/// A here-document.
900#[derive(Debug)]
901#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
902#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
903#[non_exhaustive]
904pub struct HereDoc<'a> {
905    #[allow(missing_docs)]
906    pub span: Span,
907    /// `false` if delimiter is quoted.
908    pub expand: bool,
909    #[allow(missing_docs)]
910    pub value: Cow<'a, str>,
911}
912
913/// A spanned value.
914#[derive(Debug)]
915#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
916#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
917#[allow(clippy::exhaustive_structs)]
918pub struct Spanned<T> {
919    #[allow(missing_docs)]
920    pub span: Span,
921    #[allow(missing_docs)]
922    pub value: T,
923}
924
925#[allow(missing_docs)]
926pub type Span = Range<usize>;
927
928// -----------------------------------------------------------------------------
929// Parsing
930
931/// An iterator over instructions.
932///
933/// This type is returned by [`parse_iter`] function.
934#[allow(missing_debug_implementations)]
935#[must_use = "iterators are lazy and do nothing unless consumed"]
936pub struct ParseIter<'a> {
937    text: &'a str,
938    s: &'a [u8],
939    escape_byte: u8,
940    has_stage: bool,
941    in_onbuild: bool,
942    parser_directives: ParserDirectives<'a>,
943}
944impl<'a> ParseIter<'a> {
945    fn new(mut text: &'a str) -> Result<Self> {
946        // https://github.com/moby/moby/pull/23234
947        if text.as_bytes().starts_with(UTF8_BOM) {
948            text = &text[UTF8_BOM.len()..];
949        }
950        let mut p = Self {
951            text,
952            s: text.as_bytes(),
953            escape_byte: DEFAULT_ESCAPE_BYTE,
954            has_stage: false,
955            in_onbuild: false,
956            parser_directives: ParserDirectives {
957                // https://docs.docker.com/reference/dockerfile/#parser-directives
958                syntax: None,
959                escape: None,
960                // https://github.com/moby/buildkit/pull/4962
961                check: None,
962            },
963        };
964
965        parse_parser_directives(&mut p).map_err(|e| e.into_error(&p))?;
966
967        // https://docs.docker.com/reference/dockerfile/#format
968        // > For backward compatibility, leading whitespace before comments (#) and
969        // > instructions (such as RUN) are ignored, but discouraged.
970        skip_comments_and_whitespaces(&mut p.s, p.escape_byte);
971        Ok(p)
972    }
973}
974impl<'a> Iterator for ParseIter<'a> {
975    type Item = Result<Instruction<'a>>;
976    #[inline]
977    fn next(&mut self) -> Option<Self::Item> {
978        let p = self;
979        let mut s = p.s;
980        if let Some((&b, s_next)) = s.split_first() {
981            let instruction = match parse_instruction(p, &mut s, b, s_next) {
982                Ok(i) => i,
983                Err(e) => return Some(Err(e.into_error(p))),
984            };
985            match &instruction {
986                Instruction::From(..) => {
987                    p.has_stage = true;
988                }
989                Instruction::Arg(..) => {}
990                instruction => {
991                    if !p.has_stage {
992                        return Some(Err(error::expected(
993                            "FROM",
994                            instruction.instruction_span().start,
995                        )
996                        .into_error(p)));
997                    }
998                }
999            }
1000            skip_comments_and_whitespaces(&mut s, p.escape_byte);
1001            p.s = s;
1002            return Some(Ok(instruction));
1003        }
1004        if !p.has_stage {
1005            // https://github.com/moby/buildkit/blob/e83d79a51fb49aeb921d8a2348ae14a58701c98c/frontend/dockerfile/dockerfile2llb/convert.go#L263
1006            return Some(Err(error::no_stage().into_error(p)));
1007        }
1008        None
1009    }
1010}
1011
1012const DEFAULT_ESCAPE_BYTE: u8 = b'\\';
1013
1014fn parse_parser_directives(p: &mut ParseIter<'_>) -> Result<(), ErrorKind> {
1015    while let Some((&b'#', s_next)) = p.s.split_first() {
1016        p.s = s_next;
1017        skip_spaces_no_escape(&mut p.s);
1018        let directive_start = p.text.len() - p.s.len();
1019        if token(&mut p.s, b"SYNTAX") {
1020            skip_spaces_no_escape(&mut p.s);
1021            if let Some((&b'=', s_next)) = p.s.split_first() {
1022                p.s = s_next;
1023                if p.parser_directives.syntax.is_some() {
1024                    // > Invalid due to appearing twice
1025                    p.parser_directives.syntax = None;
1026                    p.parser_directives.escape = None;
1027                    p.parser_directives.check = None;
1028                    p.escape_byte = DEFAULT_ESCAPE_BYTE;
1029                    skip_this_line_no_escape(&mut p.s);
1030                    break;
1031                }
1032                skip_spaces_no_escape(&mut p.s);
1033                let value_start = p.text.len() - p.s.len();
1034                skip_non_whitespace_no_escape(&mut p.s);
1035                let end = p.text.len() - p.s.len();
1036                let value = p.text[value_start..end].trim_ascii_end();
1037                p.parser_directives.syntax = Some(ParserDirective {
1038                    start: directive_start,
1039                    value: Spanned { span: value_start..value_start + value.len(), value },
1040                });
1041                skip_this_line_no_escape(&mut p.s);
1042                continue;
1043            }
1044        } else if token(&mut p.s, b"CHECK") {
1045            skip_spaces_no_escape(&mut p.s);
1046            if let Some((&b'=', s_next)) = p.s.split_first() {
1047                p.s = s_next;
1048                if p.parser_directives.check.is_some() {
1049                    // > Invalid due to appearing twice
1050                    p.parser_directives.syntax = None;
1051                    p.parser_directives.escape = None;
1052                    p.parser_directives.check = None;
1053                    p.escape_byte = DEFAULT_ESCAPE_BYTE;
1054                    skip_this_line_no_escape(&mut p.s);
1055                    break;
1056                }
1057                skip_spaces_no_escape(&mut p.s);
1058                let value_start = p.text.len() - p.s.len();
1059                skip_non_whitespace_no_escape(&mut p.s);
1060                let end = p.text.len() - p.s.len();
1061                let value = p.text[value_start..end].trim_ascii_end();
1062                p.parser_directives.check = Some(ParserDirective {
1063                    start: directive_start,
1064                    value: Spanned { span: value_start..value_start + value.len(), value },
1065                });
1066                skip_this_line_no_escape(&mut p.s);
1067                continue;
1068            }
1069        } else if token(&mut p.s, b"ESCAPE") {
1070            skip_spaces_no_escape(&mut p.s);
1071            if let Some((&b'=', s_next)) = p.s.split_first() {
1072                p.s = s_next;
1073                if p.parser_directives.escape.is_some() {
1074                    // > Invalid due to appearing twice
1075                    p.parser_directives.syntax = None;
1076                    p.parser_directives.escape = None;
1077                    p.parser_directives.check = None;
1078                    p.escape_byte = DEFAULT_ESCAPE_BYTE;
1079                    skip_this_line_no_escape(&mut p.s);
1080                    break;
1081                }
1082                skip_spaces_no_escape(&mut p.s);
1083                let value_start = p.text.len() - p.s.len();
1084                skip_non_whitespace_no_escape(&mut p.s);
1085                let end = p.text.len() - p.s.len();
1086                let value = p.text[value_start..end].trim_ascii_end();
1087                match value {
1088                    "`" => p.escape_byte = b'`',
1089                    "\\" => {}
1090                    _ => return Err(error::invalid_escape(value_start)),
1091                }
1092                p.parser_directives.escape = Some(ParserDirective {
1093                    start: directive_start,
1094                    value: Spanned {
1095                        span: value_start..value_start + value.len(),
1096                        value: p.escape_byte as char,
1097                    },
1098                });
1099                skip_this_line_no_escape(&mut p.s);
1100                continue;
1101            }
1102        }
1103        skip_this_line_no_escape(&mut p.s);
1104        break;
1105    }
1106    Ok(())
1107}
1108
1109#[inline]
1110fn parse_instruction<'a>(
1111    p: &mut ParseIter<'a>,
1112    s: &mut &'a [u8],
1113    b: u8,
1114    s_next: &'a [u8],
1115) -> Result<Instruction<'a>, ErrorKind> {
1116    let instruction_start = p.text.len() - s.len();
1117    *s = s_next;
1118    // NB: `token_slow` must be called after all `token` calls.
1119    match b & TO_UPPER8 {
1120        b'A' => {
1121            if token(s, &b"ARG"[1..]) {
1122                let instruction_span = instruction_start..p.text.len() - s.len();
1123                if spaces_or_line_end(s, p.escape_byte) {
1124                    return parse_arg(p, s, Keyword { span: instruction_span });
1125                }
1126            } else if token(s, &b"ADD"[1..]) {
1127                let instruction_span = instruction_start..p.text.len() - s.len();
1128                if spaces_or_line_end(s, p.escape_byte) {
1129                    let add = Keyword { span: instruction_span };
1130                    let (options, src, dest) = parse_add_or_copy(p, s, &add)?;
1131                    return Ok(Instruction::Add(AddInstruction { add, options, src, dest }));
1132                }
1133            } else if token_slow(s, &b"ARG"[1..], p.escape_byte) {
1134                let instruction_span = instruction_start..p.text.len() - s.len();
1135                if spaces_or_line_end(s, p.escape_byte) {
1136                    return parse_arg(p, s, Keyword { span: instruction_span });
1137                }
1138            } else if token_slow(s, &b"ADD"[1..], p.escape_byte) {
1139                let instruction_span = instruction_start..p.text.len() - s.len();
1140                if spaces_or_line_end(s, p.escape_byte) {
1141                    let add = Keyword { span: instruction_span };
1142                    let (options, src, dest) = parse_add_or_copy(p, s, &add)?;
1143                    return Ok(Instruction::Add(AddInstruction { add, options, src, dest }));
1144                }
1145            }
1146        }
1147        b'C' => {
1148            if token(s, &b"COPY"[1..]) {
1149                let instruction_span = instruction_start..p.text.len() - s.len();
1150                if spaces_or_line_end(s, p.escape_byte) {
1151                    let copy = Keyword { span: instruction_span };
1152                    let (options, src, dest) = parse_add_or_copy(p, s, &copy)?;
1153                    return Ok(Instruction::Copy(CopyInstruction { copy, options, src, dest }));
1154                }
1155            } else if token(s, &b"CMD"[1..]) {
1156                let instruction_span = instruction_start..p.text.len() - s.len();
1157                if spaces_or_line_end(s, p.escape_byte) {
1158                    return parse_cmd(p, s, Keyword { span: instruction_span });
1159                }
1160            } else if token_slow(s, &b"COPY"[1..], p.escape_byte) {
1161                let instruction_span = instruction_start..p.text.len() - s.len();
1162                if spaces_or_line_end(s, p.escape_byte) {
1163                    let copy = Keyword { span: instruction_span };
1164                    let (options, src, dest) = parse_add_or_copy(p, s, &copy)?;
1165                    return Ok(Instruction::Copy(CopyInstruction { copy, options, src, dest }));
1166                }
1167            } else if token_slow(s, &b"CMD"[1..], p.escape_byte) {
1168                let instruction_span = instruction_start..p.text.len() - s.len();
1169                if spaces_or_line_end(s, p.escape_byte) {
1170                    return parse_cmd(p, s, Keyword { span: instruction_span });
1171                }
1172            }
1173        }
1174        b'E' => {
1175            if token(s, &b"ENV"[1..]) {
1176                let instruction_span = instruction_start..p.text.len() - s.len();
1177                if spaces_or_line_end(s, p.escape_byte) {
1178                    return parse_env(p, s, Keyword { span: instruction_span });
1179                }
1180            } else if token(s, &b"EXPOSE"[1..]) {
1181                let instruction_span = instruction_start..p.text.len() - s.len();
1182                if spaces_or_line_end(s, p.escape_byte) {
1183                    return parse_expose(p, s, Keyword { span: instruction_span });
1184                }
1185            } else if token(s, &b"ENTRYPOINT"[1..]) {
1186                let instruction_span = instruction_start..p.text.len() - s.len();
1187                if spaces_or_line_end(s, p.escape_byte) {
1188                    return parse_entrypoint(p, s, Keyword { span: instruction_span });
1189                }
1190            } else if token_slow(s, &b"ENV"[1..], p.escape_byte) {
1191                let instruction_span = instruction_start..p.text.len() - s.len();
1192                if spaces_or_line_end(s, p.escape_byte) {
1193                    return parse_env(p, s, Keyword { span: instruction_span });
1194                }
1195            } else if token_slow(s, &b"EXPOSE"[1..], p.escape_byte) {
1196                let instruction_span = instruction_start..p.text.len() - s.len();
1197                if spaces_or_line_end(s, p.escape_byte) {
1198                    return parse_expose(p, s, Keyword { span: instruction_span });
1199                }
1200            } else if token_slow(s, &b"ENTRYPOINT"[1..], p.escape_byte) {
1201                let instruction_span = instruction_start..p.text.len() - s.len();
1202                if spaces_or_line_end(s, p.escape_byte) {
1203                    return parse_entrypoint(p, s, Keyword { span: instruction_span });
1204                }
1205            }
1206        }
1207        b'F' => {
1208            if token(s, &b"FROM"[1..]) || token_slow(s, &b"FROM"[1..], p.escape_byte) {
1209                let instruction_span = instruction_start..p.text.len() - s.len();
1210                if spaces_or_line_end(s, p.escape_byte) {
1211                    return parse_from(p, s, Keyword { span: instruction_span });
1212                }
1213            }
1214        }
1215        b'H' => {
1216            if token(s, &b"HEALTHCHECK"[1..]) || token_slow(s, &b"HEALTHCHECK"[1..], p.escape_byte)
1217            {
1218                let instruction_span = instruction_start..p.text.len() - s.len();
1219                if spaces_or_line_end(s, p.escape_byte) {
1220                    return parse_healthcheck(p, s, Keyword { span: instruction_span });
1221                }
1222            }
1223        }
1224        b'L' => {
1225            if token(s, &b"LABEL"[1..]) || token_slow(s, &b"LABEL"[1..], p.escape_byte) {
1226                let instruction_span = instruction_start..p.text.len() - s.len();
1227                if spaces_or_line_end(s, p.escape_byte) {
1228                    return parse_label(p, s, Keyword { span: instruction_span });
1229                }
1230            }
1231        }
1232        b'M' => {
1233            if token(s, &b"MAINTAINER"[1..]) || token_slow(s, &b"MAINTAINER"[1..], p.escape_byte) {
1234                let instruction_span = instruction_start..p.text.len() - s.len();
1235                if spaces_or_line_end(s, p.escape_byte) {
1236                    return parse_maintainer(p, s, Keyword { span: instruction_span });
1237                }
1238            }
1239        }
1240        b'O' => {
1241            if token(s, &b"ONBUILD"[1..]) || token_slow(s, &b"ONBUILD"[1..], p.escape_byte) {
1242                let instruction_span = instruction_start..p.text.len() - s.len();
1243                if spaces_or_line_end(s, p.escape_byte) {
1244                    return parse_onbuild(p, s, Keyword { span: instruction_span });
1245                }
1246            }
1247        }
1248        b'R' => {
1249            if token(s, &b"RUN"[1..]) || token_slow(s, &b"RUN"[1..], p.escape_byte) {
1250                let instruction_span = instruction_start..p.text.len() - s.len();
1251                if spaces_or_line_end(s, p.escape_byte) {
1252                    return parse_run(p, s, Keyword { span: instruction_span });
1253                }
1254            }
1255        }
1256        b'S' => {
1257            if token(s, &b"SHELL"[1..]) {
1258                let instruction_span = instruction_start..p.text.len() - s.len();
1259                if spaces_or_line_end(s, p.escape_byte) {
1260                    return parse_shell(p, s, Keyword { span: instruction_span });
1261                }
1262            } else if token(s, &b"STOPSIGNAL"[1..]) {
1263                let instruction_span = instruction_start..p.text.len() - s.len();
1264                if spaces_or_line_end(s, p.escape_byte) {
1265                    return parse_stopsignal(p, s, Keyword { span: instruction_span });
1266                }
1267            } else if token_slow(s, &b"SHELL"[1..], p.escape_byte) {
1268                let instruction_span = instruction_start..p.text.len() - s.len();
1269                if spaces_or_line_end(s, p.escape_byte) {
1270                    return parse_shell(p, s, Keyword { span: instruction_span });
1271                }
1272            } else if token_slow(s, &b"STOPSIGNAL"[1..], p.escape_byte) {
1273                let instruction_span = instruction_start..p.text.len() - s.len();
1274                if spaces_or_line_end(s, p.escape_byte) {
1275                    return parse_stopsignal(p, s, Keyword { span: instruction_span });
1276                }
1277            }
1278        }
1279        b'U' => {
1280            if token(s, &b"USER"[1..]) || token_slow(s, &b"USER"[1..], p.escape_byte) {
1281                let instruction_span = instruction_start..p.text.len() - s.len();
1282                if spaces_or_line_end(s, p.escape_byte) {
1283                    return parse_user(p, s, Keyword { span: instruction_span });
1284                }
1285            }
1286        }
1287        b'V' => {
1288            if token(s, &b"VOLUME"[1..]) || token_slow(s, &b"VOLUME"[1..], p.escape_byte) {
1289                let instruction_span = instruction_start..p.text.len() - s.len();
1290                if spaces_or_line_end(s, p.escape_byte) {
1291                    return parse_volume(p, s, Keyword { span: instruction_span });
1292                }
1293            }
1294        }
1295        b'W' => {
1296            if token(s, &b"WORKDIR"[1..]) || token_slow(s, &b"WORKDIR"[1..], p.escape_byte) {
1297                let instruction_span = instruction_start..p.text.len() - s.len();
1298                if spaces_or_line_end(s, p.escape_byte) {
1299                    return parse_workdir(p, s, Keyword { span: instruction_span });
1300                }
1301            }
1302        }
1303        _ => {}
1304    }
1305    Err(error::unknown_instruction(instruction_start))
1306}
1307
1308#[inline]
1309fn parse_arg<'a>(
1310    p: &mut ParseIter<'a>,
1311    s: &mut &'a [u8],
1312    instruction: Keyword,
1313) -> Result<Instruction<'a>, ErrorKind> {
1314    debug_assert!(token_slow(
1315        &mut p.text[instruction.span.clone()].as_bytes(),
1316        b"ARG",
1317        p.escape_byte,
1318    ));
1319    let arguments = collect_non_line_unescaped_consume_line(s, p.text, p.escape_byte);
1320    if arguments.value.is_empty() {
1321        return Err(error::at_least_one_argument(instruction.span.start));
1322    }
1323    Ok(Instruction::Arg(ArgInstruction { arg: instruction, arguments }))
1324}
1325
1326#[inline]
1327fn parse_add_or_copy<'a>(
1328    p: &mut ParseIter<'a>,
1329    s: &mut &'a [u8],
1330    instruction: &Keyword,
1331) -> Result<(SmallVec<[Flag<'a>; 1]>, SmallVec<[Source<'a>; 1]>, UnescapedString<'a>), ErrorKind> {
1332    debug_assert!(
1333        token_slow(&mut p.text[instruction.span.clone()].as_bytes(), b"ADD", p.escape_byte,)
1334            || token_slow(&mut p.text[instruction.span.clone()].as_bytes(), b"COPY", p.escape_byte,)
1335    );
1336    let options = parse_options(s, p.text, p.escape_byte);
1337    if is_maybe_json(s) {
1338        let mut tmp = *s;
1339        if let Ok(((src, dest), _array_span)) = parse_json_array::<(
1340            SmallVec<[Source<'_>; 1]>,
1341            Option<_>,
1342        )>(&mut tmp, p.text, p.escape_byte)
1343        {
1344            debug_assert!(is_line_end(tmp.first()));
1345            if tmp.is_empty() {
1346                *s = &[];
1347            } else {
1348                *s = &tmp[1..];
1349            }
1350            if src.is_empty() {
1351                return Err(error::at_least_two_arguments(instruction.span.start));
1352            }
1353            return Ok((options, src, dest.unwrap()));
1354        }
1355    }
1356    let (mut src, dest) = collect_space_separated_unescaped_consume_line::<(
1357        SmallVec<[Source<'_>; 1]>,
1358        Option<_>,
1359    )>(s, p.text, p.escape_byte);
1360    if src.is_empty() {
1361        return Err(error::at_least_two_arguments(instruction.span.start));
1362    }
1363    for src in &mut src {
1364        let Source::Path(path) = src else { unreachable!() };
1365        let Some(mut delim) = path.value.as_bytes().strip_prefix(b"<<") else { continue };
1366        if delim.is_empty() {
1367            continue;
1368        }
1369        let mut strip_tab = false;
1370        let mut quote = None;
1371        if let Some((&b'-', delim_next)) = delim.split_first() {
1372            strip_tab = true;
1373            delim = delim_next;
1374        }
1375        if let Some((&b, delim_next)) = delim.split_first() {
1376            if matches!(b, b'"' | b'\'') {
1377                quote = Some(b);
1378                delim = delim_next;
1379                if delim.last() != Some(&b) {
1380                    return Err(error::expected_quote(
1381                        b,
1382                        delim.last().copied(),
1383                        p.text.len() - s.len(),
1384                    ));
1385                }
1386                delim = &delim[..delim.len() - 1];
1387            }
1388        }
1389        if strip_tab {
1390            let (here_doc, span) = collect_here_doc_strip_tab(s, p.text, p.escape_byte, delim)?;
1391            *src = Source::HereDoc(HereDoc { span, expand: quote.is_none(), value: here_doc });
1392        } else {
1393            let (here_doc, span) = collect_here_doc_no_strip_tab(s, p.text, p.escape_byte, delim)?;
1394            *src =
1395                Source::HereDoc(HereDoc { span, expand: quote.is_none(), value: here_doc.into() });
1396        }
1397    }
1398    Ok((options, src, dest.unwrap()))
1399}
1400
1401#[allow(clippy::unnecessary_wraps)]
1402#[inline]
1403fn parse_cmd<'a>(
1404    p: &mut ParseIter<'a>,
1405    s: &mut &'a [u8],
1406    instruction: Keyword,
1407) -> Result<Instruction<'a>, ErrorKind> {
1408    debug_assert!(token_slow(
1409        &mut p.text[instruction.span.clone()].as_bytes(),
1410        b"CMD",
1411        p.escape_byte,
1412    ));
1413    if is_maybe_json(s) {
1414        let mut tmp = *s;
1415        if let Ok((arguments, array_span)) =
1416            parse_json_array::<SmallVec<[_; 1]>>(&mut tmp, p.text, p.escape_byte)
1417        {
1418            debug_assert!(is_line_end(tmp.first()));
1419            if tmp.is_empty() {
1420                *s = &[];
1421            } else {
1422                *s = &tmp[1..];
1423            }
1424            // "CMD []" seems to be okay?
1425            // https://github.com/moby/buildkit/blob/6d143f5602a61acef286f39ee75f1cb33c367d44/frontend/dockerfile/parser/testfiles/brimstone-docker-consul/Dockerfile#L3
1426            return Ok(Instruction::Cmd(CmdInstruction {
1427                cmd: instruction,
1428                arguments: Command::Exec(Spanned { span: array_span, value: arguments }),
1429            }));
1430        }
1431    }
1432    let arguments_start = p.text.len() - s.len();
1433    skip_this_line(s, p.escape_byte);
1434    let end = p.text.len() - s.len();
1435    let arguments = p.text[arguments_start..end].trim_ascii_end();
1436    Ok(Instruction::Cmd(CmdInstruction {
1437        cmd: instruction,
1438        arguments: Command::Shell(Spanned {
1439            span: arguments_start..arguments_start + arguments.len(),
1440            value: arguments,
1441        }),
1442    }))
1443}
1444
1445#[inline]
1446fn parse_env<'a>(
1447    p: &mut ParseIter<'a>,
1448    s: &mut &'a [u8],
1449    instruction: Keyword,
1450) -> Result<Instruction<'a>, ErrorKind> {
1451    debug_assert!(token_slow(
1452        &mut p.text[instruction.span.clone()].as_bytes(),
1453        b"ENV",
1454        p.escape_byte,
1455    ));
1456    let arguments = collect_non_line_unescaped_consume_line(s, p.text, p.escape_byte);
1457    if arguments.value.is_empty() {
1458        return Err(error::at_least_one_argument(instruction.span.start));
1459    }
1460    Ok(Instruction::Env(EnvInstruction { env: instruction, arguments }))
1461}
1462
1463#[inline]
1464fn parse_expose<'a>(
1465    p: &mut ParseIter<'a>,
1466    s: &mut &'a [u8],
1467    instruction: Keyword,
1468) -> Result<Instruction<'a>, ErrorKind> {
1469    debug_assert!(token_slow(
1470        &mut p.text[instruction.span.clone()].as_bytes(),
1471        b"EXPOSE",
1472        p.escape_byte,
1473    ));
1474    let arguments: SmallVec<[_; 1]> =
1475        collect_space_separated_unescaped_consume_line(s, p.text, p.escape_byte);
1476    if arguments.is_empty() {
1477        return Err(error::at_least_one_argument(instruction.span.start));
1478    }
1479    Ok(Instruction::Expose(ExposeInstruction { expose: instruction, arguments }))
1480}
1481
1482#[inline]
1483fn parse_entrypoint<'a>(
1484    p: &mut ParseIter<'a>,
1485    s: &mut &'a [u8],
1486    instruction: Keyword,
1487) -> Result<Instruction<'a>, ErrorKind> {
1488    debug_assert!(token_slow(
1489        &mut p.text[instruction.span.clone()].as_bytes(),
1490        b"ENTRYPOINT",
1491        p.escape_byte,
1492    ));
1493    if is_maybe_json(s) {
1494        let mut tmp = *s;
1495        if let Ok((arguments, array_span)) =
1496            parse_json_array::<SmallVec<[_; 1]>>(&mut tmp, p.text, p.escape_byte)
1497        {
1498            debug_assert!(is_line_end(tmp.first()));
1499            if tmp.is_empty() {
1500                *s = &[];
1501            } else {
1502                *s = &tmp[1..];
1503            }
1504            if arguments.is_empty() {
1505                return Err(error::at_least_one_argument(instruction.span.start));
1506            }
1507            return Ok(Instruction::Entrypoint(EntrypointInstruction {
1508                entrypoint: instruction,
1509                arguments: Command::Exec(Spanned { span: array_span, value: arguments }),
1510            }));
1511        }
1512    }
1513    let arguments_start = p.text.len() - s.len();
1514    skip_this_line(s, p.escape_byte);
1515    let end = p.text.len() - s.len();
1516    let arguments = p.text[arguments_start..end].trim_ascii_end();
1517    if arguments.is_empty() {
1518        return Err(error::at_least_one_argument(instruction.span.start));
1519    }
1520    Ok(Instruction::Entrypoint(EntrypointInstruction {
1521        entrypoint: instruction,
1522        arguments: Command::Shell(Spanned {
1523            span: arguments_start..arguments_start + arguments.len(),
1524            value: arguments,
1525        }),
1526    }))
1527}
1528
1529#[inline]
1530fn parse_from<'a>(
1531    p: &mut ParseIter<'a>,
1532    s: &mut &'a [u8],
1533    instruction: Keyword,
1534) -> Result<Instruction<'a>, ErrorKind> {
1535    debug_assert!(token_slow(
1536        &mut p.text[instruction.span.clone()].as_bytes(),
1537        b"FROM",
1538        p.escape_byte,
1539    ));
1540    let options = parse_options(s, p.text, p.escape_byte);
1541    // TODO: https://github.com/moby/buildkit/blob/e83d79a51fb49aeb921d8a2348ae14a58701c98c/frontend/dockerfile/dockerfile2llb/convert.go#L302
1542    // > base name (%s) should not be blank
1543    let image = collect_non_whitespace_unescaped(s, p.text, p.escape_byte);
1544    if image.value.is_empty() {
1545        return Err(error::at_least_one_argument(instruction.span.start));
1546    }
1547    let mut as_ = None;
1548    if skip_spaces(s, p.escape_byte) {
1549        let as_start = p.text.len() - s.len();
1550        if token(s, b"AS") || token_slow(s, b"AS", p.escape_byte) {
1551            let as_span = as_start..p.text.len() - s.len();
1552            if !skip_spaces(s, p.escape_byte) {
1553                return Err(error::expected("AS", as_start));
1554            }
1555            let name = collect_non_whitespace_unescaped(s, p.text, p.escape_byte);
1556            skip_spaces(s, p.escape_byte);
1557            if !is_line_end(s.first()) {
1558                return Err(error::expected("newline or eof", p.text.len() - s.len()));
1559            }
1560            as_ = Some((Keyword { span: as_span }, name));
1561        } else if !is_line_end(s.first()) {
1562            return Err(error::expected("AS", as_start));
1563        }
1564    }
1565    Ok(Instruction::From(FromInstruction { from: instruction, options, image, as_ }))
1566}
1567
1568#[inline]
1569fn parse_healthcheck<'a>(
1570    p: &mut ParseIter<'a>,
1571    s: &mut &'a [u8],
1572    instruction: Keyword,
1573) -> Result<Instruction<'a>, ErrorKind> {
1574    debug_assert!(token_slow(
1575        &mut p.text[instruction.span.clone()].as_bytes(),
1576        b"HEALTHCHECK",
1577        p.escape_byte,
1578    ));
1579    let options = parse_options(s, p.text, p.escape_byte);
1580    let Some((&b, s_next)) = s.split_first() else {
1581        return Err(error::expected("CMD or NONE", p.text.len() - s.len()));
1582    };
1583    let cmd_or_none_start = p.text.len() - s.len();
1584    match b & TO_UPPER8 {
1585        b'C' => {
1586            *s = s_next;
1587            if token(s, &b"CMD"[1..]) || token_slow(s, &b"CMD"[1..], p.escape_byte) {
1588                let cmd_span = cmd_or_none_start..p.text.len() - s.len();
1589                let cmd_keyword = Keyword { span: cmd_span };
1590                if spaces_or_line_end(s, p.escape_byte) {
1591                    if is_maybe_json(s) {
1592                        let mut tmp = *s;
1593                        if let Ok((arguments, array_span)) =
1594                            parse_json_array::<SmallVec<[_; 1]>>(&mut tmp, p.text, p.escape_byte)
1595                        {
1596                            debug_assert!(is_line_end(tmp.first()));
1597                            if tmp.is_empty() {
1598                                *s = &[];
1599                            } else {
1600                                *s = &tmp[1..];
1601                            }
1602                            if arguments.is_empty() {
1603                                return Err(error::at_least_one_argument(instruction.span.start));
1604                            }
1605                            return Ok(Instruction::Healthcheck(HealthcheckInstruction {
1606                                healthcheck: instruction,
1607                                options,
1608                                arguments: HealthcheckArguments::Cmd {
1609                                    cmd: cmd_keyword,
1610                                    arguments: Command::Exec(Spanned {
1611                                        span: array_span,
1612                                        value: arguments,
1613                                    }),
1614                                },
1615                            }));
1616                        }
1617                    }
1618                    let arguments_start = p.text.len() - s.len();
1619                    skip_this_line(s, p.escape_byte);
1620                    let end = p.text.len() - s.len();
1621                    let arguments = p.text[arguments_start..end].trim_ascii_end();
1622                    return Ok(Instruction::Healthcheck(HealthcheckInstruction {
1623                        healthcheck: instruction,
1624                        options,
1625                        arguments: HealthcheckArguments::Cmd {
1626                            cmd: cmd_keyword,
1627                            arguments: Command::Shell(Spanned {
1628                                span: arguments_start..arguments_start + arguments.len(),
1629                                value: arguments,
1630                            }),
1631                        },
1632                    }));
1633                }
1634            }
1635        }
1636        b'N' => {
1637            *s = s_next;
1638            if token(s, &b"NONE"[1..]) || token_slow(s, &b"NONE"[1..], p.escape_byte) {
1639                let none_span = cmd_or_none_start..p.text.len() - s.len();
1640                skip_spaces(s, p.escape_byte);
1641                if !is_line_end(s.first()) {
1642                    return Err(error::other(
1643                        "HEALTHCHECK NONE does not accept arguments",
1644                        p.text.len() - s.len(),
1645                    ));
1646                }
1647                // TODO: HEALTHCHECK NONE doesn't support options
1648                let none_keyword = Keyword { span: none_span };
1649                return Ok(Instruction::Healthcheck(HealthcheckInstruction {
1650                    healthcheck: instruction,
1651                    options,
1652                    arguments: HealthcheckArguments::None { none: none_keyword },
1653                }));
1654            }
1655        }
1656        _ => {}
1657    }
1658    Err(error::expected("CMD or NONE", p.text.len() - s.len()))
1659}
1660
1661#[inline]
1662fn parse_label<'a>(
1663    p: &mut ParseIter<'a>,
1664    s: &mut &'a [u8],
1665    instruction: Keyword,
1666) -> Result<Instruction<'a>, ErrorKind> {
1667    debug_assert!(token_slow(
1668        &mut p.text[instruction.span.clone()].as_bytes(),
1669        b"LABEL",
1670        p.escape_byte,
1671    ));
1672    let arguments = collect_non_line_unescaped_consume_line(s, p.text, p.escape_byte);
1673    if arguments.value.is_empty() {
1674        return Err(error::at_least_one_argument(instruction.span.start));
1675    }
1676    Ok(Instruction::Label(LabelInstruction { label: instruction, arguments }))
1677}
1678
1679#[cold]
1680fn parse_maintainer<'a>(
1681    p: &mut ParseIter<'a>,
1682    s: &mut &'a [u8],
1683    instruction: Keyword,
1684) -> Result<Instruction<'a>, ErrorKind> {
1685    debug_assert!(token_slow(
1686        &mut p.text[instruction.span.clone()].as_bytes(),
1687        b"MAINTAINER",
1688        p.escape_byte,
1689    ));
1690    let name = collect_non_line_unescaped_consume_line(s, p.text, p.escape_byte);
1691    if name.value.is_empty() {
1692        return Err(error::exactly_one_argument(instruction.span.start));
1693    }
1694    Ok(Instruction::Maintainer(MaintainerInstruction { maintainer: instruction, name }))
1695}
1696
1697#[inline]
1698fn parse_onbuild<'a>(
1699    p: &mut ParseIter<'a>,
1700    s: &mut &'a [u8],
1701    instruction: Keyword,
1702) -> Result<Instruction<'a>, ErrorKind> {
1703    debug_assert!(token_slow(
1704        &mut p.text[instruction.span.clone()].as_bytes(),
1705        b"ONBUILD",
1706        p.escape_byte,
1707    ));
1708    // https://docs.docker.com/reference/dockerfile/#onbuild-limitations
1709    if mem::replace(&mut p.in_onbuild, true) {
1710        return Err(error::other("ONBUILD ONBUILD is not allowed", instruction.span.start));
1711    }
1712    let Some((&b, s_next)) = s.split_first() else {
1713        return Err(error::expected("instruction after ONBUILD", instruction.span.start));
1714    };
1715    // TODO: https://docs.docker.com/reference/dockerfile/#onbuild-limitations
1716    // match b & TO_UPPER8 {
1717    //     b'F' => {
1718    //         if token(s, b"FROM") || token_slow(s, b"FROM", p.escape_byte) {
1719    //             return Err(error::other(p,
1720    //                 "ONBUILD FROM is not allowed",
1721    //                 instruction.span.start,
1722    //             ));
1723    //         }
1724    //     }
1725    //     b'M' => {
1726    //         if token(s, b"MAINTAINER")
1727    //             || token_slow(s, b"MAINTAINER", p.escape_byte)
1728    //         {
1729    //             return Err(error::other(p,
1730    //                 "ONBUILD MAINTAINER is not allowed",
1731    //                 instruction.span.start,
1732    //             ));
1733    //         }
1734    //     }
1735    //     _ => {}
1736    // }
1737    let inner_instruction = parse_instruction(p, s, b, s_next)?;
1738    p.in_onbuild = false;
1739    Ok(Instruction::Onbuild(OnbuildInstruction {
1740        onbuild: instruction,
1741        instruction: Box::new(inner_instruction),
1742    }))
1743}
1744
1745#[inline]
1746fn parse_run<'a>(
1747    p: &mut ParseIter<'a>,
1748    s: &mut &'a [u8],
1749    instruction: Keyword,
1750) -> Result<Instruction<'a>, ErrorKind> {
1751    debug_assert!(token_slow(
1752        &mut p.text[instruction.span.clone()].as_bytes(),
1753        b"RUN",
1754        p.escape_byte,
1755    ));
1756    let options = parse_options(s, p.text, p.escape_byte);
1757    if is_maybe_json(s) {
1758        let mut tmp = *s;
1759        if let Ok((arguments, array_span)) =
1760            parse_json_array::<SmallVec<[_; 1]>>(&mut tmp, p.text, p.escape_byte)
1761        {
1762            debug_assert!(is_line_end(tmp.first()));
1763            if tmp.is_empty() {
1764                *s = &[];
1765            } else {
1766                *s = &tmp[1..];
1767            }
1768            if arguments.is_empty() {
1769                return Err(error::at_least_one_argument(instruction.span.start));
1770            }
1771            return Ok(Instruction::Run(RunInstruction {
1772                run: instruction,
1773                options,
1774                arguments: Command::Exec(Spanned { span: array_span, value: arguments }),
1775                // TODO: https://github.com/moby/buildkit/issues/2207
1776                here_docs: vec![],
1777            }));
1778        }
1779    }
1780
1781    // https://docs.docker.com/reference/dockerfile/#here-documents
1782    let mut strip_tab = false;
1783    let mut quote = None;
1784    let mut pos = 2;
1785    // At least 5, <<E\nE
1786    if s.len() >= 5 && s.starts_with(b"<<") && {
1787        if s[pos] == b'-' {
1788            strip_tab = true;
1789            pos += 1;
1790        }
1791        if matches!(s[pos], b'"' | b'\'') {
1792            quote = Some(s[pos]);
1793            pos += 1;
1794        }
1795        // TODO: non-ascii_alphanumeric
1796        s[pos].is_ascii_alphanumeric()
1797    } {
1798        *s = &s[pos..];
1799        let delim_start = p.text.len() - s.len();
1800        // TODO: non-ascii_alphanumeric
1801        while let Some((&b, s_next)) = s.split_first() {
1802            if b.is_ascii_alphanumeric() {
1803                *s = s_next;
1804                continue;
1805            }
1806            break;
1807        }
1808        let delim = &p.text.as_bytes()[delim_start..p.text.len() - s.len()];
1809        if let Some(quote) = quote {
1810            if let Some((&b, s_next)) = s.split_first() {
1811                if b != quote {
1812                    return Err(error::expected_quote(quote, Some(b), p.text.len() - s.len()));
1813                }
1814                *s = s_next;
1815            } else {
1816                return Err(error::expected_quote(quote, None, p.text.len() - s.len()));
1817            }
1818        }
1819        // TODO: skip space
1820        let arguments_start = p.text.len() - s.len();
1821        skip_this_line(s, p.escape_byte);
1822        let end = p.text.len() - s.len();
1823        let arguments = p.text[arguments_start..end].trim_ascii_end();
1824        let here_doc = if strip_tab {
1825            let (here_doc, span) = collect_here_doc_strip_tab(s, p.text, p.escape_byte, delim)?;
1826            HereDoc { span, expand: quote.is_none(), value: here_doc }
1827        } else {
1828            let (here_doc, span) = collect_here_doc_no_strip_tab(s, p.text, p.escape_byte, delim)?;
1829            HereDoc { span, expand: quote.is_none(), value: here_doc.into() }
1830        };
1831        return Ok(Instruction::Run(RunInstruction {
1832            run: instruction,
1833            options,
1834            arguments: Command::Shell(Spanned {
1835                span: arguments_start..arguments_start + arguments.len(),
1836                value: arguments,
1837            }),
1838            // TODO: multiple here-docs
1839            here_docs: vec![here_doc],
1840        }));
1841    }
1842
1843    let arguments_start = p.text.len() - s.len();
1844    skip_this_line(s, p.escape_byte);
1845    let end = p.text.len() - s.len();
1846    let arguments = p.text[arguments_start..end].trim_ascii_end();
1847    Ok(Instruction::Run(RunInstruction {
1848        run: instruction,
1849        options,
1850        arguments: Command::Shell(Spanned {
1851            span: arguments_start..arguments_start + arguments.len(),
1852            value: arguments,
1853        }),
1854        here_docs: vec![],
1855    }))
1856}
1857
1858#[inline]
1859fn parse_shell<'a>(
1860    p: &mut ParseIter<'a>,
1861    s: &mut &'a [u8],
1862    instruction: Keyword,
1863) -> Result<Instruction<'a>, ErrorKind> {
1864    debug_assert!(token_slow(
1865        &mut p.text[instruction.span.clone()].as_bytes(),
1866        b"SHELL",
1867        p.escape_byte,
1868    ));
1869    if !is_maybe_json(s) {
1870        return Err(error::expected("JSON array", p.text.len() - s.len()));
1871    }
1872    match parse_json_array::<SmallVec<[_; 4]>>(s, p.text, p.escape_byte) {
1873        Ok((arguments, _array_span)) => {
1874            if !s.is_empty() {
1875                *s = &s[1..];
1876            }
1877            if arguments.is_empty() {
1878                return Err(error::at_least_one_argument(instruction.span.start));
1879            }
1880            Ok(Instruction::Shell(ShellInstruction { shell: instruction, arguments }))
1881        }
1882        Err(array_start) => Err(error::json(array_start)),
1883    }
1884}
1885
1886#[inline]
1887fn parse_stopsignal<'a>(
1888    p: &mut ParseIter<'a>,
1889    s: &mut &'a [u8],
1890    instruction: Keyword,
1891) -> Result<Instruction<'a>, ErrorKind> {
1892    debug_assert!(token_slow(
1893        &mut p.text[instruction.span.clone()].as_bytes(),
1894        b"STOPSIGNAL",
1895        p.escape_byte,
1896    ));
1897    // TODO: space is disallowed?
1898    let arguments = collect_non_line_unescaped_consume_line(s, p.text, p.escape_byte);
1899    if arguments.value.is_empty() {
1900        return Err(error::exactly_one_argument(instruction.span.start));
1901    }
1902    Ok(Instruction::Stopsignal(StopsignalInstruction { stopsignal: instruction, arguments }))
1903}
1904
1905#[inline]
1906fn parse_user<'a>(
1907    p: &mut ParseIter<'a>,
1908    s: &mut &'a [u8],
1909    instruction: Keyword,
1910) -> Result<Instruction<'a>, ErrorKind> {
1911    debug_assert!(token_slow(
1912        &mut p.text[instruction.span.clone()].as_bytes(),
1913        b"USER",
1914        p.escape_byte,
1915    ));
1916    // TODO: space is disallowed?
1917    let arguments = collect_non_line_unescaped_consume_line(s, p.text, p.escape_byte);
1918    if arguments.value.is_empty() {
1919        return Err(error::exactly_one_argument(instruction.span.start));
1920    }
1921    Ok(Instruction::User(UserInstruction { user: instruction, arguments }))
1922}
1923
1924#[inline]
1925fn parse_volume<'a>(
1926    p: &mut ParseIter<'a>,
1927    s: &mut &'a [u8],
1928    instruction: Keyword,
1929) -> Result<Instruction<'a>, ErrorKind> {
1930    debug_assert!(token_slow(
1931        &mut p.text[instruction.span.clone()].as_bytes(),
1932        b"VOLUME",
1933        p.escape_byte,
1934    ));
1935    if is_maybe_json(s) {
1936        let mut tmp = *s;
1937        if let Ok((arguments, array_span)) = parse_json_array(&mut tmp, p.text, p.escape_byte) {
1938            debug_assert!(is_line_end(tmp.first()));
1939            if tmp.is_empty() {
1940                *s = &[];
1941            } else {
1942                *s = &tmp[1..];
1943            }
1944            // "VOLUME []" seems to be okay?
1945            return Ok(Instruction::Volume(VolumeInstruction {
1946                volume: instruction,
1947                arguments: JsonOrStringArray::Json(Spanned { span: array_span, value: arguments }),
1948            }));
1949        }
1950    }
1951    let arguments: SmallVec<[_; 1]> =
1952        collect_space_separated_unescaped_consume_line(s, p.text, p.escape_byte);
1953    if arguments.is_empty() {
1954        // TODO: "VOLUME" too?
1955        return Err(error::at_least_one_argument(instruction.span.start));
1956    }
1957    Ok(Instruction::Volume(VolumeInstruction {
1958        volume: instruction,
1959        arguments: JsonOrStringArray::String(arguments),
1960    }))
1961}
1962
1963#[inline]
1964fn parse_workdir<'a>(
1965    p: &mut ParseIter<'a>,
1966    s: &mut &'a [u8],
1967    instruction: Keyword,
1968) -> Result<Instruction<'a>, ErrorKind> {
1969    debug_assert!(token_slow(
1970        &mut p.text[instruction.span.clone()].as_bytes(),
1971        b"WORKDIR",
1972        p.escape_byte,
1973    ));
1974    // TODO: space is disallowed if not escaped/quoted?
1975    let arguments = collect_non_line_unescaped_consume_line(s, p.text, p.escape_byte);
1976    if arguments.value.is_empty() {
1977        return Err(error::exactly_one_argument(instruction.span.start));
1978    }
1979    Ok(Instruction::Workdir(WorkdirInstruction { workdir: instruction, arguments }))
1980}
1981
1982// -----------------------------------------------------------------------------
1983// Parsing Helpers
1984
1985// [\r\n]
1986const LINE: u8 = 1 << 0;
1987// [ \t]
1988const SPACE: u8 = 1 << 1;
1989// [ \r\n\t]
1990const WHITESPACE: u8 = 1 << 2;
1991// [#]
1992const COMMENT: u8 = 1 << 3;
1993// ["]
1994const DOUBLE_QUOTE: u8 = 1 << 4;
1995// [\`]
1996const POSSIBLE_ESCAPE: u8 = 1 << 5;
1997// [=]
1998const EQ: u8 = 1 << 6;
1999
2000static TABLE: [u8; 256] = {
2001    let mut table = [0; 256];
2002    let mut i = 0;
2003    loop {
2004        match i {
2005            b' ' | b'\t' => table[i as usize] = WHITESPACE | SPACE,
2006            b'\n' | b'\r' => table[i as usize] = WHITESPACE | LINE,
2007            b'#' => table[i as usize] = COMMENT,
2008            b'"' => table[i as usize] = DOUBLE_QUOTE,
2009            b'\\' | b'`' => table[i as usize] = POSSIBLE_ESCAPE,
2010            b'=' => table[i as usize] = EQ,
2011            _ => {}
2012        }
2013        if i == u8::MAX {
2014            break;
2015        }
2016        i += 1;
2017    }
2018    table
2019};
2020
2021const UTF8_BOM: &[u8] = &[0xEF, 0xBB, 0xBF];
2022
2023trait Store<T>: Sized {
2024    fn new() -> Self;
2025    fn push(&mut self, val: T);
2026}
2027impl<T> Store<T> for Vec<T> {
2028    #[inline]
2029    fn new() -> Self {
2030        Self::new()
2031    }
2032    #[inline]
2033    fn push(&mut self, val: T) {
2034        self.push(val);
2035    }
2036}
2037impl<T, const N: usize> Store<T> for SmallVec<[T; N]> {
2038    #[inline]
2039    fn new() -> Self {
2040        Self::new()
2041    }
2042    #[inline]
2043    fn push(&mut self, val: T) {
2044        self.push(val);
2045    }
2046}
2047impl<'a, const N: usize> Store<UnescapedString<'a>>
2048    for (SmallVec<[Source<'a>; N]>, Option<UnescapedString<'a>>)
2049{
2050    #[inline]
2051    fn new() -> Self {
2052        (SmallVec::new(), None)
2053    }
2054    #[inline]
2055    fn push(&mut self, val: UnescapedString<'a>) {
2056        if let Some(val) = self.1.replace(val) {
2057            self.0.push(Source::Path(val));
2058        }
2059    }
2060}
2061
2062#[inline]
2063fn parse_options<'a, S: Store<Flag<'a>>>(s: &mut &[u8], start: &'a str, escape_byte: u8) -> S {
2064    let mut options = S::new();
2065    'outer: while let Some((&b'-', mut s_next)) = s.split_first() {
2066        loop {
2067            let Some((&b, s_next_next)) = s_next.split_first() else {
2068                break 'outer;
2069            };
2070            if b == b'-' {
2071                s_next = s_next_next;
2072                break;
2073            }
2074            if skip_line_escape(&mut s_next, b, s_next_next, escape_byte) {
2075                skip_line_escape_followup(&mut s_next, escape_byte);
2076                continue;
2077            }
2078            break 'outer;
2079        }
2080        let flag_start = start.len() - s.len();
2081        *s = s_next;
2082        let name = collect_until_unescaped::<{ WHITESPACE | EQ }>(s, start, escape_byte);
2083        let Some((&b'=', s_next)) = s.split_first() else {
2084            options.push(Flag { flag_start, name, value: None });
2085            skip_spaces(s, escape_byte);
2086            continue;
2087        };
2088        *s = s_next;
2089        let value = collect_non_whitespace_unescaped(s, start, escape_byte);
2090        options.push(Flag { flag_start, name, value: Some(value) });
2091        skip_spaces(s, escape_byte);
2092    }
2093    options
2094}
2095
2096fn parse_json_array<'a, S: Store<UnescapedString<'a>>>(
2097    s: &mut &[u8],
2098    start: &'a str,
2099    escape_byte: u8,
2100) -> Result<(S, Span), usize> {
2101    debug_assert_eq!(s.first(), Some(&b'['));
2102    debug_assert_ne!(s.get(1), Some(&b'['));
2103    let mut res = S::new();
2104    let array_start = start.len() - s.len();
2105    *s = &s[1..];
2106    skip_spaces(s, escape_byte);
2107    let (&b, s_next) = s.split_first().ok_or(array_start)?;
2108    match b {
2109        b'"' => {
2110            *s = s_next;
2111            loop {
2112                let full_word_start = start.len() - s.len();
2113                let mut word_start = full_word_start;
2114                let mut buf = String::new();
2115                loop {
2116                    let (&b, s_next) = s.split_first().ok_or(array_start)?;
2117                    if TABLE[b as usize] & (LINE | DOUBLE_QUOTE | POSSIBLE_ESCAPE) == 0 {
2118                        *s = s_next;
2119                        continue;
2120                    }
2121                    match b {
2122                        b'"' => break,
2123                        b'\n' | b'\r' => return Err(array_start),
2124                        _ => {}
2125                    }
2126                    let word_end = start.len() - s.len();
2127                    if skip_line_escape(s, b, s_next, escape_byte) {
2128                        skip_line_escape_followup(s, escape_byte);
2129                        // dockerfile escape
2130                        buf.push_str(&start[word_start..word_end]);
2131                        word_start = start.len() - s.len();
2132                        continue;
2133                    }
2134                    if b == b'\\' {
2135                        // JSON escape
2136                        let word_end = start.len() - s.len();
2137                        buf.push_str(&start[word_start..word_end]);
2138                        *s = s_next;
2139                        let (new, new_start) = match *s.first().ok_or(array_start)? {
2140                            b @ (b'"' | b'\\' | b'/') => (b as char, 1),
2141                            b'b' => ('\x08', 1),
2142                            b'f' => ('\x0c', 1),
2143                            b'n' => ('\n', 1),
2144                            b'r' => ('\r', 1),
2145                            b't' => ('\t', 1),
2146                            b'u' => (parse_json_hex_escape(s, array_start)?, 5),
2147                            _ => return Err(array_start), // invalid escape
2148                        };
2149                        buf.push(new);
2150                        *s = &s[new_start..];
2151                        word_start = start.len() - s.len();
2152                        continue;
2153                    }
2154                    *s = s_next;
2155                }
2156                let word_end = start.len() - s.len();
2157                let value = if buf.is_empty() {
2158                    // no escape
2159                    Cow::Borrowed(&start[word_start..word_end])
2160                } else {
2161                    buf.push_str(&start[word_start..word_end]);
2162                    Cow::Owned(buf)
2163                };
2164                res.push(UnescapedString { span: full_word_start..word_end, value });
2165                *s = &s[1..]; // drop "
2166                skip_spaces(s, escape_byte);
2167                let (&b, s_next) = s.split_first().ok_or(array_start)?;
2168                match b {
2169                    b',' => {
2170                        *s = s_next;
2171                        skip_spaces(s, escape_byte);
2172                        let (&b, s_next) = s.split_first().ok_or(array_start)?;
2173                        if b == b'"' {
2174                            *s = s_next;
2175                            continue;
2176                        }
2177                        return Err(array_start);
2178                    }
2179                    b']' => {
2180                        *s = s_next;
2181                        break;
2182                    }
2183                    _ => return Err(array_start),
2184                }
2185            }
2186        }
2187        b']' => *s = s_next,
2188        _ => return Err(array_start),
2189    }
2190    let array_end = start.len() - s.len();
2191    skip_spaces(s, escape_byte);
2192    if !is_line_end(s.first()) {
2193        return Err(array_start);
2194    }
2195    Ok((res, array_start..array_end))
2196}
2197// Adapted from https://github.com/serde-rs/json/blob/3f1c6de4af28b1f6c5100da323f2bffaf7c2083f/src/read.rs
2198#[cold]
2199fn parse_json_hex_escape(s: &mut &[u8], array_start: usize) -> Result<char, usize> {
2200    fn decode_hex_escape(s: &mut &[u8], array_start: usize) -> Result<u16, usize> {
2201        if s.len() < 4 {
2202            return Err(array_start); // EofWhileParsingString
2203        }
2204
2205        let mut n = 0;
2206        for _ in 0..4 {
2207            let ch = decode_hex_val(s[0]);
2208            *s = &s[1..];
2209            match ch {
2210                None => return Err(array_start), // InvalidEscape
2211                Some(val) => {
2212                    n = (n << 4) + val;
2213                }
2214            }
2215        }
2216        Ok(n)
2217    }
2218
2219    fn decode_hex_val(val: u8) -> Option<u16> {
2220        let n = HEX_DECODE_TABLE[val as usize] as u16;
2221        if n == u8::MAX as u16 { None } else { Some(n) }
2222    }
2223
2224    let c = match decode_hex_escape(s, array_start)? {
2225        _n @ 0xDC00..=0xDFFF => return Err(array_start), // ErrorCode::LoneLeadingSurrogateInHexEscape)
2226
2227        // Non-BMP characters are encoded as a sequence of two hex
2228        // escapes, representing UTF-16 surrogates. If deserializing a
2229        // utf-8 string the surrogates are required to be paired,
2230        // whereas deserializing a byte string accepts lone surrogates.
2231        n1 @ 0xD800..=0xDBFF => {
2232            if s.first() == Some(&b'\\') {
2233                *s = &s[1..];
2234            } else {
2235                return Err(array_start); // UnexpectedEndOfHexEscape
2236            }
2237
2238            if s.first() == Some(&b'u') {
2239                *s = &s[1..];
2240            } else {
2241                return Err(array_start); // UnexpectedEndOfHexEscape
2242            }
2243
2244            let n2 = decode_hex_escape(s, array_start)?;
2245
2246            if n2 < 0xDC00 || n2 > 0xDFFF {
2247                return Err(array_start); // LoneLeadingSurrogateInHexEscape
2248            }
2249
2250            let n = ((((n1 - 0xD800) as u32) << 10) | (n2 - 0xDC00) as u32) + 0x1_0000;
2251
2252            match char::from_u32(n) {
2253                Some(c) => c,
2254                None => return Err(array_start), // InvalidUnicodeCodePoint
2255            }
2256        }
2257
2258        // Every u16 outside of the surrogate ranges above is guaranteed
2259        // to be a legal char.
2260        n => char::from_u32(n as u32).unwrap(),
2261    };
2262    Ok(c)
2263}
2264#[allow(clippy::needless_raw_string_hashes)]
2265#[test]
2266fn test_parse_json_array() {
2267    // empty
2268    let t = r#"[]"#;
2269    let mut s = t.as_bytes();
2270    assert_eq!(&*parse_json_array::<Vec<_>>(&mut s, t, b'\\').unwrap().0, &[]);
2271    assert_eq!(s, b"");
2272    let t = r#"[ ]"#;
2273    let mut s = t.as_bytes();
2274    assert_eq!(&*parse_json_array::<Vec<_>>(&mut s, t, b'\\').unwrap().0, &[]);
2275    assert_eq!(s, b"");
2276    // one value
2277    let t = r#"["abc"]"#;
2278    let mut s = t.as_bytes();
2279    assert_eq!(&*parse_json_array::<Vec<_>>(&mut s, t, b'\\').unwrap().0, &[UnescapedString {
2280        span: 2..5,
2281        value: "abc".into()
2282    }]);
2283    assert_eq!(s, b"");
2284    // multi values
2285    let t = "[\"ab\",\"c\" ,  \"de\" ] \n";
2286    let mut s = t.as_bytes();
2287    assert_eq!(&*parse_json_array::<Vec<_>>(&mut s, t, b'\\').unwrap().0, &[
2288        UnescapedString { span: 2..4, value: "ab".into() },
2289        UnescapedString { span: 7..8, value: "c".into() },
2290        UnescapedString { span: 14..16, value: "de".into() },
2291    ]);
2292    assert_eq!(s, b"\n");
2293    // escape
2294    // TODO: \uXXXX
2295    let t = "[\"a\\\"\\\\\\/\\b\\f\\n\\r\\tbc\"]";
2296    let mut s = t.as_bytes();
2297    assert_eq!(&*parse_json_array::<Vec<_>>(&mut s, t, b'\\').unwrap().0, &[UnescapedString {
2298        span: 2..21,
2299        value: "a\"\\/\x08\x0c\n\r\tbc".into()
2300    }]);
2301    assert_eq!(s, b"");
2302
2303    // fail (single quote)
2304    let t = r#"['abc']"#;
2305    let mut s = t.as_bytes();
2306    assert_eq!(parse_json_array::<Vec<_>>(&mut s, t, b'\\'), Err(0));
2307    assert_eq!(s, br#"'abc']"#);
2308    // fail (extra comma)
2309    let t = r#"["abc",]"#;
2310    let mut s = t.as_bytes();
2311    assert_eq!(parse_json_array::<Vec<_>>(&mut s, t, b'\\'), Err(0));
2312    assert_eq!(s, br#"]"#);
2313    // fail (extra char after array)
2314    let t = r#"["abc"] c"#;
2315    let mut s = t.as_bytes();
2316    assert_eq!(parse_json_array::<Vec<_>>(&mut s, t, b'\\'), Err(0));
2317    assert_eq!(s, br#"c"#);
2318    // fail (invalid escape)
2319    let t = "[\"ab\\c\"]";
2320    let mut s = t.as_bytes();
2321    assert_eq!(parse_json_array::<Vec<_>>(&mut s, t, b'\\'), Err(0));
2322    assert_eq!(s, b"c\"]");
2323    // TODO: more from https://github.com/serde-rs/json/blob/3f1c6de4af28b1f6c5100da323f2bffaf7c2083f/tests/test.rs#L1060
2324}
2325
2326/// Skips spaces and tabs, and returns `true` if one or more spaces or tabs ware
2327/// consumed. (not consumes non-spaces/tabs characters.
2328#[inline]
2329fn skip_spaces_no_escape(s: &mut &[u8]) -> bool {
2330    let start = *s;
2331    while let Some((&b, s_next)) = s.split_first() {
2332        if TABLE[b as usize] & SPACE != 0 {
2333            *s = s_next;
2334            continue;
2335        }
2336        break;
2337    }
2338    start.len() != s.len()
2339}
2340/// Skips spaces and tabs, and returns `true` if one or more spaces or tabs ware
2341/// consumed. (not consumes non-space/tab characters.
2342#[inline]
2343fn skip_spaces(s: &mut &[u8], escape_byte: u8) -> bool {
2344    let mut has_space = false;
2345    while let Some((&b, s_next)) = s.split_first() {
2346        let t = TABLE[b as usize];
2347        if t & (SPACE | POSSIBLE_ESCAPE) != 0 {
2348            if t & SPACE != 0 {
2349                *s = s_next;
2350                has_space = true;
2351                continue;
2352            }
2353            if skip_line_escape(s, b, s_next, escape_byte) {
2354                skip_line_escape_followup(s, escape_byte);
2355                continue;
2356            }
2357        }
2358        break;
2359    }
2360    has_space
2361}
2362/// Consumes spaces and tabs, and returns `true` if one or more spaces or tabs ware
2363/// consumed, or reached line end. (not consumes non-space/tab characters.
2364#[inline]
2365fn spaces_or_line_end(s: &mut &[u8], escape_byte: u8) -> bool {
2366    let mut has_space = false;
2367    loop {
2368        let Some((&b, s_next)) = s.split_first() else { return true };
2369        {
2370            let t = TABLE[b as usize];
2371            if t & (WHITESPACE | POSSIBLE_ESCAPE) != 0 {
2372                if t & SPACE != 0 {
2373                    *s = s_next;
2374                    has_space = true;
2375                    continue;
2376                }
2377                if t & LINE != 0 {
2378                    return true;
2379                }
2380                if skip_line_escape(s, b, s_next, escape_byte) {
2381                    skip_line_escape_followup(s, escape_byte);
2382                    continue;
2383                }
2384            }
2385            break;
2386        }
2387    }
2388    has_space
2389}
2390
2391#[inline]
2392fn skip_comments_and_whitespaces(s: &mut &[u8], escape_byte: u8) {
2393    while let Some((&b, s_next)) = s.split_first() {
2394        let t = TABLE[b as usize];
2395        if t & (WHITESPACE | COMMENT | POSSIBLE_ESCAPE) != 0 {
2396            if t & WHITESPACE != 0 {
2397                *s = s_next;
2398                continue;
2399            }
2400            if t & COMMENT != 0 {
2401                *s = s_next;
2402                skip_this_line_no_escape(s);
2403                continue;
2404            }
2405            if skip_line_escape(s, b, s_next, escape_byte) {
2406                skip_line_escape_followup(s, escape_byte);
2407                continue;
2408            }
2409        }
2410        break;
2411    }
2412}
2413
2414#[inline]
2415fn is_line_end(b: Option<&u8>) -> bool {
2416    matches!(b, Some(b'\n' | b'\r') | None)
2417}
2418#[inline]
2419fn is_maybe_json(s: &[u8]) -> bool {
2420    // ADD/COPY: checking [[ to handle escape of [ https://docs.docker.com/reference/dockerfile/#add
2421    // Others: TODO: checking [[ to handle [[ -e .. ], but not enough to check [ -e .. ]
2422    s.first() == Some(&b'[') && s.get(1) != Some(&b'[')
2423}
2424
2425#[inline]
2426fn collect_here_doc_no_strip_tab<'a>(
2427    s: &mut &[u8],
2428    start: &'a str,
2429    _escape_byte: u8,
2430    delim: &[u8],
2431) -> Result<(&'a str, Span), ErrorKind> {
2432    let here_doc_start = start.len() - s.len();
2433    loop {
2434        if s.len() < delim.len() {
2435            return Err(error::expected_here_doc_end(delim, start.len() - s.len()));
2436        }
2437        if s.starts_with(delim) && is_line_end(s.get(delim.len())) {
2438            break;
2439        }
2440        skip_this_line_no_escape(s);
2441    }
2442    let end = start.len() - s.len();
2443    *s = &s[delim.len()..];
2444    if !s.is_empty() {
2445        *s = &s[1..];
2446    }
2447    let span = here_doc_start..end;
2448    Ok((&start[span.clone()], span))
2449}
2450#[inline]
2451fn collect_here_doc_strip_tab<'a>(
2452    s: &mut &[u8],
2453    start: &'a str,
2454    _escape_byte: u8,
2455    delim: &[u8],
2456) -> Result<(Cow<'a, str>, Span), ErrorKind> {
2457    let here_doc_start = start.len() - s.len();
2458    let mut current_start = here_doc_start;
2459    let mut res = String::new();
2460    loop {
2461        if s.len() < delim.len() {
2462            return Err(error::expected_here_doc_end(delim, start.len() - s.len()));
2463        }
2464        if let Some((&b'\t', s_next)) = s.split_first() {
2465            let end = start.len() - s.len();
2466            res.push_str(&start[current_start..end]);
2467            *s = s_next;
2468            while let Some((&b'\t', s_next)) = s.split_first() {
2469                *s = s_next;
2470            }
2471            current_start = start.len() - s.len();
2472        }
2473        if s.starts_with(delim) && is_line_end(s.get(delim.len())) {
2474            break;
2475        }
2476        skip_this_line_no_escape(s);
2477    }
2478    let end = start.len() - s.len();
2479    *s = &s[delim.len()..];
2480    if !s.is_empty() {
2481        *s = &s[1..];
2482    }
2483    let span = here_doc_start..end;
2484    if here_doc_start == current_start {
2485        Ok((Cow::Borrowed(&start[span.clone()]), span))
2486    } else {
2487        res.push_str(&start[current_start..end]);
2488        Ok((Cow::Owned(res), span))
2489    }
2490}
2491// TODO: escaped/quoted space?
2492#[inline]
2493fn collect_space_separated_unescaped_consume_line<'a, S: Store<UnescapedString<'a>>>(
2494    s: &mut &[u8],
2495    start: &'a str,
2496    escape_byte: u8,
2497) -> S {
2498    let mut res = S::new();
2499    loop {
2500        let val = collect_non_whitespace_unescaped(s, start, escape_byte);
2501        if !val.value.is_empty() {
2502            res.push(val);
2503            if skip_spaces(s, escape_byte) {
2504                continue;
2505            }
2506        }
2507        debug_assert!(is_line_end(s.first()));
2508        if !s.is_empty() {
2509            *s = &s[1..];
2510        }
2511        break;
2512    }
2513    res
2514}
2515#[inline]
2516fn collect_non_whitespace_unescaped<'a>(
2517    s: &mut &[u8],
2518    start: &'a str,
2519    escape_byte: u8,
2520) -> UnescapedString<'a> {
2521    collect_until_unescaped::<WHITESPACE>(s, start, escape_byte)
2522}
2523#[inline]
2524fn collect_non_line_unescaped_consume_line<'a>(
2525    s: &mut &[u8],
2526    start: &'a str,
2527    escape_byte: u8,
2528) -> UnescapedString<'a> {
2529    let mut val = collect_until_unescaped::<LINE>(s, start, escape_byte);
2530    debug_assert!(is_line_end(s.first()));
2531    if !s.is_empty() {
2532        *s = &s[1..];
2533    }
2534    // trim trailing spaces
2535    match &mut val.value {
2536        Cow::Borrowed(v) => {
2537            while let Some(b' ' | b'\t') = v.as_bytes().last() {
2538                *v = &v[..v.len() - 1];
2539                val.span.end -= 1;
2540            }
2541        }
2542        Cow::Owned(v) => {
2543            while let Some(b' ' | b'\t') = v.as_bytes().last() {
2544                v.pop();
2545                val.span.end -= 1;
2546            }
2547        }
2548    }
2549    val
2550}
2551#[inline]
2552fn collect_until_unescaped<'a, const UNTIL_MASK: u8>(
2553    s: &mut &[u8],
2554    start: &'a str,
2555    escape_byte: u8,
2556) -> UnescapedString<'a> {
2557    let full_word_start = start.len() - s.len();
2558    let mut word_start = full_word_start;
2559    let mut buf = String::new();
2560    while let Some((&b, s_next)) = s.split_first() {
2561        let t = TABLE[b as usize];
2562        if t & (UNTIL_MASK | POSSIBLE_ESCAPE) != 0 {
2563            if t & UNTIL_MASK != 0 {
2564                break;
2565            }
2566            let word_end = start.len() - s.len();
2567            if skip_line_escape(s, b, s_next, escape_byte) {
2568                skip_line_escape_followup(s, escape_byte);
2569                buf.push_str(&start[word_start..word_end]);
2570                word_start = start.len() - s.len();
2571                continue;
2572            }
2573        }
2574        *s = s_next;
2575    }
2576    let word_end = start.len() - s.len();
2577    let value = if buf.is_empty() {
2578        // no escape
2579        Cow::Borrowed(&start[word_start..word_end])
2580    } else {
2581        buf.push_str(&start[word_start..word_end]);
2582        Cow::Owned(buf)
2583    };
2584    UnescapedString { span: full_word_start..word_end, value }
2585}
2586
2587/// Skips non-whitespace (non-`[ \r\n\t]`) characters, and returns `true`
2588/// if one or more non-whitespace characters are present. (not consumes whitespace character).
2589#[inline]
2590fn skip_non_whitespace_no_escape(s: &mut &[u8]) -> bool {
2591    let start = *s;
2592    while let Some((&b, s_next)) = s.split_first() {
2593        if TABLE[b as usize] & WHITESPACE != 0 {
2594            break;
2595        }
2596        *s = s_next;
2597    }
2598    start.len() != s.len()
2599}
2600// #[inline]
2601// fn skip_non_whitespace(s: &mut &[u8], escape_byte: u8) -> bool {
2602//     let mut has_non_whitespace = false;
2603//     while let Some((&b, s_next)) = s.split_first() {
2604//         if TABLE[b as usize] & WHITESPACE != 0 {
2605//             break;
2606//         }
2607//         if is_line_escape(b, s_next, escape_byte) {
2608//             skip_line_escape(s, b, s_next, escape_byte);
2609//             continue;
2610//         }
2611//         *s = s_next;
2612//         has_non_whitespace = true;
2613//         continue;
2614//     }
2615//     has_non_whitespace
2616// }
2617
2618#[inline]
2619fn skip_line_escape<'a>(s: &mut &'a [u8], b: u8, s_next: &'a [u8], escape_byte: u8) -> bool {
2620    if b == escape_byte {
2621        if let Some((&b, mut s_next)) = s_next.split_first() {
2622            if b == b'\n' {
2623                *s = s_next;
2624                return true;
2625            }
2626            if b == b'\r' {
2627                if s_next.first() == Some(&b'\n') {
2628                    *s = &s_next[1..];
2629                } else {
2630                    *s = s_next;
2631                }
2632                return true;
2633            }
2634            // It seems that "\\ \n" is also accepted.
2635            // https://github.com/moby/buildkit/blob/6d143f5602a61acef286f39ee75f1cb33c367d44/frontend/dockerfile/cmd/dockerfile-frontend/Dockerfile#L19C23-L19C24
2636            if TABLE[b as usize] & SPACE != 0 {
2637                skip_spaces_no_escape(&mut s_next);
2638                if let Some((&b, s_next)) = s_next.split_first() {
2639                    if b == b'\n' {
2640                        *s = s_next;
2641                        return true;
2642                    }
2643                    if b == b'\r' {
2644                        if s_next.first() == Some(&b'\n') {
2645                            *s = &s_next[1..];
2646                        } else {
2647                            *s = s_next;
2648                        }
2649                        return true;
2650                    }
2651                }
2652            }
2653        }
2654    }
2655    false
2656}
2657#[inline]
2658fn skip_line_escape_followup(s: &mut &[u8], _escape_byte: u8) {
2659    while let Some((&b, mut s_next)) = s.split_first() {
2660        let t = TABLE[b as usize];
2661        if t & (WHITESPACE | COMMENT) != 0 {
2662            if t & SPACE != 0 {
2663                // TODO: escape after spaces is handled in skip_spaces_no_escape
2664                skip_spaces_no_escape(&mut s_next);
2665                if let Some((&b, s_next)) = s_next.split_first() {
2666                    let t = TABLE[b as usize];
2667                    if t & (COMMENT | LINE) != 0 {
2668                        // comment or empty continuation line
2669                        *s = s_next;
2670                        if t & COMMENT != 0 {
2671                            skip_this_line_no_escape(s);
2672                        }
2673                        continue;
2674                    }
2675                }
2676            } else {
2677                // comment or empty continuation line
2678                *s = s_next;
2679                if t & COMMENT != 0 {
2680                    skip_this_line_no_escape(s);
2681                }
2682                continue;
2683            }
2684        }
2685        break;
2686    }
2687}
2688
2689#[inline]
2690fn skip_this_line_no_escape(s: &mut &[u8]) {
2691    while let Some((&b, s_next)) = s.split_first() {
2692        *s = s_next;
2693        if TABLE[b as usize] & LINE != 0 {
2694            break;
2695        }
2696    }
2697}
2698/// Skips non-line (non-`[\r\n]`) characters. (consumes line character).
2699#[inline]
2700fn skip_this_line(s: &mut &[u8], escape_byte: u8) {
2701    let mut has_space_only = 0;
2702    while let Some((&b, s_next)) = s.split_first() {
2703        let t = TABLE[b as usize];
2704        if t & (LINE | COMMENT | POSSIBLE_ESCAPE) != 0 {
2705            if t & LINE != 0 {
2706                *s = s_next;
2707                break;
2708            }
2709            if has_space_only != 0 && t & COMMENT != 0 {
2710                *s = s_next;
2711                skip_this_line_no_escape(s);
2712                continue;
2713            }
2714            if skip_line_escape(s, b, s_next, escape_byte) {
2715                skip_line_escape_followup(s, escape_byte);
2716                has_space_only = SPACE;
2717                continue;
2718            }
2719        }
2720        has_space_only &= t;
2721        *s = s_next;
2722    }
2723}
2724
2725#[inline(always)]
2726fn token(s: &mut &[u8], token: &'static [u8]) -> bool {
2727    let matched = starts_with_ignore_ascii_case(s, token);
2728    if matched {
2729        *s = &s[token.len()..];
2730        true
2731    } else {
2732        false
2733    }
2734}
2735#[cold]
2736fn token_slow(s: &mut &[u8], mut token: &'static [u8], escape_byte: u8) -> bool {
2737    debug_assert!(!token.is_empty() && token.iter().all(|&n| n & TO_UPPER8 == n));
2738    if s.len() < token.len() {
2739        return false;
2740    }
2741    let mut tmp = *s;
2742    while let Some((&b, tmp_next)) = tmp.split_first() {
2743        if b & TO_UPPER8 == token[0] {
2744            tmp = tmp_next;
2745            token = &token[1..];
2746            if token.is_empty() {
2747                *s = tmp;
2748                return true;
2749            }
2750            continue;
2751        }
2752        if skip_line_escape(&mut tmp, b, tmp_next, escape_byte) {
2753            skip_line_escape_followup(&mut tmp, escape_byte);
2754            continue;
2755        }
2756        break;
2757    }
2758    false
2759}
2760
2761const TO_UPPER8: u8 = 0xDF;
2762const TO_UPPER64: u64 = 0xDFDFDFDFDFDFDFDF;
2763
2764#[inline(always)] // Ensure the code getting the length of the needle is inlined.
2765fn starts_with_ignore_ascii_case(mut s: &[u8], mut needle: &'static [u8]) -> bool {
2766    debug_assert!(!needle.is_empty() && needle.iter().all(|&n| n & TO_UPPER8 == n));
2767    if s.len() < needle.len() {
2768        return false;
2769    }
2770    if needle.len() == 1 {
2771        return needle[0] == s[0] & TO_UPPER8;
2772    }
2773    if needle.len() >= 8 {
2774        loop {
2775            if u64::from_ne_bytes(needle[..8].try_into().unwrap())
2776                != u64::from_ne_bytes(s[..8].try_into().unwrap()) & TO_UPPER64
2777            {
2778                return false;
2779            }
2780            needle = &needle[8..];
2781            s = &s[8..];
2782            if needle.len() < 8 {
2783                if needle.is_empty() {
2784                    return true;
2785                }
2786                break;
2787            }
2788        }
2789    }
2790    let s = {
2791        let mut buf = [0; 8];
2792        buf[..needle.len()].copy_from_slice(&s[..needle.len()]);
2793        u64::from_ne_bytes(buf)
2794    };
2795    let needle = {
2796        let mut buf = [0; 8];
2797        buf[..needle.len()].copy_from_slice(needle);
2798        u64::from_ne_bytes(buf)
2799    };
2800    needle == s & TO_UPPER64
2801}
2802#[test]
2803fn test_starts_with_ignore_ascii_case() {
2804    assert!(starts_with_ignore_ascii_case(b"ABC", b"ABC"));
2805    assert!(starts_with_ignore_ascii_case(b"abc", b"ABC"));
2806    assert!(starts_with_ignore_ascii_case(b"AbC", b"ABC"));
2807    assert!(!starts_with_ignore_ascii_case(b"ABB", b"ABC"));
2808    assert!(starts_with_ignore_ascii_case(b"ABCDEFGH", b"ABCDEFGH"));
2809    assert!(starts_with_ignore_ascii_case(b"abcdefgh", b"ABCDEFGH"));
2810    assert!(starts_with_ignore_ascii_case(b"AbCdEfGh", b"ABCDEFGH"));
2811    assert!(!starts_with_ignore_ascii_case(b"ABCDEFGc", b"ABCDEFGH"));
2812    assert!(starts_with_ignore_ascii_case(
2813        b"ABCDEFGHIJKLMNOPQRSTUVWXYZ",
2814        b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
2815    ));
2816    assert!(starts_with_ignore_ascii_case(
2817        b"abcdefghijklmnopqrstuvwxyz",
2818        b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
2819    ));
2820    assert!(starts_with_ignore_ascii_case(
2821        b"aBcDeFgHiJkLmNoPqRsTuVwXyZ",
2822        b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
2823    ));
2824    assert!(!starts_with_ignore_ascii_case(
2825        b"aBcDeFgHiJkLmNoPqRsTuVwXyc",
2826        b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
2827    ));
2828}
2829
2830// Lookup table for ascii to hex decoding.
2831#[rustfmt::skip]
2832static HEX_DECODE_TABLE: [u8; 256] = {
2833    const __: u8 = u8::MAX;
2834    [
2835        //  _1  _2  _3  _4  _5  _6  _7  _8  _9  _A  _B  _C  _D  _E  _F
2836        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 0_
2837        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 1_
2838        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2_
2839         0,  1,  2,  3,  4,  5,  6,  7,  8,  9, __, __, __, __, __, __, // 3_
2840        __, 10, 11, 12, 13, 14, 15, __, __, __, __, __, __, __, __, __, // 4_
2841        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 5_
2842        __, 10, 11, 12, 13, 14, 15, __, __, __, __, __, __, __, __, __, // 6_
2843        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7_
2844        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8_
2845        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9_
2846        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A_
2847        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B_
2848        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C_
2849        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D_
2850        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E_
2851        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F_
2852    ]
2853};
parse_dockerfile/lib.rs

parse_dockerfile/
lib.rs