1#![doc(test(
77 no_crate_inject,
78 attr(
79 deny(warnings, rust_2018_idioms, single_use_lifetimes),
80 allow(dead_code, unused_variables)
81 )
82))]
83#![forbid(unsafe_code)]
84#![warn(
85 missing_debug_implementations,
87 missing_docs,
88 clippy::alloc_instead_of_core,
89 clippy::exhaustive_enums,
90 clippy::exhaustive_structs,
91 clippy::impl_trait_in_params,
92 clippy::std_instead_of_core,
95)]
96#![allow(clippy::inline_always)]
97
98#[cfg(test)]
99#[path = "gen/tests/assert_impl.rs"]
100mod assert_impl;
101#[cfg(test)]
102#[path = "gen/tests/track_size.rs"]
103mod track_size;
104
105mod error;
106
107use core::{mem, ops::Range, str};
108use std::{borrow::Cow, collections::HashMap};
109
110use smallvec::SmallVec;
111
112pub use self::error::Error;
113use self::error::{ErrorKind, Result};
114
115#[allow(clippy::missing_panics_doc)]
117pub fn parse(text: &str) -> Result<Dockerfile<'_>> {
118 let mut p = ParseIter::new(text)?;
119 let mut s = p.s;
120
121 let mut instructions = Vec::with_capacity(p.text.len() / 60);
122 let mut stages = Vec::with_capacity(1);
123 let mut named_stages = 0;
124 let mut current_stage = None;
125 while let Some((&b, s_next)) = s.split_first() {
126 let instruction =
127 parse_instruction(&mut p, &mut s, b, s_next).map_err(|e| e.into_error(&p))?;
128 match instruction {
129 Instruction::From(from) => {
130 named_stages += from.as_.is_some() as usize;
131 let new_stage = instructions.len();
132 if let Some(prev_stage) = current_stage.replace(new_stage) {
133 stages.push(prev_stage..new_stage);
134 }
135 instructions.push(Instruction::From(from));
136 }
137 arg @ Instruction::Arg(..) => instructions.push(arg),
138 instruction => {
139 if current_stage.is_none() {
140 return Err(ErrorKind::Expected("FROM", instruction.instruction_span().start)
141 .into_error(&p));
142 }
143 instructions.push(instruction);
144 }
145 }
146 skip_comments_and_whitespaces(&mut s, p.escape_byte);
147 }
148 if let Some(current_stage) = current_stage {
149 stages.push(current_stage..instructions.len());
150 }
151
152 if stages.is_empty() {
153 return Err(ErrorKind::NoStages.into_error(&p));
155 }
156 let mut stages_by_name = HashMap::with_capacity(named_stages);
160 for (i, stage) in stages.iter().enumerate() {
161 let Instruction::From(from) = &instructions[stage.start] else { unreachable!() };
162 if let Some((_as, name)) = &from.as_ {
163 if let Some(first_occurrence) = stages_by_name.insert(name.value.clone(), i) {
164 let Instruction::From(from) = &instructions[stages[first_occurrence].start] else {
165 unreachable!()
166 };
167 let first = from.as_.as_ref().unwrap().1.span.clone();
168 let second = name.span.clone();
169 return Err(ErrorKind::DuplicateName { first, second }.into_error(&p));
170 }
171 }
172 }
173
174 Ok(Dockerfile { parser_directives: p.parser_directives, instructions, stages, stages_by_name })
175}
176
177pub fn parse_iter(text: &str) -> Result<ParseIter<'_>> {
182 ParseIter::new(text)
183}
184
185#[derive(Debug)]
187#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
188#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
189pub struct Dockerfile<'a> {
190 pub parser_directives: ParserDirectives<'a>,
192 pub instructions: Vec<Instruction<'a>>,
194 #[cfg_attr(feature = "serde", serde(skip))]
195 stages: Vec<Range<usize>>,
196 #[cfg_attr(feature = "serde", serde(skip))]
197 stages_by_name: HashMap<Cow<'a, str>, usize>,
198}
199impl<'a> Dockerfile<'a> {
200 #[allow(clippy::missing_panics_doc)] #[must_use]
203 pub fn global_args<'b>(&'b self) -> impl ExactSizeIterator<Item = &'b ArgInstruction<'a>> {
204 self.instructions[..self.stages.first().unwrap().start].iter().map(|arg| {
205 let Instruction::Arg(arg) = arg else { unreachable!() };
206 arg
207 })
208 }
209 #[must_use]
211 pub fn stage<'b>(&'b self, name: &str) -> Option<Stage<'a, 'b>> {
212 let i = *self.stages_by_name.get(name)?;
213 let stage = &self.stages[i];
214 let Instruction::From(from) = &self.instructions[stage.start] else { unreachable!() };
215 Some(Stage { from, instructions: &self.instructions[stage.start + 1..stage.end] })
216 }
217 #[must_use]
219 pub fn stages<'b>(&'b self) -> impl ExactSizeIterator<Item = Stage<'a, 'b>> {
220 self.stages.iter().map(move |stage| {
221 let Instruction::From(from) = &self.instructions[stage.start] else { unreachable!() };
222 Stage { from, instructions: &self.instructions[stage.start + 1..stage.end] }
223 })
224 }
225}
226#[derive(Debug)]
228#[non_exhaustive]
229pub struct Stage<'a, 'b> {
230 pub from: &'b FromInstruction<'a>,
232 pub instructions: &'b [Instruction<'a>],
234}
235
236#[derive(Debug)]
240#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
241#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
242#[non_exhaustive]
243pub struct ParserDirectives<'a> {
244 pub syntax: Option<ParserDirective<&'a str>>,
248 pub escape: Option<ParserDirective<char>>,
252 pub check: Option<ParserDirective<&'a str>>,
256}
257#[derive(Debug)]
259#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
260#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
261pub struct ParserDirective<T> {
262 start: usize,
267 pub value: Spanned<T>,
272}
273impl<T> ParserDirective<T> {
274 #[must_use]
279 pub fn span(&self) -> Span {
280 self.start..self.value.span.end
281 }
282}
283
284#[derive(Debug)]
286#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
287#[cfg_attr(feature = "serde", serde(tag = "kind"))]
288#[cfg_attr(feature = "serde", serde(rename_all = "SCREAMING_SNAKE_CASE"))]
289#[non_exhaustive]
290pub enum Instruction<'a> {
291 Add(AddInstruction<'a>),
293 Arg(ArgInstruction<'a>),
295 Cmd(CmdInstruction<'a>),
297 Copy(CopyInstruction<'a>),
299 Entrypoint(EntrypointInstruction<'a>),
301 Env(EnvInstruction<'a>),
303 Expose(ExposeInstruction<'a>),
305 From(FromInstruction<'a>),
307 Healthcheck(HealthcheckInstruction<'a>),
309 Label(LabelInstruction<'a>),
311 Maintainer(MaintainerInstruction<'a>),
313 Onbuild(OnbuildInstruction<'a>),
315 Run(RunInstruction<'a>),
317 Shell(ShellInstruction<'a>),
319 Stopsignal(StopsignalInstruction<'a>),
321 User(UserInstruction<'a>),
323 Volume(VolumeInstruction<'a>),
325 Workdir(WorkdirInstruction<'a>),
327}
328impl Instruction<'_> {
329 fn instruction_span(&self) -> Span {
330 match self {
331 Instruction::Add(instruction) => instruction.add.span.clone(),
332 Instruction::Arg(instruction) => instruction.arg.span.clone(),
333 Instruction::Cmd(instruction) => instruction.cmd.span.clone(),
334 Instruction::Copy(instruction) => instruction.copy.span.clone(),
335 Instruction::Entrypoint(instruction) => instruction.entrypoint.span.clone(),
336 Instruction::Env(instruction) => instruction.env.span.clone(),
337 Instruction::Expose(instruction) => instruction.expose.span.clone(),
338 Instruction::From(instruction) => instruction.from.span.clone(),
339 Instruction::Healthcheck(instruction) => instruction.healthcheck.span.clone(),
340 Instruction::Label(instruction) => instruction.label.span.clone(),
341 Instruction::Maintainer(instruction) => instruction.maintainer.span.clone(),
342 Instruction::Onbuild(instruction) => instruction.onbuild.span.clone(),
343 Instruction::Run(instruction) => instruction.run.span.clone(),
344 Instruction::Shell(instruction) => instruction.shell.span.clone(),
345 Instruction::Stopsignal(instruction) => instruction.stopsignal.span.clone(),
346 Instruction::User(instruction) => instruction.user.span.clone(),
347 Instruction::Volume(instruction) => instruction.volume.span.clone(),
348 Instruction::Workdir(instruction) => instruction.workdir.span.clone(),
349 }
350 }
351}
352#[derive(Debug)]
356#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
357#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
358#[non_exhaustive]
359pub struct AddInstruction<'a> {
360 pub add: Keyword,
365 pub options: SmallVec<[Flag<'a>; 1]>,
370 pub src: SmallVec<[Source<'a>; 1]>,
376 pub dest: UnescapedString<'a>,
381}
382#[derive(Debug)]
386#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
387#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
388#[non_exhaustive]
389pub struct ArgInstruction<'a> {
390 pub arg: Keyword,
395 pub arguments: UnescapedString<'a>,
401}
402#[derive(Debug)]
406#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
407#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
408#[non_exhaustive]
409pub struct CmdInstruction<'a> {
410 pub cmd: Keyword,
415 pub arguments: Command<'a>,
420}
421#[derive(Debug)]
425#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
426#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
427#[non_exhaustive]
428pub struct CopyInstruction<'a> {
429 pub copy: Keyword,
434 pub options: SmallVec<[Flag<'a>; 1]>,
439 pub src: SmallVec<[Source<'a>; 1]>,
445 pub dest: UnescapedString<'a>,
450}
451#[derive(Debug)]
454#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
455#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
456#[non_exhaustive]
457pub enum Source<'a> {
458 Path(UnescapedString<'a>),
460 HereDoc(HereDoc<'a>),
462}
463#[derive(Debug)]
467#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
468#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
469#[non_exhaustive]
470pub struct EntrypointInstruction<'a> {
471 pub entrypoint: Keyword,
476 pub arguments: Command<'a>,
481}
482#[derive(Debug)]
486#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
487#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
488#[non_exhaustive]
489pub struct EnvInstruction<'a> {
490 pub env: Keyword,
495 pub arguments: UnescapedString<'a>,
501}
502#[derive(Debug)]
506#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
507#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
508#[non_exhaustive]
509pub struct ExposeInstruction<'a> {
510 pub expose: Keyword,
515 pub arguments: SmallVec<[UnescapedString<'a>; 1]>,
520}
521#[derive(Debug)]
525#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
526#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
527#[non_exhaustive]
528pub struct FromInstruction<'a> {
529 pub from: Keyword,
534 pub options: Vec<Flag<'a>>,
539 pub image: UnescapedString<'a>,
544 pub as_: Option<(Keyword, UnescapedString<'a>)>,
549}
550#[derive(Debug)]
554#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
555#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
556#[non_exhaustive]
557pub struct HealthcheckInstruction<'a> {
558 pub healthcheck: Keyword,
563 pub options: Vec<Flag<'a>>,
568 pub arguments: HealthcheckArguments<'a>,
573}
574#[derive(Debug)]
576#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
577#[cfg_attr(feature = "serde", serde(tag = "kind"))]
578#[cfg_attr(feature = "serde", serde(rename_all = "SCREAMING_SNAKE_CASE"))]
579#[non_exhaustive]
580pub enum HealthcheckArguments<'a> {
581 #[non_exhaustive]
583 Cmd {
584 cmd: Keyword,
589 arguments: Command<'a>,
594 },
595 #[non_exhaustive]
597 None {
598 none: Keyword,
603 },
604}
605#[derive(Debug)]
609#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
610#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
611#[non_exhaustive]
612pub struct LabelInstruction<'a> {
613 pub label: Keyword,
618 pub arguments: UnescapedString<'a>,
624}
625#[derive(Debug)]
629#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
630#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
631#[non_exhaustive]
632pub struct MaintainerInstruction<'a> {
633 pub maintainer: Keyword,
638 pub name: UnescapedString<'a>,
643}
644#[derive(Debug)]
648#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
649#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
650#[non_exhaustive]
651pub struct OnbuildInstruction<'a> {
652 pub onbuild: Keyword,
657 pub instruction: Box<Instruction<'a>>,
662}
663#[derive(Debug)]
667#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
668#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
669#[non_exhaustive]
670pub struct RunInstruction<'a> {
671 pub run: Keyword,
676 pub options: SmallVec<[Flag<'a>; 1]>,
681 pub arguments: Command<'a>,
686 pub here_docs: Vec<HereDoc<'a>>,
694}
695#[derive(Debug)]
699#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
700#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
701#[non_exhaustive]
702pub struct ShellInstruction<'a> {
703 pub shell: Keyword,
708 pub arguments: SmallVec<[UnescapedString<'a>; 4]>,
715}
716#[derive(Debug)]
720#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
721#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
722#[non_exhaustive]
723pub struct StopsignalInstruction<'a> {
724 pub stopsignal: Keyword,
729 pub arguments: UnescapedString<'a>,
734}
735#[derive(Debug)]
739#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
740#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
741#[non_exhaustive]
742pub struct UserInstruction<'a> {
743 pub user: Keyword,
748 pub arguments: UnescapedString<'a>,
753}
754#[derive(Debug)]
758#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
759#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
760#[non_exhaustive]
761pub struct VolumeInstruction<'a> {
762 pub volume: Keyword,
767 pub arguments: JsonOrStringArray<'a, 1>,
772}
773#[derive(Debug)]
777#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
778#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
779#[non_exhaustive]
780pub struct WorkdirInstruction<'a> {
781 pub workdir: Keyword,
786 pub arguments: UnescapedString<'a>,
791}
792
793#[derive(Debug)]
795#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
796#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
797#[non_exhaustive]
798pub struct Keyword {
799 #[allow(missing_docs)]
800 pub span: Span,
801}
802
803#[derive(Debug)]
805#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
806#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
807pub struct Flag<'a> {
808 flag_start: usize,
813 pub name: UnescapedString<'a>,
818 pub value: Option<UnescapedString<'a>>,
823}
824impl Flag<'_> {
825 #[must_use]
830 pub fn flag_span(&self) -> Span {
831 self.flag_start..self.name.span.end
832 }
833 #[must_use]
838 pub fn span(&self) -> Span {
839 match &self.value {
840 Some(v) => self.flag_start..v.span.end,
841 None => self.flag_span(),
842 }
843 }
844}
845
846#[derive(Debug, PartialEq)]
848#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
849#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
850#[non_exhaustive]
851pub struct UnescapedString<'a> {
852 #[allow(missing_docs)]
853 pub span: Span,
854 #[allow(missing_docs)]
855 pub value: Cow<'a, str>,
856}
857
858#[derive(Debug)]
865#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
866#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
867#[non_exhaustive]
868pub enum Command<'a> {
869 Exec(Spanned<SmallVec<[UnescapedString<'a>; 1]>>),
872 Shell(Spanned<&'a str>),
874}
875
876#[derive(Debug)]
881#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
882#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
883#[allow(clippy::exhaustive_enums)]
884pub enum JsonOrStringArray<'a, const N: usize> {
885 Json(Spanned<SmallVec<[UnescapedString<'a>; N]>>),
887 String(SmallVec<[UnescapedString<'a>; N]>),
889}
890
891#[derive(Debug)]
893#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
894#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
895#[non_exhaustive]
896pub struct HereDoc<'a> {
897 #[allow(missing_docs)]
898 pub span: Span,
899 pub expand: bool,
901 #[allow(missing_docs)]
902 pub value: Cow<'a, str>,
903}
904
905#[derive(Debug)]
907#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
908#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
909#[allow(clippy::exhaustive_structs)]
910pub struct Spanned<T> {
911 #[allow(missing_docs)]
912 pub span: Span,
913 #[allow(missing_docs)]
914 pub value: T,
915}
916
917#[allow(missing_docs)]
918pub type Span = Range<usize>;
919
920#[allow(missing_debug_implementations)]
927#[must_use = "iterators are lazy and do nothing unless consumed"]
928pub struct ParseIter<'a> {
929 text: &'a str,
930 s: &'a [u8],
931 escape_byte: u8,
932 has_stage: bool,
933 in_onbuild: bool,
934 parser_directives: ParserDirectives<'a>,
935}
936impl<'a> ParseIter<'a> {
937 fn new(mut text: &'a str) -> Result<Self> {
938 if text.as_bytes().starts_with(UTF8_BOM) {
940 text = &text[UTF8_BOM.len()..];
941 }
942 let mut p = Self {
943 text,
944 s: text.as_bytes(),
945 escape_byte: DEFAULT_ESCAPE_BYTE,
946 has_stage: false,
947 in_onbuild: false,
948 parser_directives: ParserDirectives {
949 syntax: None,
951 escape: None,
952 check: None,
954 },
955 };
956
957 parse_parser_directives(&mut p).map_err(|e| e.into_error(&p))?;
958
959 skip_comments_and_whitespaces(&mut p.s, p.escape_byte);
963 Ok(p)
964 }
965}
966impl<'a> Iterator for ParseIter<'a> {
967 type Item = Result<Instruction<'a>>;
968 #[inline]
969 fn next(&mut self) -> Option<Self::Item> {
970 let p = self;
971 let mut s = p.s;
972 if let Some((&b, s_next)) = s.split_first() {
973 let instruction = match parse_instruction(p, &mut s, b, s_next) {
974 Ok(i) => i,
975 Err(e) => return Some(Err(e.into_error(p))),
976 };
977 match &instruction {
978 Instruction::From(..) => {
979 p.has_stage = true;
980 }
981 Instruction::Arg(..) => {}
982 instruction => {
983 if !p.has_stage {
984 return Some(Err(ErrorKind::Expected(
985 "FROM",
986 instruction.instruction_span().start,
987 )
988 .into_error(p)));
989 }
990 }
991 }
992 skip_comments_and_whitespaces(&mut s, p.escape_byte);
993 p.s = s;
994 return Some(Ok(instruction));
995 }
996 if !p.has_stage {
997 return Some(Err(ErrorKind::NoStages.into_error(p)));
999 }
1000 None
1001 }
1002}
1003
1004const DEFAULT_ESCAPE_BYTE: u8 = b'\\';
1005
1006fn parse_parser_directives(p: &mut ParseIter<'_>) -> Result<(), ErrorKind> {
1007 while let Some((&b'#', s_next)) = p.s.split_first() {
1008 p.s = s_next;
1009 skip_spaces_no_escape(&mut p.s);
1010 let directive_start = p.text.len() - p.s.len();
1011 if token(&mut p.s, b"SYNTAX") {
1012 skip_spaces_no_escape(&mut p.s);
1013 if let Some((&b'=', s_next)) = p.s.split_first() {
1014 p.s = s_next;
1015 if p.parser_directives.syntax.is_some() {
1016 p.parser_directives.syntax = None;
1018 p.parser_directives.escape = None;
1019 p.parser_directives.check = None;
1020 p.escape_byte = DEFAULT_ESCAPE_BYTE;
1021 skip_this_line_no_escape(&mut p.s);
1022 break;
1023 }
1024 skip_spaces_no_escape(&mut p.s);
1025 let value_start = p.text.len() - p.s.len();
1026 skip_non_whitespace_no_escape(&mut p.s);
1027 let end = p.text.len() - p.s.len();
1028 let value = p.text[value_start..end].trim_ascii_end();
1029 p.parser_directives.syntax = Some(ParserDirective {
1030 start: directive_start,
1031 value: Spanned { span: value_start..value_start + value.len(), value },
1032 });
1033 skip_this_line_no_escape(&mut p.s);
1034 continue;
1035 }
1036 } else if token(&mut p.s, b"CHECK") {
1037 skip_spaces_no_escape(&mut p.s);
1038 if let Some((&b'=', s_next)) = p.s.split_first() {
1039 p.s = s_next;
1040 if p.parser_directives.check.is_some() {
1041 p.parser_directives.syntax = None;
1043 p.parser_directives.escape = None;
1044 p.parser_directives.check = None;
1045 p.escape_byte = DEFAULT_ESCAPE_BYTE;
1046 skip_this_line_no_escape(&mut p.s);
1047 break;
1048 }
1049 skip_spaces_no_escape(&mut p.s);
1050 let value_start = p.text.len() - p.s.len();
1051 skip_non_whitespace_no_escape(&mut p.s);
1052 let end = p.text.len() - p.s.len();
1053 let value = p.text[value_start..end].trim_ascii_end();
1054 p.parser_directives.check = Some(ParserDirective {
1055 start: directive_start,
1056 value: Spanned { span: value_start..value_start + value.len(), value },
1057 });
1058 skip_this_line_no_escape(&mut p.s);
1059 continue;
1060 }
1061 } else if token(&mut p.s, b"ESCAPE") {
1062 skip_spaces_no_escape(&mut p.s);
1063 if let Some((&b'=', s_next)) = p.s.split_first() {
1064 p.s = s_next;
1065 if p.parser_directives.escape.is_some() {
1066 p.parser_directives.syntax = None;
1068 p.parser_directives.escape = None;
1069 p.parser_directives.check = None;
1070 p.escape_byte = DEFAULT_ESCAPE_BYTE;
1071 skip_this_line_no_escape(&mut p.s);
1072 break;
1073 }
1074 skip_spaces_no_escape(&mut p.s);
1075 let value_start = p.text.len() - p.s.len();
1076 skip_non_whitespace_no_escape(&mut p.s);
1077 let end = p.text.len() - p.s.len();
1078 let value = p.text[value_start..end].trim_ascii_end();
1079 match value {
1080 "`" => p.escape_byte = b'`',
1081 "\\" => {}
1082 _ => return Err(ErrorKind::InvalidEscape { escape_start: value_start }),
1083 }
1084 p.parser_directives.escape = Some(ParserDirective {
1085 start: directive_start,
1086 value: Spanned {
1087 span: value_start..value_start + value.len(),
1088 value: p.escape_byte as char,
1089 },
1090 });
1091 skip_this_line_no_escape(&mut p.s);
1092 continue;
1093 }
1094 }
1095 skip_this_line_no_escape(&mut p.s);
1096 break;
1097 }
1098 Ok(())
1099}
1100
1101#[inline]
1102fn parse_instruction<'a>(
1103 p: &mut ParseIter<'a>,
1104 s: &mut &'a [u8],
1105 b: u8,
1106 s_next: &'a [u8],
1107) -> Result<Instruction<'a>, ErrorKind> {
1108 let instruction_start = p.text.len() - s.len();
1109 *s = s_next;
1110 match b & TO_UPPER8 {
1112 b'A' => {
1113 if token(s, &b"ARG"[1..]) {
1114 let instruction_span = instruction_start..p.text.len() - s.len();
1115 if spaces_or_line_end(s, p.escape_byte) {
1116 return parse_arg(p, s, Keyword { span: instruction_span });
1117 }
1118 } else if token(s, &b"ADD"[1..]) {
1119 let instruction_span = instruction_start..p.text.len() - s.len();
1120 if spaces_or_line_end(s, p.escape_byte) {
1121 let add = Keyword { span: instruction_span };
1122 let (options, src, dest) = parse_add_or_copy(p, s, &add)?;
1123 return Ok(Instruction::Add(AddInstruction { add, options, src, dest }));
1124 }
1125 } else if token_slow(s, &b"ARG"[1..], p.escape_byte) {
1126 let instruction_span = instruction_start..p.text.len() - s.len();
1127 if spaces_or_line_end(s, p.escape_byte) {
1128 return parse_arg(p, s, Keyword { span: instruction_span });
1129 }
1130 } else if token_slow(s, &b"ADD"[1..], p.escape_byte) {
1131 let instruction_span = instruction_start..p.text.len() - s.len();
1132 if spaces_or_line_end(s, p.escape_byte) {
1133 let add = Keyword { span: instruction_span };
1134 let (options, src, dest) = parse_add_or_copy(p, s, &add)?;
1135 return Ok(Instruction::Add(AddInstruction { add, options, src, dest }));
1136 }
1137 }
1138 }
1139 b'C' => {
1140 if token(s, &b"COPY"[1..]) {
1141 let instruction_span = instruction_start..p.text.len() - s.len();
1142 if spaces_or_line_end(s, p.escape_byte) {
1143 let copy = Keyword { span: instruction_span };
1144 let (options, src, dest) = parse_add_or_copy(p, s, ©)?;
1145 return Ok(Instruction::Copy(CopyInstruction { copy, options, src, dest }));
1146 }
1147 } else if token(s, &b"CMD"[1..]) {
1148 let instruction_span = instruction_start..p.text.len() - s.len();
1149 if spaces_or_line_end(s, p.escape_byte) {
1150 return parse_cmd(p, s, Keyword { span: instruction_span });
1151 }
1152 } else if token_slow(s, &b"COPY"[1..], p.escape_byte) {
1153 let instruction_span = instruction_start..p.text.len() - s.len();
1154 if spaces_or_line_end(s, p.escape_byte) {
1155 let copy = Keyword { span: instruction_span };
1156 let (options, src, dest) = parse_add_or_copy(p, s, ©)?;
1157 return Ok(Instruction::Copy(CopyInstruction { copy, options, src, dest }));
1158 }
1159 } else if token_slow(s, &b"CMD"[1..], p.escape_byte) {
1160 let instruction_span = instruction_start..p.text.len() - s.len();
1161 if spaces_or_line_end(s, p.escape_byte) {
1162 return parse_cmd(p, s, Keyword { span: instruction_span });
1163 }
1164 }
1165 }
1166 b'E' => {
1167 if token(s, &b"ENV"[1..]) {
1168 let instruction_span = instruction_start..p.text.len() - s.len();
1169 if spaces_or_line_end(s, p.escape_byte) {
1170 return parse_env(p, s, Keyword { span: instruction_span });
1171 }
1172 } else if token(s, &b"EXPOSE"[1..]) {
1173 let instruction_span = instruction_start..p.text.len() - s.len();
1174 if spaces_or_line_end(s, p.escape_byte) {
1175 return parse_expose(p, s, Keyword { span: instruction_span });
1176 }
1177 } else if token(s, &b"ENTRYPOINT"[1..]) {
1178 let instruction_span = instruction_start..p.text.len() - s.len();
1179 if spaces_or_line_end(s, p.escape_byte) {
1180 return parse_entrypoint(p, s, Keyword { span: instruction_span });
1181 }
1182 } else if token_slow(s, &b"ENV"[1..], p.escape_byte) {
1183 let instruction_span = instruction_start..p.text.len() - s.len();
1184 if spaces_or_line_end(s, p.escape_byte) {
1185 return parse_env(p, s, Keyword { span: instruction_span });
1186 }
1187 } else if token_slow(s, &b"EXPOSE"[1..], p.escape_byte) {
1188 let instruction_span = instruction_start..p.text.len() - s.len();
1189 if spaces_or_line_end(s, p.escape_byte) {
1190 return parse_expose(p, s, Keyword { span: instruction_span });
1191 }
1192 } else if token_slow(s, &b"ENTRYPOINT"[1..], p.escape_byte) {
1193 let instruction_span = instruction_start..p.text.len() - s.len();
1194 if spaces_or_line_end(s, p.escape_byte) {
1195 return parse_entrypoint(p, s, Keyword { span: instruction_span });
1196 }
1197 }
1198 }
1199 b'F' => {
1200 if token(s, &b"FROM"[1..]) || token_slow(s, &b"FROM"[1..], p.escape_byte) {
1201 let instruction_span = instruction_start..p.text.len() - s.len();
1202 if spaces_or_line_end(s, p.escape_byte) {
1203 return parse_from(p, s, Keyword { span: instruction_span });
1204 }
1205 }
1206 }
1207 b'H' => {
1208 if token(s, &b"HEALTHCHECK"[1..]) || token_slow(s, &b"HEALTHCHECK"[1..], p.escape_byte)
1209 {
1210 let instruction_span = instruction_start..p.text.len() - s.len();
1211 if spaces_or_line_end(s, p.escape_byte) {
1212 return parse_healthcheck(p, s, Keyword { span: instruction_span });
1213 }
1214 }
1215 }
1216 b'L' => {
1217 if token(s, &b"LABEL"[1..]) || token_slow(s, &b"LABEL"[1..], p.escape_byte) {
1218 let instruction_span = instruction_start..p.text.len() - s.len();
1219 if spaces_or_line_end(s, p.escape_byte) {
1220 return parse_label(p, s, Keyword { span: instruction_span });
1221 }
1222 }
1223 }
1224 b'M' => {
1225 if token(s, &b"MAINTAINER"[1..]) || token_slow(s, &b"MAINTAINER"[1..], p.escape_byte) {
1226 let instruction_span = instruction_start..p.text.len() - s.len();
1227 if spaces_or_line_end(s, p.escape_byte) {
1228 return parse_maintainer(p, s, Keyword { span: instruction_span });
1229 }
1230 }
1231 }
1232 b'O' => {
1233 if token(s, &b"ONBUILD"[1..]) || token_slow(s, &b"ONBUILD"[1..], p.escape_byte) {
1234 let instruction_span = instruction_start..p.text.len() - s.len();
1235 if spaces_or_line_end(s, p.escape_byte) {
1236 return parse_onbuild(p, s, Keyword { span: instruction_span });
1237 }
1238 }
1239 }
1240 b'R' => {
1241 if token(s, &b"RUN"[1..]) || token_slow(s, &b"RUN"[1..], p.escape_byte) {
1242 let instruction_span = instruction_start..p.text.len() - s.len();
1243 if spaces_or_line_end(s, p.escape_byte) {
1244 return parse_run(p, s, Keyword { span: instruction_span });
1245 }
1246 }
1247 }
1248 b'S' => {
1249 if token(s, &b"SHELL"[1..]) {
1250 let instruction_span = instruction_start..p.text.len() - s.len();
1251 if spaces_or_line_end(s, p.escape_byte) {
1252 return parse_shell(p, s, Keyword { span: instruction_span });
1253 }
1254 } else if token(s, &b"STOPSIGNAL"[1..]) {
1255 let instruction_span = instruction_start..p.text.len() - s.len();
1256 if spaces_or_line_end(s, p.escape_byte) {
1257 return parse_stopsignal(p, s, Keyword { span: instruction_span });
1258 }
1259 } else if token_slow(s, &b"SHELL"[1..], p.escape_byte) {
1260 let instruction_span = instruction_start..p.text.len() - s.len();
1261 if spaces_or_line_end(s, p.escape_byte) {
1262 return parse_shell(p, s, Keyword { span: instruction_span });
1263 }
1264 } else if token_slow(s, &b"STOPSIGNAL"[1..], p.escape_byte) {
1265 let instruction_span = instruction_start..p.text.len() - s.len();
1266 if spaces_or_line_end(s, p.escape_byte) {
1267 return parse_stopsignal(p, s, Keyword { span: instruction_span });
1268 }
1269 }
1270 }
1271 b'U' => {
1272 if token(s, &b"USER"[1..]) || token_slow(s, &b"USER"[1..], p.escape_byte) {
1273 let instruction_span = instruction_start..p.text.len() - s.len();
1274 if spaces_or_line_end(s, p.escape_byte) {
1275 return parse_user(p, s, Keyword { span: instruction_span });
1276 }
1277 }
1278 }
1279 b'V' => {
1280 if token(s, &b"VOLUME"[1..]) || token_slow(s, &b"VOLUME"[1..], p.escape_byte) {
1281 let instruction_span = instruction_start..p.text.len() - s.len();
1282 if spaces_or_line_end(s, p.escape_byte) {
1283 return parse_volume(p, s, Keyword { span: instruction_span });
1284 }
1285 }
1286 }
1287 b'W' => {
1288 if token(s, &b"WORKDIR"[1..]) || token_slow(s, &b"WORKDIR"[1..], p.escape_byte) {
1289 let instruction_span = instruction_start..p.text.len() - s.len();
1290 if spaces_or_line_end(s, p.escape_byte) {
1291 return parse_workdir(p, s, Keyword { span: instruction_span });
1292 }
1293 }
1294 }
1295 _ => {}
1296 }
1297 Err(ErrorKind::UnknownInstruction { instruction_start })
1298}
1299
1300#[inline]
1301fn parse_arg<'a>(
1302 p: &mut ParseIter<'a>,
1303 s: &mut &'a [u8],
1304 instruction: Keyword,
1305) -> Result<Instruction<'a>, ErrorKind> {
1306 debug_assert!(token_slow(
1307 &mut p.text[instruction.span.clone()].as_bytes(),
1308 b"ARG",
1309 p.escape_byte,
1310 ));
1311 let arguments = collect_non_line_unescaped_consume_line(s, p.text, p.escape_byte);
1312 if arguments.value.is_empty() {
1313 return Err(ErrorKind::AtLeastOneArgument { instruction_start: instruction.span.start });
1314 }
1315 Ok(Instruction::Arg(ArgInstruction { arg: instruction, arguments }))
1316}
1317
1318#[inline]
1319fn parse_add_or_copy<'a>(
1320 p: &mut ParseIter<'a>,
1321 s: &mut &'a [u8],
1322 instruction: &Keyword,
1323) -> Result<(SmallVec<[Flag<'a>; 1]>, SmallVec<[Source<'a>; 1]>, UnescapedString<'a>), ErrorKind> {
1324 debug_assert!(
1325 token_slow(&mut p.text[instruction.span.clone()].as_bytes(), b"ADD", p.escape_byte,)
1326 || token_slow(&mut p.text[instruction.span.clone()].as_bytes(), b"COPY", p.escape_byte,)
1327 );
1328 let options = parse_options(s, p.text, p.escape_byte);
1329 if is_maybe_json(s) {
1330 let mut tmp = *s;
1331 if let Ok(((src, dest), _array_span)) = parse_json_array::<(
1332 SmallVec<[Source<'_>; 1]>,
1333 Option<_>,
1334 )>(&mut tmp, p.text, p.escape_byte)
1335 {
1336 debug_assert!(is_line_end(tmp.first()));
1337 if tmp.is_empty() {
1338 *s = &[];
1339 } else {
1340 *s = &tmp[1..];
1341 }
1342 if src.is_empty() {
1343 return Err(ErrorKind::AtLeastTwoArguments {
1344 instruction_start: instruction.span.start,
1345 });
1346 }
1347 return Ok((options, src, dest.unwrap()));
1348 }
1349 }
1350 let (mut src, dest) = collect_space_separated_unescaped_consume_line::<(
1351 SmallVec<[Source<'_>; 1]>,
1352 Option<_>,
1353 )>(s, p.text, p.escape_byte);
1354 if src.is_empty() {
1355 return Err(ErrorKind::AtLeastTwoArguments { instruction_start: instruction.span.start });
1356 }
1357 for src in &mut src {
1358 let Source::Path(path) = src else { unreachable!() };
1359 let Some(mut delim) = path.value.as_bytes().strip_prefix(b"<<") else { continue };
1360 if delim.is_empty() {
1361 continue;
1362 }
1363 let mut strip_tab = false;
1364 let mut quote = None;
1365 if let Some((&b'-', delim_next)) = delim.split_first() {
1366 strip_tab = true;
1367 delim = delim_next;
1368 }
1369 if let Some((&b, delim_next)) = delim.split_first() {
1370 if matches!(b, b'"' | b'\'') {
1371 quote = Some(b);
1372 delim = delim_next;
1373 if delim.last() != Some(&b) {
1374 return Err(ErrorKind::ExpectedOwned(
1375 format!(
1376 "quote ({}), but found '{}'",
1377 b as char,
1378 *delim.last().unwrap_or(&0) as char
1379 ),
1380 p.text.len() - s.len(),
1381 ));
1382 }
1383 delim = &delim[..delim.len() - 1];
1384 }
1385 }
1386 if strip_tab {
1387 let (here_doc, span) = collect_here_doc_strip_tab(s, p.text, p.escape_byte, delim)?;
1388 *src = Source::HereDoc(HereDoc { span, expand: quote.is_none(), value: here_doc });
1389 } else {
1390 let (here_doc, span) = collect_here_doc_no_strip_tab(s, p.text, p.escape_byte, delim)?;
1391 *src =
1392 Source::HereDoc(HereDoc { span, expand: quote.is_none(), value: here_doc.into() });
1393 }
1394 }
1395 Ok((options, src, dest.unwrap()))
1396}
1397
1398#[allow(clippy::unnecessary_wraps)]
1399#[inline]
1400fn parse_cmd<'a>(
1401 p: &mut ParseIter<'a>,
1402 s: &mut &'a [u8],
1403 instruction: Keyword,
1404) -> Result<Instruction<'a>, ErrorKind> {
1405 debug_assert!(token_slow(
1406 &mut p.text[instruction.span.clone()].as_bytes(),
1407 b"CMD",
1408 p.escape_byte,
1409 ));
1410 if is_maybe_json(s) {
1411 let mut tmp = *s;
1412 if let Ok((arguments, array_span)) =
1413 parse_json_array::<SmallVec<[_; 1]>>(&mut tmp, p.text, p.escape_byte)
1414 {
1415 debug_assert!(is_line_end(tmp.first()));
1416 if tmp.is_empty() {
1417 *s = &[];
1418 } else {
1419 *s = &tmp[1..];
1420 }
1421 return Ok(Instruction::Cmd(CmdInstruction {
1424 cmd: instruction,
1425 arguments: Command::Exec(Spanned { span: array_span, value: arguments }),
1426 }));
1427 }
1428 }
1429 let arguments_start = p.text.len() - s.len();
1430 skip_this_line(s, p.escape_byte);
1431 let end = p.text.len() - s.len();
1432 let arguments = p.text[arguments_start..end].trim_ascii_end();
1433 Ok(Instruction::Cmd(CmdInstruction {
1434 cmd: instruction,
1435 arguments: Command::Shell(Spanned {
1436 span: arguments_start..arguments_start + arguments.len(),
1437 value: arguments,
1438 }),
1439 }))
1440}
1441
1442#[inline]
1443fn parse_env<'a>(
1444 p: &mut ParseIter<'a>,
1445 s: &mut &'a [u8],
1446 instruction: Keyword,
1447) -> Result<Instruction<'a>, ErrorKind> {
1448 debug_assert!(token_slow(
1449 &mut p.text[instruction.span.clone()].as_bytes(),
1450 b"ENV",
1451 p.escape_byte,
1452 ));
1453 let arguments = collect_non_line_unescaped_consume_line(s, p.text, p.escape_byte);
1454 if arguments.value.is_empty() {
1455 return Err(ErrorKind::AtLeastOneArgument { instruction_start: instruction.span.start });
1456 }
1457 Ok(Instruction::Env(EnvInstruction { env: instruction, arguments }))
1458}
1459
1460#[inline]
1461fn parse_expose<'a>(
1462 p: &mut ParseIter<'a>,
1463 s: &mut &'a [u8],
1464 instruction: Keyword,
1465) -> Result<Instruction<'a>, ErrorKind> {
1466 debug_assert!(token_slow(
1467 &mut p.text[instruction.span.clone()].as_bytes(),
1468 b"EXPOSE",
1469 p.escape_byte,
1470 ));
1471 let arguments: SmallVec<[_; 1]> =
1472 collect_space_separated_unescaped_consume_line(s, p.text, p.escape_byte);
1473 if arguments.is_empty() {
1474 return Err(ErrorKind::AtLeastOneArgument { instruction_start: instruction.span.start });
1475 }
1476 Ok(Instruction::Expose(ExposeInstruction { expose: instruction, arguments }))
1477}
1478
1479#[inline]
1480fn parse_entrypoint<'a>(
1481 p: &mut ParseIter<'a>,
1482 s: &mut &'a [u8],
1483 instruction: Keyword,
1484) -> Result<Instruction<'a>, ErrorKind> {
1485 debug_assert!(token_slow(
1486 &mut p.text[instruction.span.clone()].as_bytes(),
1487 b"ENTRYPOINT",
1488 p.escape_byte,
1489 ));
1490 if is_maybe_json(s) {
1491 let mut tmp = *s;
1492 if let Ok((arguments, array_span)) =
1493 parse_json_array::<SmallVec<[_; 1]>>(&mut tmp, p.text, p.escape_byte)
1494 {
1495 debug_assert!(is_line_end(tmp.first()));
1496 if tmp.is_empty() {
1497 *s = &[];
1498 } else {
1499 *s = &tmp[1..];
1500 }
1501 if arguments.is_empty() {
1502 return Err(ErrorKind::AtLeastOneArgument {
1503 instruction_start: instruction.span.start,
1504 });
1505 }
1506 return Ok(Instruction::Entrypoint(EntrypointInstruction {
1507 entrypoint: instruction,
1508 arguments: Command::Exec(Spanned { span: array_span, value: arguments }),
1509 }));
1510 }
1511 }
1512 let arguments_start = p.text.len() - s.len();
1513 skip_this_line(s, p.escape_byte);
1514 let end = p.text.len() - s.len();
1515 let arguments = p.text[arguments_start..end].trim_ascii_end();
1516 if arguments.is_empty() {
1517 return Err(ErrorKind::AtLeastOneArgument { instruction_start: instruction.span.start });
1518 }
1519 Ok(Instruction::Entrypoint(EntrypointInstruction {
1520 entrypoint: instruction,
1521 arguments: Command::Shell(Spanned {
1522 span: arguments_start..arguments_start + arguments.len(),
1523 value: arguments,
1524 }),
1525 }))
1526}
1527
1528#[inline]
1529fn parse_from<'a>(
1530 p: &mut ParseIter<'a>,
1531 s: &mut &'a [u8],
1532 instruction: Keyword,
1533) -> Result<Instruction<'a>, ErrorKind> {
1534 debug_assert!(token_slow(
1535 &mut p.text[instruction.span.clone()].as_bytes(),
1536 b"FROM",
1537 p.escape_byte,
1538 ));
1539 let options = parse_options(s, p.text, p.escape_byte);
1540 let image = collect_non_whitespace_unescaped(s, p.text, p.escape_byte);
1543 if image.value.is_empty() {
1544 return Err(ErrorKind::AtLeastOneArgument { instruction_start: instruction.span.start });
1545 }
1546 let mut as_ = None;
1547 if skip_spaces(s, p.escape_byte) {
1548 let as_start = p.text.len() - s.len();
1549 if token(s, b"AS") || token_slow(s, b"AS", p.escape_byte) {
1550 let as_span = as_start..p.text.len() - s.len();
1551 if !skip_spaces(s, p.escape_byte) {
1552 return Err(ErrorKind::Expected("AS", as_start));
1553 }
1554 let name = collect_non_whitespace_unescaped(s, p.text, p.escape_byte);
1555 skip_spaces(s, p.escape_byte);
1556 if !is_line_end(s.first()) {
1557 return Err(ErrorKind::Expected("newline or eof", p.text.len() - s.len()));
1558 }
1559 as_ = Some((Keyword { span: as_span }, name));
1560 } else if !is_line_end(s.first()) {
1561 return Err(ErrorKind::Expected("AS", as_start));
1562 }
1563 }
1564 Ok(Instruction::From(FromInstruction { from: instruction, options, image, as_ }))
1565}
1566
1567#[inline]
1568fn parse_healthcheck<'a>(
1569 p: &mut ParseIter<'a>,
1570 s: &mut &'a [u8],
1571 instruction: Keyword,
1572) -> Result<Instruction<'a>, ErrorKind> {
1573 debug_assert!(token_slow(
1574 &mut p.text[instruction.span.clone()].as_bytes(),
1575 b"HEALTHCHECK",
1576 p.escape_byte,
1577 ));
1578 let options = parse_options(s, p.text, p.escape_byte);
1579 let Some((&b, s_next)) = s.split_first() else {
1580 return Err(ErrorKind::Expected("CMD or NONE", p.text.len() - s.len()));
1581 };
1582 let cmd_or_none_start = p.text.len() - s.len();
1583 match b & TO_UPPER8 {
1584 b'C' => {
1585 *s = s_next;
1586 if token(s, &b"CMD"[1..]) || token_slow(s, &b"CMD"[1..], p.escape_byte) {
1587 let cmd_span = cmd_or_none_start..p.text.len() - s.len();
1588 let cmd_keyword = Keyword { span: cmd_span };
1589 if spaces_or_line_end(s, p.escape_byte) {
1590 if is_maybe_json(s) {
1591 let mut tmp = *s;
1592 if let Ok((arguments, array_span)) =
1593 parse_json_array::<SmallVec<[_; 1]>>(&mut tmp, p.text, p.escape_byte)
1594 {
1595 debug_assert!(is_line_end(tmp.first()));
1596 if tmp.is_empty() {
1597 *s = &[];
1598 } else {
1599 *s = &tmp[1..];
1600 }
1601 if arguments.is_empty() {
1602 return Err(ErrorKind::Expected(
1603 "at least 1 arguments",
1604 array_span.start,
1605 ));
1606 }
1607 return Ok(Instruction::Healthcheck(HealthcheckInstruction {
1608 healthcheck: instruction,
1609 options,
1610 arguments: HealthcheckArguments::Cmd {
1611 cmd: cmd_keyword,
1612 arguments: Command::Exec(Spanned {
1613 span: array_span,
1614 value: arguments,
1615 }),
1616 },
1617 }));
1618 }
1619 }
1620 let arguments_start = p.text.len() - s.len();
1621 skip_this_line(s, p.escape_byte);
1622 let end = p.text.len() - s.len();
1623 let arguments = p.text[arguments_start..end].trim_ascii_end();
1624 return Ok(Instruction::Healthcheck(HealthcheckInstruction {
1625 healthcheck: instruction,
1626 options,
1627 arguments: HealthcheckArguments::Cmd {
1628 cmd: cmd_keyword,
1629 arguments: Command::Shell(Spanned {
1630 span: arguments_start..arguments_start + arguments.len(),
1631 value: arguments,
1632 }),
1633 },
1634 }));
1635 }
1636 }
1637 }
1638 b'N' => {
1639 *s = s_next;
1640 if token(s, &b"NONE"[1..]) || token_slow(s, &b"NONE"[1..], p.escape_byte) {
1641 let none_span = cmd_or_none_start..p.text.len() - s.len();
1642 skip_spaces(s, p.escape_byte);
1643 if !is_line_end(s.first()) {
1644 return Err(ErrorKind::Expected(
1646 "HEALTHCHECK NONE takes no arguments",
1647 p.text.len() - s.len(),
1648 ));
1649 }
1650 let none_keyword = Keyword { span: none_span };
1652 return Ok(Instruction::Healthcheck(HealthcheckInstruction {
1653 healthcheck: instruction,
1654 options,
1655 arguments: HealthcheckArguments::None { none: none_keyword },
1656 }));
1657 }
1658 }
1659 _ => {}
1660 }
1661 Err(ErrorKind::Expected("CMD or NONE", p.text.len() - s.len()))
1662}
1663
1664#[inline]
1665fn parse_label<'a>(
1666 p: &mut ParseIter<'a>,
1667 s: &mut &'a [u8],
1668 instruction: Keyword,
1669) -> Result<Instruction<'a>, ErrorKind> {
1670 debug_assert!(token_slow(
1671 &mut p.text[instruction.span.clone()].as_bytes(),
1672 b"LABEL",
1673 p.escape_byte,
1674 ));
1675 let arguments = collect_non_line_unescaped_consume_line(s, p.text, p.escape_byte);
1676 if arguments.value.is_empty() {
1677 return Err(ErrorKind::AtLeastOneArgument { instruction_start: instruction.span.start });
1678 }
1679 Ok(Instruction::Label(LabelInstruction { label: instruction, arguments }))
1680}
1681
1682#[cold]
1683fn parse_maintainer<'a>(
1684 p: &mut ParseIter<'a>,
1685 s: &mut &'a [u8],
1686 instruction: Keyword,
1687) -> Result<Instruction<'a>, ErrorKind> {
1688 debug_assert!(token_slow(
1689 &mut p.text[instruction.span.clone()].as_bytes(),
1690 b"MAINTAINER",
1691 p.escape_byte,
1692 ));
1693 let name = collect_non_line_unescaped_consume_line(s, p.text, p.escape_byte);
1694 if name.value.is_empty() {
1695 return Err(ErrorKind::ExactlyOneArgument { instruction_start: instruction.span.start });
1696 }
1697 Ok(Instruction::Maintainer(MaintainerInstruction { maintainer: instruction, name }))
1698}
1699
1700#[inline]
1701fn parse_onbuild<'a>(
1702 p: &mut ParseIter<'a>,
1703 s: &mut &'a [u8],
1704 instruction: Keyword,
1705) -> Result<Instruction<'a>, ErrorKind> {
1706 debug_assert!(token_slow(
1707 &mut p.text[instruction.span.clone()].as_bytes(),
1708 b"ONBUILD",
1709 p.escape_byte,
1710 ));
1711 if mem::replace(&mut p.in_onbuild, true) {
1713 return Err(ErrorKind::Expected("ONBUILD ONBUILD is not allowed", instruction.span.start));
1715 }
1716 let Some((&b, s_next)) = s.split_first() else {
1717 return Err(ErrorKind::Expected("instruction after ONBUILD", instruction.span.start));
1718 };
1719 let inner_instruction = parse_instruction(p, s, b, s_next)?;
1744 p.in_onbuild = false;
1745 Ok(Instruction::Onbuild(OnbuildInstruction {
1746 onbuild: instruction,
1747 instruction: Box::new(inner_instruction),
1748 }))
1749}
1750
1751#[inline]
1752fn parse_run<'a>(
1753 p: &mut ParseIter<'a>,
1754 s: &mut &'a [u8],
1755 instruction: Keyword,
1756) -> Result<Instruction<'a>, ErrorKind> {
1757 debug_assert!(token_slow(
1758 &mut p.text[instruction.span.clone()].as_bytes(),
1759 b"RUN",
1760 p.escape_byte,
1761 ));
1762 let options = parse_options(s, p.text, p.escape_byte);
1763 if is_maybe_json(s) {
1764 let mut tmp = *s;
1765 if let Ok((arguments, array_span)) =
1766 parse_json_array::<SmallVec<[_; 1]>>(&mut tmp, p.text, p.escape_byte)
1767 {
1768 debug_assert!(is_line_end(tmp.first()));
1769 if tmp.is_empty() {
1770 *s = &[];
1771 } else {
1772 *s = &tmp[1..];
1773 }
1774 if arguments.is_empty() {
1775 return Err(ErrorKind::AtLeastOneArgument {
1776 instruction_start: instruction.span.start,
1777 });
1778 }
1779 return Ok(Instruction::Run(RunInstruction {
1780 run: instruction,
1781 options,
1782 arguments: Command::Exec(Spanned { span: array_span, value: arguments }),
1783 here_docs: vec![],
1785 }));
1786 }
1787 }
1788
1789 let mut strip_tab = false;
1791 let mut quote = None;
1792 let mut pos = 2;
1793 if s.len() >= 5 && s.starts_with(b"<<") && {
1795 if s[pos] == b'-' {
1796 strip_tab = true;
1797 pos += 1;
1798 }
1799 if matches!(s[pos], b'"' | b'\'') {
1800 quote = Some(s[pos]);
1801 pos += 1;
1802 }
1803 s[pos].is_ascii_alphanumeric()
1805 } {
1806 *s = &s[pos..];
1807 let delim_start = p.text.len() - s.len();
1808 while let Some((&b, s_next)) = s.split_first() {
1810 if b.is_ascii_alphanumeric() {
1811 *s = s_next;
1812 continue;
1813 }
1814 break;
1815 }
1816 let delim = &p.text.as_bytes()[delim_start..p.text.len() - s.len()];
1817 if let Some(quote) = quote {
1818 if let Some((&b, s_next)) = s.split_first() {
1819 if b != quote {
1820 return Err(ErrorKind::ExpectedOwned(
1821 format!("quote ({}), but found '{}'", quote as char, b as char),
1822 p.text.len() - s.len(),
1823 ));
1824 }
1825 *s = s_next;
1826 } else {
1827 return Err(ErrorKind::ExpectedOwned(
1828 format!("quote ({}), but reached eof", quote as char),
1829 p.text.len() - s.len(),
1830 ));
1831 }
1832 }
1833 let arguments_start = p.text.len() - s.len();
1835 skip_this_line(s, p.escape_byte);
1836 let end = p.text.len() - s.len();
1837 let arguments = p.text[arguments_start..end].trim_ascii_end();
1838 let here_doc = if strip_tab {
1839 let (here_doc, span) = collect_here_doc_strip_tab(s, p.text, p.escape_byte, delim)?;
1840 HereDoc { span, expand: quote.is_none(), value: here_doc }
1841 } else {
1842 let (here_doc, span) = collect_here_doc_no_strip_tab(s, p.text, p.escape_byte, delim)?;
1843 HereDoc { span, expand: quote.is_none(), value: here_doc.into() }
1844 };
1845 return Ok(Instruction::Run(RunInstruction {
1846 run: instruction,
1847 options,
1848 arguments: Command::Shell(Spanned {
1849 span: arguments_start..arguments_start + arguments.len(),
1850 value: arguments,
1851 }),
1852 here_docs: vec![here_doc],
1854 }));
1855 }
1856
1857 let arguments_start = p.text.len() - s.len();
1858 skip_this_line(s, p.escape_byte);
1859 let end = p.text.len() - s.len();
1860 let arguments = p.text[arguments_start..end].trim_ascii_end();
1861 Ok(Instruction::Run(RunInstruction {
1862 run: instruction,
1863 options,
1864 arguments: Command::Shell(Spanned {
1865 span: arguments_start..arguments_start + arguments.len(),
1866 value: arguments,
1867 }),
1868 here_docs: vec![],
1869 }))
1870}
1871
1872#[inline]
1873fn parse_shell<'a>(
1874 p: &mut ParseIter<'a>,
1875 s: &mut &'a [u8],
1876 instruction: Keyword,
1877) -> Result<Instruction<'a>, ErrorKind> {
1878 debug_assert!(token_slow(
1879 &mut p.text[instruction.span.clone()].as_bytes(),
1880 b"SHELL",
1881 p.escape_byte,
1882 ));
1883 if !is_maybe_json(s) {
1884 return Err(ErrorKind::Expected("JSON array", p.text.len() - s.len()));
1885 }
1886 match parse_json_array::<SmallVec<[_; 4]>>(s, p.text, p.escape_byte) {
1887 Ok((arguments, _array_span)) => {
1888 if !s.is_empty() {
1889 *s = &s[1..];
1890 }
1891 if arguments.is_empty() {
1892 return Err(ErrorKind::AtLeastOneArgument {
1893 instruction_start: instruction.span.start,
1894 });
1895 }
1896 Ok(Instruction::Shell(ShellInstruction { shell: instruction, arguments }))
1897 }
1898 Err(array_start) => Err(ErrorKind::Json { arguments_start: array_start }),
1899 }
1900}
1901
1902#[inline]
1903fn parse_stopsignal<'a>(
1904 p: &mut ParseIter<'a>,
1905 s: &mut &'a [u8],
1906 instruction: Keyword,
1907) -> Result<Instruction<'a>, ErrorKind> {
1908 debug_assert!(token_slow(
1909 &mut p.text[instruction.span.clone()].as_bytes(),
1910 b"STOPSIGNAL",
1911 p.escape_byte,
1912 ));
1913 let arguments = collect_non_line_unescaped_consume_line(s, p.text, p.escape_byte);
1915 if arguments.value.is_empty() {
1916 return Err(ErrorKind::ExactlyOneArgument { instruction_start: instruction.span.start });
1917 }
1918 Ok(Instruction::Stopsignal(StopsignalInstruction { stopsignal: instruction, arguments }))
1919}
1920
1921#[inline]
1922fn parse_user<'a>(
1923 p: &mut ParseIter<'a>,
1924 s: &mut &'a [u8],
1925 instruction: Keyword,
1926) -> Result<Instruction<'a>, ErrorKind> {
1927 debug_assert!(token_slow(
1928 &mut p.text[instruction.span.clone()].as_bytes(),
1929 b"USER",
1930 p.escape_byte,
1931 ));
1932 let arguments = collect_non_line_unescaped_consume_line(s, p.text, p.escape_byte);
1934 if arguments.value.is_empty() {
1935 return Err(ErrorKind::ExactlyOneArgument { instruction_start: instruction.span.start });
1936 }
1937 Ok(Instruction::User(UserInstruction { user: instruction, arguments }))
1938}
1939
1940#[inline]
1941fn parse_volume<'a>(
1942 p: &mut ParseIter<'a>,
1943 s: &mut &'a [u8],
1944 instruction: Keyword,
1945) -> Result<Instruction<'a>, ErrorKind> {
1946 debug_assert!(token_slow(
1947 &mut p.text[instruction.span.clone()].as_bytes(),
1948 b"VOLUME",
1949 p.escape_byte,
1950 ));
1951 if is_maybe_json(s) {
1952 let mut tmp = *s;
1953 if let Ok((arguments, array_span)) = parse_json_array(&mut tmp, p.text, p.escape_byte) {
1954 debug_assert!(is_line_end(tmp.first()));
1955 if tmp.is_empty() {
1956 *s = &[];
1957 } else {
1958 *s = &tmp[1..];
1959 }
1960 return Ok(Instruction::Volume(VolumeInstruction {
1962 volume: instruction,
1963 arguments: JsonOrStringArray::Json(Spanned { span: array_span, value: arguments }),
1964 }));
1965 }
1966 }
1967 let arguments: SmallVec<[_; 1]> =
1968 collect_space_separated_unescaped_consume_line(s, p.text, p.escape_byte);
1969 if arguments.is_empty() {
1970 return Err(ErrorKind::AtLeastOneArgument { instruction_start: instruction.span.start });
1972 }
1973 Ok(Instruction::Volume(VolumeInstruction {
1974 volume: instruction,
1975 arguments: JsonOrStringArray::String(arguments),
1976 }))
1977}
1978
1979#[inline]
1980fn parse_workdir<'a>(
1981 p: &mut ParseIter<'a>,
1982 s: &mut &'a [u8],
1983 instruction: Keyword,
1984) -> Result<Instruction<'a>, ErrorKind> {
1985 debug_assert!(token_slow(
1986 &mut p.text[instruction.span.clone()].as_bytes(),
1987 b"WORKDIR",
1988 p.escape_byte,
1989 ));
1990 let arguments = collect_non_line_unescaped_consume_line(s, p.text, p.escape_byte);
1992 if arguments.value.is_empty() {
1993 return Err(ErrorKind::ExactlyOneArgument { instruction_start: instruction.span.start });
1994 }
1995 Ok(Instruction::Workdir(WorkdirInstruction { workdir: instruction, arguments }))
1996}
1997
1998const LINE: u8 = 1 << 0;
2003const SPACE: u8 = 1 << 1;
2005const WHITESPACE: u8 = 1 << 2;
2007const COMMENT: u8 = 1 << 3;
2009const DOUBLE_QUOTE: u8 = 1 << 4;
2011const POSSIBLE_ESCAPE: u8 = 1 << 5;
2013const EQ: u8 = 1 << 6;
2015
2016static TABLE: [u8; 256] = {
2017 let mut table = [0; 256];
2018 let mut i = 0;
2019 loop {
2020 match i {
2021 b' ' | b'\t' => table[i as usize] = WHITESPACE | SPACE,
2022 b'\n' | b'\r' => table[i as usize] = WHITESPACE | LINE,
2023 b'#' => table[i as usize] = COMMENT,
2024 b'"' => table[i as usize] = DOUBLE_QUOTE,
2025 b'\\' | b'`' => table[i as usize] = POSSIBLE_ESCAPE,
2026 b'=' => table[i as usize] = EQ,
2027 _ => {}
2028 }
2029 if i == u8::MAX {
2030 break;
2031 }
2032 i += 1;
2033 }
2034 table
2035};
2036
2037const UTF8_BOM: &[u8] = &[0xEF, 0xBB, 0xBF];
2038
2039trait Store<T>: Sized {
2040 fn new() -> Self;
2041 fn push(&mut self, val: T);
2042}
2043impl<T> Store<T> for Vec<T> {
2044 #[inline]
2045 fn new() -> Self {
2046 Self::new()
2047 }
2048 #[inline]
2049 fn push(&mut self, val: T) {
2050 self.push(val);
2051 }
2052}
2053impl<T, const N: usize> Store<T> for SmallVec<[T; N]> {
2054 #[inline]
2055 fn new() -> Self {
2056 Self::new()
2057 }
2058 #[inline]
2059 fn push(&mut self, val: T) {
2060 self.push(val);
2061 }
2062}
2063impl<'a, const N: usize> Store<UnescapedString<'a>>
2064 for (SmallVec<[Source<'a>; N]>, Option<UnescapedString<'a>>)
2065{
2066 #[inline]
2067 fn new() -> Self {
2068 (SmallVec::new(), None)
2069 }
2070 #[inline]
2071 fn push(&mut self, val: UnescapedString<'a>) {
2072 if let Some(val) = self.1.replace(val) {
2073 self.0.push(Source::Path(val));
2074 }
2075 }
2076}
2077
2078#[inline]
2079fn parse_options<'a, S: Store<Flag<'a>>>(s: &mut &[u8], start: &'a str, escape_byte: u8) -> S {
2080 let mut options = S::new();
2081 'outer: loop {
2082 let Some((&b'-', mut s_next)) = s.split_first() else {
2083 break;
2084 };
2085 loop {
2086 let Some((&b, s_next_next)) = s_next.split_first() else {
2087 break 'outer;
2088 };
2089 if b == b'-' {
2090 s_next = s_next_next;
2091 break;
2092 }
2093 if skip_line_escape(&mut s_next, b, s_next_next, escape_byte) {
2094 skip_line_escape_followup(&mut s_next, escape_byte);
2095 continue;
2096 }
2097 break 'outer;
2098 }
2099 let flag_start = start.len() - s.len();
2100 *s = s_next;
2101 let name = collect_until_unescaped::<{ WHITESPACE | EQ }>(s, start, escape_byte);
2102 let Some((&b'=', s_next)) = s.split_first() else {
2103 options.push(Flag { flag_start, name, value: None });
2104 skip_spaces(s, escape_byte);
2105 continue;
2106 };
2107 *s = s_next;
2108 let value = collect_non_whitespace_unescaped(s, start, escape_byte);
2109 options.push(Flag { flag_start, name, value: Some(value) });
2110 skip_spaces(s, escape_byte);
2111 }
2112 options
2113}
2114
2115fn parse_json_array<'a, S: Store<UnescapedString<'a>>>(
2116 s: &mut &[u8],
2117 start: &'a str,
2118 escape_byte: u8,
2119) -> Result<(S, Span), usize> {
2120 debug_assert_eq!(s.first(), Some(&b'['));
2121 debug_assert_ne!(s.get(1), Some(&b'['));
2122 let mut res = S::new();
2123 let array_start = start.len() - s.len();
2124 *s = &s[1..];
2125 skip_spaces(s, escape_byte);
2126 let (&b, s_next) = s.split_first().ok_or(array_start)?;
2127 match b {
2128 b'"' => {
2129 *s = s_next;
2130 loop {
2131 let full_word_start = start.len() - s.len();
2132 let mut word_start = full_word_start;
2133 let mut buf = String::new();
2134 loop {
2135 let (&b, s_next) = s.split_first().ok_or(array_start)?;
2136 if TABLE[b as usize] & (LINE | DOUBLE_QUOTE | POSSIBLE_ESCAPE) == 0 {
2137 *s = s_next;
2138 continue;
2139 }
2140 match b {
2141 b'"' => break,
2142 b'\n' | b'\r' => return Err(array_start),
2143 _ => {}
2144 }
2145 let word_end = start.len() - s.len();
2146 if skip_line_escape(s, b, s_next, escape_byte) {
2147 skip_line_escape_followup(s, escape_byte);
2148 buf.push_str(&start[word_start..word_end]);
2150 word_start = start.len() - s.len();
2151 continue;
2152 }
2153 if b == b'\\' {
2154 let word_end = start.len() - s.len();
2156 buf.push_str(&start[word_start..word_end]);
2157 *s = s_next;
2158 let (new, new_start) = match *s.first().ok_or(array_start)? {
2159 b @ (b'"' | b'\\' | b'/') => (b as char, 1),
2160 b'b' => ('\x08', 1),
2161 b'f' => ('\x0c', 1),
2162 b'n' => ('\n', 1),
2163 b'r' => ('\r', 1),
2164 b't' => ('\t', 1),
2165 b'u' => (parse_json_hex_escape(s, array_start)?, 5),
2166 _ => return Err(array_start), };
2168 buf.push(new);
2169 *s = &s[new_start..];
2170 word_start = start.len() - s.len();
2171 continue;
2172 }
2173 *s = s_next;
2174 }
2175 let word_end = start.len() - s.len();
2176 let value = if buf.is_empty() {
2177 Cow::Borrowed(&start[word_start..word_end])
2179 } else {
2180 buf.push_str(&start[word_start..word_end]);
2181 Cow::Owned(buf)
2182 };
2183 res.push(UnescapedString { span: full_word_start..word_end, value });
2184 *s = &s[1..]; skip_spaces(s, escape_byte);
2186 let (&b, s_next) = s.split_first().ok_or(array_start)?;
2187 match b {
2188 b',' => {
2189 *s = s_next;
2190 skip_spaces(s, escape_byte);
2191 let (&b, s_next) = s.split_first().ok_or(array_start)?;
2192 if b == b'"' {
2193 *s = s_next;
2194 continue;
2195 }
2196 return Err(array_start);
2197 }
2198 b']' => {
2199 *s = s_next;
2200 break;
2201 }
2202 _ => return Err(array_start),
2203 }
2204 }
2205 }
2206 b']' => *s = s_next,
2207 _ => return Err(array_start),
2208 }
2209 let array_end = start.len() - s.len();
2210 skip_spaces(s, escape_byte);
2211 if !is_line_end(s.first()) {
2212 return Err(array_start);
2213 }
2214 Ok((res, array_start..array_end))
2215}
2216#[cold]
2218fn parse_json_hex_escape(s: &mut &[u8], array_start: usize) -> Result<char, usize> {
2219 fn decode_hex_escape(s: &mut &[u8], array_start: usize) -> Result<u16, usize> {
2220 if s.len() < 4 {
2221 return Err(array_start); }
2223
2224 let mut n = 0;
2225 for _ in 0..4 {
2226 let ch = decode_hex_val(s[0]);
2227 *s = &s[1..];
2228 match ch {
2229 None => return Err(array_start), Some(val) => {
2231 n = (n << 4) + val;
2232 }
2233 }
2234 }
2235 Ok(n)
2236 }
2237
2238 fn decode_hex_val(val: u8) -> Option<u16> {
2239 let n = HEX_DECODE_TABLE[val as usize] as u16;
2240 if n == u8::MAX as u16 { None } else { Some(n) }
2241 }
2242
2243 let c = match decode_hex_escape(s, array_start)? {
2244 _n @ 0xDC00..=0xDFFF => return Err(array_start), n1 @ 0xD800..=0xDBFF => {
2251 if s.first() == Some(&b'\\') {
2252 *s = &s[1..];
2253 } else {
2254 return Err(array_start); }
2256
2257 if s.first() == Some(&b'u') {
2258 *s = &s[1..];
2259 } else {
2260 return Err(array_start); }
2262
2263 let n2 = decode_hex_escape(s, array_start)?;
2264
2265 if n2 < 0xDC00 || n2 > 0xDFFF {
2266 return Err(array_start); }
2268
2269 let n = ((((n1 - 0xD800) as u32) << 10) | (n2 - 0xDC00) as u32) + 0x1_0000;
2270
2271 match char::from_u32(n) {
2272 Some(c) => c,
2273 None => return Err(array_start), }
2275 }
2276
2277 n => char::from_u32(n as u32).unwrap(),
2280 };
2281 Ok(c)
2282}
2283#[allow(clippy::needless_raw_string_hashes)]
2284#[test]
2285fn test_parse_json_array() {
2286 let t = r#"[]"#;
2288 let mut s = t.as_bytes();
2289 assert_eq!(&*parse_json_array::<Vec<_>>(&mut s, t, b'\\').unwrap().0, &[]);
2290 assert_eq!(s, b"");
2291 let t = r#"[ ]"#;
2292 let mut s = t.as_bytes();
2293 assert_eq!(&*parse_json_array::<Vec<_>>(&mut s, t, b'\\').unwrap().0, &[]);
2294 assert_eq!(s, b"");
2295 let t = r#"["abc"]"#;
2297 let mut s = t.as_bytes();
2298 assert_eq!(&*parse_json_array::<Vec<_>>(&mut s, t, b'\\').unwrap().0, &[UnescapedString {
2299 span: 2..5,
2300 value: "abc".into()
2301 }]);
2302 assert_eq!(s, b"");
2303 let t = "[\"ab\",\"c\" , \"de\" ] \n";
2305 let mut s = t.as_bytes();
2306 assert_eq!(&*parse_json_array::<Vec<_>>(&mut s, t, b'\\').unwrap().0, &[
2307 UnescapedString { span: 2..4, value: "ab".into() },
2308 UnescapedString { span: 7..8, value: "c".into() },
2309 UnescapedString { span: 14..16, value: "de".into() },
2310 ]);
2311 assert_eq!(s, b"\n");
2312 let t = "[\"a\\\"\\\\\\/\\b\\f\\n\\r\\tbc\"]";
2315 let mut s = t.as_bytes();
2316 assert_eq!(&*parse_json_array::<Vec<_>>(&mut s, t, b'\\').unwrap().0, &[UnescapedString {
2317 span: 2..21,
2318 value: "a\"\\/\x08\x0c\n\r\tbc".into()
2319 }]);
2320 assert_eq!(s, b"");
2321
2322 let t = r#"['abc']"#;
2324 let mut s = t.as_bytes();
2325 assert_eq!(parse_json_array::<Vec<_>>(&mut s, t, b'\\'), Err(0));
2326 assert_eq!(s, br#"'abc']"#);
2327 let t = r#"["abc",]"#;
2329 let mut s = t.as_bytes();
2330 assert_eq!(parse_json_array::<Vec<_>>(&mut s, t, b'\\'), Err(0));
2331 assert_eq!(s, br#"]"#);
2332 let t = r#"["abc"] c"#;
2334 let mut s = t.as_bytes();
2335 assert_eq!(parse_json_array::<Vec<_>>(&mut s, t, b'\\'), Err(0));
2336 assert_eq!(s, br#"c"#);
2337 let t = "[\"ab\\c\"]";
2339 let mut s = t.as_bytes();
2340 assert_eq!(parse_json_array::<Vec<_>>(&mut s, t, b'\\'), Err(0));
2341 assert_eq!(s, b"c\"]");
2342 }
2344
2345#[inline]
2348fn skip_spaces_no_escape(s: &mut &[u8]) -> bool {
2349 let start = *s;
2350 while let Some((&b, s_next)) = s.split_first() {
2351 if TABLE[b as usize] & SPACE != 0 {
2352 *s = s_next;
2353 continue;
2354 }
2355 break;
2356 }
2357 start.len() != s.len()
2358}
2359#[inline]
2362fn skip_spaces(s: &mut &[u8], escape_byte: u8) -> bool {
2363 let mut has_space = false;
2364 while let Some((&b, s_next)) = s.split_first() {
2365 let t = TABLE[b as usize];
2366 if t & (SPACE | POSSIBLE_ESCAPE) != 0 {
2367 if t & SPACE != 0 {
2368 *s = s_next;
2369 has_space = true;
2370 continue;
2371 }
2372 if skip_line_escape(s, b, s_next, escape_byte) {
2373 skip_line_escape_followup(s, escape_byte);
2374 continue;
2375 }
2376 }
2377 break;
2378 }
2379 has_space
2380}
2381#[inline]
2384fn spaces_or_line_end(s: &mut &[u8], escape_byte: u8) -> bool {
2385 let mut has_space = false;
2386 loop {
2387 let Some((&b, s_next)) = s.split_first() else { return true };
2388 {
2389 let t = TABLE[b as usize];
2390 if t & (WHITESPACE | POSSIBLE_ESCAPE) != 0 {
2391 if t & SPACE != 0 {
2392 *s = s_next;
2393 has_space = true;
2394 continue;
2395 }
2396 if t & LINE != 0 {
2397 return true;
2398 }
2399 if skip_line_escape(s, b, s_next, escape_byte) {
2400 skip_line_escape_followup(s, escape_byte);
2401 continue;
2402 }
2403 }
2404 break;
2405 }
2406 }
2407 has_space
2408}
2409
2410#[inline]
2411fn skip_comments_and_whitespaces(s: &mut &[u8], escape_byte: u8) {
2412 while let Some((&b, s_next)) = s.split_first() {
2413 let t = TABLE[b as usize];
2414 if t & (WHITESPACE | COMMENT | POSSIBLE_ESCAPE) != 0 {
2415 if t & WHITESPACE != 0 {
2416 *s = s_next;
2417 continue;
2418 }
2419 if t & COMMENT != 0 {
2420 *s = s_next;
2421 skip_this_line_no_escape(s);
2422 continue;
2423 }
2424 if skip_line_escape(s, b, s_next, escape_byte) {
2425 skip_line_escape_followup(s, escape_byte);
2426 continue;
2427 }
2428 }
2429 break;
2430 }
2431}
2432
2433#[inline]
2434fn is_line_end(b: Option<&u8>) -> bool {
2435 matches!(b, Some(b'\n' | b'\r') | None)
2436}
2437#[inline]
2438fn is_maybe_json(s: &[u8]) -> bool {
2439 s.first() == Some(&b'[') && s.get(1) != Some(&b'[')
2442}
2443
2444#[inline]
2445fn collect_here_doc_no_strip_tab<'a>(
2446 s: &mut &[u8],
2447 start: &'a str,
2448 _escape_byte: u8,
2449 delim: &[u8],
2450) -> Result<(&'a str, Span), ErrorKind> {
2451 let here_doc_start = start.len() - s.len();
2452 loop {
2453 if s.len() < delim.len() {
2454 return Err(ErrorKind::ExpectedOwned(
2455 str::from_utf8(delim).unwrap().to_owned(),
2456 start.len() - s.len(),
2457 ));
2458 }
2459 if s.starts_with(delim) && is_line_end(s.get(delim.len())) {
2460 break;
2461 }
2462 skip_this_line_no_escape(s);
2463 }
2464 let end = start.len() - s.len();
2465 *s = &s[delim.len()..];
2466 if !s.is_empty() {
2467 *s = &s[1..];
2468 }
2469 let span = here_doc_start..end;
2470 Ok((&start[span.clone()], span))
2471}
2472#[inline]
2473fn collect_here_doc_strip_tab<'a>(
2474 s: &mut &[u8],
2475 start: &'a str,
2476 _escape_byte: u8,
2477 delim: &[u8],
2478) -> Result<(Cow<'a, str>, Span), ErrorKind> {
2479 let here_doc_start = start.len() - s.len();
2480 let mut current_start = here_doc_start;
2481 let mut res = String::new();
2482 loop {
2483 if s.len() < delim.len() {
2484 return Err(ErrorKind::ExpectedOwned(
2485 str::from_utf8(delim).unwrap().to_owned(),
2486 start.len() - s.len(),
2487 ));
2488 }
2489 if let Some((&b'\t', s_next)) = s.split_first() {
2490 let end = start.len() - s.len();
2491 res.push_str(&start[current_start..end]);
2492 *s = s_next;
2493 while let Some((&b'\t', s_next)) = s.split_first() {
2494 *s = s_next;
2495 }
2496 current_start = start.len() - s.len();
2497 }
2498 if s.starts_with(delim) && is_line_end(s.get(delim.len())) {
2499 break;
2500 }
2501 skip_this_line_no_escape(s);
2502 }
2503 let end = start.len() - s.len();
2504 *s = &s[delim.len()..];
2505 if !s.is_empty() {
2506 *s = &s[1..];
2507 }
2508 let span = here_doc_start..end;
2509 if here_doc_start == current_start {
2510 Ok((Cow::Borrowed(&start[span.clone()]), span))
2511 } else {
2512 res.push_str(&start[current_start..end]);
2513 Ok((Cow::Owned(res), span))
2514 }
2515}
2516#[inline]
2518fn collect_space_separated_unescaped_consume_line<'a, S: Store<UnescapedString<'a>>>(
2519 s: &mut &[u8],
2520 start: &'a str,
2521 escape_byte: u8,
2522) -> S {
2523 let mut res = S::new();
2524 loop {
2525 let val = collect_non_whitespace_unescaped(s, start, escape_byte);
2526 if !val.value.is_empty() {
2527 res.push(val);
2528 if skip_spaces(s, escape_byte) {
2529 continue;
2530 }
2531 }
2532 debug_assert!(is_line_end(s.first()));
2533 if !s.is_empty() {
2534 *s = &s[1..];
2535 }
2536 break;
2537 }
2538 res
2539}
2540#[inline]
2541fn collect_non_whitespace_unescaped<'a>(
2542 s: &mut &[u8],
2543 start: &'a str,
2544 escape_byte: u8,
2545) -> UnescapedString<'a> {
2546 collect_until_unescaped::<WHITESPACE>(s, start, escape_byte)
2547}
2548#[inline]
2549fn collect_non_line_unescaped_consume_line<'a>(
2550 s: &mut &[u8],
2551 start: &'a str,
2552 escape_byte: u8,
2553) -> UnescapedString<'a> {
2554 let mut val = collect_until_unescaped::<LINE>(s, start, escape_byte);
2555 debug_assert!(is_line_end(s.first()));
2556 if !s.is_empty() {
2557 *s = &s[1..];
2558 }
2559 match &mut val.value {
2561 Cow::Borrowed(v) => {
2562 while let Some(b' ' | b'\t') = v.as_bytes().last() {
2563 *v = &v[..v.len() - 1];
2564 val.span.end -= 1;
2565 }
2566 }
2567 Cow::Owned(v) => {
2568 while let Some(b' ' | b'\t') = v.as_bytes().last() {
2569 v.pop();
2570 val.span.end -= 1;
2571 }
2572 }
2573 }
2574 val
2575}
2576#[inline]
2577fn collect_until_unescaped<'a, const UNTIL_MASK: u8>(
2578 s: &mut &[u8],
2579 start: &'a str,
2580 escape_byte: u8,
2581) -> UnescapedString<'a> {
2582 let full_word_start = start.len() - s.len();
2583 let mut word_start = full_word_start;
2584 let mut buf = String::new();
2585 while let Some((&b, s_next)) = s.split_first() {
2586 let t = TABLE[b as usize];
2587 if t & (UNTIL_MASK | POSSIBLE_ESCAPE) != 0 {
2588 if t & UNTIL_MASK != 0 {
2589 break;
2590 }
2591 let word_end = start.len() - s.len();
2592 if skip_line_escape(s, b, s_next, escape_byte) {
2593 skip_line_escape_followup(s, escape_byte);
2594 buf.push_str(&start[word_start..word_end]);
2595 word_start = start.len() - s.len();
2596 continue;
2597 }
2598 }
2599 *s = s_next;
2600 }
2601 let word_end = start.len() - s.len();
2602 let value = if buf.is_empty() {
2603 Cow::Borrowed(&start[word_start..word_end])
2605 } else {
2606 buf.push_str(&start[word_start..word_end]);
2607 Cow::Owned(buf)
2608 };
2609 UnescapedString { span: full_word_start..word_end, value }
2610}
2611
2612#[inline]
2615fn skip_non_whitespace_no_escape(s: &mut &[u8]) -> bool {
2616 let start = *s;
2617 while let Some((&b, s_next)) = s.split_first() {
2618 if TABLE[b as usize] & WHITESPACE != 0 {
2619 break;
2620 }
2621 *s = s_next;
2622 }
2623 start.len() != s.len()
2624}
2625#[inline]
2644fn skip_line_escape<'a>(s: &mut &'a [u8], b: u8, s_next: &'a [u8], escape_byte: u8) -> bool {
2645 if b == escape_byte {
2646 if let Some((&b, mut s_next)) = s_next.split_first() {
2647 if b == b'\n' {
2648 *s = s_next;
2649 return true;
2650 }
2651 if b == b'\r' {
2652 if s_next.first() == Some(&b'\n') {
2653 *s = &s_next[1..];
2654 } else {
2655 *s = s_next;
2656 }
2657 return true;
2658 }
2659 if TABLE[b as usize] & SPACE != 0 {
2662 skip_spaces_no_escape(&mut s_next);
2663 if let Some((&b, s_next)) = s_next.split_first() {
2664 if b == b'\n' {
2665 *s = s_next;
2666 return true;
2667 }
2668 if b == b'\r' {
2669 if s_next.first() == Some(&b'\n') {
2670 *s = &s_next[1..];
2671 } else {
2672 *s = s_next;
2673 }
2674 return true;
2675 }
2676 }
2677 }
2678 }
2679 }
2680 false
2681}
2682#[inline]
2683fn skip_line_escape_followup(s: &mut &[u8], _escape_byte: u8) {
2684 while let Some((&b, mut s_next)) = s.split_first() {
2685 let t = TABLE[b as usize];
2686 if t & (WHITESPACE | COMMENT) != 0 {
2687 if t & SPACE != 0 {
2688 skip_spaces_no_escape(&mut s_next);
2690 if let Some((&b, s_next)) = s_next.split_first() {
2691 let t = TABLE[b as usize];
2692 if t & (COMMENT | LINE) != 0 {
2693 *s = s_next;
2695 if t & COMMENT != 0 {
2696 skip_this_line_no_escape(s);
2697 }
2698 continue;
2699 }
2700 }
2701 } else {
2702 *s = s_next;
2704 if t & COMMENT != 0 {
2705 skip_this_line_no_escape(s);
2706 }
2707 continue;
2708 }
2709 }
2710 break;
2711 }
2712}
2713
2714#[inline]
2715fn skip_this_line_no_escape(s: &mut &[u8]) {
2716 while let Some((&b, s_next)) = s.split_first() {
2717 *s = s_next;
2718 if TABLE[b as usize] & LINE != 0 {
2719 break;
2720 }
2721 }
2722}
2723#[inline]
2725fn skip_this_line(s: &mut &[u8], escape_byte: u8) {
2726 let mut has_space_only = 0;
2727 while let Some((&b, s_next)) = s.split_first() {
2728 let t = TABLE[b as usize];
2729 if t & (LINE | COMMENT | POSSIBLE_ESCAPE) != 0 {
2730 if t & LINE != 0 {
2731 *s = s_next;
2732 break;
2733 }
2734 if has_space_only != 0 && t & COMMENT != 0 {
2735 *s = s_next;
2736 skip_this_line_no_escape(s);
2737 continue;
2738 }
2739 if skip_line_escape(s, b, s_next, escape_byte) {
2740 skip_line_escape_followup(s, escape_byte);
2741 has_space_only = SPACE;
2742 continue;
2743 }
2744 }
2745 has_space_only &= t;
2746 *s = s_next;
2747 }
2748}
2749
2750#[inline(always)]
2751fn token(s: &mut &[u8], token: &'static [u8]) -> bool {
2752 let matched = starts_with_ignore_ascii_case(s, token);
2753 if matched {
2754 *s = &s[token.len()..];
2755 true
2756 } else {
2757 false
2758 }
2759}
2760#[cold]
2761fn token_slow(s: &mut &[u8], mut token: &'static [u8], escape_byte: u8) -> bool {
2762 debug_assert!(!token.is_empty() && token.iter().all(|&n| n & TO_UPPER8 == n));
2763 if s.len() < token.len() {
2764 return false;
2765 }
2766 let mut tmp = *s;
2767 while let Some((&b, tmp_next)) = tmp.split_first() {
2768 if b & TO_UPPER8 == token[0] {
2769 tmp = tmp_next;
2770 token = &token[1..];
2771 if token.is_empty() {
2772 *s = tmp;
2773 return true;
2774 }
2775 continue;
2776 }
2777 if skip_line_escape(&mut tmp, b, tmp_next, escape_byte) {
2778 skip_line_escape_followup(&mut tmp, escape_byte);
2779 continue;
2780 }
2781 break;
2782 }
2783 false
2784}
2785
2786const TO_UPPER8: u8 = 0xDF;
2787const TO_UPPER64: u64 = 0xDFDFDFDFDFDFDFDF;
2788
2789#[inline(always)] fn starts_with_ignore_ascii_case(mut s: &[u8], mut needle: &'static [u8]) -> bool {
2791 debug_assert!(!needle.is_empty() && needle.iter().all(|&n| n & TO_UPPER8 == n));
2792 if s.len() < needle.len() {
2793 return false;
2794 }
2795 if needle.len() == 1 {
2796 return needle[0] == s[0] & TO_UPPER8;
2797 }
2798 if needle.len() >= 8 {
2799 loop {
2800 if u64::from_ne_bytes(needle[..8].try_into().unwrap())
2801 != u64::from_ne_bytes(s[..8].try_into().unwrap()) & TO_UPPER64
2802 {
2803 return false;
2804 }
2805 needle = &needle[8..];
2806 s = &s[8..];
2807 if needle.len() < 8 {
2808 if needle.is_empty() {
2809 return true;
2810 }
2811 break;
2812 }
2813 }
2814 }
2815 let s = {
2816 let mut buf = [0; 8];
2817 buf[..needle.len()].copy_from_slice(&s[..needle.len()]);
2818 u64::from_ne_bytes(buf)
2819 };
2820 let needle = {
2821 let mut buf = [0; 8];
2822 buf[..needle.len()].copy_from_slice(needle);
2823 u64::from_ne_bytes(buf)
2824 };
2825 needle == s & TO_UPPER64
2826}
2827#[test]
2828fn test_starts_with_ignore_ascii_case() {
2829 assert!(starts_with_ignore_ascii_case(b"ABC", b"ABC"));
2830 assert!(starts_with_ignore_ascii_case(b"abc", b"ABC"));
2831 assert!(starts_with_ignore_ascii_case(b"AbC", b"ABC"));
2832 assert!(!starts_with_ignore_ascii_case(b"ABB", b"ABC"));
2833 assert!(starts_with_ignore_ascii_case(b"ABCDEFGH", b"ABCDEFGH"));
2834 assert!(starts_with_ignore_ascii_case(b"abcdefgh", b"ABCDEFGH"));
2835 assert!(starts_with_ignore_ascii_case(b"AbCdEfGh", b"ABCDEFGH"));
2836 assert!(!starts_with_ignore_ascii_case(b"ABCDEFGc", b"ABCDEFGH"));
2837 assert!(starts_with_ignore_ascii_case(
2838 b"ABCDEFGHIJKLMNOPQRSTUVWXYZ",
2839 b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
2840 ));
2841 assert!(starts_with_ignore_ascii_case(
2842 b"abcdefghijklmnopqrstuvwxyz",
2843 b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
2844 ));
2845 assert!(starts_with_ignore_ascii_case(
2846 b"aBcDeFgHiJkLmNoPqRsTuVwXyZ",
2847 b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
2848 ));
2849 assert!(!starts_with_ignore_ascii_case(
2850 b"aBcDeFgHiJkLmNoPqRsTuVwXyc",
2851 b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
2852 ));
2853}
2854
2855#[rustfmt::skip]
2857static HEX_DECODE_TABLE: [u8; 256] = {
2858 const __: u8 = u8::MAX;
2859 [
2860 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, __, __, __, __, __, __, __, 10, 11, 12, 13, 14, 15, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, 10, 11, 12, 13, 14, 15, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, ]
2878};