1#![no_std]
77#![doc(test(
78 no_crate_inject,
79 attr(allow(
80 dead_code,
81 unused_variables,
82 clippy::undocumented_unsafe_blocks,
83 clippy::unused_trait_names,
84 ))
85))]
86#![forbid(unsafe_code)]
87#![warn(
88 missing_debug_implementations,
90 missing_docs,
91 clippy::alloc_instead_of_core,
92 clippy::exhaustive_enums,
93 clippy::exhaustive_structs,
94 clippy::impl_trait_in_params,
95 clippy::std_instead_of_alloc,
96 clippy::std_instead_of_core,
97 )]
99#![allow(clippy::inline_always)]
100
101extern crate alloc;
102extern crate std;
103
104#[cfg(test)]
105#[path = "gen/tests/assert_impl.rs"]
106mod assert_impl;
107#[cfg(test)]
108#[path = "gen/tests/track_size.rs"]
109mod track_size;
110
111mod error;
112
113use alloc::{borrow::Cow, boxed::Box, string::String, vec, vec::Vec};
114use core::{ops::Range, str};
115use std::collections::HashMap;
116
117use smallvec::SmallVec;
118
119pub use self::error::Error;
120use self::error::{ErrorKind, Result};
121
122#[allow(clippy::missing_panics_doc)]
124pub fn parse(text: &str) -> Result<Dockerfile<'_>> {
125 let mut p = ParseIter::new(text)?;
126 let mut s = p.s;
127
128 let mut instructions = Vec::with_capacity((p.text.len() / 60).min(1024));
129 let mut stages = Vec::with_capacity(1);
130 let mut named_stages = 0;
131 let mut current_stage = None;
132 while let Some((&b, s_next)) = s.split_first() {
133 let instruction =
134 parse_instruction(&mut p, &mut s, b, s_next).map_err(|e| e.into_error(&p))?;
135 match instruction {
136 Instruction::From(from) => {
137 named_stages += from.as_.is_some() as usize;
138 let new_stage = instructions.len();
139 if let Some(prev_stage) = current_stage.replace(new_stage) {
140 stages.push(prev_stage..new_stage);
141 }
142 instructions.push(Instruction::From(from));
143 }
144 arg @ Instruction::Arg(..) => instructions.push(arg),
145 instruction => {
146 if current_stage.is_none() {
147 return Err(error::expected("FROM", instruction.instruction_span().start)
148 .into_error(&p));
149 }
150 instructions.push(instruction);
151 }
152 }
153 consume_comments_and_whitespaces(&mut s, p.escape_byte);
154 }
155 if let Some(current_stage) = current_stage {
156 stages.push(current_stage..instructions.len());
157 }
158
159 if stages.is_empty() {
160 return Err(error::no_stage().into_error(&p));
162 }
163 let mut stages_by_name = HashMap::with_capacity(named_stages);
167 for (i, stage) in stages.iter().enumerate() {
168 let Instruction::From(from) = &instructions[stage.start] else { unreachable!() };
169 if let Some((_as, name)) = &from.as_ {
170 if let Some(first_occurrence) = stages_by_name.insert(name.value.clone(), i) {
171 let Instruction::From(from) = &instructions[stages[first_occurrence].start] else {
172 unreachable!()
173 };
174 let first_start = from.as_.as_ref().unwrap().1.span.start;
175 let second_start = name.span.start;
176 return Err(error::duplicate_name(first_start, second_start).into_error(&p));
177 }
178 }
179 }
180
181 Ok(Dockerfile { parser_directives: p.parser_directives, instructions, stages, stages_by_name })
182}
183
184pub fn parse_iter(text: &str) -> Result<ParseIter<'_>> {
194 ParseIter::new(text)
195}
196
197#[derive(Debug)]
199#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
200#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
201pub struct Dockerfile<'a> {
202 pub parser_directives: ParserDirectives<'a>,
204 pub instructions: Vec<Instruction<'a>>,
206 #[cfg_attr(feature = "serde", serde(skip))]
207 stages: Vec<Range<usize>>,
208 #[cfg_attr(feature = "serde", serde(skip))]
209 stages_by_name: HashMap<Cow<'a, str>, usize>,
210}
211impl<'a> Dockerfile<'a> {
212 #[allow(clippy::missing_panics_doc)] #[must_use]
215 pub fn global_args<'b>(&'b self) -> impl ExactSizeIterator<Item = &'b ArgInstruction<'a>> {
216 self.instructions[..self.stages.first().unwrap().start].iter().map(|arg| {
217 let Instruction::Arg(arg) = arg else { unreachable!() };
218 arg
219 })
220 }
221 #[must_use]
223 pub fn stage<'b>(&'b self, name: &str) -> Option<Stage<'a, 'b>> {
224 let i = *self.stages_by_name.get(name)?;
225 let stage = &self.stages[i];
226 let Instruction::From(from) = &self.instructions[stage.start] else { unreachable!() };
227 Some(Stage { from, instructions: &self.instructions[stage.start + 1..stage.end] })
228 }
229 #[must_use]
231 pub fn stages<'b>(&'b self) -> impl ExactSizeIterator<Item = Stage<'a, 'b>> {
232 self.stages.iter().map(move |stage| {
233 let Instruction::From(from) = &self.instructions[stage.start] else { unreachable!() };
234 Stage { from, instructions: &self.instructions[stage.start + 1..stage.end] }
235 })
236 }
237}
238#[derive(Debug)]
240#[non_exhaustive]
241pub struct Stage<'a, 'b> {
242 pub from: &'b FromInstruction<'a>,
244 pub instructions: &'b [Instruction<'a>],
246}
247
248#[derive(Debug)]
252#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
253#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
254#[non_exhaustive]
255pub struct ParserDirectives<'a> {
256 pub syntax: Option<ParserDirective<&'a str>>,
260 pub escape: Option<ParserDirective<char>>,
264 pub check: Option<ParserDirective<&'a str>>,
268}
269#[derive(Debug)]
271#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
272#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
273pub struct ParserDirective<T> {
274 start: usize,
279 pub value: Spanned<T>,
284}
285impl<T> ParserDirective<T> {
286 #[must_use]
291 pub fn span(&self) -> Span {
292 self.start..self.value.span.end
293 }
294}
295
296#[derive(Debug)]
298#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
299#[cfg_attr(feature = "serde", serde(tag = "kind"))]
300#[cfg_attr(feature = "serde", serde(rename_all = "SCREAMING_SNAKE_CASE"))]
301#[non_exhaustive]
302pub enum Instruction<'a> {
304 Add(AddInstruction<'a>),
306 Arg(ArgInstruction<'a>),
308 Cmd(CmdInstruction<'a>),
310 Copy(CopyInstruction<'a>),
312 Entrypoint(EntrypointInstruction<'a>),
314 Env(EnvInstruction<'a>),
316 Expose(ExposeInstruction<'a>),
318 From(FromInstruction<'a>),
320 Healthcheck(HealthcheckInstruction<'a>),
322 Label(LabelInstruction<'a>),
324 Maintainer(MaintainerInstruction<'a>),
326 Onbuild(OnbuildInstruction<'a>),
328 Run(RunInstruction<'a>),
330 Shell(ShellInstruction<'a>),
332 Stopsignal(StopsignalInstruction<'a>),
334 User(UserInstruction<'a>),
336 Volume(VolumeInstruction<'a>),
338 Workdir(WorkdirInstruction<'a>),
340}
341impl Instruction<'_> {
342 fn instruction_span(&self) -> Span {
343 match self {
344 Instruction::Add(instruction) => instruction.add.span.clone(),
345 Instruction::Arg(instruction) => instruction.arg.span.clone(),
346 Instruction::Cmd(instruction) => instruction.cmd.span.clone(),
347 Instruction::Copy(instruction) => instruction.copy.span.clone(),
348 Instruction::Entrypoint(instruction) => instruction.entrypoint.span.clone(),
349 Instruction::Env(instruction) => instruction.env.span.clone(),
350 Instruction::Expose(instruction) => instruction.expose.span.clone(),
351 Instruction::From(instruction) => instruction.from.span.clone(),
352 Instruction::Healthcheck(instruction) => instruction.healthcheck.span.clone(),
353 Instruction::Label(instruction) => instruction.label.span.clone(),
354 Instruction::Maintainer(instruction) => instruction.maintainer.span.clone(),
355 Instruction::Onbuild(instruction) => instruction.onbuild.span.clone(),
356 Instruction::Run(instruction) => instruction.run.span.clone(),
357 Instruction::Shell(instruction) => instruction.shell.span.clone(),
358 Instruction::Stopsignal(instruction) => instruction.stopsignal.span.clone(),
359 Instruction::User(instruction) => instruction.user.span.clone(),
360 Instruction::Volume(instruction) => instruction.volume.span.clone(),
361 Instruction::Workdir(instruction) => instruction.workdir.span.clone(),
362 }
363 }
364}
365#[derive(Debug)]
369#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
370#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
371#[non_exhaustive]
372pub struct AddInstruction<'a> {
373 pub add: Keyword,
378 pub options: SmallVec<[Flag<'a>; 1]>,
383 pub src: SmallVec<[Source<'a>; 1]>,
389 pub dest: UnescapedString<'a>,
394}
395#[derive(Debug)]
399#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
400#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
401#[non_exhaustive]
402pub struct ArgInstruction<'a> {
403 pub arg: Keyword,
408 pub arguments: UnescapedString<'a>,
414}
415#[derive(Debug)]
419#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
420#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
421#[non_exhaustive]
422pub struct CmdInstruction<'a> {
423 pub cmd: Keyword,
428 pub arguments: Command<'a>,
433}
434#[derive(Debug)]
438#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
439#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
440#[non_exhaustive]
441pub struct CopyInstruction<'a> {
442 pub copy: Keyword,
447 pub options: SmallVec<[Flag<'a>; 1]>,
452 pub src: SmallVec<[Source<'a>; 1]>,
458 pub dest: UnescapedString<'a>,
463}
464#[derive(Debug)]
467#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
468#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
469#[non_exhaustive]
470pub enum Source<'a> {
471 Path(UnescapedString<'a>),
473 HereDoc(HereDoc<'a>),
475}
476#[derive(Debug)]
480#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
481#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
482#[non_exhaustive]
483pub struct EntrypointInstruction<'a> {
484 pub entrypoint: Keyword,
489 pub arguments: Command<'a>,
494}
495#[derive(Debug)]
499#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
500#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
501#[non_exhaustive]
502pub struct EnvInstruction<'a> {
503 pub env: Keyword,
508 pub arguments: UnescapedString<'a>,
514}
515#[derive(Debug)]
519#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
520#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
521#[non_exhaustive]
522pub struct ExposeInstruction<'a> {
523 pub expose: Keyword,
528 pub arguments: SmallVec<[UnescapedString<'a>; 1]>,
533}
534#[derive(Debug)]
538#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
539#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
540#[non_exhaustive]
541pub struct FromInstruction<'a> {
542 pub from: Keyword,
547 pub options: Vec<Flag<'a>>,
552 pub image: UnescapedString<'a>,
557 pub as_: Option<(Keyword, UnescapedString<'a>)>,
562}
563#[derive(Debug)]
567#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
568#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
569#[non_exhaustive]
570pub struct HealthcheckInstruction<'a> {
571 pub healthcheck: Keyword,
576 pub options: Vec<Flag<'a>>,
581 pub arguments: HealthcheckArguments<'a>,
586}
587#[derive(Debug)]
589#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
590#[cfg_attr(feature = "serde", serde(tag = "kind"))]
591#[cfg_attr(feature = "serde", serde(rename_all = "SCREAMING_SNAKE_CASE"))]
592#[non_exhaustive]
593pub enum HealthcheckArguments<'a> {
594 #[non_exhaustive]
596 Cmd {
597 cmd: Keyword,
602 arguments: Command<'a>,
607 },
608 #[non_exhaustive]
610 None {
611 none: Keyword,
616 },
617}
618#[derive(Debug)]
622#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
623#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
624#[non_exhaustive]
625pub struct LabelInstruction<'a> {
626 pub label: Keyword,
631 pub arguments: UnescapedString<'a>,
637}
638#[derive(Debug)]
642#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
643#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
644#[non_exhaustive]
645pub struct MaintainerInstruction<'a> {
646 pub maintainer: Keyword,
651 pub name: UnescapedString<'a>,
656}
657#[derive(Debug)]
661#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
662#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
663#[non_exhaustive]
664pub struct OnbuildInstruction<'a> {
665 pub onbuild: Keyword,
670 pub instruction: Box<Instruction<'a>>,
675}
676#[derive(Debug)]
680#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
681#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
682#[non_exhaustive]
683pub struct RunInstruction<'a> {
684 pub run: Keyword,
689 pub options: SmallVec<[Flag<'a>; 1]>,
694 pub arguments: Command<'a>,
699 pub here_docs: Vec<HereDoc<'a>>,
707}
708#[derive(Debug)]
712#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
713#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
714#[non_exhaustive]
715pub struct ShellInstruction<'a> {
716 pub shell: Keyword,
721 pub arguments: SmallVec<[UnescapedString<'a>; 4]>,
728}
729#[derive(Debug)]
733#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
734#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
735#[non_exhaustive]
736pub struct StopsignalInstruction<'a> {
737 pub stopsignal: Keyword,
742 pub arguments: UnescapedString<'a>,
747}
748#[derive(Debug)]
752#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
753#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
754#[non_exhaustive]
755pub struct UserInstruction<'a> {
756 pub user: Keyword,
761 pub arguments: UnescapedString<'a>,
766}
767#[derive(Debug)]
771#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
772#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
773#[non_exhaustive]
774pub struct VolumeInstruction<'a> {
775 pub volume: Keyword,
780 pub arguments: JsonOrStringArray<'a, 1>,
785}
786#[derive(Debug)]
790#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
791#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
792#[non_exhaustive]
793pub struct WorkdirInstruction<'a> {
794 pub workdir: Keyword,
799 pub arguments: UnescapedString<'a>,
804}
805
806#[derive(Debug)]
808#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
809#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
810#[non_exhaustive]
811pub struct Keyword {
812 #[allow(missing_docs)]
813 pub span: Span,
814}
815
816#[derive(Debug)]
818#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
819#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
820pub struct Flag<'a> {
821 flag_start: usize,
826 pub name: UnescapedString<'a>,
831 pub value: Option<UnescapedString<'a>>,
836}
837impl Flag<'_> {
838 #[must_use]
843 pub fn flag_span(&self) -> Span {
844 self.flag_start..self.name.span.end
845 }
846 #[must_use]
851 pub fn span(&self) -> Span {
852 match &self.value {
853 Some(v) => self.flag_start..v.span.end,
854 None => self.flag_span(),
855 }
856 }
857}
858
859#[derive(Debug, PartialEq)]
861#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
862#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
863#[non_exhaustive]
864pub struct UnescapedString<'a> {
865 #[allow(missing_docs)]
866 pub span: Span,
867 #[allow(missing_docs)]
868 pub value: Cow<'a, str>,
869}
870impl UnescapedString<'_> {
871 #[inline]
872 fn trim_end(&mut self) {
873 match &mut self.value {
875 Cow::Borrowed(v) => {
876 while let Some(&b) = v.as_bytes().last() {
877 if TABLE[b as usize] & (WHITESPACE | POSSIBLE_LINE) == 0 {
878 break;
879 }
880 *v = &v[..v.len() - 1];
881 self.span.end -= 1;
882 }
883 }
884 Cow::Owned(v) => {
885 while let Some(&b) = v.as_bytes().last() {
886 if TABLE[b as usize] & (WHITESPACE | POSSIBLE_LINE) == 0 {
887 break;
888 }
889 v.pop();
890 self.span.end -= 1;
891 }
892 }
893 }
894 }
895}
896
897#[derive(Debug)]
904#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
905#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
906#[non_exhaustive]
907pub enum Command<'a> {
908 Exec(Spanned<SmallVec<[UnescapedString<'a>; 1]>>),
911 Shell(Spanned<&'a str>),
913}
914
915#[derive(Debug)]
920#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
921#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
922#[allow(clippy::exhaustive_enums)]
923pub enum JsonOrStringArray<'a, const N: usize> {
924 Json(Spanned<SmallVec<[UnescapedString<'a>; N]>>),
926 String(SmallVec<[UnescapedString<'a>; N]>),
928}
929
930#[derive(Debug)]
932#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
933#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
934#[non_exhaustive]
935pub struct HereDoc<'a> {
936 #[allow(missing_docs)]
937 pub span: Span,
938 pub expand: bool,
940 #[allow(missing_docs)]
941 pub value: Cow<'a, str>,
942}
943
944#[derive(Debug)]
946#[cfg_attr(feature = "serde", derive(serde_derive::Serialize))]
947#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
948#[allow(clippy::exhaustive_structs)]
949pub struct Spanned<T> {
950 #[allow(missing_docs)]
951 pub span: Span,
952 #[allow(missing_docs)]
953 pub value: T,
954}
955
956#[allow(missing_docs)]
957pub type Span = Range<usize>;
958
959#[allow(missing_debug_implementations)]
966#[must_use = "iterators are lazy and do nothing unless consumed"]
967pub struct ParseIter<'a> {
968 text: &'a str,
969 s: &'a [u8],
970 escape_byte: u8,
971 has_stage: bool,
972 in_onbuild: bool,
973 parser_directives: ParserDirectives<'a>,
974}
975impl<'a> ParseIter<'a> {
976 fn new(mut text: &'a str) -> Result<Self> {
977 if text.as_bytes().starts_with(UTF8_BOM) {
979 text = &text[UTF8_BOM.len()..];
980 }
981 let mut p = Self {
982 text,
983 s: text.as_bytes(),
984 escape_byte: DEFAULT_ESCAPE_BYTE,
985 has_stage: false,
986 in_onbuild: false,
987 parser_directives: ParserDirectives {
988 syntax: None,
990 escape: None,
991 check: None,
992 },
993 };
994
995 parse_parser_directives(&mut p).map_err(|e| e.into_error(&p))?;
996
997 consume_comments_and_whitespaces(&mut p.s, p.escape_byte);
1001 Ok(p)
1002 }
1003}
1004impl<'a> Iterator for ParseIter<'a> {
1005 type Item = Result<Instruction<'a>>;
1006 #[inline]
1007 fn next(&mut self) -> Option<Self::Item> {
1008 let p = self;
1009 let mut s = p.s;
1010 if let Some((&b, s_next)) = s.split_first() {
1011 let instruction = match parse_instruction(p, &mut s, b, s_next) {
1012 Ok(i) => i,
1013 Err(e) => return Some(Err(e.into_error(p))),
1014 };
1015 match &instruction {
1016 Instruction::From(..) => {
1017 p.has_stage = true;
1018 }
1019 Instruction::Arg(..) => {}
1020 instruction => {
1021 if !p.has_stage {
1022 return Some(Err(error::expected(
1023 "FROM",
1024 instruction.instruction_span().start,
1025 )
1026 .into_error(p)));
1027 }
1028 }
1029 }
1030 consume_comments_and_whitespaces(&mut s, p.escape_byte);
1031 p.s = s;
1032 return Some(Ok(instruction));
1033 }
1034 if !p.has_stage {
1035 return Some(Err(error::no_stage().into_error(p)));
1037 }
1038 None
1039 }
1040}
1041
1042const DEFAULT_ESCAPE_BYTE: u8 = b'\\';
1043
1044fn parse_parser_directives(p: &mut ParseIter<'_>) -> Result<(), ErrorKind> {
1045 while let Some((&b'#', s_next)) = p.s.split_first() {
1046 p.s = s_next;
1047 consume_whitespaces_no_line_continuation(&mut p.s);
1048 let directive_start = p.text.len() - p.s.len();
1049 if token(&mut p.s, b"SYNTAX") {
1050 consume_whitespaces_no_line_continuation(&mut p.s);
1051 if let Some((&b'=', s_next)) = p.s.split_first() {
1052 p.s = s_next;
1053 if p.parser_directives.syntax.is_some() {
1054 p.parser_directives.syntax = None;
1056 p.parser_directives.escape = None;
1057 p.parser_directives.check = None;
1058 p.escape_byte = DEFAULT_ESCAPE_BYTE;
1059 consume_current_line_no_line_continuation(&mut p.s);
1060 break;
1061 }
1062 consume_whitespaces_no_line_continuation(&mut p.s);
1063 let value_start = p.text.len() - p.s.len();
1064 consume_until_whitespaces_or_line_no_line_continuation(&mut p.s);
1065 let end = p.text.len() - p.s.len();
1066 let value = trim_end(p.text, value_start, end);
1067 p.parser_directives.syntax = Some(ParserDirective {
1068 start: directive_start,
1069 value: Spanned { span: value_start..value_start + value.len(), value },
1070 });
1071 consume_current_line_no_line_continuation(&mut p.s);
1072 continue;
1073 }
1074 } else if token(&mut p.s, b"CHECK") {
1075 consume_whitespaces_no_line_continuation(&mut p.s);
1076 if let Some((&b'=', s_next)) = p.s.split_first() {
1077 p.s = s_next;
1078 if p.parser_directives.check.is_some() {
1079 p.parser_directives.syntax = None;
1081 p.parser_directives.escape = None;
1082 p.parser_directives.check = None;
1083 p.escape_byte = DEFAULT_ESCAPE_BYTE;
1084 consume_current_line_no_line_continuation(&mut p.s);
1085 break;
1086 }
1087 consume_whitespaces_no_line_continuation(&mut p.s);
1088 let value_start = p.text.len() - p.s.len();
1089 consume_until_whitespaces_or_line_no_line_continuation(&mut p.s);
1090 let end = p.text.len() - p.s.len();
1091 let value = trim_end(p.text, value_start, end);
1092 p.parser_directives.check = Some(ParserDirective {
1093 start: directive_start,
1094 value: Spanned { span: value_start..value_start + value.len(), value },
1095 });
1096 consume_current_line_no_line_continuation(&mut p.s);
1097 continue;
1098 }
1099 } else if token(&mut p.s, b"ESCAPE") {
1100 consume_whitespaces_no_line_continuation(&mut p.s);
1101 if let Some((&b'=', s_next)) = p.s.split_first() {
1102 p.s = s_next;
1103 if p.parser_directives.escape.is_some() {
1104 p.parser_directives.syntax = None;
1106 p.parser_directives.escape = None;
1107 p.parser_directives.check = None;
1108 p.escape_byte = DEFAULT_ESCAPE_BYTE;
1109 consume_current_line_no_line_continuation(&mut p.s);
1110 break;
1111 }
1112 consume_whitespaces_no_line_continuation(&mut p.s);
1113 let value_start = p.text.len() - p.s.len();
1114 consume_until_whitespaces_or_line_no_line_continuation(&mut p.s);
1115 let end = p.text.len() - p.s.len();
1116 let value = trim_end(p.text, value_start, end);
1117 match value {
1118 "`" => p.escape_byte = b'`',
1119 "\\" => {}
1120 _ => return Err(error::invalid_escape(value_start)),
1121 }
1122 p.parser_directives.escape = Some(ParserDirective {
1123 start: directive_start,
1124 value: Spanned {
1125 span: value_start..value_start + value.len(),
1126 value: p.escape_byte as char,
1127 },
1128 });
1129 consume_current_line_no_line_continuation(&mut p.s);
1130 continue;
1131 }
1132 }
1133 consume_current_line_no_line_continuation(&mut p.s);
1134 break;
1135 }
1136 Ok(())
1137}
1138
1139#[inline]
1140fn parse_instruction<'a>(
1141 p: &mut ParseIter<'a>,
1142 s: &mut &'a [u8],
1143 b: u8,
1144 s_next: &'a [u8],
1145) -> Result<Instruction<'a>, ErrorKind> {
1146 let instruction_start = p.text.len() - s.len();
1147 *s = s_next;
1148 match b & TO_UPPER8 {
1150 b'A' => {
1151 if token(s, &b"ARG"[1..]) {
1152 let instruction_span = instruction_start..p.text.len() - s.len();
1153 if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1154 return parse_arg(p, s, Keyword { span: instruction_span });
1155 }
1156 } else if token(s, &b"ADD"[1..]) {
1157 let instruction_span = instruction_start..p.text.len() - s.len();
1158 if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1159 let add = Keyword { span: instruction_span };
1160 let (options, src, dest) = parse_add_or_copy(p, s, &add)?;
1161 return Ok(Instruction::Add(AddInstruction { add, options, src, dest }));
1162 }
1163 } else if token_slow(s, &b"ARG"[1..], p.escape_byte) {
1164 let instruction_span = instruction_start..p.text.len() - s.len();
1165 if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1166 return parse_arg(p, s, Keyword { span: instruction_span });
1167 }
1168 } else if token_slow(s, &b"ADD"[1..], p.escape_byte) {
1169 let instruction_span = instruction_start..p.text.len() - s.len();
1170 if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1171 let add = Keyword { span: instruction_span };
1172 let (options, src, dest) = parse_add_or_copy(p, s, &add)?;
1173 return Ok(Instruction::Add(AddInstruction { add, options, src, dest }));
1174 }
1175 }
1176 }
1177 b'C' => {
1178 if token(s, &b"COPY"[1..]) {
1179 let instruction_span = instruction_start..p.text.len() - s.len();
1180 if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1181 let copy = Keyword { span: instruction_span };
1182 let (options, src, dest) = parse_add_or_copy(p, s, ©)?;
1183 return Ok(Instruction::Copy(CopyInstruction { copy, options, src, dest }));
1184 }
1185 } else if token(s, &b"CMD"[1..]) {
1186 let instruction_span = instruction_start..p.text.len() - s.len();
1187 if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1188 return parse_cmd(p, s, Keyword { span: instruction_span });
1189 }
1190 } else if token_slow(s, &b"COPY"[1..], p.escape_byte) {
1191 let instruction_span = instruction_start..p.text.len() - s.len();
1192 if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1193 let copy = Keyword { span: instruction_span };
1194 let (options, src, dest) = parse_add_or_copy(p, s, ©)?;
1195 return Ok(Instruction::Copy(CopyInstruction { copy, options, src, dest }));
1196 }
1197 } else if token_slow(s, &b"CMD"[1..], p.escape_byte) {
1198 let instruction_span = instruction_start..p.text.len() - s.len();
1199 if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1200 return parse_cmd(p, s, Keyword { span: instruction_span });
1201 }
1202 }
1203 }
1204 b'E' => {
1205 if token(s, &b"ENV"[1..]) {
1206 let instruction_span = instruction_start..p.text.len() - s.len();
1207 if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1208 return parse_env(p, s, Keyword { span: instruction_span });
1209 }
1210 } else if token(s, &b"EXPOSE"[1..]) {
1211 let instruction_span = instruction_start..p.text.len() - s.len();
1212 if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1213 return parse_expose(p, s, Keyword { span: instruction_span });
1214 }
1215 } else if token(s, &b"ENTRYPOINT"[1..]) {
1216 let instruction_span = instruction_start..p.text.len() - s.len();
1217 if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1218 return parse_entrypoint(p, s, Keyword { span: instruction_span });
1219 }
1220 } else if token_slow(s, &b"ENV"[1..], p.escape_byte) {
1221 let instruction_span = instruction_start..p.text.len() - s.len();
1222 if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1223 return parse_env(p, s, Keyword { span: instruction_span });
1224 }
1225 } else if token_slow(s, &b"EXPOSE"[1..], p.escape_byte) {
1226 let instruction_span = instruction_start..p.text.len() - s.len();
1227 if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1228 return parse_expose(p, s, Keyword { span: instruction_span });
1229 }
1230 } else if token_slow(s, &b"ENTRYPOINT"[1..], p.escape_byte) {
1231 let instruction_span = instruction_start..p.text.len() - s.len();
1232 if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1233 return parse_entrypoint(p, s, Keyword { span: instruction_span });
1234 }
1235 }
1236 }
1237 b'F' => {
1238 cold_path();
1239 if token(s, &b"FROM"[1..]) || token_slow(s, &b"FROM"[1..], p.escape_byte) {
1240 let instruction_span = instruction_start..p.text.len() - s.len();
1241 if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1242 return parse_from(p, s, Keyword { span: instruction_span });
1243 }
1244 }
1245 }
1246 b'H' => {
1247 cold_path();
1248 if token(s, &b"HEALTHCHECK"[1..]) || token_slow(s, &b"HEALTHCHECK"[1..], p.escape_byte)
1249 {
1250 let instruction_span = instruction_start..p.text.len() - s.len();
1251 if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1252 return parse_healthcheck(p, s, Keyword { span: instruction_span });
1253 }
1254 }
1255 }
1256 b'L' => {
1257 cold_path();
1258 if token(s, &b"LABEL"[1..]) || token_slow(s, &b"LABEL"[1..], p.escape_byte) {
1259 let instruction_span = instruction_start..p.text.len() - s.len();
1260 if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1261 return parse_label(p, s, Keyword { span: instruction_span });
1262 }
1263 }
1264 }
1265 b'M' => {
1266 cold_path();
1267 if token(s, &b"MAINTAINER"[1..]) || token_slow(s, &b"MAINTAINER"[1..], p.escape_byte) {
1268 let instruction_span = instruction_start..p.text.len() - s.len();
1269 if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1270 return parse_maintainer(p, s, Keyword { span: instruction_span });
1271 }
1272 }
1273 }
1274 b'O' => {
1275 cold_path();
1276 if token(s, &b"ONBUILD"[1..]) || token_slow(s, &b"ONBUILD"[1..], p.escape_byte) {
1277 let instruction_span = instruction_start..p.text.len() - s.len();
1278 if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1279 return parse_onbuild(p, s, Keyword { span: instruction_span });
1280 }
1281 }
1282 }
1283 b'R' => {
1284 if token(s, &b"RUN"[1..]) || token_slow(s, &b"RUN"[1..], p.escape_byte) {
1285 let instruction_span = instruction_start..p.text.len() - s.len();
1286 if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1287 return parse_run(p, s, Keyword { span: instruction_span });
1288 }
1289 }
1290 }
1291 b'S' => {
1292 cold_path();
1293 if token(s, &b"SHELL"[1..]) {
1294 let instruction_span = instruction_start..p.text.len() - s.len();
1295 if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1296 return parse_shell(p, s, Keyword { span: instruction_span });
1297 }
1298 } else if token(s, &b"STOPSIGNAL"[1..]) {
1299 let instruction_span = instruction_start..p.text.len() - s.len();
1300 if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1301 return parse_stopsignal(p, s, Keyword { span: instruction_span });
1302 }
1303 } else if token_slow(s, &b"SHELL"[1..], p.escape_byte) {
1304 let instruction_span = instruction_start..p.text.len() - s.len();
1305 if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1306 return parse_shell(p, s, Keyword { span: instruction_span });
1307 }
1308 } else if token_slow(s, &b"STOPSIGNAL"[1..], p.escape_byte) {
1309 let instruction_span = instruction_start..p.text.len() - s.len();
1310 if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1311 return parse_stopsignal(p, s, Keyword { span: instruction_span });
1312 }
1313 }
1314 }
1315 b'U' => {
1316 cold_path();
1317 if token(s, &b"USER"[1..]) || token_slow(s, &b"USER"[1..], p.escape_byte) {
1318 let instruction_span = instruction_start..p.text.len() - s.len();
1319 if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1320 return parse_user(p, s, Keyword { span: instruction_span });
1321 }
1322 }
1323 }
1324 b'V' => {
1325 cold_path();
1326 if token(s, &b"VOLUME"[1..]) || token_slow(s, &b"VOLUME"[1..], p.escape_byte) {
1327 let instruction_span = instruction_start..p.text.len() - s.len();
1328 if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1329 return parse_volume(p, s, Keyword { span: instruction_span });
1330 }
1331 }
1332 }
1333 b'W' => {
1334 cold_path();
1335 if token(s, &b"WORKDIR"[1..]) || token_slow(s, &b"WORKDIR"[1..], p.escape_byte) {
1336 let instruction_span = instruction_start..p.text.len() - s.len();
1337 if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1338 return parse_workdir(p, s, Keyword { span: instruction_span });
1339 }
1340 }
1341 }
1342 _ => {}
1343 }
1344 Err(error::unknown_instruction(instruction_start))
1345}
1346
1347#[inline]
1348fn parse_arg<'a>(
1349 p: &mut ParseIter<'a>,
1350 s: &mut &'a [u8],
1351 instruction: Keyword,
1352) -> Result<Instruction<'a>, ErrorKind> {
1353 debug_assert!(token_slow(
1354 &mut p.text[instruction.span.clone()].as_bytes(),
1355 b"ARG",
1356 p.escape_byte,
1357 ));
1358 let mut arguments = collect_until_line_consume_newline(s, p.text, p.escape_byte);
1359 arguments.trim_end();
1360 if arguments.value.is_empty() {
1361 return Err(error::at_least_one_argument(instruction.span.start));
1362 }
1363 Ok(Instruction::Arg(ArgInstruction { arg: instruction, arguments }))
1364}
1365
1366#[inline]
1367fn parse_add_or_copy<'a>(
1368 p: &mut ParseIter<'a>,
1369 s: &mut &'a [u8],
1370 instruction: &Keyword,
1371) -> Result<(SmallVec<[Flag<'a>; 1]>, SmallVec<[Source<'a>; 1]>, UnescapedString<'a>), ErrorKind> {
1372 debug_assert!(
1373 token_slow(&mut p.text[instruction.span.clone()].as_bytes(), b"ADD", p.escape_byte,)
1374 || token_slow(&mut p.text[instruction.span.clone()].as_bytes(), b"COPY", p.escape_byte,)
1375 );
1376 let options = parse_options(s, p.text, p.escape_byte);
1377 if is_maybe_json(s) {
1378 let mut tmp = *s;
1379 if let Ok(((src, dest), _array_span)) = parse_json_array::<(
1380 SmallVec<[Source<'_>; 1]>,
1381 Option<_>,
1382 )>(&mut tmp, p.text, p.escape_byte)
1383 {
1384 if let Some((&b, s_next)) = tmp.split_first() {
1385 let consumed = consume_newline(b, s, s_next);
1386 debug_assert!(consumed);
1387 } else {
1388 *s = &[];
1389 }
1390 if src.is_empty() {
1391 return Err(error::at_least_two_arguments(instruction.span.start));
1392 }
1393 return Ok((options, src, dest.unwrap()));
1394 }
1395 }
1396 let (mut src, dest) = collect_space_separated_consume_line::<(
1397 SmallVec<[Source<'_>; 1]>,
1398 Option<_>,
1399 )>(s, p.text, p.escape_byte);
1400 if src.is_empty() {
1401 return Err(error::at_least_two_arguments(instruction.span.start));
1402 }
1403 for src in &mut src {
1404 let Source::Path(path) = src else { unreachable!() };
1405 let mut val = path.value.as_bytes();
1406 let Some(val_next) = val.strip_prefix(b"<<") else { continue };
1407 let Some((delim, strip_tab, expand)) =
1408 collect_here_doc_delim(&mut val, val_next, &path.value)?
1409 else {
1410 continue;
1411 };
1412 debug_assert!(val.is_empty()); let (here_doc, span) = collect_here_doc(s, p.text, &delim, strip_tab)?;
1414 *src = Source::HereDoc(HereDoc { span, expand, value: here_doc });
1415 }
1416 Ok((options, src, dest.unwrap()))
1417}
1418
1419#[allow(clippy::unnecessary_wraps)]
1420#[inline]
1421fn parse_cmd<'a>(
1422 p: &mut ParseIter<'a>,
1423 s: &mut &'a [u8],
1424 instruction: Keyword,
1425) -> Result<Instruction<'a>, ErrorKind> {
1426 debug_assert!(token_slow(
1427 &mut p.text[instruction.span.clone()].as_bytes(),
1428 b"CMD",
1429 p.escape_byte,
1430 ));
1431 if is_maybe_json(s) {
1432 let mut tmp = *s;
1433 if let Ok((arguments, array_span)) =
1434 parse_json_array::<SmallVec<[_; 1]>>(&mut tmp, p.text, p.escape_byte)
1435 {
1436 if let Some((&b, s_next)) = tmp.split_first() {
1437 let consumed = consume_newline(b, s, s_next);
1438 debug_assert!(consumed);
1439 } else {
1440 *s = &[];
1441 }
1442 return Ok(Instruction::Cmd(CmdInstruction {
1445 cmd: instruction,
1446 arguments: Command::Exec(Spanned { span: array_span, value: arguments }),
1447 }));
1448 }
1449 }
1450 let arguments_start = p.text.len() - s.len();
1451 consume_current_line(s, p.escape_byte);
1452 let end = p.text.len() - s.len();
1453 let arguments = trim_end(p.text, arguments_start, end);
1454 Ok(Instruction::Cmd(CmdInstruction {
1455 cmd: instruction,
1456 arguments: Command::Shell(Spanned {
1457 span: arguments_start..arguments_start + arguments.len(),
1458 value: arguments,
1459 }),
1460 }))
1461}
1462
1463#[inline]
1464fn parse_env<'a>(
1465 p: &mut ParseIter<'a>,
1466 s: &mut &'a [u8],
1467 instruction: Keyword,
1468) -> Result<Instruction<'a>, ErrorKind> {
1469 debug_assert!(token_slow(
1470 &mut p.text[instruction.span.clone()].as_bytes(),
1471 b"ENV",
1472 p.escape_byte,
1473 ));
1474 let mut arguments = collect_until_line_consume_newline(s, p.text, p.escape_byte);
1475 arguments.trim_end();
1476 if arguments.value.is_empty() {
1477 return Err(error::at_least_one_argument(instruction.span.start));
1478 }
1479 Ok(Instruction::Env(EnvInstruction { env: instruction, arguments }))
1480}
1481
1482#[inline]
1483fn parse_expose<'a>(
1484 p: &mut ParseIter<'a>,
1485 s: &mut &'a [u8],
1486 instruction: Keyword,
1487) -> Result<Instruction<'a>, ErrorKind> {
1488 debug_assert!(token_slow(
1489 &mut p.text[instruction.span.clone()].as_bytes(),
1490 b"EXPOSE",
1491 p.escape_byte,
1492 ));
1493 let arguments: SmallVec<[_; 1]> =
1494 collect_space_separated_consume_line(s, p.text, p.escape_byte);
1495 if arguments.is_empty() {
1496 return Err(error::at_least_one_argument(instruction.span.start));
1497 }
1498 Ok(Instruction::Expose(ExposeInstruction { expose: instruction, arguments }))
1499}
1500
1501#[inline]
1502fn parse_entrypoint<'a>(
1503 p: &mut ParseIter<'a>,
1504 s: &mut &'a [u8],
1505 instruction: Keyword,
1506) -> Result<Instruction<'a>, ErrorKind> {
1507 debug_assert!(token_slow(
1508 &mut p.text[instruction.span.clone()].as_bytes(),
1509 b"ENTRYPOINT",
1510 p.escape_byte,
1511 ));
1512 if is_maybe_json(s) {
1513 let mut tmp = *s;
1514 if let Ok((arguments, array_span)) =
1515 parse_json_array::<SmallVec<[_; 1]>>(&mut tmp, p.text, p.escape_byte)
1516 {
1517 if let Some((&b, s_next)) = tmp.split_first() {
1518 let consumed = consume_newline(b, s, s_next);
1519 debug_assert!(consumed);
1520 } else {
1521 *s = &[];
1522 }
1523 if arguments.is_empty() {
1524 return Err(error::at_least_one_argument(instruction.span.start));
1525 }
1526 return Ok(Instruction::Entrypoint(EntrypointInstruction {
1527 entrypoint: instruction,
1528 arguments: Command::Exec(Spanned { span: array_span, value: arguments }),
1529 }));
1530 }
1531 }
1532 let arguments_start = p.text.len() - s.len();
1533 consume_current_line(s, p.escape_byte);
1534 let end = p.text.len() - s.len();
1535 let arguments = trim_end(p.text, arguments_start, end);
1536 if arguments.is_empty() {
1537 return Err(error::at_least_one_argument(instruction.span.start));
1538 }
1539 Ok(Instruction::Entrypoint(EntrypointInstruction {
1540 entrypoint: instruction,
1541 arguments: Command::Shell(Spanned {
1542 span: arguments_start..arguments_start + arguments.len(),
1543 value: arguments,
1544 }),
1545 }))
1546}
1547
1548#[inline]
1549fn parse_from<'a>(
1550 p: &mut ParseIter<'a>,
1551 s: &mut &'a [u8],
1552 instruction: Keyword,
1553) -> Result<Instruction<'a>, ErrorKind> {
1554 debug_assert!(token_slow(
1555 &mut p.text[instruction.span.clone()].as_bytes(),
1556 b"FROM",
1557 p.escape_byte,
1558 ));
1559 let options = parse_options(s, p.text, p.escape_byte);
1560 let image = collect_non_whitespace(s, p.text, p.escape_byte);
1563 if image.value.is_empty() {
1564 return Err(error::at_least_one_argument(instruction.span.start));
1565 }
1566 let mut as_ = None;
1567 if consume_whitespaces(s, p.escape_byte) {
1568 let as_start = p.text.len() - s.len();
1569 if token(s, b"AS") || token_slow(s, b"AS", p.escape_byte) {
1570 let as_span = as_start..p.text.len() - s.len();
1571 if !consume_whitespaces(s, p.escape_byte) {
1572 return Err(error::expected("AS", as_start));
1573 }
1574 let name = collect_non_whitespace(s, p.text, p.escape_byte);
1575 consume_whitespaces(s, p.escape_byte);
1576 if !is_line_end(s.first()) {
1577 return Err(error::expected("newline or eof", p.text.len() - s.len()));
1578 }
1579 as_ = Some((Keyword { span: as_span }, name));
1580 } else if !is_line_end(s.first()) {
1581 return Err(error::expected("AS", as_start));
1582 }
1583 }
1584 Ok(Instruction::From(FromInstruction { from: instruction, options, image, as_ }))
1585}
1586
1587#[inline]
1588fn parse_healthcheck<'a>(
1589 p: &mut ParseIter<'a>,
1590 s: &mut &'a [u8],
1591 instruction: Keyword,
1592) -> Result<Instruction<'a>, ErrorKind> {
1593 debug_assert!(token_slow(
1594 &mut p.text[instruction.span.clone()].as_bytes(),
1595 b"HEALTHCHECK",
1596 p.escape_byte,
1597 ));
1598 let options = parse_options(s, p.text, p.escape_byte);
1599 let Some((&b, s_next)) = s.split_first() else {
1600 return Err(error::expected("CMD or NONE", p.text.len() - s.len()));
1601 };
1602 let cmd_or_none_start = p.text.len() - s.len();
1603 match b & TO_UPPER8 {
1604 b'C' => {
1605 *s = s_next;
1606 if token(s, &b"CMD"[1..]) || token_slow(s, &b"CMD"[1..], p.escape_byte) {
1607 let cmd_span = cmd_or_none_start..p.text.len() - s.len();
1608 let cmd_keyword = Keyword { span: cmd_span };
1609 if consume_whitespaces_or_is_empty_line(s, p.escape_byte) {
1610 if is_maybe_json(s) {
1611 let mut tmp = *s;
1612 if let Ok((arguments, array_span)) =
1613 parse_json_array::<SmallVec<[_; 1]>>(&mut tmp, p.text, p.escape_byte)
1614 {
1615 if let Some((&b, s_next)) = tmp.split_first() {
1616 let consumed = consume_newline(b, s, s_next);
1617 debug_assert!(consumed);
1618 } else {
1619 *s = &[];
1620 }
1621 if arguments.is_empty() {
1622 return Err(error::at_least_one_argument(instruction.span.start));
1623 }
1624 return Ok(Instruction::Healthcheck(HealthcheckInstruction {
1625 healthcheck: instruction,
1626 options,
1627 arguments: HealthcheckArguments::Cmd {
1628 cmd: cmd_keyword,
1629 arguments: Command::Exec(Spanned {
1630 span: array_span,
1631 value: arguments,
1632 }),
1633 },
1634 }));
1635 }
1636 }
1637 let arguments_start = p.text.len() - s.len();
1638 consume_current_line(s, p.escape_byte);
1639 let end = p.text.len() - s.len();
1640 let arguments = trim_end(p.text, arguments_start, end);
1641 return Ok(Instruction::Healthcheck(HealthcheckInstruction {
1642 healthcheck: instruction,
1643 options,
1644 arguments: HealthcheckArguments::Cmd {
1645 cmd: cmd_keyword,
1646 arguments: Command::Shell(Spanned {
1647 span: arguments_start..arguments_start + arguments.len(),
1648 value: arguments,
1649 }),
1650 },
1651 }));
1652 }
1653 }
1654 }
1655 b'N' => {
1656 *s = s_next;
1657 if token(s, &b"NONE"[1..]) || token_slow(s, &b"NONE"[1..], p.escape_byte) {
1658 let none_span = cmd_or_none_start..p.text.len() - s.len();
1659 consume_whitespaces(s, p.escape_byte);
1660 if !is_line_end(s.first()) {
1661 return Err(error::other(
1662 "HEALTHCHECK NONE does not accept arguments",
1663 p.text.len() - s.len(),
1664 ));
1665 }
1666 let none_keyword = Keyword { span: none_span };
1668 return Ok(Instruction::Healthcheck(HealthcheckInstruction {
1669 healthcheck: instruction,
1670 options,
1671 arguments: HealthcheckArguments::None { none: none_keyword },
1672 }));
1673 }
1674 }
1675 _ => {}
1676 }
1677 Err(error::expected("CMD or NONE", p.text.len() - s.len()))
1678}
1679
1680#[inline]
1681fn parse_label<'a>(
1682 p: &mut ParseIter<'a>,
1683 s: &mut &'a [u8],
1684 instruction: Keyword,
1685) -> Result<Instruction<'a>, ErrorKind> {
1686 debug_assert!(token_slow(
1687 &mut p.text[instruction.span.clone()].as_bytes(),
1688 b"LABEL",
1689 p.escape_byte,
1690 ));
1691 let mut arguments = collect_until_line_consume_newline(s, p.text, p.escape_byte);
1692 arguments.trim_end();
1693 if arguments.value.is_empty() {
1694 return Err(error::at_least_one_argument(instruction.span.start));
1695 }
1696 Ok(Instruction::Label(LabelInstruction { label: instruction, arguments }))
1697}
1698
1699#[cold]
1700fn parse_maintainer<'a>(
1701 p: &mut ParseIter<'a>,
1702 s: &mut &'a [u8],
1703 instruction: Keyword,
1704) -> Result<Instruction<'a>, ErrorKind> {
1705 debug_assert!(token_slow(
1706 &mut p.text[instruction.span.clone()].as_bytes(),
1707 b"MAINTAINER",
1708 p.escape_byte,
1709 ));
1710 let mut name = collect_until_line_consume_newline(s, p.text, p.escape_byte);
1711 name.trim_end();
1712 if name.value.is_empty() {
1713 return Err(error::exactly_one_argument(instruction.span.start));
1714 }
1715 Ok(Instruction::Maintainer(MaintainerInstruction { maintainer: instruction, name }))
1716}
1717
1718#[inline]
1719fn parse_onbuild<'a>(
1720 p: &mut ParseIter<'a>,
1721 s: &mut &'a [u8],
1722 instruction: Keyword,
1723) -> Result<Instruction<'a>, ErrorKind> {
1724 debug_assert!(token_slow(
1725 &mut p.text[instruction.span.clone()].as_bytes(),
1726 b"ONBUILD",
1727 p.escape_byte,
1728 ));
1729 if p.in_onbuild {
1731 return Err(error::other("ONBUILD ONBUILD is not allowed", instruction.span.start));
1732 }
1733 p.in_onbuild = true;
1734 let Some((&b, s_next)) = s.split_first() else {
1735 return Err(error::expected("instruction after ONBUILD", instruction.span.start));
1736 };
1737 let inner_instruction = parse_instruction(p, s, b, s_next)?;
1760 p.in_onbuild = false;
1761 Ok(Instruction::Onbuild(OnbuildInstruction {
1762 onbuild: instruction,
1763 instruction: Box::new(inner_instruction),
1764 }))
1765}
1766
1767#[inline]
1768fn parse_run<'a>(
1769 p: &mut ParseIter<'a>,
1770 s: &mut &'a [u8],
1771 instruction: Keyword,
1772) -> Result<Instruction<'a>, ErrorKind> {
1773 debug_assert!(token_slow(
1774 &mut p.text[instruction.span.clone()].as_bytes(),
1775 b"RUN",
1776 p.escape_byte,
1777 ));
1778 let options = parse_options(s, p.text, p.escape_byte);
1779 if is_maybe_json(s) {
1780 let mut tmp = *s;
1781 if let Ok((arguments, array_span)) =
1782 parse_json_array::<SmallVec<[_; 1]>>(&mut tmp, p.text, p.escape_byte)
1783 {
1784 if let Some((&b, s_next)) = tmp.split_first() {
1785 let consumed = consume_newline(b, s, s_next);
1786 debug_assert!(consumed);
1787 } else {
1788 *s = &[];
1789 }
1790 if arguments.is_empty() {
1791 return Err(error::at_least_one_argument(instruction.span.start));
1792 }
1793 return Ok(Instruction::Run(RunInstruction {
1794 run: instruction,
1795 options,
1796 arguments: Command::Exec(Spanned { span: array_span, value: arguments }),
1797 here_docs: vec![],
1799 }));
1800 }
1801 }
1802
1803 if s.len() >= 5 {
1806 if let Some(s_next) = s.strip_prefix(b"<<") {
1807 if let Some((delim, strip_tab, expand)) = collect_here_doc_delim(s, s_next, p.text)? {
1808 let arguments_start = p.text.len() - s.len();
1810 consume_current_line(s, p.escape_byte);
1811 let end = p.text.len() - s.len();
1812 let arguments = trim_end(p.text, arguments_start, end);
1813 let (here_doc, span) = collect_here_doc(s, p.text, &delim, strip_tab)?;
1814 let here_doc = HereDoc { span, expand, value: here_doc };
1815 return Ok(Instruction::Run(RunInstruction {
1816 run: instruction,
1817 options,
1818 arguments: Command::Shell(Spanned {
1819 span: arguments_start..arguments_start + arguments.len(),
1820 value: arguments,
1821 }),
1822 here_docs: vec![here_doc],
1824 }));
1825 }
1826 }
1827 }
1828
1829 let arguments_start = p.text.len() - s.len();
1830 consume_current_line(s, p.escape_byte);
1831 let end = p.text.len() - s.len();
1832 let arguments = trim_end(p.text, arguments_start, end);
1833 Ok(Instruction::Run(RunInstruction {
1834 run: instruction,
1835 options,
1836 arguments: Command::Shell(Spanned {
1837 span: arguments_start..arguments_start + arguments.len(),
1838 value: arguments,
1839 }),
1840 here_docs: vec![],
1841 }))
1842}
1843
1844#[inline]
1845fn parse_shell<'a>(
1846 p: &mut ParseIter<'a>,
1847 s: &mut &'a [u8],
1848 instruction: Keyword,
1849) -> Result<Instruction<'a>, ErrorKind> {
1850 debug_assert!(token_slow(
1851 &mut p.text[instruction.span.clone()].as_bytes(),
1852 b"SHELL",
1853 p.escape_byte,
1854 ));
1855 if !is_maybe_json(s) {
1856 return Err(error::expected("JSON array", p.text.len() - s.len()));
1857 }
1858 let (arguments, _array_span) =
1859 parse_json_array::<SmallVec<[_; 4]>>(s, p.text, p.escape_byte).map_err(error::json)?;
1860 if let Some((&b, s_next)) = s.split_first() {
1861 let consumed = consume_newline(b, s, s_next);
1862 debug_assert!(consumed);
1863 }
1864 if arguments.is_empty() {
1865 return Err(error::at_least_one_argument(instruction.span.start));
1866 }
1867 Ok(Instruction::Shell(ShellInstruction { shell: instruction, arguments }))
1868}
1869
1870#[inline]
1871fn parse_stopsignal<'a>(
1872 p: &mut ParseIter<'a>,
1873 s: &mut &'a [u8],
1874 instruction: Keyword,
1875) -> Result<Instruction<'a>, ErrorKind> {
1876 debug_assert!(token_slow(
1877 &mut p.text[instruction.span.clone()].as_bytes(),
1878 b"STOPSIGNAL",
1879 p.escape_byte,
1880 ));
1881 let mut arguments = collect_until_line_consume_newline(s, p.text, p.escape_byte);
1883 arguments.trim_end();
1884 if arguments.value.is_empty() {
1885 return Err(error::exactly_one_argument(instruction.span.start));
1886 }
1887 Ok(Instruction::Stopsignal(StopsignalInstruction { stopsignal: instruction, arguments }))
1888}
1889
1890#[inline]
1891fn parse_user<'a>(
1892 p: &mut ParseIter<'a>,
1893 s: &mut &'a [u8],
1894 instruction: Keyword,
1895) -> Result<Instruction<'a>, ErrorKind> {
1896 debug_assert!(token_slow(
1897 &mut p.text[instruction.span.clone()].as_bytes(),
1898 b"USER",
1899 p.escape_byte,
1900 ));
1901 let mut arguments = collect_until_line_consume_newline(s, p.text, p.escape_byte);
1903 arguments.trim_end();
1904 if arguments.value.is_empty() {
1905 return Err(error::exactly_one_argument(instruction.span.start));
1906 }
1907 Ok(Instruction::User(UserInstruction { user: instruction, arguments }))
1908}
1909
1910#[inline]
1911fn parse_volume<'a>(
1912 p: &mut ParseIter<'a>,
1913 s: &mut &'a [u8],
1914 instruction: Keyword,
1915) -> Result<Instruction<'a>, ErrorKind> {
1916 debug_assert!(token_slow(
1917 &mut p.text[instruction.span.clone()].as_bytes(),
1918 b"VOLUME",
1919 p.escape_byte,
1920 ));
1921 if is_maybe_json(s) {
1922 let mut tmp = *s;
1923 if let Ok((arguments, array_span)) = parse_json_array(&mut tmp, p.text, p.escape_byte) {
1924 if let Some((&b, s_next)) = tmp.split_first() {
1925 let consumed = consume_newline(b, s, s_next);
1926 debug_assert!(consumed);
1927 } else {
1928 *s = &[];
1929 }
1930 return Ok(Instruction::Volume(VolumeInstruction {
1932 volume: instruction,
1933 arguments: JsonOrStringArray::Json(Spanned { span: array_span, value: arguments }),
1934 }));
1935 }
1936 }
1937 let arguments: SmallVec<[_; 1]> =
1938 collect_space_separated_consume_line(s, p.text, p.escape_byte);
1939 if arguments.is_empty() {
1940 return Err(error::at_least_one_argument(instruction.span.start));
1942 }
1943 Ok(Instruction::Volume(VolumeInstruction {
1944 volume: instruction,
1945 arguments: JsonOrStringArray::String(arguments),
1946 }))
1947}
1948
1949#[inline]
1950fn parse_workdir<'a>(
1951 p: &mut ParseIter<'a>,
1952 s: &mut &'a [u8],
1953 instruction: Keyword,
1954) -> Result<Instruction<'a>, ErrorKind> {
1955 debug_assert!(token_slow(
1956 &mut p.text[instruction.span.clone()].as_bytes(),
1957 b"WORKDIR",
1958 p.escape_byte,
1959 ));
1960 let mut arguments = collect_until_line_consume_newline(s, p.text, p.escape_byte);
1962 arguments.trim_end();
1963 if arguments.value.is_empty() {
1964 return Err(error::exactly_one_argument(instruction.span.start));
1965 }
1966 Ok(Instruction::Workdir(WorkdirInstruction { workdir: instruction, arguments }))
1967}
1968
1969const POSSIBLE_LINE: u8 = 1 << 0;
1974const SPACE: u8 = 1 << 1;
1976const WHITESPACE: u8 = 1 << 2;
1980const COMMENT: u8 = 1 << 3;
1982const DOUBLE_QUOTE: u8 = 1 << 4;
1984const POSSIBLE_ESCAPE: u8 = 1 << 5;
1986const EQ: u8 = 1 << 6;
1988const CONTROL: u8 = 1 << 7;
1990
1991static TABLE: [u8; 256] = {
1992 let mut table = [0; 256];
1993 let mut i = 0;
1994 loop {
1995 let mut v = 0;
1996 if i < 0x20 {
1997 v |= CONTROL;
1998 }
1999 match i {
2000 b' ' | b'\t' => v |= WHITESPACE | SPACE,
2001 b'\x0B' | b'\x0C' => v |= WHITESPACE,
2002 b'\r' => v |= WHITESPACE | POSSIBLE_LINE,
2003 b'\n' => v |= POSSIBLE_LINE,
2004 b'#' => v |= COMMENT,
2005 b'"' => v |= DOUBLE_QUOTE,
2006 b'\\' | b'`' => v |= POSSIBLE_ESCAPE,
2007 b'=' => v |= EQ,
2008 _ => {}
2009 }
2010 table[i as usize] = v;
2011 if i == u8::MAX {
2012 break;
2013 }
2014 i += 1;
2015 }
2016 table
2017};
2018
2019#[rustfmt::skip]
2021static HEX_DECODE_TABLE: [u8; 256] = {
2022 const __: u8 = u8::MAX;
2023 [
2024 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, __, __, __, __, __, __, __, 10, 11, 12, 13, 14, 15, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, 10, 11, 12, 13, 14, 15, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, ]
2042};
2043
2044const UTF8_BOM: &[u8] = &[0xEF, 0xBB, 0xBF];
2045
2046trait Store<T>: Sized {
2047 fn new() -> Self;
2048 fn push(&mut self, val: T);
2049}
2050impl<T> Store<T> for Vec<T> {
2051 #[inline]
2052 fn new() -> Self {
2053 Self::new()
2054 }
2055 #[inline]
2056 fn push(&mut self, val: T) {
2057 self.push(val);
2058 }
2059}
2060impl<T, const N: usize> Store<T> for SmallVec<[T; N]> {
2061 #[inline]
2062 fn new() -> Self {
2063 Self::new()
2064 }
2065 #[inline]
2066 fn push(&mut self, val: T) {
2067 self.push(val);
2068 }
2069}
2070impl<'a, const N: usize> Store<UnescapedString<'a>>
2071 for (SmallVec<[Source<'a>; N]>, Option<UnescapedString<'a>>)
2072{
2073 #[inline]
2074 fn new() -> Self {
2075 (SmallVec::new(), None)
2076 }
2077 #[inline]
2078 fn push(&mut self, val: UnescapedString<'a>) {
2079 if let Some(val) = self.1.replace(val) {
2080 self.0.push(Source::Path(val));
2081 }
2082 }
2083}
2084
2085#[inline(always)]
2087#[cold]
2088fn cold_path() {}
2089
2090#[inline]
2092fn is_line_end(b: Option<&u8>) -> bool {
2093 matches!(b, Some(b'\n') | None)
2094}
2095
2096#[inline]
2097fn parse_options<'a, S: Store<Flag<'a>>>(s: &mut &[u8], start: &'a str, escape_byte: u8) -> S {
2098 let mut options = S::new();
2099 'outer: while let Some((&b'-', mut s_next)) = s.split_first() {
2100 loop {
2101 let Some((&b, s_next_next)) = s_next.split_first() else {
2102 break 'outer;
2103 };
2104 if b == b'-' {
2105 s_next = s_next_next;
2106 break;
2107 }
2108 if consume_line_continuation(&mut s_next, b, s_next_next, escape_byte) {
2109 continue;
2110 }
2111 break 'outer;
2112 }
2113 let flag_start = start.len() - s.len();
2114 *s = s_next;
2115 let name = collect_until::<{ WHITESPACE | POSSIBLE_LINE | EQ }>(s, start, escape_byte);
2116 let Some((&b'=', s_next)) = s.split_first() else {
2117 options.push(Flag { flag_start, name, value: None });
2118 consume_whitespaces(s, escape_byte);
2119 continue;
2120 };
2121 *s = s_next;
2122 let value = collect_non_whitespace(s, start, escape_byte);
2123 options.push(Flag { flag_start, name, value: Some(value) });
2124 consume_whitespaces(s, escape_byte);
2125 }
2126 options
2127}
2128
2129#[inline]
2130fn is_maybe_json(s: &[u8]) -> bool {
2131 s.first() == Some(&b'[') && s.get(1) != Some(&b'[')
2134}
2135fn parse_json_array<'a, S: Store<UnescapedString<'a>>>(
2136 s: &mut &[u8],
2137 start: &'a str,
2138 escape_byte: u8,
2139) -> Result<(S, Span), usize> {
2140 debug_assert!(is_maybe_json(s));
2141 let mut res = S::new();
2142 let array_start = start.len() - s.len();
2143 *s = &s[1..];
2144 consume_whitespaces(s, escape_byte);
2145 let (&b, s_next) = s.split_first().ok_or(array_start)?;
2146 match b {
2147 b'"' => {
2148 *s = s_next;
2149 loop {
2150 let full_word_start = start.len() - s.len();
2151 let mut word_start = full_word_start;
2152 let mut buf = String::new();
2153 loop {
2154 let (&b, s_next) = s.split_first().ok_or(array_start)?;
2155 if TABLE[b as usize] & (DOUBLE_QUOTE | POSSIBLE_ESCAPE | CONTROL) == 0 {
2156 *s = s_next;
2157 continue;
2158 }
2159 match b {
2160 b'"' => break,
2161 _ if b < 0x20 => return Err(array_start),
2162 _ => {}
2163 }
2164 let word_end = start.len() - s.len();
2165 if consume_line_continuation(s, b, s_next, escape_byte) {
2166 buf.push_str(&start[word_start..word_end]);
2168 word_start = start.len() - s.len();
2169 continue;
2170 }
2171 if b == b'\\' {
2172 let word_end = start.len() - s.len();
2174 buf.push_str(&start[word_start..word_end]);
2175 *s = s_next;
2176 if let Some((&b, s_next)) = s.split_first() {
2177 consume_line_continuation(s, b, s_next, escape_byte);
2178 }
2179 let (&b, s_next) = s.split_first().ok_or(array_start)?;
2180 *s = s_next;
2181 let new = match b {
2182 b'"' | b'\\' | b'/' => b as char,
2183 b'b' => '\x08',
2184 b'f' => '\x0c',
2185 b'n' => '\n',
2186 b'r' => '\r',
2187 b't' => '\t',
2188 b'u' => parse_json_hex_escape(s, escape_byte, array_start)?,
2189 _ => return Err(array_start), };
2191 buf.push(new);
2192 word_start = start.len() - s.len();
2193 continue;
2194 }
2195 *s = s_next;
2196 }
2197 let word_end = start.len() - s.len();
2198 let value = if full_word_start == word_start {
2199 Cow::Borrowed(&start[word_start..word_end])
2201 } else {
2202 buf.push_str(&start[word_start..word_end]);
2203 Cow::Owned(buf)
2204 };
2205 res.push(UnescapedString { span: full_word_start..word_end, value });
2206 *s = &s[1..]; consume_whitespaces(s, escape_byte);
2208 let (&b, s_next) = s.split_first().ok_or(array_start)?;
2209 match b {
2210 b',' => {
2211 *s = s_next;
2212 consume_whitespaces(s, escape_byte);
2213 let (&b, s_next) = s.split_first().ok_or(array_start)?;
2214 if b == b'"' {
2215 *s = s_next;
2216 continue;
2217 }
2218 return Err(array_start);
2219 }
2220 b']' => {
2221 *s = s_next;
2222 break;
2223 }
2224 _ => return Err(array_start),
2225 }
2226 }
2227 }
2228 b']' => *s = s_next,
2229 _ => return Err(array_start),
2230 }
2231 let array_end = start.len() - s.len();
2232 consume_whitespaces(s, escape_byte);
2233 if !is_line_end(s.first()) {
2234 return Err(array_start);
2235 }
2236 Ok((res, array_start..array_end))
2237}
2238#[cold]
2240fn parse_json_hex_escape(
2241 s: &mut &[u8],
2242 escape_byte: u8,
2243 array_start: usize,
2244) -> Result<char, usize> {
2245 fn decode_hex_escape(s: &mut &[u8], escape_byte: u8, array_start: usize) -> Result<u16, usize> {
2246 if s.len() < 4 {
2247 return Err(array_start); }
2249
2250 let mut n = 0;
2251 for _ in 0..4 {
2252 if let Some((&b, s_next)) = s.split_first() {
2253 consume_line_continuation(s, b, s_next, escape_byte);
2254 }
2255 let (&b, s_next) = s.split_first().ok_or(array_start)?;
2256 *s = s_next;
2257 match decode_hex_val(b) {
2258 None => return Err(array_start), Some(val) => {
2260 n = (n << 4) + val;
2261 }
2262 }
2263 }
2264 Ok(n)
2265 }
2266
2267 fn decode_hex_val(val: u8) -> Option<u16> {
2268 let n = HEX_DECODE_TABLE[val as usize] as u16;
2269 if n == u8::MAX as u16 { None } else { Some(n) }
2270 }
2271
2272 let c = match decode_hex_escape(s, escape_byte, array_start)? {
2273 _n @ 0xDC00..=0xDFFF => return Err(array_start), n1 @ 0xD800..=0xDBFF => {
2280 if let Some((&b, s_next)) = s.split_first() {
2281 consume_line_continuation(s, b, s_next, escape_byte);
2282 }
2283 let Some((&b'\\', s_next)) = s.split_first() else {
2284 return Err(array_start); };
2286 *s = s_next;
2287
2288 if let Some((&b, s_next)) = s.split_first() {
2289 consume_line_continuation(s, b, s_next, escape_byte);
2290 }
2291 let Some((&b'u', s_next)) = s.split_first() else {
2292 return Err(array_start); };
2294 *s = s_next;
2295
2296 let n2 = decode_hex_escape(s, escape_byte, array_start)?;
2297
2298 if n2 < 0xDC00 || n2 > 0xDFFF {
2299 return Err(array_start); }
2301
2302 let n = ((((n1 - 0xD800) as u32) << 10) | (n2 - 0xDC00) as u32) + 0x1_0000;
2303
2304 match char::from_u32(n) {
2305 Some(c) => c,
2306 None => return Err(array_start), }
2308 }
2309
2310 n => char::from_u32(n as u32).unwrap(),
2313 };
2314 Ok(c)
2315}
2316#[allow(clippy::needless_raw_string_hashes)]
2317#[test]
2318fn test_parse_json_array() {
2319 let t = r#"[]"#;
2321 let mut s = t.as_bytes();
2322 assert_eq!(&*parse_json_array::<Vec<_>>(&mut s, t, b'\\').unwrap().0, &[]);
2323 assert_eq!(s, b"");
2324 let t = r#"[ ]"#;
2325 let mut s = t.as_bytes();
2326 assert_eq!(&*parse_json_array::<Vec<_>>(&mut s, t, b'\\').unwrap().0, &[]);
2327 assert_eq!(s, b"");
2328 let t = r#"["abc"]"#;
2330 let mut s = t.as_bytes();
2331 assert_eq!(&*parse_json_array::<Vec<_>>(&mut s, t, b'\\').unwrap().0, &[UnescapedString {
2332 span: 2..5,
2333 value: "abc".into()
2334 }]);
2335 assert_eq!(s, b"");
2336 let t = "[\"ab\",\"c\" , \"de\" ] \n";
2338 let mut s = t.as_bytes();
2339 assert_eq!(&*parse_json_array::<Vec<_>>(&mut s, t, b'\\').unwrap().0, &[
2340 UnescapedString { span: 2..4, value: "ab".into() },
2341 UnescapedString { span: 7..8, value: "c".into() },
2342 UnescapedString { span: 14..16, value: "de".into() },
2343 ]);
2344 assert_eq!(s, b"\n");
2345 let t = "[\"a\\\"\\\\\\/\\b\\f\\n\\r\\tbc\\u12ab\\uAB12\\uD83C\\uDF95\\\n\\\\\nu\\\nD\\\n8\\\n3\\\nC\\\n\\\\\nu\\\nD\\\nF\\\n9\\\n5\\\n\"]";
2347 let mut s = t.as_bytes();
2348 assert_eq!(&*parse_json_array::<Vec<_>>(&mut s, t, b'\\').unwrap().0, &[UnescapedString {
2349 span: 2..83,
2350 value: "a\"\\/\x08\x0c\n\r\tbc\u{12ab}\u{AB12}\u{1F395}\u{1F395}".into()
2351 }]);
2352 assert_eq!(s, b"");
2353
2354 let t = r#"["]"#;
2356 let mut s = t.as_bytes();
2357 assert_eq!(parse_json_array::<Vec<_>>(&mut s, t, b'\\'), Err(0));
2358 assert_eq!(s, br#""#);
2359 let t = r#"["a]"#;
2360 let mut s = t.as_bytes();
2361 assert_eq!(parse_json_array::<Vec<_>>(&mut s, t, b'\\'), Err(0));
2362 assert_eq!(s, br#""#);
2363 let t = r#"['abc']"#;
2365 let mut s = t.as_bytes();
2366 assert_eq!(parse_json_array::<Vec<_>>(&mut s, t, b'\\'), Err(0));
2367 assert_eq!(s, br#"'abc']"#);
2368 let t = r#"["abc",]"#;
2370 let mut s = t.as_bytes();
2371 assert_eq!(parse_json_array::<Vec<_>>(&mut s, t, b'\\'), Err(0));
2372 assert_eq!(s, br#"]"#);
2373 let t = r#"["abc"d]"#;
2375 let mut s = t.as_bytes();
2376 assert_eq!(parse_json_array::<Vec<_>>(&mut s, t, b'\\'), Err(0));
2377 assert_eq!(s, br#"d]"#);
2378 let t = r#"["abc"] c"#;
2380 let mut s = t.as_bytes();
2381 assert_eq!(parse_json_array::<Vec<_>>(&mut s, t, b'\\'), Err(0));
2382 assert_eq!(s, br#"c"#);
2383 let t = "[\"ab\\c\"]";
2385 let mut s = t.as_bytes();
2386 assert_eq!(parse_json_array::<Vec<_>>(&mut s, t, b'\\'), Err(0));
2387 assert_eq!(s, b"\"]");
2388 let t = "[\"\\uD83C\\uFFFF\"]";
2390 let mut s = t.as_bytes();
2391 assert_eq!(parse_json_array::<Vec<_>>(&mut s, t, b'\\'), Err(0));
2392 assert_eq!(s, b"\"]");
2393 let t = "[\"a\nb\"]";
2395 let mut s = t.as_bytes();
2396 assert_eq!(parse_json_array::<Vec<_>>(&mut s, t, b'\\'), Err(0));
2397 assert_eq!(s, b"\nb\"]");
2398 let t = "[\"a\x1Fb\"]";
2400 let mut s = t.as_bytes();
2401 assert_eq!(parse_json_array::<Vec<_>>(&mut s, t, b'\\'), Err(0));
2402 assert_eq!(s, b"\x1Fb\"]");
2403 }
2405
2406#[inline]
2407fn collect_here_doc_delim<'a>(
2408 s: &mut &'a [u8],
2409 mut s_next: &'a [u8],
2410 start: &'a str,
2411) -> Result<Option<(Cow<'a, [u8]>, bool, bool)>, ErrorKind> {
2412 let strip_tab = if let Some((&b'-', s_next_next)) = s_next.split_first() {
2413 s_next = s_next_next;
2414 true
2415 } else {
2416 false
2417 };
2418 let delim_start = start.len() - s_next.len();
2419 let mut current_start = delim_start;
2420 let mut expand = true;
2421 let mut quote = None;
2422 let mut buf = vec![];
2423 while let Some((&b, s_next_next)) = s_next.split_first() {
2424 match b {
2425 b'"' | b'\'' => {
2426 if let Some(q) = quote {
2427 if b == q {
2428 quote = None;
2429 let end = start.len() - s_next.len();
2430 buf.extend_from_slice(&start.as_bytes()[current_start..end]);
2431 current_start = start.len() - s_next_next.len();
2432 }
2433 } else {
2434 quote = Some(b);
2435 expand = false;
2436 let end = start.len() - s_next.len();
2437 buf.extend_from_slice(&start.as_bytes()[current_start..end]);
2438 current_start = start.len() - s_next_next.len();
2439 }
2440 }
2441 b'\\' => {
2442 let end = start.len() - s_next.len();
2444 buf.extend_from_slice(&start.as_bytes()[current_start..end]);
2445 current_start = start.len() - s_next_next.len();
2446 let Some((_, s_next_next)) = s_next_next.split_first() else {
2447 return Err(error::other("unterminated escape", start.len() - s_next.len()));
2448 };
2449 s_next = s_next_next;
2450 continue;
2451 }
2452 _ if quote.is_none() && TABLE[b as usize] & (WHITESPACE | POSSIBLE_LINE) != 0 => break,
2453 _ => {}
2454 }
2455 s_next = s_next_next;
2456 }
2457 if let Some(quote) = quote {
2458 return Err(error::expected_quote(quote, None, start.len() - s_next.len()));
2459 }
2460 let end = start.len() - s_next.len();
2461 let delim = if delim_start == current_start {
2462 Cow::Borrowed(&start.as_bytes()[delim_start..end])
2463 } else {
2464 buf.extend_from_slice(&start.as_bytes()[current_start..end]);
2465 Cow::Owned(buf)
2466 };
2467 if delim.is_empty() {
2468 return Ok(None);
2469 }
2470 *s = s_next;
2471 Ok(Some((delim, strip_tab, expand)))
2472}
2473#[inline]
2474fn collect_here_doc<'a>(
2475 s: &mut &[u8],
2476 start: &'a str,
2477 delim: &[u8],
2478 strip_tab: bool,
2479) -> Result<(Cow<'a, str>, Span), ErrorKind> {
2480 let here_doc_start = start.len() - s.len();
2481 let mut current_start = here_doc_start;
2482 let mut buf = String::new();
2483 let mut end;
2484 loop {
2485 if strip_tab {
2486 if let Some((&b'\t', mut s_next)) = s.split_first() {
2488 let end = start.len() - s.len();
2489 buf.push_str(&start[current_start..end]);
2490 while let Some((&b'\t', s_next_next)) = s_next.split_first() {
2491 s_next = s_next_next;
2492 }
2493 *s = s_next;
2494 current_start = start.len() - s.len();
2495 }
2496 }
2497 if s.len() < delim.len() {
2498 return Err(error::expected_here_doc_end(delim, start.len() - s.len()));
2499 }
2500 if s.starts_with(delim) {
2501 let s_next = &s[delim.len()..];
2502 end = start.len() - s.len();
2503 if let Some((&b, s_next)) = s_next.split_first() {
2504 if consume_newline(b, s, s_next) {
2505 break;
2506 }
2507 } else {
2508 *s = s_next;
2509 break;
2510 }
2511 }
2512 consume_current_line_no_line_continuation(s);
2513 }
2514 let span = here_doc_start..end;
2515 if here_doc_start == current_start {
2516 Ok((Cow::Borrowed(&start[span.clone()]), span))
2517 } else {
2518 buf.push_str(&start[current_start..end]);
2519 Ok((Cow::Owned(buf), span))
2520 }
2521}
2522
2523#[inline]
2525fn collect_space_separated_consume_line<'a, S: Store<UnescapedString<'a>>>(
2526 s: &mut &[u8],
2527 start: &'a str,
2528 escape_byte: u8,
2529) -> S {
2530 let mut res = S::new();
2531 loop {
2532 let val = collect_non_whitespace(s, start, escape_byte);
2533 if !val.value.is_empty() {
2534 res.push(val);
2535 if consume_whitespaces(s, escape_byte) {
2536 continue;
2537 }
2538 }
2539 if let Some((&b, s_next)) = s.split_first() {
2540 let consumed = consume_newline(b, s, s_next);
2541 debug_assert!(consumed);
2542 }
2543 break;
2544 }
2545 res
2546}
2547#[inline]
2548fn collect_non_whitespace<'a>(
2549 s: &mut &[u8],
2550 start: &'a str,
2551 escape_byte: u8,
2552) -> UnescapedString<'a> {
2553 collect_until::<{ WHITESPACE | POSSIBLE_LINE }>(s, start, escape_byte)
2554}
2555#[inline]
2556fn collect_until<'a, const UNTIL_MASK: u8>(
2557 s: &mut &[u8],
2558 start: &'a str,
2559 escape_byte: u8,
2560) -> UnescapedString<'a> {
2561 let full_word_start = start.len() - s.len();
2562 let mut word_start = full_word_start;
2563 let mut buf = String::new();
2564 while let Some((&b, s_next)) = s.split_first() {
2565 let t = TABLE[b as usize];
2566 if t & (UNTIL_MASK | POSSIBLE_ESCAPE) != 0 {
2567 if t & UNTIL_MASK != 0 {
2568 break;
2569 }
2570 let word_end = start.len() - s.len();
2571 if consume_line_continuation(s, b, s_next, escape_byte) {
2572 buf.push_str(&start[word_start..word_end]);
2573 word_start = start.len() - s.len();
2574 continue;
2575 }
2576 }
2577 *s = s_next;
2578 }
2579 let word_end = start.len() - s.len();
2580 let value = if full_word_start == word_start {
2581 Cow::Borrowed(&start[word_start..word_end])
2583 } else {
2584 buf.push_str(&start[word_start..word_end]);
2585 Cow::Owned(buf)
2586 };
2587 UnescapedString { span: full_word_start..word_end, value }
2588}
2589#[inline]
2590fn collect_until_line_consume_newline<'a>(
2591 s: &mut &[u8],
2592 start: &'a str,
2593 escape_byte: u8,
2594) -> UnescapedString<'a> {
2595 let full_word_start = start.len() - s.len();
2596 let mut word_start = full_word_start;
2597 let mut buf = String::new();
2598 let word_end;
2599 loop {
2600 let Some((&b, s_next)) = s.split_first() else {
2601 word_end = start.len() - s.len();
2602 break;
2603 };
2604 let t = TABLE[b as usize];
2605 if t & (POSSIBLE_LINE | POSSIBLE_ESCAPE) != 0 {
2606 match b {
2607 b'\n' => {
2608 word_end = start.len() - s.len();
2609 *s = s_next;
2610 break;
2611 }
2612 b'\r' => {
2613 if s_next.first() == Some(&b'\n') {
2614 word_end = start.len() - s.len();
2615 *s = &s_next[1..];
2616 break;
2617 }
2618 }
2619 _ => {
2620 let word_end = start.len() - s.len();
2621 if consume_line_continuation(s, b, s_next, escape_byte) {
2622 buf.push_str(&start[word_start..word_end]);
2623 word_start = start.len() - s.len();
2624 continue;
2625 }
2626 }
2627 }
2628 }
2629 *s = s_next;
2630 }
2631 let value = if full_word_start == word_start {
2632 Cow::Borrowed(&start[word_start..word_end])
2634 } else {
2635 buf.push_str(&start[word_start..word_end]);
2636 Cow::Owned(buf)
2637 };
2638 UnescapedString { span: full_word_start..word_end, value }
2639}
2640
2641#[inline(always)]
2644fn consume_newline<'a>(b: u8, s: &mut &'a [u8], s_next: &'a [u8]) -> bool {
2645 match b {
2646 b'\n' => {
2647 *s = s_next;
2648 return true;
2649 }
2650 b'\r' => {
2651 if s_next.first() == Some(&b'\n') {
2652 *s = &s_next[1..];
2653 return true;
2654 }
2655 }
2656 _ => {}
2657 }
2658 false
2659}
2660
2661#[inline]
2663fn consume_line_continuation<'a>(
2664 s: &mut &'a [u8],
2665 b: u8,
2666 s_next: &'a [u8],
2667 escape_byte: u8,
2668) -> bool {
2669 #[inline]
2670 fn followup(s: &mut &[u8], _escape_byte: u8) {
2671 while let Some((&b, mut s_next)) = s.split_first() {
2672 let t = TABLE[b as usize];
2673 if t & (WHITESPACE | POSSIBLE_LINE | COMMENT) == 0 {
2674 break;
2675 }
2676 if t & WHITESPACE != 0 {
2677 consume_whitespaces_no_line_continuation(&mut s_next);
2679 let Some((&b, s_next_next)) = s_next.split_first() else { break };
2680 let t = TABLE[b as usize];
2681 if t & (COMMENT | POSSIBLE_LINE) == 0 {
2682 break;
2683 }
2684 s_next = s_next_next;
2685 }
2686 *s = s_next;
2687 if b != b'\n' {
2690 consume_current_line_no_line_continuation(s);
2691 }
2692 }
2693 }
2694
2695 if b == escape_byte {
2696 cold_path();
2697 if let Some((&b, mut s_next)) = s_next.split_first() {
2698 if consume_newline(b, s, s_next) {
2699 followup(s, escape_byte);
2700 return true;
2701 }
2702 if TABLE[b as usize] & SPACE != 0 {
2705 cold_path();
2706 consume_whitespaces_no_line_continuation(&mut s_next);
2707 if let Some((&b, s_next)) = s_next.split_first() {
2708 if consume_newline(b, s, s_next) {
2709 followup(s, escape_byte);
2710 return true;
2711 }
2712 }
2713 }
2714 }
2715 }
2716 false
2717}
2718
2719#[inline]
2723fn consume_until_whitespaces_or_line_no_line_continuation(s: &mut &[u8]) -> bool {
2724 let start = *s;
2725 while let Some((&b, s_next)) = s.split_first() {
2726 if TABLE[b as usize] & (WHITESPACE | POSSIBLE_LINE) != 0 {
2727 break;
2728 }
2729 *s = s_next;
2730 }
2731 start.len() != s.len()
2732}
2733
2734#[inline]
2737fn consume_current_line_no_line_continuation(s: &mut &[u8]) {
2738 while let Some((&b, s_next)) = s.split_first() {
2739 if consume_newline(b, s, s_next) {
2740 break;
2741 }
2742 *s = s_next;
2743 }
2744}
2745#[inline]
2748fn consume_current_line(s: &mut &[u8], escape_byte: u8) {
2749 let mut has_whitespace_only = 0;
2750 while let Some((&b, s_next)) = s.split_first() {
2751 let t = TABLE[b as usize];
2752 if t & (POSSIBLE_LINE | COMMENT | POSSIBLE_ESCAPE) != 0 {
2753 if consume_newline(b, s, s_next) {
2754 break;
2755 }
2756 if has_whitespace_only != 0 && t & COMMENT != 0 {
2757 *s = s_next;
2758 consume_current_line_no_line_continuation(s);
2759 continue;
2760 }
2761 if consume_line_continuation(s, b, s_next, escape_byte) {
2762 has_whitespace_only = WHITESPACE;
2763 continue;
2764 }
2765 }
2766 has_whitespace_only &= t;
2767 *s = s_next;
2768 }
2769}
2770
2771#[inline]
2775fn consume_whitespaces_no_line_continuation(s: &mut &[u8]) -> bool {
2776 let start = *s;
2777 while let Some((&b, s_next)) = s.split_first() {
2778 if TABLE[b as usize] & WHITESPACE != 0 {
2779 *s = s_next;
2780 continue;
2781 }
2782 break;
2783 }
2784 start.len() != s.len()
2785}
2786#[inline]
2790fn consume_whitespaces(s: &mut &[u8], escape_byte: u8) -> bool {
2791 let mut has_space = false;
2792 while let Some((&b, s_next)) = s.split_first() {
2793 let t = TABLE[b as usize];
2794 if t & (WHITESPACE | POSSIBLE_ESCAPE) != 0 {
2795 if t & WHITESPACE != 0 {
2796 *s = s_next;
2797 has_space = true;
2798 continue;
2799 }
2800 if consume_line_continuation(s, b, s_next, escape_byte) {
2801 continue;
2802 }
2803 }
2804 break;
2805 }
2806 has_space
2807}
2808#[inline]
2812fn consume_whitespaces_or_is_empty_line(s: &mut &[u8], escape_byte: u8) -> bool {
2813 let mut has_space = false;
2814 loop {
2815 let Some((&b, s_next)) = s.split_first() else { return true };
2816 {
2817 let t = TABLE[b as usize];
2818 if t & (WHITESPACE | POSSIBLE_ESCAPE | POSSIBLE_LINE) != 0 {
2819 if t & WHITESPACE != 0 {
2820 *s = s_next;
2821 has_space = true;
2822 continue;
2823 }
2824 if b == b'\n' {
2825 return true;
2826 }
2827 if consume_line_continuation(s, b, s_next, escape_byte) {
2828 continue;
2829 }
2830 }
2831 break;
2832 }
2833 }
2834 has_space
2835}
2836#[inline]
2838fn consume_comments_and_whitespaces(s: &mut &[u8], escape_byte: u8) {
2839 while let Some((&b, s_next)) = s.split_first() {
2840 let t = TABLE[b as usize];
2841 if t & (WHITESPACE | POSSIBLE_LINE | COMMENT | POSSIBLE_ESCAPE) != 0 {
2842 if t & (WHITESPACE | POSSIBLE_LINE) != 0 {
2843 *s = s_next;
2844 continue;
2845 }
2846 if t & COMMENT != 0 {
2847 *s = s_next;
2848 consume_current_line_no_line_continuation(s);
2849 continue;
2850 }
2851 if consume_line_continuation(s, b, s_next, escape_byte) {
2852 continue;
2853 }
2854 }
2855 break;
2856 }
2857}
2858
2859#[inline]
2860#[track_caller]
2861fn trim_end(text: &str, start: usize, mut end: usize) -> &str {
2862 while start < end {
2863 let next_end = end - 1;
2864 if let Some(&b) = text.as_bytes().get(next_end) {
2865 if TABLE[b as usize] & (WHITESPACE | POSSIBLE_LINE) != 0 {
2866 end = next_end;
2867 continue;
2868 }
2869 }
2870 break;
2871 }
2872 &text[start..end]
2873}
2874
2875#[inline(always)]
2876fn token(s: &mut &[u8], token: &'static [u8]) -> bool {
2877 let matched = starts_with_ignore_ascii_case(s, token);
2878 if matched {
2879 *s = &s[token.len()..];
2880 true
2881 } else {
2882 false
2883 }
2884}
2885#[cold]
2886fn token_slow(s: &mut &[u8], mut token: &'static [u8], escape_byte: u8) -> bool {
2887 debug_assert!(!token.is_empty() && token.iter().all(|&n| n & TO_UPPER8 == n));
2888 if s.len() < token.len() {
2889 return false;
2890 }
2891 let mut tmp = *s;
2892 while let Some((&b, tmp_next)) = tmp.split_first() {
2893 if b & TO_UPPER8 == token[0] {
2894 tmp = tmp_next;
2895 token = &token[1..];
2896 if token.is_empty() {
2897 *s = tmp;
2898 return true;
2899 }
2900 continue;
2901 }
2902 if consume_line_continuation(&mut tmp, b, tmp_next, escape_byte) {
2903 continue;
2904 }
2905 break;
2906 }
2907 false
2908}
2909
2910const TO_UPPER8: u8 = 0xDF;
2911const TO_UPPER64: u64 = 0xDFDF_DFDF_DFDF_DFDF;
2912
2913#[inline(always)] fn starts_with_ignore_ascii_case(mut s: &[u8], mut needle: &'static [u8]) -> bool {
2915 debug_assert!(!needle.is_empty() && needle.iter().all(|&n| n & TO_UPPER8 == n));
2916 if s.len() < needle.len() {
2917 return false;
2918 }
2919 if needle.len() == 1 {
2920 return needle[0] == s[0] & TO_UPPER8;
2921 }
2922 if needle.len() >= 8 {
2923 loop {
2924 if u64::from_ne_bytes(needle[..8].try_into().unwrap())
2925 != u64::from_ne_bytes(s[..8].try_into().unwrap()) & TO_UPPER64
2926 {
2927 return false;
2928 }
2929 needle = &needle[8..];
2930 s = &s[8..];
2931 if needle.len() < 8 {
2932 if needle.is_empty() {
2933 return true;
2934 }
2935 break;
2936 }
2937 }
2938 }
2939 let s = {
2940 let mut buf = [0; 8];
2941 buf[..needle.len()].copy_from_slice(&s[..needle.len()]);
2942 u64::from_ne_bytes(buf)
2943 };
2944 let needle = {
2945 let mut buf = [0; 8];
2946 buf[..needle.len()].copy_from_slice(needle);
2947 u64::from_ne_bytes(buf)
2948 };
2949 needle == s & TO_UPPER64
2950}
2951#[test]
2952fn test_starts_with_ignore_ascii_case() {
2953 assert!(starts_with_ignore_ascii_case(b"ABC", b"ABC"));
2954 assert!(starts_with_ignore_ascii_case(b"abc", b"ABC"));
2955 assert!(starts_with_ignore_ascii_case(b"AbC", b"ABC"));
2956 assert!(!starts_with_ignore_ascii_case(b"ABB", b"ABC"));
2957 assert!(starts_with_ignore_ascii_case(b"ABCDEFGH", b"ABCDEFGH"));
2958 assert!(starts_with_ignore_ascii_case(b"abcdefgh", b"ABCDEFGH"));
2959 assert!(starts_with_ignore_ascii_case(b"AbCdEfGh", b"ABCDEFGH"));
2960 assert!(!starts_with_ignore_ascii_case(b"ABCDEFGc", b"ABCDEFGH"));
2961 assert!(starts_with_ignore_ascii_case(
2962 b"ABCDEFGHIJKLMNOPQRSTUVWXYZ",
2963 b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
2964 ));
2965 assert!(starts_with_ignore_ascii_case(
2966 b"abcdefghijklmnopqrstuvwxyz",
2967 b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
2968 ));
2969 assert!(starts_with_ignore_ascii_case(
2970 b"aBcDeFgHiJkLmNoPqRsTuVwXyZ",
2971 b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
2972 ));
2973 assert!(!starts_with_ignore_ascii_case(
2974 b"aBcDeFgHiJkLmNoPqRsTuVwXyc",
2975 b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
2976 ));
2977}