1#![doc = include_str!("readme.md")]
2pub mod token_type;
3
4use crate::{language::DockerfileLanguage, lexer::token_type::DockerfileTokenType};
5use oak_core::{
6 Lexer, LexerCache, LexerState, OakError, Source, TextEdit,
7 lexer::{LexOutput, WhitespaceConfig},
8};
9use std::sync::LazyLock;
10
11pub(crate) type State<'a, S> = LexerState<'a, S, DockerfileLanguage>;
12
13static DOCKERFILE_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
14
15#[derive(Clone)]
17pub struct DockerfileLexer<'config> {
18 config: &'config DockerfileLanguage,
19}
20
21impl<'config> Lexer<DockerfileLanguage> for DockerfileLexer<'config> {
22 fn lex<'a, S: Source + ?Sized>(&self, text: &'a S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<DockerfileLanguage>) -> LexOutput<DockerfileLanguage> {
23 let mut state = State::new(text);
24 let result = self.run(&mut state);
25 if result.is_ok() {
26 state.add_eof()
27 }
28 state.finish_with_cache(result, cache)
29 }
30}
31
32impl<'config> DockerfileLexer<'config> {
33 pub fn new(config: &'config DockerfileLanguage) -> Self {
35 Self { config }
36 }
37
38 fn run<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> Result<(), OakError> {
39 while state.not_at_end() {
40 let safe_point = state.get_position();
41
42 if self.skip_whitespace(state) {
43 continue;
44 }
45
46 if self.lex_newline(state) {
47 continue;
48 }
49
50 if self.lex_comment(state) {
51 continue;
52 }
53
54 if self.lex_identifier_or_instruction(state) {
55 continue;
56 }
57
58 if self.lex_number(state) {
59 continue;
60 }
61
62 if self.lex_string(state) {
63 continue;
64 }
65
66 if self.lex_path(state) {
67 continue;
68 }
69
70 if self.lex_operators_and_delimiters(state) {
71 continue;
72 }
73
74 if self.lex_other(state) {
75 continue;
76 }
77
78 state.advance_if_dead_lock(safe_point)
79 }
80
81 Ok(())
82 }
83
84 fn skip_whitespace<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
86 DOCKERFILE_WHITESPACE.scan(state, DockerfileTokenType::Whitespace)
87 }
88
89 fn lex_newline<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
91 let start = state.get_position();
92 if let Some(ch) = state.peek() {
93 if ch == '\n' {
94 state.advance(1);
95 state.add_token(DockerfileTokenType::Newline, start, state.get_position());
96 return true;
97 }
98 else if ch == '\r' {
99 state.advance(1);
100 if state.peek() == Some('\n') {
101 state.advance(1)
102 }
103 state.add_token(DockerfileTokenType::Newline, start, state.get_position());
104 return true;
105 }
106 }
107 false
108 }
109
110 fn lex_comment<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
112 let start = state.get_position();
113 if state.peek() == Some('#') {
114 state.advance(1);
115 while let Some(ch) = state.peek() {
116 if ch == '\n' || ch == '\r' {
117 break;
118 }
119 state.advance(ch.len_utf8())
120 }
121 state.add_token(DockerfileTokenType::Comment, start, state.get_position());
122 return true;
123 }
124 false
125 }
126
127 fn lex_identifier_or_instruction<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
129 let start = state.get_position();
130 if let Some(ch) = state.peek() {
131 if ch.is_ascii_alphabetic() || ch == '_' {
132 state.advance(ch.len_utf8());
133
134 while let Some(ch) = state.peek() {
135 if ch.is_ascii_alphanumeric() || ch == '_' { state.advance(ch.len_utf8()) } else { break }
136 }
137
138 let end_pos = state.get_position();
139 let text = state.get_source().get_text_in((start..end_pos).into());
140
141 let kind = match text.to_uppercase().as_str() {
143 "FROM" => DockerfileTokenType::From,
144 "RUN" => DockerfileTokenType::Run,
145 "CMD" => DockerfileTokenType::Cmd,
146 "LABEL" => DockerfileTokenType::Label,
147 "EXPOSE" => DockerfileTokenType::Expose,
148 "ENV" => DockerfileTokenType::Env,
149 "ADD" => DockerfileTokenType::Add,
150 "COPY" => DockerfileTokenType::Copy,
151 "ENTRYPOINT" => DockerfileTokenType::Entrypoint,
152 "VOLUME" => DockerfileTokenType::Volume,
153 "USER" => DockerfileTokenType::User,
154 "WORKDIR" => DockerfileTokenType::Workdir,
155 "ARG" => DockerfileTokenType::Arg,
156 "ONBUILD" => DockerfileTokenType::Onbuild,
157 "STOPSIGNAL" => DockerfileTokenType::Stopsignal,
158 "HEALTHCHECK" => DockerfileTokenType::Healthcheck,
159 "SHELL" => DockerfileTokenType::Shell,
160 "MAINTAINER" => DockerfileTokenType::Maintainer,
161 "AS" => DockerfileTokenType::As,
162 "NONE" => DockerfileTokenType::None,
163 "INTERVAL" => DockerfileTokenType::Interval,
164 "TIMEOUT" => DockerfileTokenType::Timeout,
165 "START_PERIOD" => DockerfileTokenType::StartPeriod,
166 "RETRIES" => DockerfileTokenType::Retries,
167 _ => DockerfileTokenType::Identifier,
168 };
169
170 state.add_token(kind, start, end_pos);
171 return true;
172 }
173 }
174 false
175 }
176
177 fn lex_number<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
179 let start = state.get_position();
180 if let Some(ch) = state.peek() {
181 if ch.is_ascii_digit() {
182 state.advance(1);
183
184 while let Some(ch) = state.peek() {
185 if ch.is_ascii_digit() || ch == '.' { state.advance(1) } else { break }
186 }
187
188 state.add_token(DockerfileTokenType::Number, start, state.get_position());
189 return true;
190 }
191 }
192 false
193 }
194
195 fn lex_string<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
197 let start = state.get_position();
198 if let Some(quote) = state.peek() {
199 if quote == '"' || quote == '\'' {
200 state.advance(1);
201
202 while let Some(ch) = state.peek() {
203 if ch == quote {
204 state.advance(1);
205 break;
206 }
207 else if ch == '\\' {
208 state.advance(1);
209 if state.peek().is_some() {
210 state.advance(1)
211 }
212 }
213 else {
214 state.advance(ch.len_utf8())
215 }
216 }
217
218 state.add_token(DockerfileTokenType::String, start, state.get_position());
219 return true;
220 }
221 }
222 false
223 }
224
225 fn lex_path<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
227 let start = state.get_position();
228 if let Some(ch) = state.peek() {
229 if ch == '/' || ch == '.' {
230 state.advance(1);
231
232 while let Some(ch) = state.peek() {
233 if ch.is_ascii_alphanumeric() || ch == '/' || ch == '.' || ch == '-' || ch == '_' { state.advance(1) } else { break }
234 }
235
236 state.add_token(DockerfileTokenType::Path, start, state.get_position());
237 return true;
238 }
239 }
240 false
241 }
242
243 fn lex_operators_and_delimiters<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
245 let start = state.get_position();
246 if let Some(ch) = state.peek() {
247 let kind = match ch {
248 '=' => DockerfileTokenType::Equal,
249 ':' => DockerfileTokenType::Colon,
250 '{' => DockerfileTokenType::LeftBrace,
251 '}' => DockerfileTokenType::RightBrace,
252 '[' => DockerfileTokenType::LeftBracket,
253 ']' => DockerfileTokenType::RightBracket,
254 '(' => DockerfileTokenType::LeftParen,
255 ')' => DockerfileTokenType::RightParen,
256 ',' => DockerfileTokenType::Comma,
257 ';' => DockerfileTokenType::Semicolon,
258 '$' => DockerfileTokenType::Dollar,
259 _ => return false,
260 };
261
262 state.advance(1);
263 state.add_token(kind, start, state.get_position());
264 return true;
265 }
266 false
267 }
268
269 fn lex_other<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
271 let start = state.get_position();
272 if let Some(ch) = state.peek() {
273 state.advance(ch.len_utf8());
274 state.add_token(DockerfileTokenType::Error, start, state.get_position());
275 return true;
276 }
277 false
278 }
279}