1use crate::{kind::DockerfileSyntaxKind, language::DockerfileLanguage};
2use oak_core::{
3 IncrementalCache, Lexer, LexerState, OakError,
4 lexer::{LexOutput, WhitespaceConfig},
5 source::Source,
6};
7use std::sync::LazyLock;
8
9type State<S> = LexerState<S, DockerfileLanguage>;
10
11static DOCKERFILE_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
12
13#[derive(Clone)]
14pub struct DockerfileLexer<'config> {
15 config: &'config DockerfileLanguage,
16}
17
18impl<'config> Lexer<DockerfileLanguage> for DockerfileLexer<'config> {
19 fn lex_incremental(
20 &self,
21 source: impl Source,
22 changed: usize,
23 cache: IncrementalCache<DockerfileLanguage>,
24 ) -> LexOutput<DockerfileLanguage> {
25 let mut state = LexerState::new_with_cache(source, changed, cache);
26 let result = self.run(&mut state);
27 state.finish(result)
28 }
29}
30
31impl<'config> DockerfileLexer<'config> {
32 pub fn new(config: &'config DockerfileLanguage) -> Self {
33 Self { config }
34 }
35
36 fn run<S: Source>(&self, state: &mut State<S>) -> Result<(), OakError> {
37 while state.not_at_end() {
38 let safe_point = state.get_position();
39
40 if self.skip_whitespace(state) {
41 continue;
42 }
43
44 if self.lex_newline(state) {
45 continue;
46 }
47
48 if self.lex_comment(state) {
49 continue;
50 }
51
52 if self.lex_identifier_or_instruction(state) {
53 continue;
54 }
55
56 if self.lex_number(state) {
57 continue;
58 }
59
60 if self.lex_string(state) {
61 continue;
62 }
63
64 if self.lex_path(state) {
65 continue;
66 }
67
68 if self.lex_operators_and_delimiters(state) {
69 continue;
70 }
71
72 if self.lex_other(state) {
73 continue;
74 }
75
76 state.safe_check(safe_point);
77 }
78
79 let eof_pos = state.get_position();
81 state.add_token(DockerfileSyntaxKind::Eof, eof_pos, eof_pos);
82 Ok(())
83 }
84
85 fn skip_whitespace<S: Source>(&self, state: &mut State<S>) -> bool {
87 match DOCKERFILE_WHITESPACE.scan(state.rest(), state.get_position(), DockerfileSyntaxKind::Whitespace) {
88 Some(token) => {
89 state.advance_with(token);
90 true
91 }
92 None => false,
93 }
94 }
95
96 fn lex_newline<S: Source>(&self, state: &mut State<S>) -> bool {
98 let start = state.get_position();
99 if let Some(ch) = state.peek() {
100 if ch == '\n' {
101 state.advance(1);
102 state.add_token(DockerfileSyntaxKind::Newline, start, state.get_position());
103 return true;
104 }
105 else if ch == '\r' {
106 state.advance(1);
107 if state.peek() == Some('\n') {
108 state.advance(1);
109 }
110 state.add_token(DockerfileSyntaxKind::Newline, start, state.get_position());
111 return true;
112 }
113 }
114 false
115 }
116
117 fn lex_comment<S: Source>(&self, state: &mut State<S>) -> bool {
119 let start = state.get_position();
120 if state.peek() == Some('#') {
121 state.advance(1);
122 while let Some(ch) = state.peek() {
123 if ch == '\n' || ch == '\r' {
124 break;
125 }
126 state.advance(ch.len_utf8());
127 }
128 state.add_token(DockerfileSyntaxKind::Comment, start, state.get_position());
129 return true;
130 }
131 false
132 }
133
134 fn lex_identifier_or_instruction<S: Source>(&self, state: &mut State<S>) -> bool {
136 let start = state.get_position();
137 if let Some(ch) = state.peek() {
138 if ch.is_ascii_alphabetic() || ch == '_' {
139 state.advance(ch.len_utf8());
140
141 while let Some(ch) = state.peek() {
142 if ch.is_ascii_alphanumeric() || ch == '_' {
143 state.advance(ch.len_utf8());
144 }
145 else {
146 break;
147 }
148 }
149
150 let end_pos = state.get_position();
151 let text = state.get_text_in((start..end_pos).into());
152
153 let kind = match text.to_uppercase().as_str() {
155 "FROM" => DockerfileSyntaxKind::From,
156 "RUN" => DockerfileSyntaxKind::Run,
157 "CMD" => DockerfileSyntaxKind::Cmd,
158 "LABEL" => DockerfileSyntaxKind::Label,
159 "EXPOSE" => DockerfileSyntaxKind::Expose,
160 "ENV" => DockerfileSyntaxKind::Env,
161 "ADD" => DockerfileSyntaxKind::Add,
162 "COPY" => DockerfileSyntaxKind::Copy,
163 "ENTRYPOINT" => DockerfileSyntaxKind::Entrypoint,
164 "VOLUME" => DockerfileSyntaxKind::Volume,
165 "USER" => DockerfileSyntaxKind::User,
166 "WORKDIR" => DockerfileSyntaxKind::Workdir,
167 "ARG" => DockerfileSyntaxKind::Arg,
168 "ONBUILD" => DockerfileSyntaxKind::Onbuild,
169 "STOPSIGNAL" => DockerfileSyntaxKind::Stopsignal,
170 "HEALTHCHECK" => DockerfileSyntaxKind::Healthcheck,
171 "SHELL" => DockerfileSyntaxKind::Shell,
172 "MAINTAINER" => DockerfileSyntaxKind::Maintainer,
173 "AS" => DockerfileSyntaxKind::As,
174 "NONE" => DockerfileSyntaxKind::None,
175 "INTERVAL" => DockerfileSyntaxKind::Interval,
176 "TIMEOUT" => DockerfileSyntaxKind::Timeout,
177 "START_PERIOD" => DockerfileSyntaxKind::StartPeriod,
178 "RETRIES" => DockerfileSyntaxKind::Retries,
179 _ => DockerfileSyntaxKind::Identifier,
180 };
181
182 state.add_token(kind, start, end_pos);
183 return true;
184 }
185 }
186 false
187 }
188
189 fn lex_number<S: Source>(&self, state: &mut State<S>) -> bool {
191 let start = state.get_position();
192 if let Some(ch) = state.peek() {
193 if ch.is_ascii_digit() {
194 state.advance(1);
195
196 while let Some(ch) = state.peek() {
197 if ch.is_ascii_digit() || ch == '.' {
198 state.advance(1);
199 }
200 else {
201 break;
202 }
203 }
204
205 state.add_token(DockerfileSyntaxKind::Number, start, state.get_position());
206 return true;
207 }
208 }
209 false
210 }
211
212 fn lex_string<S: Source>(&self, state: &mut State<S>) -> bool {
214 let start = state.get_position();
215 if let Some(quote) = state.peek() {
216 if quote == '"' || quote == '\'' {
217 state.advance(1);
218
219 while let Some(ch) = state.peek() {
220 if ch == quote {
221 state.advance(1);
222 break;
223 }
224 else if ch == '\\' {
225 state.advance(1);
226 if state.peek().is_some() {
227 state.advance(1);
228 }
229 }
230 else {
231 state.advance(ch.len_utf8());
232 }
233 }
234
235 state.add_token(DockerfileSyntaxKind::String, start, state.get_position());
236 return true;
237 }
238 }
239 false
240 }
241
242 fn lex_path<S: Source>(&self, state: &mut State<S>) -> bool {
244 let start = state.get_position();
245 if let Some(ch) = state.peek() {
246 if ch == '/' || ch == '.' {
247 state.advance(1);
248
249 while let Some(ch) = state.peek() {
250 if ch.is_ascii_alphanumeric() || ch == '/' || ch == '.' || ch == '-' || ch == '_' {
251 state.advance(1);
252 }
253 else {
254 break;
255 }
256 }
257
258 state.add_token(DockerfileSyntaxKind::Path, start, state.get_position());
259 return true;
260 }
261 }
262 false
263 }
264
265 fn lex_operators_and_delimiters<S: Source>(&self, state: &mut State<S>) -> bool {
267 let start = state.get_position();
268 if let Some(ch) = state.peek() {
269 let kind = match ch {
270 '=' => DockerfileSyntaxKind::Equal,
271 ':' => DockerfileSyntaxKind::Colon,
272 '{' => DockerfileSyntaxKind::LeftBrace,
273 '}' => DockerfileSyntaxKind::RightBrace,
274 '[' => DockerfileSyntaxKind::LeftBracket,
275 ']' => DockerfileSyntaxKind::RightBracket,
276 '(' => DockerfileSyntaxKind::LeftParen,
277 ')' => DockerfileSyntaxKind::RightParen,
278 ',' => DockerfileSyntaxKind::Comma,
279 ';' => DockerfileSyntaxKind::Semicolon,
280 '$' => DockerfileSyntaxKind::Dollar,
281 _ => return false,
282 };
283
284 state.advance(1);
285 state.add_token(kind, start, state.get_position());
286 return true;
287 }
288 false
289 }
290
291 fn lex_other<S: Source>(&self, state: &mut State<S>) -> bool {
293 let start = state.get_position();
294 if let Some(ch) = state.peek() {
295 state.advance(ch.len_utf8());
296 state.add_token(DockerfileSyntaxKind::Error, start, state.get_position());
297 return true;
298 }
299 false
300 }
301}