1use crate::{kind::DockerfileSyntaxKind, language::DockerfileLanguage};
2use oak_core::{
3 Lexer, LexerCache, LexerState, OakError, TextEdit,
4 lexer::{LexOutput, WhitespaceConfig},
5 source::Source,
6};
7use std::sync::LazyLock;
8
9type State<'a, S> = LexerState<'a, S, DockerfileLanguage>;
10
11static DOCKERFILE_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
12
13#[derive(Clone)]
14pub struct DockerfileLexer<'config> {
15 _config: &'config DockerfileLanguage,
16}
17
18impl<'config> Lexer<DockerfileLanguage> for DockerfileLexer<'config> {
19 fn lex<'a, S: Source + ?Sized>(&self, text: &'a S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<DockerfileLanguage>) -> LexOutput<DockerfileLanguage> {
20 let mut state = State::new(text);
21 let result = self.run(&mut state);
22 if result.is_ok() {
23 state.add_eof();
24 }
25 state.finish_with_cache(result, cache)
26 }
27}
28
29impl<'config> DockerfileLexer<'config> {
30 pub fn new(config: &'config DockerfileLanguage) -> Self {
31 Self { _config: config }
32 }
33
34 fn run<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> Result<(), OakError> {
35 while state.not_at_end() {
36 let safe_point = state.get_position();
37
38 if self.skip_whitespace(state) {
39 continue;
40 }
41
42 if self.lex_newline(state) {
43 continue;
44 }
45
46 if self.lex_comment(state) {
47 continue;
48 }
49
50 if self.lex_identifier_or_instruction(state) {
51 continue;
52 }
53
54 if self.lex_number(state) {
55 continue;
56 }
57
58 if self.lex_string(state) {
59 continue;
60 }
61
62 if self.lex_path(state) {
63 continue;
64 }
65
66 if self.lex_operators_and_delimiters(state) {
67 continue;
68 }
69
70 if self.lex_other(state) {
71 continue;
72 }
73
74 state.advance_if_dead_lock(safe_point);
75 }
76
77 Ok(())
78 }
79
80 fn skip_whitespace<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
82 DOCKERFILE_WHITESPACE.scan(state, DockerfileSyntaxKind::Whitespace)
83 }
84
85 fn lex_newline<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
87 let start = state.get_position();
88 if let Some(ch) = state.peek() {
89 if ch == '\n' {
90 state.advance(1);
91 state.add_token(DockerfileSyntaxKind::Newline, start, state.get_position());
92 return true;
93 }
94 else if ch == '\r' {
95 state.advance(1);
96 if state.peek() == Some('\n') {
97 state.advance(1);
98 }
99 state.add_token(DockerfileSyntaxKind::Newline, start, state.get_position());
100 return true;
101 }
102 }
103 false
104 }
105
106 fn lex_comment<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
108 let start = state.get_position();
109 if state.peek() == Some('#') {
110 state.advance(1);
111 while let Some(ch) = state.peek() {
112 if ch == '\n' || ch == '\r' {
113 break;
114 }
115 state.advance(ch.len_utf8());
116 }
117 state.add_token(DockerfileSyntaxKind::Comment, start, state.get_position());
118 return true;
119 }
120 false
121 }
122
123 fn lex_identifier_or_instruction<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
125 let start = state.get_position();
126 if let Some(ch) = state.peek() {
127 if ch.is_ascii_alphabetic() || ch == '_' {
128 state.advance(ch.len_utf8());
129
130 while let Some(ch) = state.peek() {
131 if ch.is_ascii_alphanumeric() || ch == '_' {
132 state.advance(ch.len_utf8());
133 }
134 else {
135 break;
136 }
137 }
138
139 let end_pos = state.get_position();
140 let text = state.get_text_in((start..end_pos).into());
141
142 let kind = match text.to_uppercase().as_str() {
144 "FROM" => DockerfileSyntaxKind::From,
145 "RUN" => DockerfileSyntaxKind::Run,
146 "CMD" => DockerfileSyntaxKind::Cmd,
147 "LABEL" => DockerfileSyntaxKind::Label,
148 "EXPOSE" => DockerfileSyntaxKind::Expose,
149 "ENV" => DockerfileSyntaxKind::Env,
150 "ADD" => DockerfileSyntaxKind::Add,
151 "COPY" => DockerfileSyntaxKind::Copy,
152 "ENTRYPOINT" => DockerfileSyntaxKind::Entrypoint,
153 "VOLUME" => DockerfileSyntaxKind::Volume,
154 "USER" => DockerfileSyntaxKind::User,
155 "WORKDIR" => DockerfileSyntaxKind::Workdir,
156 "ARG" => DockerfileSyntaxKind::Arg,
157 "ONBUILD" => DockerfileSyntaxKind::Onbuild,
158 "STOPSIGNAL" => DockerfileSyntaxKind::Stopsignal,
159 "HEALTHCHECK" => DockerfileSyntaxKind::Healthcheck,
160 "SHELL" => DockerfileSyntaxKind::Shell,
161 "MAINTAINER" => DockerfileSyntaxKind::Maintainer,
162 "AS" => DockerfileSyntaxKind::As,
163 "NONE" => DockerfileSyntaxKind::None,
164 "INTERVAL" => DockerfileSyntaxKind::Interval,
165 "TIMEOUT" => DockerfileSyntaxKind::Timeout,
166 "START_PERIOD" => DockerfileSyntaxKind::StartPeriod,
167 "RETRIES" => DockerfileSyntaxKind::Retries,
168 _ => DockerfileSyntaxKind::Identifier,
169 };
170
171 state.add_token(kind, start, end_pos);
172 return true;
173 }
174 }
175 false
176 }
177
178 fn lex_number<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
180 let start = state.get_position();
181 if let Some(ch) = state.peek() {
182 if ch.is_ascii_digit() {
183 state.advance(1);
184
185 while let Some(ch) = state.peek() {
186 if ch.is_ascii_digit() || ch == '.' {
187 state.advance(1);
188 }
189 else {
190 break;
191 }
192 }
193
194 state.add_token(DockerfileSyntaxKind::Number, start, state.get_position());
195 return true;
196 }
197 }
198 false
199 }
200
201 fn lex_string<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
203 let start = state.get_position();
204 if let Some(quote) = state.peek() {
205 if quote == '"' || quote == '\'' {
206 state.advance(1);
207
208 while let Some(ch) = state.peek() {
209 if ch == quote {
210 state.advance(1);
211 break;
212 }
213 else if ch == '\\' {
214 state.advance(1);
215 if state.peek().is_some() {
216 state.advance(1);
217 }
218 }
219 else {
220 state.advance(ch.len_utf8());
221 }
222 }
223
224 state.add_token(DockerfileSyntaxKind::String, start, state.get_position());
225 return true;
226 }
227 }
228 false
229 }
230
231 fn lex_path<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
233 let start = state.get_position();
234 if let Some(ch) = state.peek() {
235 if ch == '/' || ch == '.' {
236 state.advance(1);
237
238 while let Some(ch) = state.peek() {
239 if ch.is_ascii_alphanumeric() || ch == '/' || ch == '.' || ch == '-' || ch == '_' {
240 state.advance(1);
241 }
242 else {
243 break;
244 }
245 }
246
247 state.add_token(DockerfileSyntaxKind::Path, start, state.get_position());
248 return true;
249 }
250 }
251 false
252 }
253
254 fn lex_operators_and_delimiters<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
256 let start = state.get_position();
257 if let Some(ch) = state.peek() {
258 let kind = match ch {
259 '=' => DockerfileSyntaxKind::Equal,
260 ':' => DockerfileSyntaxKind::Colon,
261 '{' => DockerfileSyntaxKind::LeftBrace,
262 '}' => DockerfileSyntaxKind::RightBrace,
263 '[' => DockerfileSyntaxKind::LeftBracket,
264 ']' => DockerfileSyntaxKind::RightBracket,
265 '(' => DockerfileSyntaxKind::LeftParen,
266 ')' => DockerfileSyntaxKind::RightParen,
267 ',' => DockerfileSyntaxKind::Comma,
268 ';' => DockerfileSyntaxKind::Semicolon,
269 '$' => DockerfileSyntaxKind::Dollar,
270 _ => return false,
271 };
272
273 state.advance(1);
274 state.add_token(kind, start, state.get_position());
275 return true;
276 }
277 false
278 }
279
280 fn lex_other<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
282 let start = state.get_position();
283 if let Some(ch) = state.peek() {
284 state.advance(ch.len_utf8());
285 state.add_token(DockerfileSyntaxKind::Error, start, state.get_position());
286 return true;
287 }
288 false
289 }
290}