1#![doc = include_str!("readme.md")]
2pub mod token_type;
3
4use crate::{language::DockerfileLanguage, lexer::token_type::DockerfileTokenType};
5use oak_core::{
6 Lexer, LexerCache, LexerState, OakError, Source, TextEdit,
7 lexer::{LexOutput, WhitespaceConfig},
8};
9use std::sync::LazyLock;
10
11type State<'a, S> = LexerState<'a, S, DockerfileLanguage>;
12
13static DOCKERFILE_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
14
15#[derive(Clone)]
16pub struct DockerfileLexer<'config> {
17 _config: &'config DockerfileLanguage,
18}
19
20impl<'config> Lexer<DockerfileLanguage> for DockerfileLexer<'config> {
21 fn lex<'a, S: Source + ?Sized>(&self, text: &'a S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<DockerfileLanguage>) -> LexOutput<DockerfileLanguage> {
22 let mut state = State::new(text);
23 let result = self.run(&mut state);
24 if result.is_ok() {
25 state.add_eof()
26 }
27 state.finish_with_cache(result, cache)
28 }
29}
30
31impl<'config> DockerfileLexer<'config> {
32 pub fn new(config: &'config DockerfileLanguage) -> Self {
33 Self { _config: config }
34 }
35
36 fn run<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> Result<(), OakError> {
37 while state.not_at_end() {
38 let safe_point = state.get_position();
39
40 if self.skip_whitespace(state) {
41 continue;
42 }
43
44 if self.lex_newline(state) {
45 continue;
46 }
47
48 if self.lex_comment(state) {
49 continue;
50 }
51
52 if self.lex_identifier_or_instruction(state) {
53 continue;
54 }
55
56 if self.lex_number(state) {
57 continue;
58 }
59
60 if self.lex_string(state) {
61 continue;
62 }
63
64 if self.lex_path(state) {
65 continue;
66 }
67
68 if self.lex_operators_and_delimiters(state) {
69 continue;
70 }
71
72 if self.lex_other(state) {
73 continue;
74 }
75
76 state.advance_if_dead_lock(safe_point)
77 }
78
79 Ok(())
80 }
81
82 fn skip_whitespace<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
84 DOCKERFILE_WHITESPACE.scan(state, DockerfileTokenType::Whitespace)
85 }
86
87 fn lex_newline<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
89 let start = state.get_position();
90 if let Some(ch) = state.peek() {
91 if ch == '\n' {
92 state.advance(1);
93 state.add_token(DockerfileTokenType::Newline, start, state.get_position());
94 return true;
95 }
96 else if ch == '\r' {
97 state.advance(1);
98 if state.peek() == Some('\n') {
99 state.advance(1)
100 }
101 state.add_token(DockerfileTokenType::Newline, start, state.get_position());
102 return true;
103 }
104 }
105 false
106 }
107
108 fn lex_comment<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
110 let start = state.get_position();
111 if state.peek() == Some('#') {
112 state.advance(1);
113 while let Some(ch) = state.peek() {
114 if ch == '\n' || ch == '\r' {
115 break;
116 }
117 state.advance(ch.len_utf8())
118 }
119 state.add_token(DockerfileTokenType::Comment, start, state.get_position());
120 return true;
121 }
122 false
123 }
124
125 fn lex_identifier_or_instruction<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
127 let start = state.get_position();
128 if let Some(ch) = state.peek() {
129 if ch.is_ascii_alphabetic() || ch == '_' {
130 state.advance(ch.len_utf8());
131
132 while let Some(ch) = state.peek() {
133 if ch.is_ascii_alphanumeric() || ch == '_' { state.advance(ch.len_utf8()) } else { break }
134 }
135
136 let end_pos = state.get_position();
137 let text = state.get_text_in((start..end_pos).into());
138
139 let kind = match text.to_uppercase().as_str() {
141 "FROM" => DockerfileTokenType::From,
142 "RUN" => DockerfileTokenType::Run,
143 "CMD" => DockerfileTokenType::Cmd,
144 "LABEL" => DockerfileTokenType::Label,
145 "EXPOSE" => DockerfileTokenType::Expose,
146 "ENV" => DockerfileTokenType::Env,
147 "ADD" => DockerfileTokenType::Add,
148 "COPY" => DockerfileTokenType::Copy,
149 "ENTRYPOINT" => DockerfileTokenType::Entrypoint,
150 "VOLUME" => DockerfileTokenType::Volume,
151 "USER" => DockerfileTokenType::User,
152 "WORKDIR" => DockerfileTokenType::Workdir,
153 "ARG" => DockerfileTokenType::Arg,
154 "ONBUILD" => DockerfileTokenType::Onbuild,
155 "STOPSIGNAL" => DockerfileTokenType::Stopsignal,
156 "HEALTHCHECK" => DockerfileTokenType::Healthcheck,
157 "SHELL" => DockerfileTokenType::Shell,
158 "MAINTAINER" => DockerfileTokenType::Maintainer,
159 "AS" => DockerfileTokenType::As,
160 "NONE" => DockerfileTokenType::None,
161 "INTERVAL" => DockerfileTokenType::Interval,
162 "TIMEOUT" => DockerfileTokenType::Timeout,
163 "START_PERIOD" => DockerfileTokenType::StartPeriod,
164 "RETRIES" => DockerfileTokenType::Retries,
165 _ => DockerfileTokenType::Identifier,
166 };
167
168 state.add_token(kind, start, end_pos);
169 return true;
170 }
171 }
172 false
173 }
174
175 fn lex_number<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
177 let start = state.get_position();
178 if let Some(ch) = state.peek() {
179 if ch.is_ascii_digit() {
180 state.advance(1);
181
182 while let Some(ch) = state.peek() {
183 if ch.is_ascii_digit() || ch == '.' { state.advance(1) } else { break }
184 }
185
186 state.add_token(DockerfileTokenType::Number, start, state.get_position());
187 return true;
188 }
189 }
190 false
191 }
192
193 fn lex_string<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
195 let start = state.get_position();
196 if let Some(quote) = state.peek() {
197 if quote == '"' || quote == '\'' {
198 state.advance(1);
199
200 while let Some(ch) = state.peek() {
201 if ch == quote {
202 state.advance(1);
203 break;
204 }
205 else if ch == '\\' {
206 state.advance(1);
207 if state.peek().is_some() {
208 state.advance(1)
209 }
210 }
211 else {
212 state.advance(ch.len_utf8())
213 }
214 }
215
216 state.add_token(DockerfileTokenType::String, start, state.get_position());
217 return true;
218 }
219 }
220 false
221 }
222
223 fn lex_path<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
225 let start = state.get_position();
226 if let Some(ch) = state.peek() {
227 if ch == '/' || ch == '.' {
228 state.advance(1);
229
230 while let Some(ch) = state.peek() {
231 if ch.is_ascii_alphanumeric() || ch == '/' || ch == '.' || ch == '-' || ch == '_' { state.advance(1) } else { break }
232 }
233
234 state.add_token(DockerfileTokenType::Path, start, state.get_position());
235 return true;
236 }
237 }
238 false
239 }
240
241 fn lex_operators_and_delimiters<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
243 let start = state.get_position();
244 if let Some(ch) = state.peek() {
245 let kind = match ch {
246 '=' => DockerfileTokenType::Equal,
247 ':' => DockerfileTokenType::Colon,
248 '{' => DockerfileTokenType::LeftBrace,
249 '}' => DockerfileTokenType::RightBrace,
250 '[' => DockerfileTokenType::LeftBracket,
251 ']' => DockerfileTokenType::RightBracket,
252 '(' => DockerfileTokenType::LeftParen,
253 ')' => DockerfileTokenType::RightParen,
254 ',' => DockerfileTokenType::Comma,
255 ';' => DockerfileTokenType::Semicolon,
256 '$' => DockerfileTokenType::Dollar,
257 _ => return false,
258 };
259
260 state.advance(1);
261 state.add_token(kind, start, state.get_position());
262 return true;
263 }
264 false
265 }
266
267 fn lex_other<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
269 let start = state.get_position();
270 if let Some(ch) = state.peek() {
271 state.advance(ch.len_utf8());
272 state.add_token(DockerfileTokenType::Error, start, state.get_position());
273 return true;
274 }
275 false
276 }
277}