Skip to main content

oak_dockerfile/lexer/
mod.rs

1#![doc = include_str!("readme.md")]
2pub mod token_type;
3
4use crate::{language::DockerfileLanguage, lexer::token_type::DockerfileTokenType};
5use oak_core::{
6    Lexer, LexerCache, LexerState, OakError, Source, TextEdit,
7    lexer::{LexOutput, WhitespaceConfig},
8};
9use std::sync::LazyLock;
10
11pub(crate) type State<'a, S> = LexerState<'a, S, DockerfileLanguage>;
12
13static DOCKERFILE_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
14
15/// Lexer for Dockerfile files.
16#[derive(Clone)]
17pub struct DockerfileLexer<'config> {
18    config: &'config DockerfileLanguage,
19}
20
21impl<'config> Lexer<DockerfileLanguage> for DockerfileLexer<'config> {
22    fn lex<'a, S: Source + ?Sized>(&self, text: &'a S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<DockerfileLanguage>) -> LexOutput<DockerfileLanguage> {
23        let mut state = State::new(text);
24        let result = self.run(&mut state);
25        if result.is_ok() {
26            state.add_eof()
27        }
28        state.finish_with_cache(result, cache)
29    }
30}
31
32impl<'config> DockerfileLexer<'config> {
33    /// Creates a new `DockerfileLexer` with the given configuration.
34    pub fn new(config: &'config DockerfileLanguage) -> Self {
35        Self { config }
36    }
37
38    fn run<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> Result<(), OakError> {
39        while state.not_at_end() {
40            let safe_point = state.get_position();
41
42            if self.skip_whitespace(state) {
43                continue;
44            }
45
46            if self.lex_newline(state) {
47                continue;
48            }
49
50            if self.lex_comment(state) {
51                continue;
52            }
53
54            if self.lex_identifier_or_instruction(state) {
55                continue;
56            }
57
58            if self.lex_number(state) {
59                continue;
60            }
61
62            if self.lex_string(state) {
63                continue;
64            }
65
66            if self.lex_path(state) {
67                continue;
68            }
69
70            if self.lex_operators_and_delimiters(state) {
71                continue;
72            }
73
74            if self.lex_other(state) {
75                continue;
76            }
77
78            state.advance_if_dead_lock(safe_point)
79        }
80
81        Ok(())
82    }
83
84    /// Skips whitespace characters.
85    fn skip_whitespace<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
86        DOCKERFILE_WHITESPACE.scan(state, DockerfileTokenType::Whitespace)
87    }
88
89    /// Handles newline characters.
90    fn lex_newline<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
91        let start = state.get_position();
92        if let Some(ch) = state.peek() {
93            if ch == '\n' {
94                state.advance(1);
95                state.add_token(DockerfileTokenType::Newline, start, state.get_position());
96                return true;
97            }
98            else if ch == '\r' {
99                state.advance(1);
100                if state.peek() == Some('\n') {
101                    state.advance(1)
102                }
103                state.add_token(DockerfileTokenType::Newline, start, state.get_position());
104                return true;
105            }
106        }
107        false
108    }
109
110    /// Handles comments
111    fn lex_comment<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
112        let start = state.get_position();
113        if state.peek() == Some('#') {
114            state.advance(1);
115            while let Some(ch) = state.peek() {
116                if ch == '\n' || ch == '\r' {
117                    break;
118                }
119                state.advance(ch.len_utf8())
120            }
121            state.add_token(DockerfileTokenType::Comment, start, state.get_position());
122            return true;
123        }
124        false
125    }
126
127    /// Handles identifiers or instructions
128    fn lex_identifier_or_instruction<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
129        let start = state.get_position();
130        if let Some(ch) = state.peek() {
131            if ch.is_ascii_alphabetic() || ch == '_' {
132                state.advance(ch.len_utf8());
133
134                while let Some(ch) = state.peek() {
135                    if ch.is_ascii_alphanumeric() || ch == '_' { state.advance(ch.len_utf8()) } else { break }
136                }
137
138                let end_pos = state.get_position();
139                let text = state.get_source().get_text_in((start..end_pos).into());
140
141                // Check if it's a Dockerfile instruction
142                let kind = match text.to_uppercase().as_str() {
143                    "FROM" => DockerfileTokenType::From,
144                    "RUN" => DockerfileTokenType::Run,
145                    "CMD" => DockerfileTokenType::Cmd,
146                    "LABEL" => DockerfileTokenType::Label,
147                    "EXPOSE" => DockerfileTokenType::Expose,
148                    "ENV" => DockerfileTokenType::Env,
149                    "ADD" => DockerfileTokenType::Add,
150                    "COPY" => DockerfileTokenType::Copy,
151                    "ENTRYPOINT" => DockerfileTokenType::Entrypoint,
152                    "VOLUME" => DockerfileTokenType::Volume,
153                    "USER" => DockerfileTokenType::User,
154                    "WORKDIR" => DockerfileTokenType::Workdir,
155                    "ARG" => DockerfileTokenType::Arg,
156                    "ONBUILD" => DockerfileTokenType::Onbuild,
157                    "STOPSIGNAL" => DockerfileTokenType::Stopsignal,
158                    "HEALTHCHECK" => DockerfileTokenType::Healthcheck,
159                    "SHELL" => DockerfileTokenType::Shell,
160                    "MAINTAINER" => DockerfileTokenType::Maintainer,
161                    "AS" => DockerfileTokenType::As,
162                    "NONE" => DockerfileTokenType::None,
163                    "INTERVAL" => DockerfileTokenType::Interval,
164                    "TIMEOUT" => DockerfileTokenType::Timeout,
165                    "START_PERIOD" => DockerfileTokenType::StartPeriod,
166                    "RETRIES" => DockerfileTokenType::Retries,
167                    _ => DockerfileTokenType::Identifier,
168                };
169
170                state.add_token(kind, start, end_pos);
171                return true;
172            }
173        }
174        false
175    }
176
177    /// Handles numbers
178    fn lex_number<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
179        let start = state.get_position();
180        if let Some(ch) = state.peek() {
181            if ch.is_ascii_digit() {
182                state.advance(1);
183
184                while let Some(ch) = state.peek() {
185                    if ch.is_ascii_digit() || ch == '.' { state.advance(1) } else { break }
186                }
187
188                state.add_token(DockerfileTokenType::Number, start, state.get_position());
189                return true;
190            }
191        }
192        false
193    }
194
195    /// Handles strings
196    fn lex_string<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
197        let start = state.get_position();
198        if let Some(quote) = state.peek() {
199            if quote == '"' || quote == '\'' {
200                state.advance(1);
201
202                while let Some(ch) = state.peek() {
203                    if ch == quote {
204                        state.advance(1);
205                        break;
206                    }
207                    else if ch == '\\' {
208                        state.advance(1);
209                        if state.peek().is_some() {
210                            state.advance(1)
211                        }
212                    }
213                    else {
214                        state.advance(ch.len_utf8())
215                    }
216                }
217
218                state.add_token(DockerfileTokenType::String, start, state.get_position());
219                return true;
220            }
221        }
222        false
223    }
224
225    /// Lexes paths.
226    fn lex_path<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
227        let start = state.get_position();
228        if let Some(ch) = state.peek() {
229            if ch == '/' || ch == '.' {
230                state.advance(1);
231
232                while let Some(ch) = state.peek() {
233                    if ch.is_ascii_alphanumeric() || ch == '/' || ch == '.' || ch == '-' || ch == '_' { state.advance(1) } else { break }
234                }
235
236                state.add_token(DockerfileTokenType::Path, start, state.get_position());
237                return true;
238            }
239        }
240        false
241    }
242
243    /// Lexes operators and delimiters.
244    fn lex_operators_and_delimiters<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
245        let start = state.get_position();
246        if let Some(ch) = state.peek() {
247            let kind = match ch {
248                '=' => DockerfileTokenType::Equal,
249                ':' => DockerfileTokenType::Colon,
250                '{' => DockerfileTokenType::LeftBrace,
251                '}' => DockerfileTokenType::RightBrace,
252                '[' => DockerfileTokenType::LeftBracket,
253                ']' => DockerfileTokenType::RightBracket,
254                '(' => DockerfileTokenType::LeftParen,
255                ')' => DockerfileTokenType::RightParen,
256                ',' => DockerfileTokenType::Comma,
257                ';' => DockerfileTokenType::Semicolon,
258                '$' => DockerfileTokenType::Dollar,
259                _ => return false,
260            };
261
262            state.advance(1);
263            state.add_token(kind, start, state.get_position());
264            return true;
265        }
266        false
267    }
268
269    /// Lexes other characters.
270    fn lex_other<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
271        let start = state.get_position();
272        if let Some(ch) = state.peek() {
273            state.advance(ch.len_utf8());
274            state.add_token(DockerfileTokenType::Error, start, state.get_position());
275            return true;
276        }
277        false
278    }
279}