Skip to main content

oak_dockerfile/lexer/
mod.rs

1#![doc = include_str!("readme.md")]
2pub mod token_type;
3
4use crate::{language::DockerfileLanguage, lexer::token_type::DockerfileTokenType};
5use oak_core::{
6    Lexer, LexerCache, LexerState, OakError, Source, TextEdit,
7    lexer::{LexOutput, WhitespaceConfig},
8};
9use std::sync::LazyLock;
10
11type State<'a, S> = LexerState<'a, S, DockerfileLanguage>;
12
13static DOCKERFILE_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
14
15#[derive(Clone)]
16pub struct DockerfileLexer<'config> {
17    _config: &'config DockerfileLanguage,
18}
19
20impl<'config> Lexer<DockerfileLanguage> for DockerfileLexer<'config> {
21    fn lex<'a, S: Source + ?Sized>(&self, text: &'a S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<DockerfileLanguage>) -> LexOutput<DockerfileLanguage> {
22        let mut state = State::new(text);
23        let result = self.run(&mut state);
24        if result.is_ok() {
25            state.add_eof()
26        }
27        state.finish_with_cache(result, cache)
28    }
29}
30
31impl<'config> DockerfileLexer<'config> {
32    pub fn new(config: &'config DockerfileLanguage) -> Self {
33        Self { _config: config }
34    }
35
36    fn run<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> Result<(), OakError> {
37        while state.not_at_end() {
38            let safe_point = state.get_position();
39
40            if self.skip_whitespace(state) {
41                continue;
42            }
43
44            if self.lex_newline(state) {
45                continue;
46            }
47
48            if self.lex_comment(state) {
49                continue;
50            }
51
52            if self.lex_identifier_or_instruction(state) {
53                continue;
54            }
55
56            if self.lex_number(state) {
57                continue;
58            }
59
60            if self.lex_string(state) {
61                continue;
62            }
63
64            if self.lex_path(state) {
65                continue;
66            }
67
68            if self.lex_operators_and_delimiters(state) {
69                continue;
70            }
71
72            if self.lex_other(state) {
73                continue;
74            }
75
76            state.advance_if_dead_lock(safe_point)
77        }
78
79        Ok(())
80    }
81
82    /// 跳过空白字符
83    fn skip_whitespace<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
84        DOCKERFILE_WHITESPACE.scan(state, DockerfileTokenType::Whitespace)
85    }
86
87    /// 处理换行符
88    fn lex_newline<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
89        let start = state.get_position();
90        if let Some(ch) = state.peek() {
91            if ch == '\n' {
92                state.advance(1);
93                state.add_token(DockerfileTokenType::Newline, start, state.get_position());
94                return true;
95            }
96            else if ch == '\r' {
97                state.advance(1);
98                if state.peek() == Some('\n') {
99                    state.advance(1)
100                }
101                state.add_token(DockerfileTokenType::Newline, start, state.get_position());
102                return true;
103            }
104        }
105        false
106    }
107
108    /// 处理注释
109    fn lex_comment<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
110        let start = state.get_position();
111        if state.peek() == Some('#') {
112            state.advance(1);
113            while let Some(ch) = state.peek() {
114                if ch == '\n' || ch == '\r' {
115                    break;
116                }
117                state.advance(ch.len_utf8())
118            }
119            state.add_token(DockerfileTokenType::Comment, start, state.get_position());
120            return true;
121        }
122        false
123    }
124
125    /// 处理标识符或指令
126    fn lex_identifier_or_instruction<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
127        let start = state.get_position();
128        if let Some(ch) = state.peek() {
129            if ch.is_ascii_alphabetic() || ch == '_' {
130                state.advance(ch.len_utf8());
131
132                while let Some(ch) = state.peek() {
133                    if ch.is_ascii_alphanumeric() || ch == '_' { state.advance(ch.len_utf8()) } else { break }
134                }
135
136                let end_pos = state.get_position();
137                let text = state.get_text_in((start..end_pos).into());
138
139                // 检查是否是 Dockerfile 指令
140                let kind = match text.to_uppercase().as_str() {
141                    "FROM" => DockerfileTokenType::From,
142                    "RUN" => DockerfileTokenType::Run,
143                    "CMD" => DockerfileTokenType::Cmd,
144                    "LABEL" => DockerfileTokenType::Label,
145                    "EXPOSE" => DockerfileTokenType::Expose,
146                    "ENV" => DockerfileTokenType::Env,
147                    "ADD" => DockerfileTokenType::Add,
148                    "COPY" => DockerfileTokenType::Copy,
149                    "ENTRYPOINT" => DockerfileTokenType::Entrypoint,
150                    "VOLUME" => DockerfileTokenType::Volume,
151                    "USER" => DockerfileTokenType::User,
152                    "WORKDIR" => DockerfileTokenType::Workdir,
153                    "ARG" => DockerfileTokenType::Arg,
154                    "ONBUILD" => DockerfileTokenType::Onbuild,
155                    "STOPSIGNAL" => DockerfileTokenType::Stopsignal,
156                    "HEALTHCHECK" => DockerfileTokenType::Healthcheck,
157                    "SHELL" => DockerfileTokenType::Shell,
158                    "MAINTAINER" => DockerfileTokenType::Maintainer,
159                    "AS" => DockerfileTokenType::As,
160                    "NONE" => DockerfileTokenType::None,
161                    "INTERVAL" => DockerfileTokenType::Interval,
162                    "TIMEOUT" => DockerfileTokenType::Timeout,
163                    "START_PERIOD" => DockerfileTokenType::StartPeriod,
164                    "RETRIES" => DockerfileTokenType::Retries,
165                    _ => DockerfileTokenType::Identifier,
166                };
167
168                state.add_token(kind, start, end_pos);
169                return true;
170            }
171        }
172        false
173    }
174
175    /// 处理数字
176    fn lex_number<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
177        let start = state.get_position();
178        if let Some(ch) = state.peek() {
179            if ch.is_ascii_digit() {
180                state.advance(1);
181
182                while let Some(ch) = state.peek() {
183                    if ch.is_ascii_digit() || ch == '.' { state.advance(1) } else { break }
184                }
185
186                state.add_token(DockerfileTokenType::Number, start, state.get_position());
187                return true;
188            }
189        }
190        false
191    }
192
193    /// 处理字符串
194    fn lex_string<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
195        let start = state.get_position();
196        if let Some(quote) = state.peek() {
197            if quote == '"' || quote == '\'' {
198                state.advance(1);
199
200                while let Some(ch) = state.peek() {
201                    if ch == quote {
202                        state.advance(1);
203                        break;
204                    }
205                    else if ch == '\\' {
206                        state.advance(1);
207                        if state.peek().is_some() {
208                            state.advance(1)
209                        }
210                    }
211                    else {
212                        state.advance(ch.len_utf8())
213                    }
214                }
215
216                state.add_token(DockerfileTokenType::String, start, state.get_position());
217                return true;
218            }
219        }
220        false
221    }
222
223    /// 处理路径
224    fn lex_path<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
225        let start = state.get_position();
226        if let Some(ch) = state.peek() {
227            if ch == '/' || ch == '.' {
228                state.advance(1);
229
230                while let Some(ch) = state.peek() {
231                    if ch.is_ascii_alphanumeric() || ch == '/' || ch == '.' || ch == '-' || ch == '_' { state.advance(1) } else { break }
232                }
233
234                state.add_token(DockerfileTokenType::Path, start, state.get_position());
235                return true;
236            }
237        }
238        false
239    }
240
241    /// 处理运算符和分隔符
242    fn lex_operators_and_delimiters<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
243        let start = state.get_position();
244        if let Some(ch) = state.peek() {
245            let kind = match ch {
246                '=' => DockerfileTokenType::Equal,
247                ':' => DockerfileTokenType::Colon,
248                '{' => DockerfileTokenType::LeftBrace,
249                '}' => DockerfileTokenType::RightBrace,
250                '[' => DockerfileTokenType::LeftBracket,
251                ']' => DockerfileTokenType::RightBracket,
252                '(' => DockerfileTokenType::LeftParen,
253                ')' => DockerfileTokenType::RightParen,
254                ',' => DockerfileTokenType::Comma,
255                ';' => DockerfileTokenType::Semicolon,
256                '$' => DockerfileTokenType::Dollar,
257                _ => return false,
258            };
259
260            state.advance(1);
261            state.add_token(kind, start, state.get_position());
262            return true;
263        }
264        false
265    }
266
267    /// 处理其他字符
268    fn lex_other<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
269        let start = state.get_position();
270        if let Some(ch) = state.peek() {
271            state.advance(ch.len_utf8());
272            state.add_token(DockerfileTokenType::Error, start, state.get_position());
273            return true;
274        }
275        false
276    }
277}