Skip to main content

oak_dockerfile/lexer/
mod.rs

1use crate::{kind::DockerfileSyntaxKind, language::DockerfileLanguage};
2use oak_core::{
3    Lexer, LexerCache, LexerState, OakError, TextEdit,
4    lexer::{LexOutput, WhitespaceConfig},
5    source::Source,
6};
7use std::sync::LazyLock;
8
9type State<'a, S> = LexerState<'a, S, DockerfileLanguage>;
10
11static DOCKERFILE_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
12
13#[derive(Clone)]
14pub struct DockerfileLexer<'config> {
15    _config: &'config DockerfileLanguage,
16}
17
18impl<'config> Lexer<DockerfileLanguage> for DockerfileLexer<'config> {
19    fn lex<'a, S: Source + ?Sized>(&self, text: &'a S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<DockerfileLanguage>) -> LexOutput<DockerfileLanguage> {
20        let mut state = State::new(text);
21        let result = self.run(&mut state);
22        if result.is_ok() {
23            state.add_eof();
24        }
25        state.finish_with_cache(result, cache)
26    }
27}
28
29impl<'config> DockerfileLexer<'config> {
30    pub fn new(config: &'config DockerfileLanguage) -> Self {
31        Self { _config: config }
32    }
33
34    fn run<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> Result<(), OakError> {
35        while state.not_at_end() {
36            let safe_point = state.get_position();
37
38            if self.skip_whitespace(state) {
39                continue;
40            }
41
42            if self.lex_newline(state) {
43                continue;
44            }
45
46            if self.lex_comment(state) {
47                continue;
48            }
49
50            if self.lex_identifier_or_instruction(state) {
51                continue;
52            }
53
54            if self.lex_number(state) {
55                continue;
56            }
57
58            if self.lex_string(state) {
59                continue;
60            }
61
62            if self.lex_path(state) {
63                continue;
64            }
65
66            if self.lex_operators_and_delimiters(state) {
67                continue;
68            }
69
70            if self.lex_other(state) {
71                continue;
72            }
73
74            state.advance_if_dead_lock(safe_point);
75        }
76
77        Ok(())
78    }
79
80    /// 跳过空白字符
81    fn skip_whitespace<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
82        DOCKERFILE_WHITESPACE.scan(state, DockerfileSyntaxKind::Whitespace)
83    }
84
85    /// 处理换行符
86    fn lex_newline<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
87        let start = state.get_position();
88        if let Some(ch) = state.peek() {
89            if ch == '\n' {
90                state.advance(1);
91                state.add_token(DockerfileSyntaxKind::Newline, start, state.get_position());
92                return true;
93            }
94            else if ch == '\r' {
95                state.advance(1);
96                if state.peek() == Some('\n') {
97                    state.advance(1);
98                }
99                state.add_token(DockerfileSyntaxKind::Newline, start, state.get_position());
100                return true;
101            }
102        }
103        false
104    }
105
106    /// 处理注释
107    fn lex_comment<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
108        let start = state.get_position();
109        if state.peek() == Some('#') {
110            state.advance(1);
111            while let Some(ch) = state.peek() {
112                if ch == '\n' || ch == '\r' {
113                    break;
114                }
115                state.advance(ch.len_utf8());
116            }
117            state.add_token(DockerfileSyntaxKind::Comment, start, state.get_position());
118            return true;
119        }
120        false
121    }
122
123    /// 处理标识符或指令
124    fn lex_identifier_or_instruction<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
125        let start = state.get_position();
126        if let Some(ch) = state.peek() {
127            if ch.is_ascii_alphabetic() || ch == '_' {
128                state.advance(ch.len_utf8());
129
130                while let Some(ch) = state.peek() {
131                    if ch.is_ascii_alphanumeric() || ch == '_' {
132                        state.advance(ch.len_utf8());
133                    }
134                    else {
135                        break;
136                    }
137                }
138
139                let end_pos = state.get_position();
140                let text = state.get_text_in((start..end_pos).into());
141
142                // 检查是否是 Dockerfile 指令
143                let kind = match text.to_uppercase().as_str() {
144                    "FROM" => DockerfileSyntaxKind::From,
145                    "RUN" => DockerfileSyntaxKind::Run,
146                    "CMD" => DockerfileSyntaxKind::Cmd,
147                    "LABEL" => DockerfileSyntaxKind::Label,
148                    "EXPOSE" => DockerfileSyntaxKind::Expose,
149                    "ENV" => DockerfileSyntaxKind::Env,
150                    "ADD" => DockerfileSyntaxKind::Add,
151                    "COPY" => DockerfileSyntaxKind::Copy,
152                    "ENTRYPOINT" => DockerfileSyntaxKind::Entrypoint,
153                    "VOLUME" => DockerfileSyntaxKind::Volume,
154                    "USER" => DockerfileSyntaxKind::User,
155                    "WORKDIR" => DockerfileSyntaxKind::Workdir,
156                    "ARG" => DockerfileSyntaxKind::Arg,
157                    "ONBUILD" => DockerfileSyntaxKind::Onbuild,
158                    "STOPSIGNAL" => DockerfileSyntaxKind::Stopsignal,
159                    "HEALTHCHECK" => DockerfileSyntaxKind::Healthcheck,
160                    "SHELL" => DockerfileSyntaxKind::Shell,
161                    "MAINTAINER" => DockerfileSyntaxKind::Maintainer,
162                    "AS" => DockerfileSyntaxKind::As,
163                    "NONE" => DockerfileSyntaxKind::None,
164                    "INTERVAL" => DockerfileSyntaxKind::Interval,
165                    "TIMEOUT" => DockerfileSyntaxKind::Timeout,
166                    "START_PERIOD" => DockerfileSyntaxKind::StartPeriod,
167                    "RETRIES" => DockerfileSyntaxKind::Retries,
168                    _ => DockerfileSyntaxKind::Identifier,
169                };
170
171                state.add_token(kind, start, end_pos);
172                return true;
173            }
174        }
175        false
176    }
177
178    /// 处理数字
179    fn lex_number<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
180        let start = state.get_position();
181        if let Some(ch) = state.peek() {
182            if ch.is_ascii_digit() {
183                state.advance(1);
184
185                while let Some(ch) = state.peek() {
186                    if ch.is_ascii_digit() || ch == '.' {
187                        state.advance(1);
188                    }
189                    else {
190                        break;
191                    }
192                }
193
194                state.add_token(DockerfileSyntaxKind::Number, start, state.get_position());
195                return true;
196            }
197        }
198        false
199    }
200
201    /// 处理字符串
202    fn lex_string<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
203        let start = state.get_position();
204        if let Some(quote) = state.peek() {
205            if quote == '"' || quote == '\'' {
206                state.advance(1);
207
208                while let Some(ch) = state.peek() {
209                    if ch == quote {
210                        state.advance(1);
211                        break;
212                    }
213                    else if ch == '\\' {
214                        state.advance(1);
215                        if state.peek().is_some() {
216                            state.advance(1);
217                        }
218                    }
219                    else {
220                        state.advance(ch.len_utf8());
221                    }
222                }
223
224                state.add_token(DockerfileSyntaxKind::String, start, state.get_position());
225                return true;
226            }
227        }
228        false
229    }
230
231    /// 处理路径
232    fn lex_path<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
233        let start = state.get_position();
234        if let Some(ch) = state.peek() {
235            if ch == '/' || ch == '.' {
236                state.advance(1);
237
238                while let Some(ch) = state.peek() {
239                    if ch.is_ascii_alphanumeric() || ch == '/' || ch == '.' || ch == '-' || ch == '_' {
240                        state.advance(1);
241                    }
242                    else {
243                        break;
244                    }
245                }
246
247                state.add_token(DockerfileSyntaxKind::Path, start, state.get_position());
248                return true;
249            }
250        }
251        false
252    }
253
254    /// 处理运算符和分隔符
255    fn lex_operators_and_delimiters<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
256        let start = state.get_position();
257        if let Some(ch) = state.peek() {
258            let kind = match ch {
259                '=' => DockerfileSyntaxKind::Equal,
260                ':' => DockerfileSyntaxKind::Colon,
261                '{' => DockerfileSyntaxKind::LeftBrace,
262                '}' => DockerfileSyntaxKind::RightBrace,
263                '[' => DockerfileSyntaxKind::LeftBracket,
264                ']' => DockerfileSyntaxKind::RightBracket,
265                '(' => DockerfileSyntaxKind::LeftParen,
266                ')' => DockerfileSyntaxKind::RightParen,
267                ',' => DockerfileSyntaxKind::Comma,
268                ';' => DockerfileSyntaxKind::Semicolon,
269                '$' => DockerfileSyntaxKind::Dollar,
270                _ => return false,
271            };
272
273            state.advance(1);
274            state.add_token(kind, start, state.get_position());
275            return true;
276        }
277        false
278    }
279
280    /// 处理其他字符
281    fn lex_other<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
282        let start = state.get_position();
283        if let Some(ch) = state.peek() {
284            state.advance(ch.len_utf8());
285            state.add_token(DockerfileSyntaxKind::Error, start, state.get_position());
286            return true;
287        }
288        false
289    }
290}