oak_dockerfile/lexer/
mod.rs

1use crate::{kind::DockerfileSyntaxKind, language::DockerfileLanguage};
2use oak_core::{
3    IncrementalCache, Lexer, LexerState, OakError,
4    lexer::{LexOutput, WhitespaceConfig},
5    source::Source,
6};
7use std::sync::LazyLock;
8
9type State<S> = LexerState<S, DockerfileLanguage>;
10
11static DOCKERFILE_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
12
13#[derive(Clone)]
14pub struct DockerfileLexer<'config> {
15    config: &'config DockerfileLanguage,
16}
17
18impl<'config> Lexer<DockerfileLanguage> for DockerfileLexer<'config> {
19    fn lex_incremental(
20        &self,
21        source: impl Source,
22        changed: usize,
23        cache: IncrementalCache<DockerfileLanguage>,
24    ) -> LexOutput<DockerfileLanguage> {
25        let mut state = LexerState::new_with_cache(source, changed, cache);
26        let result = self.run(&mut state);
27        state.finish(result)
28    }
29}
30
31impl<'config> DockerfileLexer<'config> {
32    pub fn new(config: &'config DockerfileLanguage) -> Self {
33        Self { config }
34    }
35
36    fn run<S: Source>(&self, state: &mut State<S>) -> Result<(), OakError> {
37        while state.not_at_end() {
38            let safe_point = state.get_position();
39
40            if self.skip_whitespace(state) {
41                continue;
42            }
43
44            if self.lex_newline(state) {
45                continue;
46            }
47
48            if self.lex_comment(state) {
49                continue;
50            }
51
52            if self.lex_identifier_or_instruction(state) {
53                continue;
54            }
55
56            if self.lex_number(state) {
57                continue;
58            }
59
60            if self.lex_string(state) {
61                continue;
62            }
63
64            if self.lex_path(state) {
65                continue;
66            }
67
68            if self.lex_operators_and_delimiters(state) {
69                continue;
70            }
71
72            if self.lex_other(state) {
73                continue;
74            }
75
76            state.safe_check(safe_point);
77        }
78
79        // 添加 EOF token
80        let eof_pos = state.get_position();
81        state.add_token(DockerfileSyntaxKind::Eof, eof_pos, eof_pos);
82        Ok(())
83    }
84
85    /// 跳过空白字符
86    fn skip_whitespace<S: Source>(&self, state: &mut State<S>) -> bool {
87        match DOCKERFILE_WHITESPACE.scan(state.rest(), state.get_position(), DockerfileSyntaxKind::Whitespace) {
88            Some(token) => {
89                state.advance_with(token);
90                true
91            }
92            None => false,
93        }
94    }
95
96    /// 处理换行符
97    fn lex_newline<S: Source>(&self, state: &mut State<S>) -> bool {
98        let start = state.get_position();
99        if let Some(ch) = state.peek() {
100            if ch == '\n' {
101                state.advance(1);
102                state.add_token(DockerfileSyntaxKind::Newline, start, state.get_position());
103                return true;
104            }
105            else if ch == '\r' {
106                state.advance(1);
107                if state.peek() == Some('\n') {
108                    state.advance(1);
109                }
110                state.add_token(DockerfileSyntaxKind::Newline, start, state.get_position());
111                return true;
112            }
113        }
114        false
115    }
116
117    /// 处理注释
118    fn lex_comment<S: Source>(&self, state: &mut State<S>) -> bool {
119        let start = state.get_position();
120        if state.peek() == Some('#') {
121            state.advance(1);
122            while let Some(ch) = state.peek() {
123                if ch == '\n' || ch == '\r' {
124                    break;
125                }
126                state.advance(ch.len_utf8());
127            }
128            state.add_token(DockerfileSyntaxKind::Comment, start, state.get_position());
129            return true;
130        }
131        false
132    }
133
134    /// 处理标识符或指令
135    fn lex_identifier_or_instruction<S: Source>(&self, state: &mut State<S>) -> bool {
136        let start = state.get_position();
137        if let Some(ch) = state.peek() {
138            if ch.is_ascii_alphabetic() || ch == '_' {
139                state.advance(ch.len_utf8());
140
141                while let Some(ch) = state.peek() {
142                    if ch.is_ascii_alphanumeric() || ch == '_' {
143                        state.advance(ch.len_utf8());
144                    }
145                    else {
146                        break;
147                    }
148                }
149
150                let end_pos = state.get_position();
151                let text = state.get_text_in((start..end_pos).into());
152
153                // 检查是否是 Dockerfile 指令
154                let kind = match text.to_uppercase().as_str() {
155                    "FROM" => DockerfileSyntaxKind::From,
156                    "RUN" => DockerfileSyntaxKind::Run,
157                    "CMD" => DockerfileSyntaxKind::Cmd,
158                    "LABEL" => DockerfileSyntaxKind::Label,
159                    "EXPOSE" => DockerfileSyntaxKind::Expose,
160                    "ENV" => DockerfileSyntaxKind::Env,
161                    "ADD" => DockerfileSyntaxKind::Add,
162                    "COPY" => DockerfileSyntaxKind::Copy,
163                    "ENTRYPOINT" => DockerfileSyntaxKind::Entrypoint,
164                    "VOLUME" => DockerfileSyntaxKind::Volume,
165                    "USER" => DockerfileSyntaxKind::User,
166                    "WORKDIR" => DockerfileSyntaxKind::Workdir,
167                    "ARG" => DockerfileSyntaxKind::Arg,
168                    "ONBUILD" => DockerfileSyntaxKind::Onbuild,
169                    "STOPSIGNAL" => DockerfileSyntaxKind::Stopsignal,
170                    "HEALTHCHECK" => DockerfileSyntaxKind::Healthcheck,
171                    "SHELL" => DockerfileSyntaxKind::Shell,
172                    "MAINTAINER" => DockerfileSyntaxKind::Maintainer,
173                    "AS" => DockerfileSyntaxKind::As,
174                    "NONE" => DockerfileSyntaxKind::None,
175                    "INTERVAL" => DockerfileSyntaxKind::Interval,
176                    "TIMEOUT" => DockerfileSyntaxKind::Timeout,
177                    "START_PERIOD" => DockerfileSyntaxKind::StartPeriod,
178                    "RETRIES" => DockerfileSyntaxKind::Retries,
179                    _ => DockerfileSyntaxKind::Identifier,
180                };
181
182                state.add_token(kind, start, end_pos);
183                return true;
184            }
185        }
186        false
187    }
188
189    /// 处理数字
190    fn lex_number<S: Source>(&self, state: &mut State<S>) -> bool {
191        let start = state.get_position();
192        if let Some(ch) = state.peek() {
193            if ch.is_ascii_digit() {
194                state.advance(1);
195
196                while let Some(ch) = state.peek() {
197                    if ch.is_ascii_digit() || ch == '.' {
198                        state.advance(1);
199                    }
200                    else {
201                        break;
202                    }
203                }
204
205                state.add_token(DockerfileSyntaxKind::Number, start, state.get_position());
206                return true;
207            }
208        }
209        false
210    }
211
212    /// 处理字符串
213    fn lex_string<S: Source>(&self, state: &mut State<S>) -> bool {
214        let start = state.get_position();
215        if let Some(quote) = state.peek() {
216            if quote == '"' || quote == '\'' {
217                state.advance(1);
218
219                while let Some(ch) = state.peek() {
220                    if ch == quote {
221                        state.advance(1);
222                        break;
223                    }
224                    else if ch == '\\' {
225                        state.advance(1);
226                        if state.peek().is_some() {
227                            state.advance(1);
228                        }
229                    }
230                    else {
231                        state.advance(ch.len_utf8());
232                    }
233                }
234
235                state.add_token(DockerfileSyntaxKind::String, start, state.get_position());
236                return true;
237            }
238        }
239        false
240    }
241
242    /// 处理路径
243    fn lex_path<S: Source>(&self, state: &mut State<S>) -> bool {
244        let start = state.get_position();
245        if let Some(ch) = state.peek() {
246            if ch == '/' || ch == '.' {
247                state.advance(1);
248
249                while let Some(ch) = state.peek() {
250                    if ch.is_ascii_alphanumeric() || ch == '/' || ch == '.' || ch == '-' || ch == '_' {
251                        state.advance(1);
252                    }
253                    else {
254                        break;
255                    }
256                }
257
258                state.add_token(DockerfileSyntaxKind::Path, start, state.get_position());
259                return true;
260            }
261        }
262        false
263    }
264
265    /// 处理操作符和分隔符
266    fn lex_operators_and_delimiters<S: Source>(&self, state: &mut State<S>) -> bool {
267        let start = state.get_position();
268        if let Some(ch) = state.peek() {
269            let kind = match ch {
270                '=' => DockerfileSyntaxKind::Equal,
271                ':' => DockerfileSyntaxKind::Colon,
272                '{' => DockerfileSyntaxKind::LeftBrace,
273                '}' => DockerfileSyntaxKind::RightBrace,
274                '[' => DockerfileSyntaxKind::LeftBracket,
275                ']' => DockerfileSyntaxKind::RightBracket,
276                '(' => DockerfileSyntaxKind::LeftParen,
277                ')' => DockerfileSyntaxKind::RightParen,
278                ',' => DockerfileSyntaxKind::Comma,
279                ';' => DockerfileSyntaxKind::Semicolon,
280                '$' => DockerfileSyntaxKind::Dollar,
281                _ => return false,
282            };
283
284            state.advance(1);
285            state.add_token(kind, start, state.get_position());
286            return true;
287        }
288        false
289    }
290
291    /// 处理其他字符
292    fn lex_other<S: Source>(&self, state: &mut State<S>) -> bool {
293        let start = state.get_position();
294        if let Some(ch) = state.peek() {
295            state.advance(ch.len_utf8());
296            state.add_token(DockerfileSyntaxKind::Error, start, state.get_position());
297            return true;
298        }
299        false
300    }
301}