Skip to main content

oak_llvm_ir/lexer/
mod.rs

1use crate::{kind::LLvmSyntaxKind, language::LLvmLanguage};
2use oak_core::{Lexer, LexerCache, LexerState, TextEdit, lexer::LexOutput, source::Source};
3
4type State<'a, S> = LexerState<'a, S, LLvmLanguage>;
5
6#[derive(Clone, Debug)]
7pub struct LlvmLexer<'config> {
8    _config: &'config LLvmLanguage,
9}
10
11impl<'config> Lexer<LLvmLanguage> for LlvmLexer<'config> {
12    fn lex<'a, S: Source + ?Sized>(&self, text: &'a S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<LLvmLanguage>) -> LexOutput<LLvmLanguage> {
13        let mut state = State::new(text);
14        let result = self.run(&mut state);
15        if result.is_ok() {
16            state.add_eof();
17        }
18        state.finish_with_cache(result, cache)
19    }
20}
21
22impl<'config> LlvmLexer<'config> {
23    pub fn new(config: &'config LLvmLanguage) -> Self {
24        Self { _config: config }
25    }
26    fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), oak_core::OakError> {
27        while state.not_at_end() {
28            let start = state.get_position();
29            let safe_point = start;
30
31            if let Some(ch) = state.current() {
32                match ch {
33                    ' ' | '\t' => {
34                        state.advance(1);
35                        state.add_token(LLvmSyntaxKind::Whitespace, start, state.get_position());
36                    }
37                    '\n' | '\r' => {
38                        state.advance(1);
39                        state.add_token(LLvmSyntaxKind::Newline, start, state.get_position());
40                    }
41                    ';' => {
42                        state.advance(1);
43                        while let Some(ch) = state.current() {
44                            if ch == '\n' || ch == '\r' {
45                                break;
46                            }
47                            state.advance(ch.len_utf8());
48                        }
49                        state.add_token(LLvmSyntaxKind::Comment, start, state.get_position());
50                    }
51                    '%' => {
52                        state.advance(1);
53                        while let Some(ch) = state.current() {
54                            if !ch.is_alphanumeric() && ch != '.' && ch != '_' && ch != '-' {
55                                break;
56                            }
57                            state.advance(ch.len_utf8());
58                        }
59                        state.add_token(LLvmSyntaxKind::LocalVar, start, state.get_position());
60                    }
61                    '@' => {
62                        state.advance(1);
63                        while let Some(ch) = state.current() {
64                            if !ch.is_alphanumeric() && ch != '.' && ch != '_' && ch != '-' {
65                                break;
66                            }
67                            state.advance(ch.len_utf8());
68                        }
69                        state.add_token(LLvmSyntaxKind::GlobalVar, start, state.get_position());
70                    }
71                    '!' => {
72                        state.advance(1);
73                        while let Some(ch) = state.current() {
74                            if !ch.is_alphanumeric() && ch != '.' && ch != '_' && ch != '-' {
75                                break;
76                            }
77                            state.advance(ch.len_utf8());
78                        }
79                        state.add_token(LLvmSyntaxKind::Metadata, start, state.get_position());
80                    }
81                    '=' => {
82                        state.advance(1);
83                        state.add_token(LLvmSyntaxKind::Equal, start, state.get_position());
84                    }
85                    ',' => {
86                        state.advance(1);
87                        state.add_token(LLvmSyntaxKind::Comma, start, state.get_position());
88                    }
89                    '(' => {
90                        state.advance(1);
91                        state.add_token(LLvmSyntaxKind::LParen, start, state.get_position());
92                    }
93                    ')' => {
94                        state.advance(1);
95                        state.add_token(LLvmSyntaxKind::RParen, start, state.get_position());
96                    }
97                    '[' => {
98                        state.advance(1);
99                        state.add_token(LLvmSyntaxKind::LBracket, start, state.get_position());
100                    }
101                    ']' => {
102                        state.advance(1);
103                        state.add_token(LLvmSyntaxKind::RBracket, start, state.get_position());
104                    }
105                    '{' => {
106                        state.advance(1);
107                        state.add_token(LLvmSyntaxKind::LBrace, start, state.get_position());
108                    }
109                    '}' => {
110                        state.advance(1);
111                        state.add_token(LLvmSyntaxKind::RBrace, start, state.get_position());
112                    }
113                    '*' => {
114                        state.advance(1);
115                        state.add_token(LLvmSyntaxKind::Star, start, state.get_position());
116                    }
117                    ':' => {
118                        state.advance(1);
119                        state.add_token(LLvmSyntaxKind::Colon, start, state.get_position());
120                    }
121                    '0'..='9' | '-' => {
122                        state.advance(1);
123                        while let Some(ch) = state.current() {
124                            if !ch.is_ascii_digit() && ch != '.' {
125                                break;
126                            }
127                            state.advance(ch.len_utf8());
128                        }
129                        state.add_token(LLvmSyntaxKind::Number, start, state.get_position());
130                    }
131                    '"' => {
132                        state.advance(1);
133                        while let Some(ch) = state.current() {
134                            if ch == '"' {
135                                state.advance(1);
136                                break;
137                            }
138                            if ch == '\\' {
139                                state.advance(1);
140                            }
141                            let len = state.current().map(|c| c.len_utf8()).unwrap_or(0);
142                            state.advance(len);
143                        }
144                        state.add_token(LLvmSyntaxKind::String, start, state.get_position());
145                    }
146                    _ if ch.is_alphabetic() || ch == '_' || ch == '.' => {
147                        state.advance(1);
148                        while let Some(ch) = state.current() {
149                            if !ch.is_alphanumeric() && ch != '_' && ch != '.' && ch != '-' {
150                                break;
151                            }
152                            state.advance(ch.len_utf8());
153                        }
154                        state.add_token(LLvmSyntaxKind::Keyword, start, state.get_position());
155                    }
156                    _ => {
157                        state.advance(ch.len_utf8());
158                        state.add_token(LLvmSyntaxKind::Error, start, state.get_position());
159                    }
160                }
161            }
162            else {
163                break;
164            }
165
166            state.advance_if_dead_lock(safe_point);
167        }
168
169        Ok(())
170    }
171}