1use crate::{kind::LLvmSyntaxKind, language::LLvmLanguage};
2use oak_core::{Lexer, LexerCache, LexerState, TextEdit, lexer::LexOutput, source::Source};
3
4type State<'a, S> = LexerState<'a, S, LLvmLanguage>;
5
6#[derive(Clone, Debug)]
7pub struct LlvmLexer<'config> {
8 _config: &'config LLvmLanguage,
9}
10
11impl<'config> Lexer<LLvmLanguage> for LlvmLexer<'config> {
12 fn lex<'a, S: Source + ?Sized>(&self, text: &'a S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<LLvmLanguage>) -> LexOutput<LLvmLanguage> {
13 let mut state = State::new(text);
14 let result = self.run(&mut state);
15 if result.is_ok() {
16 state.add_eof();
17 }
18 state.finish_with_cache(result, cache)
19 }
20}
21
22impl<'config> LlvmLexer<'config> {
23 pub fn new(config: &'config LLvmLanguage) -> Self {
24 Self { _config: config }
25 }
26 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), oak_core::OakError> {
27 while state.not_at_end() {
28 let start = state.get_position();
29 let safe_point = start;
30
31 if let Some(ch) = state.current() {
32 match ch {
33 ' ' | '\t' => {
34 state.advance(1);
35 state.add_token(LLvmSyntaxKind::Whitespace, start, state.get_position());
36 }
37 '\n' | '\r' => {
38 state.advance(1);
39 state.add_token(LLvmSyntaxKind::Newline, start, state.get_position());
40 }
41 ';' => {
42 state.advance(1);
43 while let Some(ch) = state.current() {
44 if ch == '\n' || ch == '\r' {
45 break;
46 }
47 state.advance(ch.len_utf8());
48 }
49 state.add_token(LLvmSyntaxKind::Comment, start, state.get_position());
50 }
51 '%' => {
52 state.advance(1);
53 while let Some(ch) = state.current() {
54 if !ch.is_alphanumeric() && ch != '.' && ch != '_' && ch != '-' {
55 break;
56 }
57 state.advance(ch.len_utf8());
58 }
59 state.add_token(LLvmSyntaxKind::LocalVar, start, state.get_position());
60 }
61 '@' => {
62 state.advance(1);
63 while let Some(ch) = state.current() {
64 if !ch.is_alphanumeric() && ch != '.' && ch != '_' && ch != '-' {
65 break;
66 }
67 state.advance(ch.len_utf8());
68 }
69 state.add_token(LLvmSyntaxKind::GlobalVar, start, state.get_position());
70 }
71 '!' => {
72 state.advance(1);
73 while let Some(ch) = state.current() {
74 if !ch.is_alphanumeric() && ch != '.' && ch != '_' && ch != '-' {
75 break;
76 }
77 state.advance(ch.len_utf8());
78 }
79 state.add_token(LLvmSyntaxKind::Metadata, start, state.get_position());
80 }
81 '=' => {
82 state.advance(1);
83 state.add_token(LLvmSyntaxKind::Equal, start, state.get_position());
84 }
85 ',' => {
86 state.advance(1);
87 state.add_token(LLvmSyntaxKind::Comma, start, state.get_position());
88 }
89 '(' => {
90 state.advance(1);
91 state.add_token(LLvmSyntaxKind::LParen, start, state.get_position());
92 }
93 ')' => {
94 state.advance(1);
95 state.add_token(LLvmSyntaxKind::RParen, start, state.get_position());
96 }
97 '[' => {
98 state.advance(1);
99 state.add_token(LLvmSyntaxKind::LBracket, start, state.get_position());
100 }
101 ']' => {
102 state.advance(1);
103 state.add_token(LLvmSyntaxKind::RBracket, start, state.get_position());
104 }
105 '{' => {
106 state.advance(1);
107 state.add_token(LLvmSyntaxKind::LBrace, start, state.get_position());
108 }
109 '}' => {
110 state.advance(1);
111 state.add_token(LLvmSyntaxKind::RBrace, start, state.get_position());
112 }
113 '*' => {
114 state.advance(1);
115 state.add_token(LLvmSyntaxKind::Star, start, state.get_position());
116 }
117 ':' => {
118 state.advance(1);
119 state.add_token(LLvmSyntaxKind::Colon, start, state.get_position());
120 }
121 '0'..='9' | '-' => {
122 state.advance(1);
123 while let Some(ch) = state.current() {
124 if !ch.is_ascii_digit() && ch != '.' {
125 break;
126 }
127 state.advance(ch.len_utf8());
128 }
129 state.add_token(LLvmSyntaxKind::Number, start, state.get_position());
130 }
131 '"' => {
132 state.advance(1);
133 while let Some(ch) = state.current() {
134 if ch == '"' {
135 state.advance(1);
136 break;
137 }
138 if ch == '\\' {
139 state.advance(1);
140 }
141 let len = state.current().map(|c| c.len_utf8()).unwrap_or(0);
142 state.advance(len);
143 }
144 state.add_token(LLvmSyntaxKind::String, start, state.get_position());
145 }
146 _ if ch.is_alphabetic() || ch == '_' || ch == '.' => {
147 state.advance(1);
148 while let Some(ch) = state.current() {
149 if !ch.is_alphanumeric() && ch != '_' && ch != '.' && ch != '-' {
150 break;
151 }
152 state.advance(ch.len_utf8());
153 }
154 state.add_token(LLvmSyntaxKind::Keyword, start, state.get_position());
155 }
156 _ => {
157 state.advance(ch.len_utf8());
158 state.add_token(LLvmSyntaxKind::Error, start, state.get_position());
159 }
160 }
161 }
162 else {
163 break;
164 }
165
166 state.advance_if_dead_lock(safe_point);
167 }
168
169 Ok(())
170 }
171}