1use crate::{ClojureLanguage, ClojureSyntaxKind};
2use oak_core::{
3 lexer::{LexOutput, Lexer, LexerState},
4 source::Source,
5 tree::IncrementalCache,
6};
7
8pub struct ClojureLexer;
9
10impl Lexer<ClojureLanguage> for ClojureLexer {
11 fn lex_incremental(
12 &self,
13 source: impl Source,
14 _changed: usize,
15 _cache: IncrementalCache<ClojureLanguage>,
16 ) -> LexOutput<ClojureLanguage> {
17 let mut state = LexerState::new_with_cache(source, _changed, _cache);
18 while state.not_at_end() {
19 let start = state.get_position();
20
21 match state.current() {
22 Some(c) if c.is_whitespace() => {
23 self.lex_whitespace(&mut state);
24 }
25 Some(';') => {
26 self.lex_comment(&mut state);
27 }
28 Some('"') => {
29 self.lex_string(&mut state);
30 }
31 Some('\\') => {
32 self.lex_character(&mut state);
33 }
34 Some(c) if c.is_ascii_digit() => {
35 self.lex_number(&mut state);
36 }
37 Some(':') => {
38 self.lex_keyword(&mut state);
39 }
40 Some('#') => {
41 self.lex_dispatch(&mut state);
42 }
43 Some('(') => {
44 state.advance(1);
45 state.add_token(ClojureSyntaxKind::ListStart, start, state.get_position());
46 }
47 Some(')') => {
48 state.advance(1);
49 state.add_token(ClojureSyntaxKind::ListEnd, start, state.get_position());
50 }
51 Some('[') => {
52 state.advance(1);
53 state.add_token(ClojureSyntaxKind::VectorStart, start, state.get_position());
54 }
55 Some(']') => {
56 state.advance(1);
57 state.add_token(ClojureSyntaxKind::VectorEnd, start, state.get_position());
58 }
59 Some('{') => {
60 state.advance(1);
61 state.add_token(ClojureSyntaxKind::MapStart, start, state.get_position());
62 }
63 Some('}') => {
64 state.advance(1);
65 state.add_token(ClojureSyntaxKind::MapEnd, start, state.get_position());
66 }
67 Some('\'') => {
68 state.advance(1);
69 state.add_token(ClojureSyntaxKind::Quote, start, state.get_position());
70 }
71 Some('`') => {
72 state.advance(1);
73 state.add_token(ClojureSyntaxKind::Quote, start, state.get_position());
74 }
75 Some('~') => {
76 if state.peek() == Some('@') {
77 state.advance(2);
78 state.add_token(ClojureSyntaxKind::UnquoteSplice, start, state.get_position());
79 }
80 else {
81 state.advance(1);
82 state.add_token(ClojureSyntaxKind::Unquote, start, state.get_position());
83 }
84 }
85 Some('^') => {
86 state.advance(1);
87 state.add_token(ClojureSyntaxKind::Meta, start, state.get_position());
88 }
89 Some(_) => {
90 self.lex_symbol(&mut state);
91 }
92 None => break,
93 }
94 }
95
96 state.finish(Ok(()))
97 }
98}
99
100impl ClojureLexer {
101 fn lex_whitespace<S: Source>(&self, state: &mut LexerState<S, ClojureLanguage>) {
102 let start = state.get_position();
103 while let Some(c) = state.current() {
104 if c.is_whitespace() {
105 state.advance(1);
106 }
107 else {
108 break;
109 }
110 }
111 state.add_token(ClojureSyntaxKind::Whitespace, start, state.get_position());
112 }
113
114 fn lex_comment<S: Source>(&self, state: &mut LexerState<S, ClojureLanguage>) {
115 let start = state.get_position();
116 state.advance(1); while let Some(c) = state.current() {
119 if c == '\n' {
120 break;
121 }
122 state.advance(1);
123 }
124
125 state.add_token(ClojureSyntaxKind::Comment, start, state.get_position());
126 }
127
128 fn lex_string<S: Source>(&self, state: &mut LexerState<S, ClojureLanguage>) {
129 let start = state.get_position();
130 state.advance(1); while let Some(c) = state.current() {
133 if c == '"' {
134 state.advance(1);
135 break;
136 }
137 else if c == '\\' {
138 state.advance(1); if state.current().is_some() {
140 state.advance(1); }
142 }
143 else {
144 state.advance(1);
145 }
146 }
147
148 state.add_token(ClojureSyntaxKind::StringLiteral, start, state.get_position());
149 }
150
151 fn lex_character<S: Source>(&self, state: &mut LexerState<S, ClojureLanguage>) {
152 let start = state.get_position();
153 state.advance(1); if let Some(_) = state.current() {
156 state.advance(1);
157 }
158
159 state.add_token(ClojureSyntaxKind::CharacterLiteral, start, state.get_position());
160 }
161
162 fn lex_number<S: Source>(&self, state: &mut LexerState<S, ClojureLanguage>) {
163 let start = state.get_position();
164
165 while let Some(c) = state.current() {
166 if c.is_ascii_digit() || c == '.' {
167 state.advance(1);
168 }
169 else {
170 break;
171 }
172 }
173
174 state.add_token(ClojureSyntaxKind::NumberLiteral, start, state.get_position());
175 }
176
177 fn lex_keyword<S: Source>(&self, state: &mut LexerState<S, ClojureLanguage>) {
178 let start = state.get_position();
179 state.advance(1); while let Some(c) = state.current() {
182 if c.is_alphanumeric() || c == '-' || c == '_' || c == '?' || c == '!' {
183 state.advance(1);
184 }
185 else {
186 break;
187 }
188 }
189
190 state.add_token(ClojureSyntaxKind::KeywordLiteral, start, state.get_position());
191 }
192
193 fn lex_dispatch<S: Source>(&self, state: &mut LexerState<S, ClojureLanguage>) {
194 let start = state.get_position();
195 state.advance(1); match state.current() {
198 Some('{') => {
199 state.advance(1);
200 state.add_token(ClojureSyntaxKind::SetStart, start, state.get_position());
201 }
202 Some('(') => {
203 state.advance(1);
204 state.add_token(ClojureSyntaxKind::AnonFnStart, start, state.get_position());
205 }
206 Some('"') => {
207 self.lex_regex(state, start);
208 }
209 _ => {
210 state.add_token(ClojureSyntaxKind::Dispatch, start, state.get_position());
211 }
212 }
213 }
214
215 fn lex_regex<S: Source>(&self, state: &mut LexerState<S, ClojureLanguage>, start: usize) {
216 state.advance(1); while let Some(c) = state.current() {
219 if c == '"' {
220 state.advance(1);
221 break;
222 }
223 else if c == '\\' {
224 state.advance(1); if state.current().is_some() {
226 state.advance(1); }
228 }
229 else {
230 state.advance(1);
231 }
232 }
233
234 state.add_token(ClojureSyntaxKind::RegexLiteral, start, state.get_position());
235 }
236
237 fn lex_symbol<S: Source>(&self, state: &mut LexerState<S, ClojureLanguage>) {
238 let start = state.get_position();
239
240 while let Some(c) = state.current() {
241 if c.is_alphanumeric() || c == '-' || c == '_' || c == '?' || c == '!' || c == '*' || c == '+' || c == '/' {
242 state.advance(1);
243 }
244 else {
245 break;
246 }
247 }
248
249 state.add_token(ClojureSyntaxKind::Symbol, start, state.get_position());
250 }
251}