1pub mod token_type;
2pub use token_type::ClojureTokenType;
3
4use crate::ClojureLanguage;
5use oak_core::{
6 Lexer, LexerCache, LexerState, OakError,
7 lexer::LexOutput,
8 source::{Source, TextEdit},
9};
10
11pub struct ClojureLexer;
12
13type State<'a, S> = LexerState<'a, S, ClojureLanguage>;
14
15impl Lexer<ClojureLanguage> for ClojureLexer {
16 fn lex<'a, S: Source + ?Sized>(&self, text: &S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<ClojureLanguage>) -> LexOutput<ClojureLanguage> {
17 let mut state = State::new(text);
18 let result = self.run(&mut state);
19 if result.is_ok() {
20 state.add_eof();
21 }
22 state.finish_with_cache(result, cache)
23 }
24}
25
26impl ClojureLexer {
27 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
28 while state.not_at_end() {
29 let start = state.get_position();
30 let safe_point = start;
31
32 match state.peek() {
33 Some(c) if c.is_whitespace() => {
34 self.lex_whitespace(state);
35 }
36 Some(';') => {
37 self.lex_comment(state);
38 }
39 Some('"') => {
40 self.lex_string(state);
41 }
42 Some('\\') => {
43 self.lex_character(state);
44 }
45 Some(c) if c.is_ascii_digit() => {
46 self.lex_number(state);
47 }
48 Some(':') => {
49 self.lex_keyword(state);
50 }
51 Some('#') => {
52 self.lex_dispatch(state);
53 }
54 Some('(') => {
55 state.advance(1);
56 state.add_token(ClojureTokenType::ListStart, start, state.get_position());
57 }
58 Some(')') => {
59 state.advance(1);
60 state.add_token(ClojureTokenType::ListEnd, start, state.get_position());
61 }
62 Some('[') => {
63 state.advance(1);
64 state.add_token(ClojureTokenType::VectorStart, start, state.get_position());
65 }
66 Some(']') => {
67 state.advance(1);
68 state.add_token(ClojureTokenType::VectorEnd, start, state.get_position());
69 }
70 Some('{') => {
71 state.advance(1);
72 state.add_token(ClojureTokenType::MapStart, start, state.get_position());
73 }
74 Some('}') => {
75 state.advance(1);
76 state.add_token(ClojureTokenType::MapEnd, start, state.get_position());
77 }
78 Some('\'') | Some('`') => {
79 state.advance(1);
80 state.add_token(ClojureTokenType::Quote, start, state.get_position());
81 }
82 Some('~') => {
83 state.advance(1);
84 if state.peek() == Some('@') {
85 state.advance(1);
86 state.add_token(ClojureTokenType::UnquoteSplice, start, state.get_position());
87 }
88 else {
89 state.add_token(ClojureTokenType::Unquote, start, state.get_position());
90 }
91 }
92 Some('^') => {
93 state.advance(1);
94 state.add_token(ClojureTokenType::Meta, start, state.get_position());
95 }
96 Some(_) => {
97 self.lex_symbol(state);
98 }
99 None => break,
100 }
101
102 state.advance_if_dead_lock(safe_point);
103 }
104 Ok(())
105 }
106}
107
108impl ClojureLexer {
109 fn lex_whitespace<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, ClojureLanguage>) {
110 let start = state.get_position();
111 while let Some(c) = state.peek() {
112 if c.is_whitespace() {
113 state.advance(c.len_utf8());
114 }
115 else {
116 break;
117 }
118 }
119 state.add_token(ClojureTokenType::Whitespace, start, state.get_position());
120 }
121
122 fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, ClojureLanguage>) {
123 let start = state.get_position();
124 state.advance(1); while let Some(c) = state.peek() {
127 if c == '\n' {
128 break;
129 }
130 state.advance(c.len_utf8());
131 }
132
133 state.add_token(ClojureTokenType::Comment, start, state.get_position());
134 }
135
136 fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, ClojureLanguage>) {
137 let start = state.get_position();
138 state.advance(1); while let Some(c) = state.peek() {
141 if c == '"' {
142 state.advance(1);
143 break;
144 }
145 else if c == '\\' {
146 state.advance(1); if let Some(escaped) = state.peek() {
148 state.advance(escaped.len_utf8()); }
150 }
151 else {
152 state.advance(c.len_utf8());
153 }
154 }
155
156 state.add_token(ClojureTokenType::StringLiteral, start, state.get_position());
157 }
158
159 fn lex_character<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, ClojureLanguage>) {
160 let start = state.get_position();
161 state.advance(1); if let Some(c) = state.peek() {
164 state.advance(c.len_utf8());
165 }
166
167 state.add_token(ClojureTokenType::CharacterLiteral, start, state.get_position());
168 }
169
170 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, ClojureLanguage>) {
171 let start = state.get_position();
172
173 while let Some(c) = state.peek() {
174 if c.is_ascii_digit() || c == '.' {
175 state.advance(1);
176 }
177 else {
178 break;
179 }
180 }
181
182 state.add_token(ClojureTokenType::NumberLiteral, start, state.get_position());
183 }
184
185 fn lex_keyword<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, ClojureLanguage>) {
186 let start = state.get_position();
187 state.advance(1); while let Some(c) = state.peek() {
190 if c.is_alphanumeric() || c == '-' || c == '_' || c == '?' || c == '!' {
191 state.advance(c.len_utf8());
192 }
193 else {
194 break;
195 }
196 }
197
198 state.add_token(ClojureTokenType::KeywordLiteral, start, state.get_position());
199 }
200
201 fn lex_dispatch<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, ClojureLanguage>) {
202 let start = state.get_position();
203 state.advance(1); match state.peek() {
206 Some('{') => {
207 state.advance(1);
208 state.add_token(ClojureTokenType::SetStart, start, state.get_position());
209 }
210 Some('(') => {
211 state.advance(1);
212 state.add_token(ClojureTokenType::AnonFnStart, start, state.get_position());
213 }
214 Some('"') => {
215 self.lex_regex(state, start);
216 }
217 _ => {
218 state.add_token(ClojureTokenType::Dispatch, start, state.get_position());
219 }
220 }
221 }
222
223 fn lex_regex<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, ClojureLanguage>, start: usize) {
224 state.advance(1); while let Some(c) = state.peek() {
227 if c == '"' {
228 state.advance(1);
229 break;
230 }
231 else if c == '\\' {
232 state.advance(1); if let Some(escaped) = state.peek() {
234 state.advance(escaped.len_utf8()); }
236 }
237 else {
238 state.advance(c.len_utf8());
239 }
240 }
241
242 state.add_token(ClojureTokenType::RegexLiteral, start, state.get_position());
243 }
244
245 fn lex_symbol<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, ClojureLanguage>) {
246 let start = state.get_position();
247
248 while let Some(c) = state.peek() {
249 if c.is_alphanumeric() || c == '-' || c == '_' || c == '?' || c == '!' || c == '*' || c == '+' || c == '/' {
250 state.advance(c.len_utf8());
251 }
252 else {
253 break;
254 }
255 }
256
257 state.add_token(ClojureTokenType::Symbol, start, state.get_position());
258 }
259}