1pub mod token_type;
2pub use token_type::ClojureTokenType;
3
4use crate::ClojureLanguage;
5use oak_core::{
6 Lexer, LexerCache, LexerState, OakError,
7 lexer::LexOutput,
8 source::{Source, TextEdit},
9};
10
11#[derive(Clone, Debug)]
12pub struct ClojureLexer<'config> {
13 _config: &'config ClojureLanguage,
14}
15
16type State<'a, S> = LexerState<'a, S, ClojureLanguage>;
17
18impl<'config> Lexer<ClojureLanguage> for ClojureLexer<'config> {
19 fn lex<'a, S: Source + ?Sized>(&self, text: &S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<ClojureLanguage>) -> LexOutput<ClojureLanguage> {
20 let mut state = State::new(text);
21 let result = self.run(&mut state);
22 if result.is_ok() {
23 state.add_eof();
24 }
25 state.finish_with_cache(result, cache)
26 }
27}
28
29impl<'config> ClojureLexer<'config> {
30 pub fn new(config: &'config ClojureLanguage) -> Self {
31 Self { _config: config }
32 }
33 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
34 while state.not_at_end() {
35 let start = state.get_position();
36 let safe_point = start;
37
38 match state.peek() {
39 Some(c) if c.is_whitespace() => {
40 self.lex_whitespace(state);
41 }
42 Some(';') => {
43 self.lex_comment(state);
44 }
45 Some('"') => {
46 self.lex_string(state);
47 }
48 Some('\\') => {
49 self.lex_character(state);
50 }
51 Some(c) if c.is_ascii_digit() => {
52 self.lex_number(state);
53 }
54 Some(':') => {
55 self.lex_keyword(state);
56 }
57 Some('#') => {
58 self.lex_dispatch(state);
59 }
60 Some('(') => {
61 state.advance(1);
62 state.add_token(ClojureTokenType::ListStart, start, state.get_position());
63 }
64 Some(')') => {
65 state.advance(1);
66 state.add_token(ClojureTokenType::ListEnd, start, state.get_position());
67 }
68 Some('[') => {
69 state.advance(1);
70 state.add_token(ClojureTokenType::VectorStart, start, state.get_position());
71 }
72 Some(']') => {
73 state.advance(1);
74 state.add_token(ClojureTokenType::VectorEnd, start, state.get_position());
75 }
76 Some('{') => {
77 state.advance(1);
78 state.add_token(ClojureTokenType::MapStart, start, state.get_position());
79 }
80 Some('}') => {
81 state.advance(1);
82 state.add_token(ClojureTokenType::MapEnd, start, state.get_position());
83 }
84 Some('\'') | Some('`') => {
85 state.advance(1);
86 state.add_token(ClojureTokenType::Quote, start, state.get_position());
87 }
88 Some('~') => {
89 state.advance(1);
90 if state.peek() == Some('@') {
91 state.advance(1);
92 state.add_token(ClojureTokenType::UnquoteSplice, start, state.get_position());
93 }
94 else {
95 state.add_token(ClojureTokenType::Unquote, start, state.get_position());
96 }
97 }
98 Some('^') => {
99 state.advance(1);
100 state.add_token(ClojureTokenType::Meta, start, state.get_position());
101 }
102 Some(_) => {
103 self.lex_symbol(state);
104 }
105 None => break,
106 }
107
108 state.advance_if_dead_lock(safe_point);
109 }
110 Ok(())
111 }
112}
113
114impl<'config> ClojureLexer<'config> {
115 fn lex_whitespace<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, ClojureLanguage>) {
116 let start = state.get_position();
117 while let Some(c) = state.peek() {
118 if c.is_whitespace() {
119 state.advance(c.len_utf8());
120 }
121 else {
122 break;
123 }
124 }
125 state.add_token(ClojureTokenType::Whitespace, start, state.get_position());
126 }
127
128 fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, ClojureLanguage>) {
129 let start = state.get_position();
130 state.advance(1); while let Some(c) = state.peek() {
133 if c == '\n' {
134 break;
135 }
136 state.advance(c.len_utf8());
137 }
138
139 state.add_token(ClojureTokenType::Comment, start, state.get_position());
140 }
141
142 fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, ClojureLanguage>) {
143 let start = state.get_position();
144 state.advance(1); while let Some(c) = state.peek() {
147 if c == '"' {
148 state.advance(1);
149 break;
150 }
151 else if c == '\\' {
152 state.advance(1); if let Some(escaped) = state.peek() {
154 state.advance(escaped.len_utf8()); }
156 }
157 else {
158 state.advance(c.len_utf8());
159 }
160 }
161
162 state.add_token(ClojureTokenType::StringLiteral, start, state.get_position());
163 }
164
165 fn lex_character<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, ClojureLanguage>) {
166 let start = state.get_position();
167 state.advance(1); if let Some(c) = state.peek() {
170 state.advance(c.len_utf8());
171 }
172
173 state.add_token(ClojureTokenType::CharacterLiteral, start, state.get_position());
174 }
175
176 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, ClojureLanguage>) {
177 let start = state.get_position();
178
179 while let Some(c) = state.peek() {
180 if c.is_ascii_digit() || c == '.' {
181 state.advance(1);
182 }
183 else {
184 break;
185 }
186 }
187
188 state.add_token(ClojureTokenType::NumberLiteral, start, state.get_position());
189 }
190
191 fn lex_keyword<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, ClojureLanguage>) {
192 let start = state.get_position();
193 state.advance(1); while let Some(c) = state.peek() {
196 if c.is_alphanumeric() || c == '-' || c == '_' || c == '?' || c == '!' {
197 state.advance(c.len_utf8());
198 }
199 else {
200 break;
201 }
202 }
203
204 state.add_token(ClojureTokenType::KeywordLiteral, start, state.get_position());
205 }
206
207 fn lex_dispatch<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, ClojureLanguage>) {
208 let start = state.get_position();
209 state.advance(1); match state.peek() {
212 Some('{') => {
213 state.advance(1);
214 state.add_token(ClojureTokenType::SetStart, start, state.get_position());
215 }
216 Some('(') => {
217 state.advance(1);
218 state.add_token(ClojureTokenType::AnonFnStart, start, state.get_position());
219 }
220 Some('"') => {
221 self.lex_regex(state, start);
222 }
223 _ => {
224 state.add_token(ClojureTokenType::Dispatch, start, state.get_position());
225 }
226 }
227 }
228
229 fn lex_regex<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, ClojureLanguage>, start: usize) {
230 state.advance(1); while let Some(c) = state.peek() {
233 if c == '"' {
234 state.advance(1);
235 break;
236 }
237 else if c == '\\' {
238 state.advance(1); if let Some(escaped) = state.peek() {
240 state.advance(escaped.len_utf8()); }
242 }
243 else {
244 state.advance(c.len_utf8());
245 }
246 }
247
248 state.add_token(ClojureTokenType::RegexLiteral, start, state.get_position());
249 }
250
251 fn lex_symbol<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, ClojureLanguage>) {
252 let start = state.get_position();
253
254 while let Some(c) = state.peek() {
255 if c.is_alphanumeric() || c == '-' || c == '_' || c == '?' || c == '!' || c == '*' || c == '+' || c == '/' {
256 state.advance(c.len_utf8());
257 }
258 else {
259 break;
260 }
261 }
262
263 state.add_token(ClojureTokenType::Symbol, start, state.get_position());
264 }
265}