1#![doc = include_str!("readme.md")]
2pub mod token_type;
3pub use token_type::ClojureTokenType;
4
5use crate::ClojureLanguage;
6use oak_core::{
7 Lexer, LexerCache, LexerState, OakError,
8 lexer::LexOutput,
9 source::{Source, TextEdit},
10};
11
12#[derive(Clone, Debug)]
13pub struct ClojureLexer<'config> {
14 _config: &'config ClojureLanguage,
15}
16
17type State<'a, S> = LexerState<'a, S, ClojureLanguage>;
18
19impl<'config> Lexer<ClojureLanguage> for ClojureLexer<'config> {
20 fn lex<'a, S: Source + ?Sized>(&self, text: &S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<ClojureLanguage>) -> LexOutput<ClojureLanguage> {
21 let mut state = State::new(text);
22 let result = self.run(&mut state);
23 if result.is_ok() {
24 state.add_eof()
25 }
26 state.finish_with_cache(result, cache)
27 }
28}
29
30impl<'config> ClojureLexer<'config> {
31 pub fn new(config: &'config ClojureLanguage) -> Self {
32 Self { _config: config }
33 }
34 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
35 while state.not_at_end() {
36 let start = state.get_position();
37 let safe_point = start;
38
39 match state.peek() {
40 Some(c) if c.is_whitespace() => self.lex_whitespace(state),
41 Some(';') => self.lex_comment(state),
42 Some('"') => self.lex_string(state),
43 Some('\\') => self.lex_character(state),
44 Some(c) if c.is_ascii_digit() => self.lex_number(state),
45 Some(':') => self.lex_keyword(state),
46 Some('#') => self.lex_dispatch(state),
47 Some('(') => {
48 state.advance(1);
49 state.add_token(ClojureTokenType::ListStart, start, state.get_position())
50 }
51 Some(')') => {
52 state.advance(1);
53 state.add_token(ClojureTokenType::ListEnd, start, state.get_position())
54 }
55 Some('[') => {
56 state.advance(1);
57 state.add_token(ClojureTokenType::VectorStart, start, state.get_position())
58 }
59 Some(']') => {
60 state.advance(1);
61 state.add_token(ClojureTokenType::VectorEnd, start, state.get_position())
62 }
63 Some('{') => {
64 state.advance(1);
65 state.add_token(ClojureTokenType::MapStart, start, state.get_position())
66 }
67 Some('}') => {
68 state.advance(1);
69 state.add_token(ClojureTokenType::MapEnd, start, state.get_position())
70 }
71 Some('\'') | Some('`') => {
72 state.advance(1);
73 state.add_token(ClojureTokenType::Quote, start, state.get_position())
74 }
75 Some('~') => {
76 state.advance(1);
77 if state.peek() == Some('↯') {
78 state.advance(1);
79 state.add_token(ClojureTokenType::UnquoteSplice, start, state.get_position())
80 }
81 else {
82 state.add_token(ClojureTokenType::Unquote, start, state.get_position())
83 }
84 }
85 Some('^') => {
86 state.advance(1);
87 state.add_token(ClojureTokenType::Meta, start, state.get_position())
88 }
89 Some(_) => self.lex_symbol(state),
90 None => break,
91 }
92
93 state.advance_if_dead_lock(safe_point)
94 }
95 Ok(())
96 }
97}
98
99impl<'config> ClojureLexer<'config> {
100 fn lex_whitespace<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, ClojureLanguage>) {
101 let start = state.get_position();
102 while let Some(c) = state.peek() {
103 if c.is_whitespace() { state.advance(c.len_utf8()) } else { break }
104 }
105 state.add_token(ClojureTokenType::Whitespace, start, state.get_position())
106 }
107
108 fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, ClojureLanguage>) {
109 let start = state.get_position();
110 state.advance(1); while let Some(c) = state.peek() {
113 if c == '\n' {
114 break;
115 }
116 state.advance(c.len_utf8())
117 }
118
119 state.add_token(ClojureTokenType::Comment, start, state.get_position())
120 }
121
122 fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, ClojureLanguage>) {
123 let start = state.get_position();
124 state.advance(1); while let Some(c) = state.peek() {
127 if c == '"' {
128 state.advance(1);
129 break;
130 }
131 else if c == '\\' {
132 state.advance(1); if let Some(escaped) = state.peek() {
134 state.advance(escaped.len_utf8()); }
136 }
137 else {
138 state.advance(c.len_utf8())
139 }
140 }
141
142 state.add_token(ClojureTokenType::StringLiteral, start, state.get_position())
143 }
144
145 fn lex_character<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, ClojureLanguage>) {
146 let start = state.get_position();
147 state.advance(1); if let Some(c) = state.peek() {
150 state.advance(c.len_utf8())
151 }
152
153 state.add_token(ClojureTokenType::CharacterLiteral, start, state.get_position())
154 }
155
156 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, ClojureLanguage>) {
157 let start = state.get_position();
158
159 while let Some(c) = state.peek() {
160 if c.is_ascii_digit() || c == '.' { state.advance(1) } else { break }
161 }
162
163 state.add_token(ClojureTokenType::NumberLiteral, start, state.get_position())
164 }
165
166 fn lex_keyword<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, ClojureLanguage>) {
167 let start = state.get_position();
168 state.advance(1); while let Some(c) = state.peek() {
171 if c.is_alphanumeric() || c == '-' || c == '_' || c == '?' || c == '!' { state.advance(c.len_utf8()) } else { break }
172 }
173
174 state.add_token(ClojureTokenType::KeywordLiteral, start, state.get_position())
175 }
176
177 fn lex_dispatch<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, ClojureLanguage>) {
178 let start = state.get_position();
179 state.advance(1); match state.peek() {
182 Some('{') => {
183 state.advance(1);
184 state.add_token(ClojureTokenType::SetStart, start, state.get_position())
185 }
186 Some('(') => {
187 state.advance(1);
188 state.add_token(ClojureTokenType::AnonFnStart, start, state.get_position())
189 }
190 Some('"') => self.lex_regex(state, start),
191 _ => state.add_token(ClojureTokenType::Dispatch, start, state.get_position()),
192 }
193 }
194
195 fn lex_regex<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, ClojureLanguage>, start: usize) {
196 state.advance(1); while let Some(c) = state.peek() {
199 if c == '"' {
200 state.advance(1);
201 break;
202 }
203 else if c == '\\' {
204 state.advance(1); if let Some(escaped) = state.peek() {
206 state.advance(escaped.len_utf8()); }
208 }
209 else {
210 state.advance(c.len_utf8())
211 }
212 }
213
214 state.add_token(ClojureTokenType::RegexLiteral, start, state.get_position())
215 }
216
217 fn lex_symbol<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, ClojureLanguage>) {
218 let start = state.get_position();
219
220 while let Some(c) = state.peek() {
221 if c.is_alphanumeric() || c == '-' || c == '_' || c == '?' || c == '!' || c == '*' || c == '+' || c == '/' { state.advance(c.len_utf8()) } else { break }
222 }
223
224 state.add_token(ClojureTokenType::Symbol, start, state.get_position())
225 }
226}