1#![doc = include_str!("readme.md")]
2pub mod token_type;
4pub use token_type::ClojureTokenType;
5
6use crate::ClojureLanguage;
7use oak_core::{
8 Lexer, LexerCache, LexerState, OakError,
9 lexer::LexOutput,
10 source::{Source, TextEdit},
11};
12
13#[derive(Clone, Debug)]
15pub struct ClojureLexer<'config> {
16 pub(crate) config: &'config ClojureLanguage,
17}
18
19pub(crate) type State<'a, S> = LexerState<'a, S, ClojureLanguage>;
20
21impl<'config> Lexer<ClojureLanguage> for ClojureLexer<'config> {
22 fn lex<'a, S: Source + ?Sized>(&self, text: &S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<ClojureLanguage>) -> LexOutput<ClojureLanguage> {
23 let mut state = State::new(text);
24 let result = self.run(&mut state);
25 if result.is_ok() {
26 state.add_eof()
27 }
28 state.finish_with_cache(result, cache)
29 }
30}
31
32impl<'config> ClojureLexer<'config> {
33 pub fn new(config: &'config ClojureLanguage) -> Self {
35 Self { config }
36 }
37 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
38 while state.not_at_end() {
39 let start = state.get_position();
40 let safe_point = start;
41
42 match state.peek() {
43 Some(c) if c.is_whitespace() => self.lex_whitespace(state),
44 Some(';') => self.lex_comment(state),
45 Some('"') => self.lex_string(state),
46 Some('\\') => self.lex_character(state),
47 Some(c) if c.is_ascii_digit() => self.lex_number(state),
48 Some(':') => self.lex_keyword(state),
49 Some('#') => self.lex_dispatch(state),
50 Some('(') => {
51 state.advance(1);
52 state.add_token(ClojureTokenType::ListStart, start, state.get_position())
53 }
54 Some(')') => {
55 state.advance(1);
56 state.add_token(ClojureTokenType::ListEnd, start, state.get_position())
57 }
58 Some('[') => {
59 state.advance(1);
60 state.add_token(ClojureTokenType::VectorStart, start, state.get_position())
61 }
62 Some(']') => {
63 state.advance(1);
64 state.add_token(ClojureTokenType::VectorEnd, start, state.get_position())
65 }
66 Some('{') => {
67 state.advance(1);
68 state.add_token(ClojureTokenType::MapStart, start, state.get_position())
69 }
70 Some('}') => {
71 state.advance(1);
72 state.add_token(ClojureTokenType::MapEnd, start, state.get_position())
73 }
74 Some('\'') | Some('`') => {
75 state.advance(1);
76 state.add_token(ClojureTokenType::Quote, start, state.get_position())
77 }
78 Some('~') => {
79 state.advance(1);
80 if state.peek() == Some('@') {
81 state.advance(1);
82 state.add_token(ClojureTokenType::UnquoteSplice, start, state.get_position())
83 }
84 else {
85 state.add_token(ClojureTokenType::Unquote, start, state.get_position())
86 }
87 }
88 Some('^') => {
89 state.advance(1);
90 state.add_token(ClojureTokenType::Meta, start, state.get_position())
91 }
92 Some(_) => self.lex_symbol(state),
93 None => break,
94 }
95
96 state.advance_if_dead_lock(safe_point)
97 }
98 Ok(())
99 }
100}
101
102impl<'config> ClojureLexer<'config> {
103 fn lex_whitespace<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, ClojureLanguage>) {
104 let start = state.get_position();
105 while let Some(c) = state.peek() {
106 if c.is_whitespace() { state.advance(c.len_utf8()) } else { break }
107 }
108 state.add_token(ClojureTokenType::Whitespace, start, state.get_position())
109 }
110
111 fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, ClojureLanguage>) {
112 let start = state.get_position();
113 state.advance(1); while let Some(c) = state.peek() {
116 if c == '\n' {
117 break;
118 }
119 state.advance(c.len_utf8())
120 }
121
122 state.add_token(ClojureTokenType::Comment, start, state.get_position())
123 }
124
125 fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, ClojureLanguage>) {
126 let start = state.get_position();
127 state.advance(1); while let Some(c) = state.peek() {
130 if c == '"' {
131 state.advance(1);
132 break;
133 }
134 else if c == '\\' {
135 state.advance(1); if let Some(escaped) = state.peek() {
137 state.advance(escaped.len_utf8()); }
139 }
140 else {
141 state.advance(c.len_utf8())
142 }
143 }
144
145 state.add_token(ClojureTokenType::StringLiteral, start, state.get_position())
146 }
147
148 fn lex_character<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, ClojureLanguage>) {
149 let start = state.get_position();
150 state.advance(1); if let Some(c) = state.peek() {
153 state.advance(c.len_utf8())
154 }
155
156 state.add_token(ClojureTokenType::CharacterLiteral, start, state.get_position())
157 }
158
159 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, ClojureLanguage>) {
160 let start = state.get_position();
161
162 while let Some(c) = state.peek() {
163 if c.is_ascii_digit() || c == '.' { state.advance(1) } else { break }
164 }
165
166 state.add_token(ClojureTokenType::NumberLiteral, start, state.get_position())
167 }
168
169 fn lex_keyword<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, ClojureLanguage>) {
170 let start = state.get_position();
171 state.advance(1); while let Some(c) = state.peek() {
174 if c.is_alphanumeric() || c == '-' || c == '_' || c == '?' || c == '!' { state.advance(c.len_utf8()) } else { break }
175 }
176
177 state.add_token(ClojureTokenType::KeywordLiteral, start, state.get_position())
178 }
179
180 fn lex_dispatch<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, ClojureLanguage>) {
181 let start = state.get_position();
182 state.advance(1); match state.peek() {
185 Some('{') => {
186 state.advance(1);
187 state.add_token(ClojureTokenType::SetStart, start, state.get_position())
188 }
189 Some('(') => {
190 state.advance(1);
191 state.add_token(ClojureTokenType::AnonFnStart, start, state.get_position())
192 }
193 Some('"') => self.lex_regex(state, start),
194 _ => state.add_token(ClojureTokenType::Dispatch, start, state.get_position()),
195 }
196 }
197
198 fn lex_regex<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, ClojureLanguage>, start: usize) {
199 state.advance(1); while let Some(c) = state.peek() {
202 if c == '"' {
203 state.advance(1);
204 break;
205 }
206 else if c == '\\' {
207 state.advance(1); if let Some(escaped) = state.peek() {
209 state.advance(escaped.len_utf8()); }
211 }
212 else {
213 state.advance(c.len_utf8())
214 }
215 }
216
217 state.add_token(ClojureTokenType::RegexLiteral, start, state.get_position())
218 }
219
220 fn lex_symbol<'a, S: Source + ?Sized>(&self, state: &mut LexerState<'a, S, ClojureLanguage>) {
221 let start = state.get_position();
222
223 while let Some(c) = state.peek() {
224 if c.is_alphanumeric() || c == '-' || c == '_' || c == '?' || c == '!' || c == '*' || c == '+' || c == '/' { state.advance(c.len_utf8()) } else { break }
225 }
226
227 state.add_token(ClojureTokenType::Symbol, start, state.get_position())
228 }
229}