1#![doc = include_str!("readme.md")]
2pub mod token_type;
4
5pub use token_type::JTokenType;
6
7use crate::language::JLanguage;
8use oak_core::{
9 Lexer, LexerCache, LexerState, OakError,
10 lexer::{LexOutput, WhitespaceConfig},
11 source::Source,
12};
13use std::sync::LazyLock;
14
15pub(crate) type State<'a, S> = LexerState<'a, S, JLanguage>;
16
17static J_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
18
19#[derive(Clone, Debug)]
21pub struct JLexer<'config> {
22 config: &'config JLanguage,
23}
24
25impl<'config> Lexer<JLanguage> for JLexer<'config> {
26 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[oak_core::source::TextEdit], cache: &'a mut impl LexerCache<JLanguage>) -> LexOutput<JLanguage> {
27 let mut state: State<'_, S> = LexerState::new_with_cache(source, 0, cache);
28 let result = self.run(&mut state);
29 if result.is_ok() {
30 state.add_eof();
31 }
32 state.finish_with_cache(result, cache)
33 }
34}
35
36impl<'config> JLexer<'config> {
37 pub fn new(config: &'config JLanguage) -> Self {
39 Self { config }
40 }
41
42 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
44 while state.not_at_end() {
45 let safe_point = state.get_position();
46
47 if self.skip_whitespace(state) {
48 continue;
49 }
50
51 if self.skip_comment(state) {
52 continue;
53 }
54
55 if self.lex_string_literal(state) {
56 continue;
57 }
58
59 if self.lex_number_literal(state) {
60 continue;
61 }
62
63 if self.lex_identifier(state) {
64 continue;
65 }
66
67 if self.lex_operators(state) {
68 continue;
69 }
70
71 if let Some(ch) = state.peek() {
73 state.advance(ch.len_utf8());
74 state.add_token(JTokenType::Error, safe_point, state.get_position());
75 }
76 }
77
78 Ok(())
79 }
80
81 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
83 J_WHITESPACE.scan(state, JTokenType::Whitespace)
84 }
85
86 fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
88 let start = state.get_position();
89 if state.consume_if_starts_with("NB.") {
90 while let Some(ch) = state.peek() {
91 if ch == '\n' || ch == '\r' {
92 break;
93 }
94 state.advance(ch.len_utf8());
95 }
96 state.add_token(JTokenType::Comment, start, state.get_position());
97 return true;
98 }
99 false
100 }
101
102 fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
104 let start = state.get_position();
105 if state.consume_if_starts_with("'") {
106 while let Some(ch) = state.peek() {
107 if ch == '\'' {
108 state.advance(ch.len_utf8());
109 if state.consume_if_starts_with("'") {
111 continue;
112 }
113 state.add_token(JTokenType::StringLiteral, start, state.get_position());
114 return true;
115 }
116 state.advance(ch.len_utf8());
117 }
118 state.add_token(JTokenType::Error, start, state.get_position());
120 return true;
121 }
122 false
123 }
124
125 fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
127 let start = state.get_position();
128 if let Some(ch) = state.peek() {
129 if ch.is_ascii_digit() || ch == '_' {
130 state.advance(ch.len_utf8());
132 while let Some(ch) = state.peek() {
133 if ch.is_ascii_digit() || ch == '.' || ch == 'e' || ch == 'E' || ch == 'j' || ch == 'r' {
134 state.advance(ch.len_utf8());
135 }
136 else {
137 break;
138 }
139 }
140 state.add_token(JTokenType::NumberLiteral, start, state.get_position());
141 return true;
142 }
143 }
144 false
145 }
146
147 fn lex_identifier<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
149 let start = state.get_position();
150 if let Some(ch) = state.peek() {
151 if ch.is_ascii_alphabetic() {
152 state.advance(ch.len_utf8());
153 while let Some(ch) = state.peek() {
154 if ch.is_ascii_alphanumeric() || ch == '_' {
155 state.advance(ch.len_utf8());
156 }
157 else {
158 break;
159 }
160 }
161 state.add_token(JTokenType::Identifier, start, state.get_position());
162 return true;
163 }
164 }
165 false
166 }
167
168 fn lex_operators<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
170 let start = state.get_position();
171
172 for (op, token) in [("=:", JTokenType::IsGlobal), ("=.", JTokenType::IsLocal)] {
174 if state.consume_if_starts_with(op) {
175 state.add_token(token, start, state.get_position());
176 return true;
177 }
178 }
179
180 if let Some(ch) = state.peek() {
182 let token = match ch {
183 '=' => Some(JTokenType::Equal),
184 '.' => Some(JTokenType::Dot),
185 ':' => Some(JTokenType::Colon),
186 '+' => Some(JTokenType::Plus),
187 '-' => Some(JTokenType::Minus),
188 '*' => Some(JTokenType::Star),
189 '%' => Some(JTokenType::Percent),
190 '$' => Some(JTokenType::Dollar),
191 ',' => Some(JTokenType::Comma),
192 '#' => Some(JTokenType::Hash),
193 '/' => Some(JTokenType::Slash),
194 '\\' => Some(JTokenType::Backslash),
195 '|' => Some(JTokenType::Pipe),
196 '&' => Some(JTokenType::Ampersand),
197 '^' => Some(JTokenType::Caret),
198 '~' => Some(JTokenType::Tilde),
199 '<' => Some(JTokenType::Less),
200 '>' => Some(JTokenType::Greater),
201 '(' => Some(JTokenType::LeftParen),
202 ')' => Some(JTokenType::RightParen),
203 '[' => Some(JTokenType::LeftBracket),
204 ']' => Some(JTokenType::RightBracket),
205 '{' => Some(JTokenType::LeftBrace),
206 '}' => Some(JTokenType::RightBrace),
207 _ => None,
208 };
209
210 if let Some(token) = token {
211 state.advance(ch.len_utf8());
212 state.add_token(token, start, state.get_position());
213 return true;
214 }
215 }
216
217 false
218 }
219}