1#![doc = include_str!("readme.md")]
2pub mod token_type;
3
4pub use token_type::JTokenType;
5
6use crate::language::JLanguage;
7use oak_core::{
8 Lexer, LexerCache, LexerState, OakError,
9 lexer::{LexOutput, WhitespaceConfig},
10 source::Source,
11};
12use std::sync::LazyLock;
13
14type State<'a, S> = LexerState<'a, S, JLanguage>;
15
16static J_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
17
18#[derive(Clone, Debug)]
19pub struct JLexer<'config> {
20 config: &'config JLanguage,
21}
22
23impl<'config> Lexer<JLanguage> for JLexer<'config> {
24 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[oak_core::source::TextEdit], cache: &'a mut impl LexerCache<JLanguage>) -> LexOutput<JLanguage> {
25 let mut state: State<'_, S> = LexerState::new_with_cache(source, 0, cache);
26 let result = self.run(&mut state);
27 if result.is_ok() {
28 state.add_eof();
29 }
30 state.finish_with_cache(result, cache)
31 }
32}
33
34impl<'config> JLexer<'config> {
35 pub fn new(config: &'config JLanguage) -> Self {
36 Self { config }
37 }
38
39 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
41 while state.not_at_end() {
42 let safe_point = state.get_position();
43
44 if self.skip_whitespace(state) {
45 continue;
46 }
47
48 if self.skip_comment(state) {
49 continue;
50 }
51
52 if self.lex_string_literal(state) {
53 continue;
54 }
55
56 if self.lex_number_literal(state) {
57 continue;
58 }
59
60 if self.lex_identifier(state) {
61 continue;
62 }
63
64 if self.lex_operators(state) {
65 continue;
66 }
67
68 if let Some(ch) = state.peek() {
70 state.advance(ch.len_utf8());
71 state.add_token(JTokenType::Error, safe_point, state.get_position());
72 }
73 }
74
75 Ok(())
76 }
77
78 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
80 J_WHITESPACE.scan(state, JTokenType::Whitespace)
81 }
82
83 fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
85 let start = state.get_position();
86 if state.consume_if_starts_with("NB.") {
87 while let Some(ch) = state.peek() {
88 if ch == '\n' || ch == '\r' {
89 break;
90 }
91 state.advance(ch.len_utf8());
92 }
93 state.add_token(JTokenType::Comment, start, state.get_position());
94 return true;
95 }
96 false
97 }
98
99 fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
101 let start = state.get_position();
102 if state.consume_if_starts_with("'") {
103 while let Some(ch) = state.peek() {
104 if ch == '\'' {
105 state.advance(ch.len_utf8());
106 if state.consume_if_starts_with("'") {
108 continue;
109 }
110 state.add_token(JTokenType::StringLiteral, start, state.get_position());
111 return true;
112 }
113 state.advance(ch.len_utf8());
114 }
115 state.add_token(JTokenType::Error, start, state.get_position());
117 return true;
118 }
119 false
120 }
121
122 fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
124 let start = state.get_position();
125 if let Some(ch) = state.peek() {
126 if ch.is_ascii_digit() || ch == '_' {
127 state.advance(ch.len_utf8());
129 while let Some(ch) = state.peek() {
130 if ch.is_ascii_digit() || ch == '.' || ch == 'e' || ch == 'E' || ch == 'j' || ch == 'r' {
131 state.advance(ch.len_utf8());
132 }
133 else {
134 break;
135 }
136 }
137 state.add_token(JTokenType::NumberLiteral, start, state.get_position());
138 return true;
139 }
140 }
141 false
142 }
143
144 fn lex_identifier<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
146 let start = state.get_position();
147 if let Some(ch) = state.peek() {
148 if ch.is_ascii_alphabetic() {
149 state.advance(ch.len_utf8());
150 while let Some(ch) = state.peek() {
151 if ch.is_ascii_alphanumeric() || ch == '_' {
152 state.advance(ch.len_utf8());
153 }
154 else {
155 break;
156 }
157 }
158 state.add_token(JTokenType::Identifier, start, state.get_position());
159 return true;
160 }
161 }
162 false
163 }
164
165 fn lex_operators<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
167 let start = state.get_position();
168
169 for (op, token) in [("=:", JTokenType::IsGlobal), ("=.", JTokenType::IsLocal)] {
171 if state.consume_if_starts_with(op) {
172 state.add_token(token, start, state.get_position());
173 return true;
174 }
175 }
176
177 if let Some(ch) = state.peek() {
179 let token = match ch {
180 '=' => Some(JTokenType::Equal),
181 '.' => Some(JTokenType::Dot),
182 ':' => Some(JTokenType::Colon),
183 '+' => Some(JTokenType::Plus),
184 '-' => Some(JTokenType::Minus),
185 '*' => Some(JTokenType::Star),
186 '%' => Some(JTokenType::Percent),
187 '$' => Some(JTokenType::Dollar),
188 ',' => Some(JTokenType::Comma),
189 '#' => Some(JTokenType::Hash),
190 '/' => Some(JTokenType::Slash),
191 '\\' => Some(JTokenType::Backslash),
192 '|' => Some(JTokenType::Pipe),
193 '&' => Some(JTokenType::Ampersand),
194 '^' => Some(JTokenType::Caret),
195 '~' => Some(JTokenType::Tilde),
196 '<' => Some(JTokenType::Less),
197 '>' => Some(JTokenType::Greater),
198 '(' => Some(JTokenType::LeftParen),
199 ')' => Some(JTokenType::RightParen),
200 '[' => Some(JTokenType::LeftBracket),
201 ']' => Some(JTokenType::RightBracket),
202 '{' => Some(JTokenType::LeftBrace),
203 '}' => Some(JTokenType::RightBrace),
204 _ => None,
205 };
206
207 if let Some(token) = token {
208 state.advance(ch.len_utf8());
209 state.add_token(token, start, state.get_position());
210 return true;
211 }
212 }
213
214 false
215 }
216}