1#![doc = include_str!("readme.md")]
2pub mod token_type;
3
4use crate::{language::OrgModeLanguage, lexer::token_type::OrgModeTokenType};
5use oak_core::{
6 TextEdit,
7 errors::OakError,
8 lexer::{CommentConfig, LexOutput, Lexer, LexerCache, LexerState, StringConfig, WhitespaceConfig},
9 source::Source,
10};
11use std::sync::LazyLock;
12
13pub(crate) type State<'a, S> = LexerState<'a, S, OrgModeLanguage>;
14
15static ORG_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: false });
16static ORG_COMMENT: LazyLock<CommentConfig> = LazyLock::new(|| CommentConfig { line_marker: "#", block_start: "", block_end: "", nested_blocks: false });
17static ORG_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"'], escape: Some('\\') });
18
19#[derive(Clone, Debug)]
21pub struct OrgModeLexer<'config> {
22 config: &'config OrgModeLanguage,
23}
24
25impl<'config> OrgModeLexer<'config> {
26 pub fn new(config: &'config OrgModeLanguage) -> Self {
29 Self { config }
30 }
31
32 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
33 ORG_WHITESPACE.scan(state, OrgModeTokenType::Whitespace)
34 }
35
36 fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
37 ORG_COMMENT.scan(state, OrgModeTokenType::Comment, OrgModeTokenType::Comment)
38 }
39
40 fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
41 ORG_STRING.scan(state, OrgModeTokenType::Text)
42 }
43
44 fn lex_text_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
45 if let Some(ch) = state.peek() {
46 if ch.is_alphabetic() {
47 let start_pos = state.get_position();
48 while let Some(ch) = state.peek() {
50 if ch.is_alphanumeric() {
51 state.advance(ch.len_utf8());
52 }
53 else {
54 break;
55 }
56 }
57 let end_pos = state.get_position();
58 let text = state.source().get_text_in((start_pos..end_pos).into());
59 let kind = if self.config.todo_keywords.iter().any(|k| k == text.as_ref()) {
60 OrgModeTokenType::Todo
61 }
62 else if self.config.done_keywords.iter().any(|k| k == text.as_ref()) {
63 OrgModeTokenType::Done
64 }
65 else {
66 OrgModeTokenType::Text
67 };
68 state.add_token(kind, start_pos, end_pos);
69 return true;
70 }
71 }
72 false
73 }
74
75 fn lex_priority<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
76 if state.starts_with("[#") {
77 let start_pos = state.get_position();
78 state.advance(2);
79
80 if let Some(ch) = state.peek() {
81 if ch.is_alphabetic() {
82 state.advance(ch.len_utf8());
83 if let Some(']') = state.peek() {
84 state.advance(1);
85 state.add_token(OrgModeTokenType::Priority, start_pos, state.get_position());
86 return true;
87 }
88 }
89 }
90
91 state.set_position(start_pos);
92 false
93 }
94 else {
95 false
96 }
97 }
98
99 fn lex_number_or_date<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
100 if let Some(ch) = state.peek() {
101 if ch.is_ascii_digit() {
102 let start_pos = state.get_position();
103 let mut has_dash = false;
104
105 while let Some(ch) = state.peek() {
106 if ch.is_ascii_digit() {
107 state.advance(1);
108 }
109 else if ch == '-' {
110 state.advance(1);
111 has_dash = true;
112 }
113 else {
114 break;
115 }
116 }
117
118 let kind = if has_dash { OrgModeTokenType::Date } else { OrgModeTokenType::Number };
119
120 state.add_token(kind, start_pos, state.get_position());
121 return true;
122 }
123 }
124 false
125 }
126
127 fn lex_symbols<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
128 if let Some(ch) = state.peek() {
129 let start_pos = state.get_position();
130 state.advance(ch.len_utf8());
131
132 let kind = match ch {
133 '+' => OrgModeTokenType::Plus,
134 '-' => OrgModeTokenType::Minus,
135 '*' => OrgModeTokenType::Star,
136 '#' => OrgModeTokenType::Hash,
137 '|' => OrgModeTokenType::Pipe,
138 ':' => OrgModeTokenType::Colon,
139 '[' => OrgModeTokenType::LeftBracket,
140 ']' => OrgModeTokenType::RightBracket,
141 '(' => OrgModeTokenType::LeftParen,
142 ')' => OrgModeTokenType::RightParen,
143 '{' => OrgModeTokenType::LeftBrace,
144 '}' => OrgModeTokenType::RightBrace,
145 '<' => OrgModeTokenType::LessThan,
146 '>' => OrgModeTokenType::GreaterThan,
147 '=' => OrgModeTokenType::Equal,
148 '_' => OrgModeTokenType::Underscore,
149 '~' => OrgModeTokenType::Tilde,
150 '/' => OrgModeTokenType::Slash,
151 '\\' => OrgModeTokenType::Backslash,
152 '\n' => OrgModeTokenType::Newline,
153 _ => {
154 state.add_token(OrgModeTokenType::Text, start_pos, state.get_position());
156 return true;
157 }
158 };
159
160 state.add_token(kind, start_pos, state.get_position());
161 true
162 }
163 else {
164 false
165 }
166 }
167
168 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
169 while state.not_at_end() {
170 let safe_point = state.get_position();
171
172 if let Some('\n') = state.peek() {
174 let start_pos = state.get_position();
175 state.advance(1);
176 state.add_token(OrgModeTokenType::Newline, start_pos, state.get_position());
177 continue;
178 }
179
180 if self.skip_whitespace(state) {
182 continue;
183 }
184
185 if self.skip_comment(state) {
187 continue;
188 }
189
190 if self.lex_string(state) {
192 continue;
193 }
194
195 if self.lex_priority(state) {
197 continue;
198 }
199
200 if self.lex_number_or_date(state) {
202 continue;
203 }
204
205 if self.lex_text_or_keyword(state) {
207 continue;
208 }
209
210 if self.lex_symbols(state) {
212 continue;
213 }
214
215 let start_pos = state.get_position();
217 if let Some(ch) = state.peek() {
218 state.advance(ch.len_utf8());
219 state.add_token(OrgModeTokenType::Error, start_pos, state.get_position());
220 }
221 else {
222 break;
223 }
224
225 state.advance_if_dead_lock(safe_point);
226 }
227 Ok(())
228 }
229}
230
231impl<'config> Lexer<OrgModeLanguage> for OrgModeLexer<'config> {
232 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<OrgModeLanguage>) -> LexOutput<OrgModeLanguage> {
233 let mut state = State::new(source);
234 let result = self.run(&mut state);
235 if result.is_ok() {
236 state.add_eof()
237 }
238 state.finish_with_cache(result, cache)
239 }
240}