1use {
16 std::rc::Rc,
17 crate::{
18 char_ext::CharExt,
19 live_id::{LiveId,LIVE_ID_SEED},
20 full_token::{TokenWithLen, Delim, FullToken},
21 colorhex
22 },
23};
24
25#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
27pub enum State {
28 Initial(InitialState),
29 BlockCommentTail(BlockCommentTailState),
30 DoubleQuotedStringTail(DoubleQuotedStringTailState),
31 RawDoubleQuotedStringTail(RawDoubleQuotedStringTailState),
33}
34
35impl Default for State {
36 fn default() -> State {
37 State::Initial(InitialState)
38 }
39}
40
41impl State {
42 pub fn next(self, cursor: &mut Cursor) -> (State, Option<TokenWithLen>) {
70 if cursor.peek(0) == '\0' {
71 return (self, None);
72 }
73 let start = cursor.index;
74 let (next_state, token) = match self {
75 State::Initial(state) => state.next(cursor),
76 State::BlockCommentTail(state) => state.next(cursor),
77 State::DoubleQuotedStringTail(state) => state.next(cursor),
78 State::RawDoubleQuotedStringTail(state) => state.next(cursor),
79 };
81 let end = cursor.index;
82 assert!(start < end);
83 (
84 next_state,
85 Some(TokenWithLen {
86 len: end - start,
87 token,
88 }),
89 )
90 }
91}
92
93#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
95pub struct InitialState;
96
97impl InitialState {
98 fn next(self, cursor: &mut Cursor<'_>) -> (State, FullToken) {
99 match (cursor.peek(0), cursor.peek(1), cursor.peek(2)) {
100 ('r', '#', '"') | ('r', '#', '#') => self.raw_string(cursor),
101 ('b', 'r', '"') | ('b', 'r', '#') => self.raw_byte_string(cursor),
102 ('.', '.', '.') | ('.', '.', '=') | ('<', '<', '=') | ('>', '>', '=') => {
103 let id = cursor.id_from_3();
104 cursor.skip(3);
105 (
106 State::Initial(InitialState),
107 FullToken::Punct(id),
108 )
109 }
110 ('/', '/', _) => self.line_comment(cursor),
111 ('/', '*', _) => self.block_comment(cursor),
112 ('b', '"', _) => self.byte_string(cursor),
114 ('!', '=', _)
116 | ('%', '=', _)
117 | ('&', '&', _)
118 | ('&', '=', _)
119 | ('*', '=', _)
120 | ('+', '=', _)
121 | ('-', '=', _)
122 | ('-', '>', _)
123 | ('.', '.', _)
124 | ('/', '=', _)
125 | (':', ':', _)
126 | ('<', '<', _)
127 | ('<', '=', _)
128 | ('=', '=', _)
129 | ('=', '>', _)
130 | ('>', '=', _)
131 | ('>', '>', _)
132 | ('^', '=', _)
133 | ('|', '=', _)
134 | ('|', '|', _) => {
135 let id = cursor.id_from_2();
136 cursor.skip(2);
137 (
138 State::Initial(InitialState),
139 FullToken::Punct(id),
140 )
141 }
142 ('"', _, _) => self.string(cursor),
144 ('(', _, _) => {
145 cursor.skip(1);
146 (
147 State::Initial(InitialState),
148 FullToken::Open(Delim::Paren),
149 )
150 }
151 (')', _, _) => {
152 cursor.skip(1);
153 (
154 State::Initial(InitialState),
155 FullToken::Close(Delim::Paren),
156 )
157 }
158 ('[', _, _) => {
159 cursor.skip(1);
160 (
161 State::Initial(InitialState),
162 FullToken::Open(Delim::Bracket),
163 )
164 }
165 (']', _, _) => {
166 cursor.skip(1);
167 (
168 State::Initial(InitialState),
169 FullToken::Close(Delim::Bracket),
170 )
171 }
172 ('{', _, _) => {
173 cursor.skip(1);
174 (
175 State::Initial(InitialState),
176 FullToken::Open(Delim::Brace),
177 )
178 }
179 ('}', _, _) => {
180 cursor.skip(1);
181 (
182 State::Initial(InitialState),
183 FullToken::Close(Delim::Brace),
184 )
185 }
186 ('#', ch1, ch2) if ch1 == 'x' && ch2.is_ascii_hexdigit() || ch1.is_ascii_hexdigit() => self.color(cursor),
187 ('.', ch1, _) if ch1.is_ascii_digit() => self.number(cursor),
188 ('!', _, _)
189 | ('#', _, _)
190 | ('$', _, _)
191 | ('%', _, _)
192 | ('&', _, _)
193 | ('*', _, _)
194 | ('+', _, _)
195 | (',', _, _)
196 | ('-', _, _)
197 | ('.', _, _)
198 | ('/', _, _)
199 | (':', _, _)
200 | (';', _, _)
201 | ('<', _, _)
202 | ('=', _, _)
203 | ('>', _, _)
204 | ('?', _, _)
205 | ('@', _, _)
206 | ('^', _, _)
207 | ('|', _, _) => {
209 let id = cursor.id_from_1();
210 cursor.skip(1);
211 (
212 State::Initial(InitialState),
213 FullToken::Punct(id),
214 )
215 }
216 (ch, _, _) if ch.is_identifier_start() => self.identifier_or_bool(cursor),
217 (ch, _, _) if ch.is_ascii_digit() => self.number(cursor),
218 (ch, _, _) if ch.is_whitespace() => self.whitespace(cursor),
219 _ => {
220 cursor.skip(1);
221 (State::Initial(InitialState), FullToken::Unknown)
222 }
223 }
224 }
225
226 fn line_comment(self, cursor: &mut Cursor) -> (State, FullToken) {
227 debug_assert!(cursor.peek(0) == '/' && cursor.peek(1) == '/');
228 cursor.skip(2);
229 while cursor.skip_if( | ch | ch != '\0') {}
230 (State::Initial(InitialState), FullToken::Comment)
231 }
232
233 fn block_comment(self, cursor: &mut Cursor<'_>) -> (State, FullToken) {
234 debug_assert!(cursor.peek(0) == '/' && cursor.peek(1) == '*');
235 cursor.skip(2);
236 BlockCommentTailState {depth: 0}.next(cursor)
237 }
238
239 fn identifier_or_bool(self, cursor: &mut Cursor) -> (State, FullToken) {
240 debug_assert!(cursor.peek(0).is_identifier_start());
241 let start = cursor.index();
242 match cursor.peek(0) {
243 'f' => {
244 cursor.skip(1);
245 if "alse".chars().all( | expected | cursor.skip_if( | actual | actual == expected)) && !cursor.peek(0).is_identifier_continue() {
246 return (State::Initial(InitialState), FullToken::Bool(false));
247 }
248 self.identifier_tail(start, cursor)
249 }
250 't' => {
251 cursor.skip(1);
252 if "rue".chars().all( | expected | cursor.skip_if( | actual | actual == expected)) && !cursor.peek(0).is_identifier_continue() {
253 return (State::Initial(InitialState), FullToken::Bool(true));
254 }
255 self.identifier_tail(start, cursor)
256 },
257 _ => self.identifier_tail(start, cursor),
258 }
259 }
260
261 fn identifier_tail(self, start: usize, cursor: &mut Cursor) -> (State, FullToken) {
262 while cursor.skip_if( | ch | ch.is_identifier_continue()) {}
263 (State::Initial(InitialState), FullToken::Ident(
264 LiveId::from_str_with_lut(cursor.from_start_to_scratch(start)).unwrap()
265 ))
266 }
267
268 fn number(self, cursor: &mut Cursor) -> (State, FullToken) {
269 match (cursor.peek(0), cursor.peek(1)) {
270 ('0', 'b') => {
271 cursor.skip(2);
272 if !cursor.skip_digits(2) {
273 return (State::Initial(InitialState), FullToken::Unknown);
274 }
275 (State::Initial(InitialState), FullToken::OtherNumber)
276 }
277 ('0', 'o') => {
278 cursor.skip(2);
279 if !cursor.skip_digits(8) {
280 return (State::Initial(InitialState), FullToken::Unknown);
281 }
282 (State::Initial(InitialState), FullToken::OtherNumber)
283 }
284 ('0', 'x') => {
285 cursor.skip(2);
286 if !cursor.skip_digits(16) {
287 return (State::Initial(InitialState), FullToken::Unknown);
288 }
289 (State::Initial(InitialState), FullToken::OtherNumber)
290 }
291 _ => {
292 let start = cursor.index();
293 cursor.skip_digits(10);
295
296 match cursor.peek(0) {
297 '.' if cursor.peek(1) != '.' && !cursor.peek(0).is_identifier_start() => {
298 cursor.skip(1);
299 if cursor.skip_digits(10) && (cursor.peek(0) == 'E' || cursor.peek(0) == 'e') && !cursor.skip_exponent() {
300 return (State::Initial(InitialState), FullToken::Unknown);
301 }
302 if cursor.skip_suffix() {
303 return (State::Initial(InitialState), FullToken::OtherNumber)
304 }
305 if let Ok(value) = cursor.from_start_to_scratch(start).parse::<f64>() {
307 (State::Initial(InitialState), FullToken::Float(value))
308 }
309 else {
310 (State::Initial(InitialState), FullToken::Unknown)
311 }
312 }
313 'E' | 'e' => {
314 if !cursor.skip_exponent() {
315 return (State::Initial(InitialState), FullToken::Unknown);
316 }
317 if cursor.skip_suffix() {
318 return (State::Initial(InitialState), FullToken::OtherNumber)
319 }
320 if let Ok(value) = cursor.from_start_to_scratch(start).parse::<f64>() {
322 (State::Initial(InitialState), FullToken::Float(value))
323 }
324 else {
325 (State::Initial(InitialState), FullToken::Unknown)
326 }
327 }
328 _ => {
329 if cursor.skip_suffix() {
330 return (State::Initial(InitialState), FullToken::OtherNumber)
331 }
332 if let Ok(value) = cursor.from_start_to_scratch(start).parse::<i64>() {
334 (State::Initial(InitialState), FullToken::Int(value))
335 }
336 else {
337 (State::Initial(InitialState), FullToken::Unknown)
338 }
339 }
340 }
341 }
342 }
343 }
344
345 fn color(self, cursor: &mut Cursor) -> (State, FullToken) {
346 let start = match (cursor.peek(0), cursor.peek(1)) {
347 ('#', 'x') => {
348 cursor.skip(2);
349 let start = cursor.index();
350 if !cursor.skip_digits(16) {
351 return (State::Initial(InitialState), FullToken::Unknown);
352 }
353 start
354 }
355 _ => {
356 cursor.skip(1);
357 let start = cursor.index();
358 if !cursor.skip_digits(16) {
359 return (State::Initial(InitialState), FullToken::Unknown);
360 }
361 start
362 }
363 };
364 if let Ok(col) = colorhex::hex_bytes_to_u32(cursor.from_start_to_scratch(start).as_bytes()) {
365 (State::Initial(InitialState), FullToken::Color(col))
366 }
367 else {
368 (State::Initial(InitialState), FullToken::Unknown)
369 }
370 }
371 fn string(self, cursor: &mut Cursor) -> (State, FullToken) {
396 self.double_quoted_string(cursor)
397 }
398
399 fn byte_string(self, cursor: &mut Cursor) -> (State, FullToken) {
407 debug_assert!(cursor.peek(0) == 'b');
408 cursor.skip(1);
409 self.double_quoted_string(cursor)
410 }
411
412 fn raw_string(self, cursor: &mut Cursor) -> (State, FullToken) {
413 debug_assert!(cursor.peek(0) == 'r');
414 cursor.skip(1);
415 self.raw_double_quoted_string(cursor)
416 }
417
418 fn raw_byte_string(self, cursor: &mut Cursor) -> (State, FullToken) {
419 debug_assert!(cursor.peek(0) == 'b' && cursor.peek(1) == 'r');
420 cursor.skip(2);
421 self.raw_double_quoted_string(cursor)
422 }
423 fn double_quoted_string(self, cursor: &mut Cursor) -> (State, FullToken) {
443 debug_assert!(cursor.peek(0) == '"');
444 cursor.skip(1);
445 DoubleQuotedStringTailState.next(cursor)
446 }
447fn raw_double_quoted_string(self, cursor: &mut Cursor) -> (State, FullToken) {
455 let mut start_hash_count = 0;
456 while cursor.skip_if( | ch | ch == '#') {
457 start_hash_count += 1;
458 }
459 RawDoubleQuotedStringTailState {start_hash_count}.next(cursor)
460 }
461
462 fn whitespace(self, cursor: &mut Cursor) -> (State, FullToken) {
463 debug_assert!(cursor.peek(0).is_whitespace());
464 cursor.skip(1);
465 while cursor.skip_if( | ch | ch.is_whitespace()) {}
466 (State::Initial(InitialState), FullToken::Whitespace)
467 }
468}
469
470#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
471pub struct BlockCommentTailState {
472 depth: usize,
473}
474
475impl BlockCommentTailState {
476 fn next(self, cursor: &mut Cursor<'_>) -> (State, FullToken) {
477 let mut state = self;
478 loop {
479 match (cursor.peek(0), cursor.peek(1)) {
480 ('/', '*') => {
481 cursor.skip(2);
482 state.depth += 1;
483 }
484 ('*', '/') => {
485 cursor.skip(2);
486 if state.depth == 0 {
487 break (State::Initial(InitialState), FullToken::Comment);
488 }
489 state.depth -= 1;
490 }
491 ('\0', _) => {
492 break (State::BlockCommentTail(state), FullToken::Comment);
493 }
494 _ => cursor.skip(1),
495 }
496 }
497 }
498}
499
500#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
502pub struct DoubleQuotedStringTailState;
503
504impl DoubleQuotedStringTailState {
505 fn next(self, cursor: &mut Cursor<'_>) -> (State, FullToken) {
506 let mut s = String::new();
507 loop {
508 match (cursor.peek(0), cursor.peek(1)) {
509 ('"', _) => {
510 cursor.skip(1);
511 cursor.skip_suffix();
512 break (State::Initial(InitialState), FullToken::String(Rc::new(s)));
513 }
514 ('\0', _) => {
515 break (
516 State::DoubleQuotedStringTail(DoubleQuotedStringTailState),
517 FullToken::String(Rc::new(s)),
518 );
519 }
520 ('\\', '\\') => {
521 s.push('\\');
522 cursor.skip(2);
523 },
524 ('\\', '"') => {
525 s.push('"');
526 cursor.skip(2);
527 },
528 ('\\', 'n') => {
529 s.push('\n');
530 cursor.skip(2);
531 },
532 (x,_) => {
533 s.push(x);
534 cursor.skip(1);
535 }
536 }
537 }
538 }
539}
540#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
569pub struct RawDoubleQuotedStringTailState {
570 start_hash_count: usize,
571}
572
573impl RawDoubleQuotedStringTailState {
574 fn next(self, cursor: &mut Cursor<'_>) -> (State, FullToken) {
575 let mut s = String::new();
576 loop {
577 match cursor.peek(0) {
578 '"' => {
579 cursor.skip(1);
580 let mut end_hash_count = 0;
581 while end_hash_count < self.start_hash_count && cursor.skip_if( | ch | ch == '#') {
582 end_hash_count += 1;
583 }
584 if end_hash_count == self.start_hash_count {
585 cursor.skip_suffix();
586 break (State::Initial(InitialState), FullToken::String(Rc::new(s)));
587 }
588 }
589 '\0' => {
590 break (State::RawDoubleQuotedStringTail(self), FullToken::String(Rc::new(s)));
591 }
592 x => {
593 s.push(x);
594 cursor.skip(1);
595 }
596 }
597 }
598 }
599}
600
601#[derive(Debug)]
603pub struct Cursor<'a> {
604 chars: &'a [char],
605 scratch: &'a mut String,
606 index: usize,
607}
608
609impl<'a> Cursor<'a> {
610 pub fn new(chars: &'a [char], scratch: &'a mut String) -> Cursor<'a> {
622 Cursor {chars, scratch, index: 0 }
623 }
624
625 pub fn index(&self) -> usize {
626 self.index
627 }
628
629 fn from_start_to_scratch(&mut self, start: usize) -> &str {
630 self.scratch.clear();
631 for i in start..self.index {
632 self.scratch.push(self.chars[i]);
633 }
634 self.scratch
635 }
636
637
638 fn peek(&self, index: usize) -> char {
639 self.chars.get(self.index + index).cloned().unwrap_or('\0')
640 }
641
642 fn id_from_1(&self) -> LiveId {
643 LiveId::from_bytes(LIVE_ID_SEED, &[
644 self.chars[self.index] as u8,
645 ], 0, 1)
646 }
647
648 fn id_from_2(&self) -> LiveId {
649 LiveId::from_bytes(LIVE_ID_SEED, &[
650 self.chars[self.index] as u8,
651 self.chars[self.index + 1] as u8,
652 ], 0, 2)
653 }
654
655 fn id_from_3(&self) -> LiveId {
656 LiveId::from_bytes(LIVE_ID_SEED, &[
657 self.chars[self.index] as u8,
658 self.chars[self.index + 1] as u8,
659 self.chars[self.index + 2] as u8,
660 ], 0, 3)
661 }
662
663 fn skip(&mut self, count: usize) {
664 self.index += count;
665 }
666
667 fn skip_if<P>(&mut self, predicate: P) -> bool
668 where
669 P: FnOnce(char) -> bool,
670 {
671 if predicate(self.peek(0)) {
672 self.skip(1);
673 true
674 } else {
675 false
676 }
677 }
678
679 fn skip_exponent(&mut self) -> bool {
680 debug_assert!(self.peek(0) == 'E' || self.peek(0) == 'e');
681 self.skip(1);
682 if self.peek(0) == '+' || self.peek(0) == '-' {
683 self.skip(1);
684 }
685 self.skip_digits(10)
686 }
687
688 fn skip_digits(&mut self, radix: u32) -> bool {
689 let mut has_skip_digits = false;
690 loop {
691 match self.peek(0) {
692 '_' => {
693 self.skip(1);
694 }
695 ch if ch.is_digit(radix) => {
696 self.skip(1);
697 has_skip_digits = true;
698 }
699 _ => break,
700 }
701 }
702 has_skip_digits
703 }
704
705 fn skip_suffix(&mut self) -> bool {
706 if self.peek(0).is_identifier_start() {
707 self.skip(1);
708 while self.skip_if( | ch | ch.is_identifier_continue()) {}
709 return true
710 }
711 false
712 }
713}
714
715
716#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
717pub struct TokenPos {
718 pub line: usize,
719 pub index: usize,
720}
721
722#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
723pub struct TokenRange {
724 pub start: TokenPos,
725 pub end: TokenPos
726}
727
728impl TokenRange{
729 pub fn is_in_range(&self, pos:TokenPos)->bool{
730 if self.start.line == self.end.line{
731 pos.line == self.start.line && pos.index >= self.start.index && pos.index < self.end.index
732 }
733 else{
734 pos.line == self.start.line && pos.index >= self.start.index ||
735 pos.line > self.start.line && pos.line < self.end.line ||
736 pos.line == self.end.line && pos.index < self.end.index
737 }
738 }
739}