1use {
16 std::sync::Arc,
17 crate::{
18 char_ext::CharExt,
19 live_id::{LiveId,LIVE_ID_SEED},
20 full_token::{TokenWithLen, Delim, FullToken},
21 colorhex
22 },
23};
24
25#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
27pub enum State {
28 Initial(InitialState),
29 BlockCommentTail(BlockCommentTailState),
30 DoubleQuotedStringTail(DoubleQuotedStringTailState),
31 RawDoubleQuotedStringTail(RawDoubleQuotedStringTailState),
33}
34
35impl Default for State {
36 fn default() -> State {
37 State::Initial(InitialState)
38 }
39}
40
41impl State {
42 pub fn next(self, cursor: &mut Cursor) -> (State, Option<TokenWithLen>) {
70 if cursor.peek(0) == '\0' {
71 return (self, None);
72 }
73 let start = cursor.index;
74 let (next_state, token) = match self {
75 State::Initial(state) => state.next(cursor),
76 State::BlockCommentTail(state) => state.next(cursor),
77 State::DoubleQuotedStringTail(state) => state.next(cursor),
78 State::RawDoubleQuotedStringTail(state) => state.next(cursor),
79 };
81 let end = cursor.index;
82 assert!(start < end);
83 (
84 next_state,
85 Some(TokenWithLen {
86 len: end - start,
87 token,
88 }),
89 )
90 }
91}
92
93#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
95pub struct InitialState;
96
97impl InitialState {
98 fn next(self, cursor: &mut Cursor<'_>) -> (State, FullToken) {
99 match (cursor.peek(0), cursor.peek(1), cursor.peek(2)) {
100 ('r', '#', '"') | ('r', '#', '#') => self.raw_string(cursor),
101 ('b', 'r', '"') | ('b', 'r', '#') => self.raw_byte_string(cursor),
102 ('.', '.', '.') | ('.', '.', '=') | ('<', '<', '=') | ('>', '>', '=') => {
103 let id = cursor.id_from_3();
104 cursor.skip(3);
105 (
106 State::Initial(InitialState),
107 FullToken::Punct(id),
108 )
109 }
110 ('/', '/', _) => self.line_comment(cursor),
111 ('/', '*', _) => self.block_comment(cursor),
112 ('b', '"', _) => self.byte_string(cursor),
114 ('!', '=', _)
116 | ('%', '=', _)
117 | ('&', '&', _)
118 | ('&', '=', _)
119 | ('*', '=', _)
120 | ('+', '=', _)
121 | ('-', '=', _)
122 | ('-', '>', _)
123 | ('.', '.', _)
124 | ('/', '=', _)
125 | (':', ':', _)
126 | ('<', '<', _)
127 | ('<', '=', _)
128 | ('=', '=', _)
129 | ('=', '>', _)
130 | ('>', '=', _)
131 | ('>', '>', _)
132 | ('^', '=', _)
133 | ('|', '=', _)
134 | ('|', '|', _) => {
135 let id = cursor.id_from_2();
136 cursor.skip(2);
137 (
138 State::Initial(InitialState),
139 FullToken::Punct(id),
140 )
141 }
142 ('"', _, _) => self.string(cursor),
144 ('(', _, _) => {
145 cursor.skip(1);
146 (
147 State::Initial(InitialState),
148 FullToken::Open(Delim::Paren),
149 )
150 }
151 (')', _, _) => {
152 cursor.skip(1);
153 (
154 State::Initial(InitialState),
155 FullToken::Close(Delim::Paren),
156 )
157 }
158 ('[', _, _) => {
159 cursor.skip(1);
160 (
161 State::Initial(InitialState),
162 FullToken::Open(Delim::Bracket),
163 )
164 }
165 (']', _, _) => {
166 cursor.skip(1);
167 (
168 State::Initial(InitialState),
169 FullToken::Close(Delim::Bracket),
170 )
171 }
172 ('{', _, _) => {
173 cursor.skip(1);
174 (
175 State::Initial(InitialState),
176 FullToken::Open(Delim::Brace),
177 )
178 }
179 ('}', _, _) => {
180 cursor.skip(1);
181 (
182 State::Initial(InitialState),
183 FullToken::Close(Delim::Brace),
184 )
185 }
186 ('#', ch1, ch2) if ch1 == 'x' && ch2.is_ascii_hexdigit() || ch1.is_ascii_hexdigit() => self.color(cursor),
187 ('.', ch1, _) if ch1.is_ascii_digit() => self.number(cursor),
188 ('!', _, _)
191 | ('#', _, _)
192 | ('$', _, _)
193 | ('%', _, _)
194 | ('&', _, _)
195 | ('*', _, _)
196 | ('+', _, _)
197 | (',', _, _)
198 | ('-', _, _)
199 | ('.', _, _)
200 | ('/', _, _)
201 | (':', _, _)
202 | (';', _, _)
203 | ('<', _, _)
204 | ('=', _, _)
205 | ('>', _, _)
206 | ('?', _, _)
207 | ('@', _, _)
208 | ('^', _, _)
209 | ('|', _, _) => {
211 let id = cursor.id_from_1();
212 cursor.skip(1);
213 (
214 State::Initial(InitialState),
215 FullToken::Punct(id),
216 )
217 }
218 (ch, _, _) if ch.is_identifier_start() => self.identifier_or_bool(cursor),
219 (ch, _, _) if ch.is_ascii_digit() => self.number(cursor),
220 (ch, _, _) if ch.is_whitespace() => self.whitespace(cursor),
221 _ => {
222 cursor.skip(1);
223 (State::Initial(InitialState), FullToken::Unknown)
224 }
225 }
226 }
227
228 fn line_comment(self, cursor: &mut Cursor) -> (State, FullToken) {
229 debug_assert!(cursor.peek(0) == '/' && cursor.peek(1) == '/');
230 cursor.skip(2);
231 while cursor.skip_if( | ch | ch != '\n' && ch != '\0') {}
232 (State::Initial(InitialState), FullToken::Comment)
233 }
234
235 fn block_comment(self, cursor: &mut Cursor<'_>) -> (State, FullToken) {
236 debug_assert!(cursor.peek(0) == '/' && cursor.peek(1) == '*');
237 cursor.skip(2);
238 BlockCommentTailState {depth: 0}.next(cursor)
239 }
240
241 fn identifier_or_bool(self, cursor: &mut Cursor) -> (State, FullToken) {
242 debug_assert!(cursor.peek(0).is_identifier_start());
243 let start = cursor.index();
244 match cursor.peek(0) {
245 'f' => {
246 cursor.skip(1);
247 if "alse".chars().all( | expected | cursor.skip_if( | actual | actual == expected)) && !cursor.peek(0).is_identifier_continue() {
248 return (State::Initial(InitialState), FullToken::Bool(false));
249 }
250 self.identifier_tail(start, cursor)
251 }
252 't' => {
253 cursor.skip(1);
254 if "rue".chars().all( | expected | cursor.skip_if( | actual | actual == expected)) && !cursor.peek(0).is_identifier_continue() {
255 return (State::Initial(InitialState), FullToken::Bool(true));
256 }
257 self.identifier_tail(start, cursor)
258 },
259 _ => self.identifier_tail(start, cursor),
260 }
261 }
262
263 fn identifier_tail(self, start: usize, cursor: &mut Cursor) -> (State, FullToken) {
264 while cursor.skip_if( | ch | ch.is_identifier_continue()) {}
265 (State::Initial(InitialState), FullToken::Ident(
266 LiveId::from_str_with_lut(cursor.from_start_to_scratch(start)).unwrap()
267 ))
268 }
269
270 fn number(self, cursor: &mut Cursor) -> (State, FullToken) {
271 match (cursor.peek(0), cursor.peek(1)) {
272 ('0', 'b') => {
273 cursor.skip(2);
274 if !cursor.skip_digits(2) {
275 return (State::Initial(InitialState), FullToken::Unknown);
276 }
277 (State::Initial(InitialState), FullToken::OtherNumber)
278 }
279 ('0', 'o') => {
280 cursor.skip(2);
281 if !cursor.skip_digits(8) {
282 return (State::Initial(InitialState), FullToken::Unknown);
283 }
284 (State::Initial(InitialState), FullToken::OtherNumber)
285 }
286 ('0', 'x') => {
287 cursor.skip(2);
288 if !cursor.skip_digits(16) {
289 return (State::Initial(InitialState), FullToken::Unknown);
290 }
291 (State::Initial(InitialState), FullToken::OtherNumber)
292 }
293 _ => {
294 let start = cursor.index();
295 cursor.skip_digits(10);
300
301 match cursor.peek(0) {
302 '.' if cursor.peek(1) != '.' && !cursor.peek(0).is_identifier_start() => {
303 cursor.skip(1);
304 if cursor.skip_digits(10) && (cursor.peek(0) == 'E' || cursor.peek(0) == 'e') && !cursor.skip_exponent() {
305 return (State::Initial(InitialState), FullToken::Unknown);
306 }
307 if cursor.skip_suffix() {
308 return (State::Initial(InitialState), FullToken::OtherNumber)
309 }
310 if let Ok(value) = cursor.from_start_to_scratch(start).parse::<f64>() {
312 (State::Initial(InitialState), FullToken::Float(value))
313 }
314 else {
315 (State::Initial(InitialState), FullToken::Unknown)
316 }
317 }
318 'E' | 'e' => {
319 if !cursor.skip_exponent() {
320 return (State::Initial(InitialState), FullToken::Unknown);
321 }
322 if cursor.skip_suffix() {
323 return (State::Initial(InitialState), FullToken::OtherNumber)
324 }
325 if let Ok(value) = cursor.from_start_to_scratch(start).parse::<f64>() {
327 (State::Initial(InitialState), FullToken::Float(value))
328 }
329 else {
330 (State::Initial(InitialState), FullToken::Unknown)
331 }
332 }
333 _ => {
334 if cursor.skip_suffix() {
335 return (State::Initial(InitialState), FullToken::OtherNumber)
336 }
337 if let Ok(value) = cursor.from_start_to_scratch(start).parse::<i64>() {
339 (State::Initial(InitialState), FullToken::Int(value))
340 }
341 else {
342 (State::Initial(InitialState), FullToken::Unknown)
343 }
344 }
345 }
346 }
347 }
348 }
349
350 fn color(self, cursor: &mut Cursor) -> (State, FullToken) {
351 let start = match (cursor.peek(0), cursor.peek(1)) {
352 ('#', 'x') => {
353 cursor.skip(2);
354 let start = cursor.index();
355 if !cursor.skip_digits(16) {
356 return (State::Initial(InitialState), FullToken::Unknown);
357 }
358 start
359 }
360 _ => {
361 cursor.skip(1);
362 let start = cursor.index();
363 if !cursor.skip_digits(16) {
364 return (State::Initial(InitialState), FullToken::Unknown);
365 }
366 start
367 }
368 };
369 if let Ok(col) = colorhex::hex_bytes_to_u32(cursor.from_start_to_scratch(start).as_bytes()) {
370 (State::Initial(InitialState), FullToken::Color(col))
371 }
372 else {
373 (State::Initial(InitialState), FullToken::Unknown)
374 }
375 }
376 fn string(self, cursor: &mut Cursor) -> (State, FullToken) {
401 self.double_quoted_string(cursor)
402 }
403
404 fn byte_string(self, cursor: &mut Cursor) -> (State, FullToken) {
412 debug_assert!(cursor.peek(0) == 'b');
413 cursor.skip(1);
414 self.double_quoted_string(cursor)
415 }
416
417 fn raw_string(self, cursor: &mut Cursor) -> (State, FullToken) {
418 debug_assert!(cursor.peek(0) == 'r');
419 cursor.skip(1);
420 self.raw_double_quoted_string(cursor)
421 }
422
423 fn raw_byte_string(self, cursor: &mut Cursor) -> (State, FullToken) {
424 debug_assert!(cursor.peek(0) == 'b' && cursor.peek(1) == 'r');
425 cursor.skip(2);
426 self.raw_double_quoted_string(cursor)
427 }
428 fn double_quoted_string(self, cursor: &mut Cursor) -> (State, FullToken) {
448 debug_assert!(cursor.peek(0) == '"');
449 cursor.skip(1);
450 DoubleQuotedStringTailState.next(cursor)
451 }
452fn raw_double_quoted_string(self, cursor: &mut Cursor) -> (State, FullToken) {
460 let mut start_hash_count = 0;
461 while cursor.skip_if( | ch | ch == '#') {
462 start_hash_count += 1;
463 }
464 RawDoubleQuotedStringTailState {start_hash_count}.next(cursor)
465 }
466
467 fn whitespace(self, cursor: &mut Cursor) -> (State, FullToken) {
468 debug_assert!(cursor.peek(0).is_whitespace());
469 cursor.skip(1);
470 while cursor.skip_if( | ch | ch.is_whitespace()) {}
471 (State::Initial(InitialState), FullToken::Whitespace)
472 }
473}
474
475#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
476pub struct BlockCommentTailState {
477 depth: usize,
478}
479
480impl BlockCommentTailState {
481 fn next(self, cursor: &mut Cursor<'_>) -> (State, FullToken) {
482 let mut state = self;
483 loop {
484 match (cursor.peek(0), cursor.peek(1)) {
485 ('/', '*') => {
486 cursor.skip(2);
487 state.depth += 1;
488 }
489 ('*', '/') => {
490 cursor.skip(2);
491 if state.depth == 0 {
492 break (State::Initial(InitialState), FullToken::Comment);
493 }
494 state.depth -= 1;
495 }
496 ('\0', _) => {
497 break (State::BlockCommentTail(state), FullToken::Comment);
498 }
499 _ => cursor.skip(1),
500 }
501 }
502 }
503}
504
505#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
507pub struct DoubleQuotedStringTailState;
508
509impl DoubleQuotedStringTailState {
510 fn next(self, cursor: &mut Cursor<'_>) -> (State, FullToken) {
511 let mut s = String::new();
512 enum Skip{
513 Scanning(bool, usize, usize),
514 Found(usize)
515 }
516 let mut skip = Skip::Scanning(true, 0,0);
517 loop {
518 match (cursor.peek(0), cursor.peek(1)) {
519 ('"', _) => {
520 cursor.skip(1);
521 cursor.skip_suffix();
522 break (State::Initial(InitialState), FullToken::String(Arc::new(s)));
523 }
524 ('\0', _) => {
525 break (
526 State::DoubleQuotedStringTail(DoubleQuotedStringTailState),
527 FullToken::String(Arc::new(s)),
528 );
529 }
530 ('\\', '\\') => {
531 if let Skip::Scanning(_,_,len) = skip{
532 skip = Skip::Found(len);
533 }
534 s.push('\\');
535 cursor.skip(2);
536 },
537 ('\\', '"') => {
538 if let Skip::Scanning(_,_,len) = skip{
539 skip = Skip::Found(len);
540 }
541 s.push('"');
542 cursor.skip(2);
543 },
544 ('\\', 'n') => {
545 if let Skip::Scanning(_,_,len) = skip{
546 skip = Skip::Found(len);
547 }
548 s.push('\n');
549 cursor.skip(2);
550 },
551 ('\n',_)=>{ s.push('\n');
553 if let Skip::Scanning(first,_,len) = skip{
554 skip = Skip::Scanning(first, 0, len);
555 }
556 else if let Skip::Found(len) = skip{
557 skip = Skip::Scanning(false, 0, len);
558 }
559 cursor.skip(1);
560 }
561 (' ', _)=>{
562 if let Skip::Scanning(first, count, len) = &mut skip{
563 if *first{
564 *len += 1;
565 }
566 else{
567 if *count>=*len{
568 skip = Skip::Found(*len);
569 s.push(' ');
570 }
571 else{
572 *count += 1;
573 }
574 }
575 }
576 else{
577 s.push(' ');
578 }
579 cursor.skip(1);
580 }
581 (x,_) => {
582 if let Skip::Scanning(_,_,len) = skip{
583 skip = Skip::Found(len);
584 }
585 s.push(x);
586 cursor.skip(1);
587 }
588 }
589 }
590 }
591}
592#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
621pub struct RawDoubleQuotedStringTailState {
622 start_hash_count: usize,
623}
624
625impl RawDoubleQuotedStringTailState {
626 fn next(self, cursor: &mut Cursor<'_>) -> (State, FullToken) {
627 let mut s = String::new();
628 loop {
629 match cursor.peek(0) {
630 '"' => {
631 cursor.skip(1);
632 let mut end_hash_count = 0;
633 while end_hash_count < self.start_hash_count && cursor.skip_if( | ch | ch == '#') {
634 end_hash_count += 1;
635 }
636 if end_hash_count == self.start_hash_count {
637 cursor.skip_suffix();
638 break (State::Initial(InitialState), FullToken::String(Arc::new(s)));
639 }
640 }
641 '\0' => {
642 break (State::RawDoubleQuotedStringTail(self), FullToken::String(Arc::new(s)));
643 }
644 x => {
645 s.push(x);
646 cursor.skip(1);
647 }
648 }
649 }
650 }
651}
652
653#[derive(Debug)]
655pub struct Cursor<'a> {
656 chars: &'a [char],
657 scratch: &'a mut String,
658 index: usize,
659}
660
661impl<'a> Cursor<'a> {
662 pub fn new(chars: &'a [char], scratch: &'a mut String) -> Cursor<'a> {
674 Cursor {chars, scratch, index: 0 }
675 }
676
677 pub fn index(&self) -> usize {
678 self.index
679 }
680
681 fn from_start_to_scratch(&mut self, start: usize) -> &str {
682 self.scratch.clear();
683 for i in start..self.index {
684 self.scratch.push(self.chars[i]);
685 }
686 self.scratch
687 }
688
689
690 fn peek(&self, index: usize) -> char {
691 self.chars.get(self.index + index).cloned().unwrap_or('\0')
692 }
693
694 fn id_from_1(&self) -> LiveId {
695 LiveId::from_bytes(LIVE_ID_SEED, &[
696 self.chars[self.index] as u8,
697 ], 0, 1)
698 }
699
700 fn id_from_2(&self) -> LiveId {
701 LiveId::from_bytes(LIVE_ID_SEED, &[
702 self.chars[self.index] as u8,
703 self.chars[self.index + 1] as u8,
704 ], 0, 2)
705 }
706
707 fn id_from_3(&self) -> LiveId {
708 LiveId::from_bytes(LIVE_ID_SEED, &[
709 self.chars[self.index] as u8,
710 self.chars[self.index + 1] as u8,
711 self.chars[self.index + 2] as u8,
712 ], 0, 3)
713 }
714
715 fn skip(&mut self, count: usize) {
716 self.index += count;
717 }
718
719 fn skip_if<P>(&mut self, predicate: P) -> bool
720 where
721 P: FnOnce(char) -> bool,
722 {
723 if predicate(self.peek(0)) {
724 self.skip(1);
725 true
726 } else {
727 false
728 }
729 }
730
731 fn skip_exponent(&mut self) -> bool {
732 debug_assert!(self.peek(0) == 'E' || self.peek(0) == 'e');
733 self.skip(1);
734 if self.peek(0) == '+' || self.peek(0) == '-' {
735 self.skip(1);
736 }
737 self.skip_digits(10)
738 }
739
740 fn skip_digits(&mut self, radix: u32) -> bool {
741 let mut has_skip_digits = false;
742 loop {
743 match self.peek(0) {
744 '_' => {
745 self.skip(1);
746 }
747 ch if ch.is_digit(radix) => {
748 self.skip(1);
749 has_skip_digits = true;
750 }
751 _ => break,
752 }
753 }
754 has_skip_digits
755 }
756
757 fn skip_suffix(&mut self) -> bool {
758 if self.peek(0).is_identifier_start() {
759 self.skip(1);
760 while self.skip_if( | ch | ch.is_identifier_continue()) {}
761 return true
762 }
763 false
764 }
765}
766
767
768#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
769pub struct TokenPos {
770 pub line: usize,
771 pub index: usize,
772}
773
774#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
775pub struct TokenRange {
776 pub start: TokenPos,
777 pub end: TokenPos
778}
779
780impl TokenRange{
781 pub fn is_in_range(&self, pos:TokenPos)->bool{
782 if self.start.line == self.end.line{
783 pos.line == self.start.line && pos.index >= self.start.index && pos.index < self.end.index
784 }
785 else{
786 pos.line == self.start.line && pos.index >= self.start.index ||
787 pos.line > self.start.line && pos.line < self.end.line ||
788 pos.line == self.end.line && pos.index < self.end.index
789 }
790 }
791}