liu/
parser.rs

1use crate::util::*;
2use crate::*;
3use std::collections::hash_map::HashMap;
4
5#[repr(u32)]
6#[derive(Clone, Copy, PartialEq)]
7pub enum Key {
8    Let = 0,
9    Proc,
10    Type,
11    Defer,
12    Context,
13
14    If,
15    Else,
16    Match,
17
18    Continue,
19    Break,
20    For,
21
22    Spawn,
23    Wait,
24
25    Underscore,
26    Print,
27}
28
29impl Key {
30    const COUNT: Self = Self::Underscore;
31}
32
33#[repr(u8)]
34#[derive(Debug, Clone, Copy, PartialEq)]
35pub enum TokenKind {
36    LParen = b'(',
37    RParen = b')',
38    LBracket = b'[',
39    RBracket = b']',
40    LBrace = b'{',
41    RBrace = b'}',
42
43    Dot = b'.',
44    Comma = b',',
45    Colon = b':',
46    Semicolon = b';',
47
48    Bang = b'!',
49    Tilde = b'~',
50    Amp = b'&',
51    Caret = b'^',
52    Mod = b'%',
53    Star = b'*',
54    Div = b'/',
55    Plus = b'+',
56    Dash = b'-',
57    Equal = b'=',
58    Lt = b'<',
59    Gt = b'>',
60
61    Equal2 = 129, // ==
62    NotEqual,     // !=
63    LtEq,         // <=
64    GtEq,         // >=
65
66    And, // &&
67    Or,  // ||
68
69    Directive,
70    Word,
71    String,
72    Char,
73    Number,
74
75    Skip,
76    NewlineSkip,
77}
78
79#[derive(Debug, Clone, Copy)]
80pub struct Token {
81    pub kind: TokenKind,
82    pub data: u32,
83}
84
85impl Token {
86    pub fn len(&self, table: &StringTable) -> usize {
87        match self.kind {
88            TokenKind::Skip => return self.data as usize,
89            TokenKind::NewlineSkip => return self.data as usize,
90
91            TokenKind::Word => return table.names[self.data].len(),
92            TokenKind::Directive => return table.names[self.data].len() + 1,
93            TokenKind::String => return table.names[self.data].len() + 2,
94            TokenKind::Char => return table.names[self.data].len() + 2,
95            TokenKind::Number => return table.names[self.data].len(),
96
97            TokenKind::Equal2 => return 2,
98            TokenKind::LtEq => return 2,
99            TokenKind::GtEq => return 2,
100            TokenKind::And => return 2,
101            TokenKind::Or => return 2,
102
103            _ => return 1,
104        }
105    }
106}
107
108pub fn parse(table: &StringTable, file: u32, data: Pod<Token>) -> Result<Ast, Error> {
109    use TokenKind::*;
110
111    let allocator = AstAlloc::new(file);
112
113    let mut parser = Parser {
114        allocator,
115        table,
116        file,
117        data,
118        index: 0,
119        text_cursor: 0,
120    };
121
122    let mut loc = CodeLoc {
123        start: parser.text_cursor,
124        end: parser.text_cursor,
125        file,
126    };
127
128    let mut stmts = Pod::new();
129
130    parser.pop_kinds_loop(&[Skip, NewlineSkip, Semicolon]);
131
132    while parser.index < parser.data.len() {
133        let stmt = parser.parse_expr()?;
134        stmts.push(stmt);
135
136        parser.pop_kind(Skip);
137
138        let before_eat = parser.index;
139
140        parser.pop_kinds_loop(&[NewlineSkip, Semicolon]);
141
142        if parser.index == before_eat {
143            loc.end = parser.text_cursor;
144
145            return Err(Error::expected("a newline or semicolon", loc));
146        }
147
148        parser.pop_kinds_loop(&[Skip, NewlineSkip, Semicolon]);
149    }
150
151    let stmts = parser.allocator.add_slice(&stmts);
152
153    let block = Block { stmts };
154
155    return Ok(Ast { block });
156}
157
158struct Parser<'a> {
159    allocator: AstAlloc,
160    table: &'a StringTable,
161    data: Pod<Token>,
162    file: u32,
163    index: usize,
164    text_cursor: usize,
165}
166
167impl<'a> Parser<'a> {
168    fn peek(&self) -> Option<Token> {
169        let tok = self.data.get(self.index)?;
170
171        return Some(*tok);
172    }
173
174    #[inline]
175    fn adv(&mut self) {
176        if let Some(tok) = self.peek() {
177            self.text_cursor += tok.len(self.table);
178            self.index += 1;
179        }
180    }
181
182    fn pop(&mut self) -> Option<Token> {
183        let tok = self.peek()?;
184
185        self.text_cursor += tok.len(self.table);
186        self.index += 1;
187
188        return Some(tok);
189    }
190
191    fn pop_kind(&mut self, kind: TokenKind) -> Option<Token> {
192        let tok = self.peek()?;
193
194        if tok.kind != kind {
195            return None;
196        }
197
198        self.text_cursor += tok.len(self.table);
199        self.index += 1;
200
201        return Some(tok);
202    }
203
204    fn pop_tok(&mut self, kind: TokenKind, data: u32) -> bool {
205        let tok = match self.peek() {
206            None => return false,
207            Some(tok) => tok,
208        };
209
210        if tok.kind != kind || tok.data != data {
211            return false;
212        }
213
214        self.text_cursor += tok.len(self.table);
215        self.index += 1;
216
217        return true;
218    }
219
220    fn pop_kinds_loop(&mut self, kinds: &[TokenKind]) -> CopyRange {
221        let start = self.text_cursor;
222
223        'outer: while let Some(tok) = self.peek() {
224            for &kind in kinds {
225                if tok.kind == kind {
226                    self.text_cursor += tok.len(self.table);
227                    self.index += 1;
228                    continue 'outer;
229                }
230            }
231
232            break;
233        }
234
235        return r(start, self.text_cursor);
236    }
237
238    pub fn parse_expr(&mut self) -> Result<Expr, Error> {
239        return self.parse_decl();
240    }
241
242    pub fn parse_decl(&mut self) -> Result<Expr, Error> {
243        if let Some(expr) = self.parse_proc()? {
244            return Ok(expr);
245        }
246
247        if let Some(expr) = self.parse_let()? {
248            return Ok(expr);
249        }
250
251        if let Some(expr) = self.parse_assign()? {
252            return Ok(expr);
253        }
254
255        if let Some(expr) = self.parse_control()? {
256            return Ok(expr);
257        }
258
259        return self.parse_binary_op();
260    }
261
262    pub fn parse_proc(&mut self) -> Result<Option<Expr>, Error> {
263        use TokenKind::*;
264
265        let mut loc = CodeLoc {
266            start: self.text_cursor,
267            end: self.text_cursor,
268            file: self.file,
269        };
270
271        if !self.pop_tok(Word, Key::Proc as u32) {
272            return Ok(None);
273        };
274
275        self.pop_kinds_loop(&[Skip]);
276
277        let symbol = match self.pop_kind(Word) {
278            Some(tok) => {
279                if tok.data < Key::COUNT as u32 {
280                    loc.end = self.text_cursor;
281
282                    return Err(Error::expected("a procedure name", loc));
283                }
284
285                tok.data
286            }
287            None => {
288                loc.end = self.text_cursor;
289
290                return Err(Error::expected("a procedure name", loc));
291            }
292        };
293
294        if self.pop_kind(LParen).is_none() {
295            loc.end = self.text_cursor;
296
297            return Err(Error::expected("opening parenthesis", loc));
298        }
299
300        self.pop_kinds_loop(&[Skip, NewlineSkip]);
301
302        if self.pop_kind(RParen).is_none() {
303            loc.end = self.text_cursor;
304
305            return Err(Error::expected("opening closing parenthesis", loc));
306        }
307
308        self.pop_kinds_loop(&[Skip, NewlineSkip]);
309
310        let code = match self.parse_control()? {
311            Some(e) => e,
312            None => {
313                loc.end = self.text_cursor;
314
315                return Err(Error::expected("a block", loc));
316            }
317        };
318
319        let code = self.allocator.make(code);
320
321        let kind = ExprKind::Procedure(Proc { symbol, code });
322
323        return Ok(Some(Expr { kind, loc }));
324    }
325
326    pub fn parse_let(&mut self) -> Result<Option<Expr>, Error> {
327        use TokenKind::*;
328
329        let mut loc = CodeLoc {
330            start: self.text_cursor,
331            end: self.text_cursor,
332            file: self.file,
333        };
334
335        if !self.pop_tok(Word, Key::Let as u32) {
336            return Ok(None);
337        };
338
339        self.pop_kinds_loop(&[Skip, NewlineSkip]);
340
341        let ident = match self.pop_kind(Word) {
342            Some(tok) => tok,
343            None => {
344                loc.end = self.text_cursor;
345
346                return Err(Error::expected("an identifer", loc));
347            }
348        };
349
350        if ident.data < Key::COUNT as u32 {
351            loc.end = self.text_cursor;
352
353            return Err(Error::expected("an identifer", loc));
354        }
355
356        self.pop_kinds_loop(&[Skip, NewlineSkip]);
357
358        let equal_start = self.text_cursor;
359        match self.pop() {
360            Some(Token { kind: Equal, .. }) => {}
361
362            Some(_) | None => {
363                loc.start = equal_start;
364                loc.end = self.text_cursor;
365
366                return Err(Error::expected("an equal sign", loc));
367            }
368        }
369
370        self.pop_kinds_loop(&[Skip, NewlineSkip]);
371
372        let value = match self.parse_control()? {
373            Some(e) => e,
374            None => self.parse_binary_op()?,
375        };
376
377        let value = self.allocator.make(value);
378
379        loc.end = self.text_cursor;
380        let kind = ExprKind::Let {
381            symbol: ident.data,
382            value,
383        };
384
385        return Ok(Some(Expr { kind, loc }));
386    }
387
388    pub fn parse_assign(&mut self) -> Result<Option<Expr>, Error> {
389        return Ok(None);
390    }
391
392    pub fn parse_control(&mut self) -> Result<Option<Expr>, Error> {
393        use TokenKind::*;
394
395        let mut loc = CodeLoc {
396            start: self.text_cursor,
397            end: self.text_cursor,
398            file: self.file,
399        };
400
401        // if, else
402        if self.pop_tok(Word, Key::If as u32) {
403            self.pop_kinds_loop(&[Skip, NewlineSkip]);
404
405            let cond = self.parse_binary_op()?;
406            let cond = self.allocator.make(cond);
407
408            let control_start = self.text_cursor;
409            let if_true = match self.parse_control()? {
410                Some(e) => self.allocator.make(e),
411                None => {
412                    loc.start = control_start;
413                    loc.end = self.text_cursor;
414
415                    return Err(Error::expected("control flow or block", loc));
416                }
417            };
418
419            if !self.pop_tok(Word, Key::Else as u32) {
420                loc.end = self.text_cursor;
421                let kind = ExprKind::If { cond, if_true };
422
423                return Ok(Some(Expr { kind, loc }));
424            }
425
426            let control_start = self.text_cursor;
427            let if_false = match self.parse_control()? {
428                Some(e) => self.allocator.make(e),
429                None => {
430                    loc.start = control_start;
431                    loc.end = self.text_cursor;
432
433                    return Err(Error::expected("control flow or block", loc));
434                }
435            };
436
437            loc.end = self.text_cursor;
438            let kind = ExprKind::IfElse {
439                cond,
440                if_true,
441                if_false,
442            };
443
444            return Ok(Some(Expr { kind, loc }));
445        }
446
447        // case
448
449        // for
450
451        // block
452        if let Some(_) = self.pop_kind(LBrace) {
453            use TokenKind::*;
454
455            let mut stmts = Pod::new();
456
457            self.pop_kinds_loop(&[Skip, NewlineSkip, Semicolon]);
458
459            if self.pop_kind(RBrace).is_some() {
460                loc.end = self.text_cursor;
461
462                let block = Block {
463                    stmts: ExprRange::EMPTY,
464                };
465
466                let kind = ExprKind::Block(block);
467
468                return Ok(Some(Expr { kind, loc }));
469            }
470
471            // TODO track indentation of braces, for nice reporting
472            // of matching closing braces.
473            //                         - Albert Liu, Mar 04, 2022 Fri 01:14 EST
474            loop {
475                let stmt = self.parse_expr()?;
476                stmts.push(stmt);
477
478                self.pop_kind(Skip);
479
480                let before_eat = self.index;
481
482                self.pop_kinds_loop(&[NewlineSkip, Semicolon]);
483
484                if self.pop_kind(RBrace).is_some() {
485                    loc.end = self.text_cursor;
486
487                    break;
488                }
489
490                if self.index == before_eat {
491                    loc.end = self.text_cursor;
492
493                    return Err(Error::expected("a newline or semicolon", loc));
494                }
495
496                self.pop_kinds_loop(&[Skip, NewlineSkip, Semicolon]);
497            }
498
499            let stmts = self.allocator.add_slice(&stmts);
500
501            let block = Block { stmts };
502
503            let kind = ExprKind::Block(block);
504
505            return Ok(Some(Expr { kind, loc }));
506        }
507
508        return Ok(None);
509    }
510
511    pub fn parse_binary_op(&mut self) -> Result<Expr, Error> {
512        return self.parse_binary_precedence_op(0);
513    }
514
515    pub fn parse_binary_precedence_op(&mut self, min_level: u8) -> Result<Expr, Error> {
516        use TokenKind::*;
517
518        let mut loc = CodeLoc {
519            start: self.text_cursor,
520            end: self.text_cursor,
521            file: self.file,
522        };
523
524        let mut expr = self.parse_prefix()?;
525
526        self.pop_kinds_loop(&[Skip]);
527
528        // https://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing
529        // This algorithm is supposed to be efficient. No idea if that's actually true,
530        // but it is incredibly concise.
531        while let Some(tok) = self.peek() {
532            let info = OPERATORS[tok.kind as usize];
533            if info.precedence < min_level {
534                break;
535            }
536
537            let kind = match info.op_kind {
538                Some(kind) => kind,
539                None => break,
540            };
541
542            self.adv();
543
544            let mut next_min_level = info.precedence;
545            if info.is_left_to_right {
546                next_min_level += 1;
547            }
548
549            self.pop_kinds_loop(&[Skip, NewlineSkip]);
550
551            let right = self.parse_binary_precedence_op(next_min_level)?;
552
553            if let Some(check) = info.check_operands {
554                check(&expr, &right)?;
555            }
556
557            loc.end = right.loc.end;
558
559            let left = self.allocator.make(expr);
560            let right = self.allocator.make(right);
561
562            let kind = ExprKind::BinaryOp { kind, left, right };
563
564            expr = Expr { kind, loc };
565
566            self.pop_kinds_loop(&[Skip]);
567        }
568
569        return Ok(expr);
570    }
571
572    pub fn parse_prefix(&mut self) -> Result<Expr, Error> {
573        use TokenKind::*;
574
575        let mut loc = CodeLoc {
576            start: self.text_cursor,
577            end: self.text_cursor,
578            file: self.file,
579        };
580
581        return self.parse_postfix();
582    }
583
584    pub fn parse_postfix(&mut self) -> Result<Expr, Error> {
585        use TokenKind::*;
586
587        let mut loc = CodeLoc {
588            start: self.text_cursor,
589            end: self.text_cursor,
590            file: self.file,
591        };
592
593        let mut expr = self.parse_atom()?;
594
595        self.pop_kinds_loop(&[Skip]);
596
597        while let Some(tok) = self.peek() {
598            match tok.kind {
599                LParen => {
600                    self.adv();
601
602                    self.pop_kinds_loop(&[Skip, NewlineSkip]);
603
604                    if self.pop_kind(RParen).is_some() {
605                        loc.end = self.text_cursor;
606
607                        let callee = self.allocator.make(expr);
608                        let kind = ExprKind::Call {
609                            callee,
610                            args: ExprRange::EMPTY,
611                        };
612
613                        expr = Expr { kind, loc };
614                        continue;
615                    }
616
617                    let mut args = Pod::new();
618                    loop {
619                        let expr = self.parse_binary_op()?;
620                        args.push(expr);
621
622                        let before_comma = self.text_cursor;
623
624                        self.pop_kinds_loop(&[Skip, NewlineSkip]);
625
626                        let found_comma = self.pop_kind(Comma).is_some();
627
628                        self.pop_kinds_loop(&[Skip, NewlineSkip]);
629
630                        if self.pop_kind(RParen).is_some() {
631                            loc.end = self.text_cursor;
632
633                            break;
634                        }
635
636                        if !found_comma {
637                            loc.start = before_comma;
638                            loc.end = before_comma;
639
640                            return Err(Error::expected("a comma or closing paren", loc));
641                        }
642                    }
643
644                    let callee = self.allocator.make(expr);
645                    let args = self.allocator.add_slice(&args);
646
647                    loc.end = self.text_cursor;
648                    let kind = ExprKind::Call { callee, args };
649
650                    expr = Expr { kind, loc };
651                }
652
653                _ => break,
654            }
655        }
656
657        return Ok(expr);
658    }
659
660    pub fn parse_atom(&mut self) -> Result<Expr, Error> {
661        use TokenKind::*;
662
663        let mut loc = CodeLoc {
664            start: self.text_cursor,
665            end: self.text_cursor,
666            file: self.file,
667        };
668
669        let result = self.pop();
670        let tok = result.ok_or_else(|| {
671            return Error::expected("an expression", loc);
672        })?;
673
674        match tok.kind {
675            Word => {
676                if tok.data < Key::COUNT as u32 && tok.data != Key::Type as u32 {
677                    loc.end = self.text_cursor;
678
679                    return Err(Error::expected("an identifer", loc));
680                }
681
682                loc.end = self.text_cursor;
683                let kind = ExprKind::Ident { symbol: tok.data };
684
685                return Ok(Expr { kind, loc });
686            }
687
688            Number => {
689                let data = self.table.names[tok.data];
690
691                let mut index = 0;
692                let mut total: u64 = 0;
693
694                // NOTE: just assume its an integer for now
695                for &b in data.as_bytes() {
696                    if b < b'0' || b'9' < b {
697                        loc.start = loc.start + index;
698                        loc.end = loc.start + 1;
699
700                        return Err(Error::expected("a digit in a number", loc));
701                    }
702
703                    total *= 10;
704                    total += (b - b'0') as u64;
705
706                    index += 1;
707                }
708
709                loc.end = self.text_cursor;
710                let kind = ExprKind::Integer(total);
711
712                return Ok(Expr { kind, loc });
713            }
714
715            LParen => {
716                self.pop_kinds_loop(&[Skip, NewlineSkip]);
717
718                let expr = self.parse_expr()?;
719
720                self.pop_kinds_loop(&[Skip, NewlineSkip]);
721
722                match self.pop_kind(RParen) {
723                    Some(tok) => return Ok(expr),
724                    None => {
725                        loc.end = self.text_cursor;
726
727                        return Err(Error::expected("a closing parenthesis", loc));
728                    }
729                }
730            }
731
732            _ => {
733                loc.end = self.text_cursor;
734
735                return Err(Error::expected("an expression", loc));
736            }
737        }
738    }
739}
740
741#[derive(Clone, Copy)]
742struct OperatorInfo {
743    op_kind: Option<BinaryExprKind>,
744    precedence: u8,
745    is_left_to_right: bool,
746
747    // @TODO this should be something like make_expr : (left, right) -> Result(*Expr)
748    // So that we can make assignment expressions a lil nicer right off the bat
749    check_operands: Option<fn(left: &Expr, right: &Expr) -> Result<(), Error>>,
750}
751
752const OPERATORS: [OperatorInfo; 256] = {
753    let default_info = OperatorInfo {
754        op_kind: None,
755        precedence: 0,
756        is_left_to_right: true,
757        check_operands: None,
758    };
759
760    let mut info = [default_info; 256];
761    let mut idx;
762
763    idx = TokenKind::Equal2 as usize;
764    info[idx].op_kind = Some(BinaryExprKind::Equal);
765    info[idx].precedence = 10;
766
767    idx = TokenKind::Plus as usize;
768    info[idx].op_kind = Some(BinaryExprKind::Add);
769    info[idx].precedence = 50;
770
771    idx = TokenKind::Star as usize;
772    info[idx].op_kind = Some(BinaryExprKind::Multiply);
773    info[idx].precedence = 60;
774
775    info
776};
777
778pub fn lex(table: &mut StringTable, file: u32, s: &str) -> Result<Pod<Token>, Error> {
779    let mut tokens = Pod::new();
780    let bytes = s.as_bytes();
781
782    let mut index = 0;
783    'outer: while let Some(&b) = bytes.get(index) {
784        let start = index;
785        index += 1;
786
787        'simple: loop {
788            macro_rules! trailing_eq {
789                ($e1:expr, $e2:expr) => {{
790                    if let Some(b'=') = bytes.get(index) {
791                        index += 1;
792
793                        $e2
794                    } else {
795                        $e1
796                    }
797                }};
798            }
799
800            let kind = match b {
801                b'(' => TokenKind::LParen,
802                b')' => TokenKind::RParen,
803                b'[' => TokenKind::LBracket,
804                b']' => TokenKind::RBracket,
805                b'{' => TokenKind::LBrace,
806                b'}' => TokenKind::RBrace,
807                b'.' => TokenKind::Dot,
808                b',' => TokenKind::Comma,
809                b':' => TokenKind::Colon,
810                b';' => TokenKind::Semicolon,
811                b'~' => TokenKind::Tilde,
812                b'&' => TokenKind::Amp,
813                b'^' => TokenKind::Caret,
814
815                b'!' => trailing_eq!(TokenKind::Bang, TokenKind::NotEqual),
816                b'=' => trailing_eq!(TokenKind::Equal, TokenKind::Equal2),
817                b'<' => trailing_eq!(TokenKind::Lt, TokenKind::LtEq),
818                b'>' => trailing_eq!(TokenKind::Gt, TokenKind::GtEq),
819
820                // b'/' is handled separately because comments have more complex
821                // syntax checking
822                b'%' => TokenKind::Mod,
823                b'*' => TokenKind::Star,
824                b'+' => TokenKind::Plus,
825                b'-' => TokenKind::Dash,
826
827                _ => break 'simple,
828            };
829
830            tokens.push(Token { kind, data: 0 });
831            continue 'outer;
832        }
833
834        if b == b'"' {
835            let end = parse_string(file, bytes, index, b'"')?;
836            let s = unsafe { core::str::from_utf8_unchecked(&bytes[index..(end - 1)]) };
837            let data = table.add(s);
838
839            index = end;
840
841            let kind = TokenKind::String;
842            tokens.push(Token { kind, data });
843            continue 'outer;
844        }
845
846        if b == b'\'' {
847            let end = parse_string(file, bytes, index, b'\'')?;
848            let s = unsafe { core::str::from_utf8_unchecked(&bytes[index..(end - 1)]) };
849            let data = table.add(s);
850
851            index = end;
852
853            let kind = TokenKind::Char;
854            tokens.push(Token { kind, data });
855            continue 'outer;
856        }
857
858        if b == b'/' {
859            if let Some(b'/') = bytes.get(index) {
860                index += 1;
861
862                while let Some(&b) = bytes.get(index) {
863                    index += 1;
864
865                    if b == b'\n' {
866                        break;
867                    }
868                }
869
870                let kind = TokenKind::Skip;
871                let data: u32 = expect((index - start).try_into());
872                tokens.push(Token { kind, data });
873                continue 'outer;
874            }
875
876            let kind = TokenKind::Div;
877            tokens.push(Token { kind, data: 0 });
878            continue 'outer;
879        }
880
881        let is_alpha = (b >= b'a' && b <= b'z') || (b >= b'A' && b <= b'Z');
882        let is_num = b >= b'0' && b <= b'9';
883        if is_alpha || is_num || b == b'_' {
884            while let Some(&b) = bytes.get(index) {
885                let is_alpha = (b >= b'a' && b <= b'z') || (b >= b'A' && b <= b'Z');
886                let is_num = b >= b'0' && b <= b'9';
887
888                if is_alpha || is_num || b == b'_' {
889                    index += 1;
890                    continue;
891                }
892
893                break;
894            }
895
896            let kind = match is_num {
897                false => TokenKind::Word,
898
899                true => {
900                    if let Some(b'.') = bytes.get(index).map(|b| *b) {
901                        index += 1;
902
903                        while let Some(&b) = bytes.get(index) {
904                            let is_alpha = (b >= b'a' && b <= b'z') || (b >= b'A' && b <= b'Z');
905                            let is_num = b >= b'0' && b <= b'9';
906
907                            if is_alpha || is_num || b == b'_' {
908                                index += 1;
909                                continue;
910                            }
911
912                            break;
913                        }
914                    }
915
916                    TokenKind::Number
917                }
918            };
919
920            let s = unsafe { core::str::from_utf8_unchecked(&bytes[start..index]) };
921            let data = table.add(s);
922
923            tokens.push(Token { kind, data });
924            continue 'outer;
925        }
926
927        let is_newline = b == b'\n';
928        if b == b' ' || b == b'\t' || b == b'\r' || is_newline {
929            let mut has_newline = is_newline;
930
931            while let Some(&b) = bytes.get(index) {
932                let is_newline = b == b'\n';
933                if is_newline {
934                    has_newline = true;
935                    index += 1;
936
937                    continue;
938                }
939
940                if b == b' ' || b == b'\t' || b == b'\r' {
941                    index += 1;
942
943                    continue;
944                }
945
946                break;
947            }
948
949            let kind = match has_newline {
950                true => TokenKind::NewlineSkip,
951                false => TokenKind::Skip,
952            };
953
954            let data: u32 = expect((index - start).try_into());
955            tokens.push(Token { kind, data });
956            continue 'outer;
957        }
958
959        let loc = CodeLoc {
960            file,
961            start,
962            end: index,
963        };
964
965        let error = Error::new("unrecognized token", loc);
966        return Err(error);
967    }
968
969    return Ok(tokens);
970}
971
972fn parse_string(file: u32, bytes: &[u8], mut index: usize, terminator: u8) -> Result<usize, Error> {
973    let start = index;
974
975    let mut escaped = false;
976    while let Some(&b) = bytes.get(index) {
977        index += 1;
978
979        if b == b'\\' {
980            escaped = true;
981            continue;
982        }
983
984        if b == b'"' && !escaped {
985            return Ok(index);
986        }
987
988        escaped = false;
989    }
990
991    let loc = CodeLoc {
992        file,
993        start,
994        end: index,
995    };
996
997    return Err(Error::new("failed to parse char or string", loc));
998}
999
1000pub struct StringTable {
1001    allocator: BucketList,
1002    pub names: Pod<&'static str>,
1003    pub translate: HashMap<&'static str, u32>,
1004}
1005
1006impl StringTable {
1007    pub fn new() -> Self {
1008        let mut table = Self {
1009            allocator: BucketList::new(),
1010            names: Pod::new(),
1011            translate: HashMap::new(),
1012        };
1013
1014        let mut success = true;
1015
1016        success = success && table.add("let") == Key::Let as u32;
1017        success = success && table.add("proc") == Key::Proc as u32;
1018        success = success && table.add("type") == Key::Type as u32;
1019        success = success && table.add("defer") == Key::Defer as u32;
1020        success = success && table.add("context") == Key::Context as u32;
1021
1022        success = success && table.add("if") == Key::If as u32;
1023        success = success && table.add("else") == Key::Else as u32;
1024        success = success && table.add("match") == Key::Match as u32;
1025
1026        success = success && table.add("continue") == Key::Continue as u32;
1027        success = success && table.add("break") == Key::Break as u32;
1028        success = success && table.add("for") == Key::For as u32;
1029
1030        success = success && table.add("spawn") == Key::Spawn as u32;
1031        success = success && table.add("wait") == Key::Wait as u32;
1032
1033        success = success && table.add("_") == Key::Underscore as u32;
1034        success = success && table.add("print") == Key::Print as u32;
1035
1036        if !success {
1037            panic!("Rippo");
1038        }
1039
1040        table
1041    }
1042
1043    pub fn add(&mut self, s: &str) -> u32 {
1044        if let Some(id) = self.translate.get(s) {
1045            return *id;
1046        }
1047
1048        let s = self.allocator.add_str(s);
1049        let id = self.names.len() as u32;
1050
1051        self.translate.insert(s, id);
1052        self.names.push(s);
1053
1054        return id;
1055    }
1056}