1use crate::source::describe_position;
2use std::borrow::Cow;
3use std::char;
4use std::fmt;
5use std::iter;
6use std::ops;
7use std::str;
8
9#[cfg_attr(test, derive(Debug))]
10#[derive(Clone)]
11pub enum LexErrorKind<'source> {
12 UnterminatedBlockComment,
13 UnterminatedString,
14 ReservedName(&'source str),
15 UnexpectedCharacter(char),
16 ControlCharInString,
17 InvalidStringFormat,
18}
19
20#[cfg_attr(test, derive(Debug))]
23#[derive(Clone)]
24pub struct LexError<'source> {
25 kind: LexErrorKind<'source>,
26 offset: usize,
27 source: &'source str,
28}
29
30impl<'s> LexError<'s> {
31 pub fn kind(&self) -> &LexErrorKind<'s> {
32 &self.kind
33 }
34
35 pub fn offset(&self) -> usize {
36 self.offset
37 }
38
39 pub fn source(&self) -> &'s str {
40 self.source
41 }
42}
43
44impl<'s> fmt::Display for LexError<'s> {
45 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
46 use LexErrorKind::*;
47 match &self.kind {
48 UnterminatedBlockComment => write!(f, "block comment is not terminated")?,
49 UnterminatedString => write!(f, "string literal is not terminated",)?,
50 ReservedName(name) => write!(f, "name '{}' is unavailable since it's reserved name", name)?,
51 UnexpectedCharacter(c) => write!(f, "unexpected character '{}'", c)?,
52 ControlCharInString => write!(f, "control char in string")?,
53 InvalidStringFormat => write!(
54 f,
55 r#"escape must be one of \t, \n, \r, \", \', \\, \u{{hexnum}}, \MN where M and N are hex number"#
56 )?,
57 }
58 describe_position(f, self.source, self.offset)
59 }
60}
61
62type Result<'s, T> = ::std::result::Result<T, Box<LexError<'s>>>;
63
64#[cfg_attr(test, derive(Debug))]
65#[derive(Clone, Copy, PartialEq)]
66pub enum Sign {
67 Plus,
68 Minus,
69}
70
71impl Sign {
72 pub fn apply<N: ops::Neg<Output = N>>(self, n: N) -> N::Output {
73 match self {
74 Sign::Plus => n,
75 Sign::Minus => -n,
76 }
77 }
78}
79
80impl fmt::Display for Sign {
81 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
82 match self {
83 Sign::Plus => f.write_str("+"),
84 Sign::Minus => f.write_str("-"),
85 }
86 }
87}
88
89#[cfg_attr(test, derive(Debug))]
90#[derive(PartialEq, Clone, Copy)]
91pub enum NumBase {
92 Hex,
93 Dec,
94}
95
96impl NumBase {
97 pub fn prefix(self) -> &'static str {
98 match self {
99 NumBase::Hex => "0x",
100 NumBase::Dec => "",
101 }
102 }
103
104 pub fn radix(self) -> u32 {
105 match self {
106 NumBase::Hex => 16,
107 NumBase::Dec => 10,
108 }
109 }
110}
111
112#[cfg_attr(test, derive(Debug, PartialEq))]
114#[derive(Clone)]
115pub enum Float<'source> {
116 Nan(Option<&'source str>),
117 Inf,
118 Val {
119 base: NumBase,
120 frac: &'source str,
121 exp: Option<(Sign, &'source str)>,
122 },
123}
124
125#[cfg_attr(test, derive(Debug, PartialEq))]
127#[derive(Clone)]
128pub enum Token<'source> {
129 LParen,
130 RParen,
131 Keyword(&'source str), Int(Sign, NumBase, &'source str),
133 Float(Sign, Float<'source>),
134 String(Cow<'source, [u8]>, &'source str),
135 Ident(&'source str),
136}
137
138impl<'s> fmt::Display for Token<'s> {
139 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
140 match self {
141 Token::LParen => f.write_str("paren '('"),
142 Token::RParen => f.write_str("paren ')'"),
143 Token::Keyword(kw) => write!(f, "keyword '{}'", kw),
144 Token::Int(sign, base, s) => write!(f, "integer '{}{}{}'", sign, base.prefix(), s),
145 Token::Float(sign, Float::Nan(Some(payload))) => {
146 write!(f, "float number '{}nan:0x{}'", sign, payload)
147 }
148 Token::Float(sign, Float::Nan(None)) => write!(f, "float number '{}nan'", sign),
149 Token::Float(sign, Float::Inf) => write!(f, "float number '{}inf'", sign),
150 Token::Float(
151 sign,
152 Float::Val {
153 base,
154 frac,
155 exp: Some((exp_sign, exp)),
156 },
157 ) => {
158 let exp_leader = if *base == NumBase::Hex { 'P' } else { 'E' };
159 write!(
160 f,
161 "float number '{sign}{prefix}{frac}{exp_leader}{exp_sign}{exp}",
162 sign = sign,
163 prefix = base.prefix(),
164 frac = frac,
165 exp_leader = exp_leader,
166 exp_sign = exp_sign,
167 exp = exp
168 )
169 }
170 Token::Float(sign, Float::Val { base, frac, exp: None }) => {
171 write!(f, "float number '{}{}{}", sign, base.prefix(), frac,)
172 }
173 Token::String(_, s) => write!(f, "string literal {}", s),
174 Token::Ident(ident) => write!(f, "identifier '{}'", ident),
175 }
176 }
177}
178
179type Lexed<'s> = Option<(Token<'s>, usize)>;
180type LexResult<'s> = Result<'s, Lexed<'s>>;
181
182#[derive(Clone)]
183pub struct Lexer<'source> {
184 chars: iter::Peekable<str::CharIndices<'source>>, source: &'source str,
186}
187
188impl<'s> Lexer<'s> {
189 pub fn new(source: &str) -> Lexer<'_> {
190 Lexer {
191 source,
192 chars: source.char_indices().peekable(),
193 }
194 }
195
196 pub fn source(&self) -> &'s str {
197 self.source
198 }
199
200 pub fn lex(&mut self) -> LexResult<'s> {
201 while self.eat_whitespace()? {}
202
203 if let Some(lexed) = self.lex_paren() {
205 return Ok(Some(lexed));
206 }
207 if let Some(lexed) = self.lex_string()? {
208 return Ok(Some(lexed));
209 }
210 if let Some(lexed) = self.lex_idchars()? {
212 return Ok(Some(lexed));
213 }
214
215 if let Some(peeked) = self.chars.peek() {
216 let (offset, c) = *peeked; self.fail(LexErrorKind::UnexpectedCharacter(c), offset)
218 } else {
219 Ok(None)
220 }
221 }
222
223 fn lex_paren(&mut self) -> Lexed<'s> {
224 if let Some(offset) = self.eat_char('(') {
225 Some((Token::LParen, offset))
226 } else {
227 self.eat_char(')').map(|offset| (Token::RParen, offset))
228 }
229 }
230
231 fn lex_string(&mut self) -> LexResult<'s> {
232 let start = match self.eat_char('"') {
234 Some(offset) => offset,
235 None => return Ok(None),
236 };
237
238 let mut buf = vec![];
239 while let Some((i, c)) = self.chars.next() {
240 match c {
241 '"' => {
242 let content = if buf.is_empty() {
243 Cow::Borrowed(self.source[start + 1..i].as_bytes())
246 } else {
247 Cow::Owned(buf)
248 };
249 let token = Token::String(content, &self.source[start..i + 1]);
250 return Ok(Some((token, start)));
251 }
252 '\\' => {
253 if buf.is_empty() {
254 buf.extend_from_slice(self.source[start + 1..i].as_bytes());
258 }
259
260 match self.chars.next() {
261 Some((_, 't')) => buf.push(b'\t'),
262 Some((_, 'n')) => buf.push(b'\n'),
263 Some((_, 'r')) => buf.push(b'\r'),
264 Some((_, '"')) => buf.push(b'"'),
265 Some((_, '\'')) => buf.push(b'\''),
266 Some((_, '\\')) => buf.push(b'\\'),
267 Some((_, 'u')) => {
268 match self.chars.next() {
269 Some((i, '{')) => {
270 let brace_start = i + 1; let uend = loop {
272 match self.chars.next() {
273 Some((i, '}')) => break i,
274 Some(_) => continue,
275 None => return self.fail(LexErrorKind::UnterminatedString, start),
276 }
277 };
278 if let Some(c) = u32::from_str_radix(&self.source[brace_start..uend], 16)
279 .ok()
280 .and_then(char::from_u32)
281 {
282 let mut b = [0; 4];
283 buf.extend_from_slice(c.encode_utf8(&mut b).as_bytes());
284 } else {
285 return self.fail(LexErrorKind::InvalidStringFormat, start);
286 }
287 }
288 Some(_) => return self.fail(LexErrorKind::InvalidStringFormat, start),
289 None => return self.fail(LexErrorKind::UnterminatedString, start),
290 }
291 }
292 Some((_, c)) => {
293 let hi = c.to_digit(16);
294 let lo = self.chars.next().and_then(|(_, c)| c.to_digit(16));
295 match (hi, lo) {
296 (Some(hi), Some(lo)) => buf.push((hi * 16 + lo) as u8),
297 _ => return self.fail(LexErrorKind::InvalidStringFormat, start),
298 }
299 }
300 None => return self.fail(LexErrorKind::UnterminatedString, start),
301 }
302 }
303 _ if c.is_ascii_control() => return self.fail(LexErrorKind::ControlCharInString, start),
304 _ if !buf.is_empty() => {
305 let mut b = [0; 4];
306 buf.extend_from_slice(c.encode_utf8(&mut b).as_bytes());
307 }
308 _ => { }
309 }
310 }
311
312 self.fail(LexErrorKind::UnterminatedString, start)
313 }
314
315 fn lex_idchars(&mut self) -> LexResult<'s> {
316 fn is_idchar(c: char) -> bool {
317 matches!(c,
319 '0'..='9'
320 | 'a'..='z'
321 | 'A'..='Z'
322 | '!'
323 | '#'
324 | '$'
325 | '%'
326 | '&'
327 | '\''
328 | '*'
329 | '+'
330 | '-'
331 | '.'
332 | '/'
333 | ':'
334 | '<'
335 | '='
336 | '>'
337 | '?'
338 | '@'
339 | '\\'
340 | '^'
341 | '_'
342 | '`'
343 | '|'
344 | '~'
345 )
346 }
347
348 let start = self.offset();
349 let end = loop {
350 match self.chars.peek() {
351 Some((_, c)) if is_idchar(*c) => {
352 self.chars.next();
353 continue;
354 }
355 Some((offset, _)) => break *offset,
356 None => break self.source.len(),
357 }
358 };
359
360 if start == end {
361 return Ok(None);
362 }
363
364 let idchars = &self.source[start..end];
366 if let Some(lexed) = Self::lex_number_from_idchars(idchars, start) {
367 return Ok(Some(lexed));
368 }
369 if let Some(lexed) = Self::lex_ident_or_keyword_from_idchars(idchars, start) {
370 return Ok(Some(lexed));
371 }
372
373 self.fail(LexErrorKind::ReservedName(idchars), start)
375 }
376
377 fn is_num<F: Fn(&char) -> bool>(s: &str, pred: F) -> bool {
378 if s.is_empty() {
379 return false;
380 }
381 let mut prev_underscore = true; for c in s.chars() {
383 match c {
384 '_' if prev_underscore => return false,
385 '_' => prev_underscore = true,
386 _ if pred(&c) => prev_underscore = false,
387 _ => return false,
388 }
389 }
390 !prev_underscore
391 }
392
393 fn lex_unsigned_number(idchars: &'s str, sign: Sign, base: NumBase) -> Option<Token<'s>> {
394 fn is_hex_exp(c: char) -> bool {
398 c == 'p' || c == 'P'
399 }
400 fn is_dec_exp(c: char) -> bool {
401 c == 'e' || c == 'E'
402 }
403
404 #[allow(clippy::type_complexity)]
405 let (is_digit, is_exp): (fn(&char) -> bool, fn(char) -> bool) = match base {
406 NumBase::Hex => (char::is_ascii_hexdigit, is_hex_exp),
407 NumBase::Dec => (char::is_ascii_digit, is_dec_exp),
408 };
409 let mut chars = idchars.char_indices();
410 if chars.next().map(|(_, c)| !is_digit(&c)).unwrap_or(true) {
411 return None;
412 }
413
414 let mut exp_start = false;
415 let mut saw_dot = false;
416 {
417 #[derive(PartialEq, Eq)]
418 enum PrevChar {
419 Dot,
420 Underscore,
421 Digit,
422 }
423
424 let mut prev_char = PrevChar::Digit;
425 for (_, c) in &mut chars {
426 prev_char = match c {
427 '.' if saw_dot || prev_char != PrevChar::Digit => return None,
428 '.' => {
429 saw_dot = true;
430 PrevChar::Dot
431 }
432 '_' if prev_char != PrevChar::Digit => return None,
433 '_' => PrevChar::Underscore,
434 c if is_exp(c) => {
435 exp_start = true;
436 break;
437 }
438 c if is_digit(&c) => PrevChar::Digit,
439 _ => return None,
440 };
441 }
442
443 if prev_char == PrevChar::Underscore {
445 return None;
446 }
447 }
448
449 match chars.next() {
450 Some((i, c)) if exp_start => {
451 let (exp_sign, start) = match c {
452 '+' => (Sign::Plus, i + 1),
453 '-' => (Sign::Minus, i + 1),
454 _ => (Sign::Plus, i),
455 };
456 let frac = &idchars[..i - 1]; let exp = &idchars[start..];
458 if Self::is_num(exp, char::is_ascii_digit) {
459 let float = Float::Val {
460 base,
461 frac,
462 exp: Some((exp_sign, exp)),
463 };
464 Some(Token::Float(sign, float))
465 } else {
466 None
467 }
468 }
469 Some(_) => unreachable!(),
470 None if exp_start => None, None if saw_dot => Some(Token::Float(
472 sign,
473 Float::Val {
474 base,
475 frac: idchars,
476 exp: None,
477 },
478 )),
479 None => Some(Token::Int(sign, base, idchars)),
480 }
481 }
482
483 fn lex_number_from_idchars(idchars: &'s str, start: usize) -> Lexed<'s> {
484 let (sign, idchars) = match idchars.chars().next() {
485 Some('+') => (Sign::Plus, &idchars[1..]),
486 Some('-') => (Sign::Minus, &idchars[1..]),
487 _ => (Sign::Plus, idchars),
488 };
489
490 let token = match idchars {
492 "inf" => Some(Token::Float(sign, Float::Inf)),
493 "nan" => Some(Token::Float(sign, Float::Nan(None))),
494 idchars if idchars.starts_with("nan:0x") => {
495 let payload = &idchars[6..];
496 if Self::is_num(payload, char::is_ascii_hexdigit) {
497 Some(Token::Float(sign, Float::Nan(Some(payload))))
498 } else {
499 None
500 }
501 }
502 idchars if idchars.starts_with("0x") => Self::lex_unsigned_number(&idchars[2..], sign, NumBase::Hex),
503 idchars => Self::lex_unsigned_number(idchars, sign, NumBase::Dec),
504 };
505 token.map(|t| (t, start))
506 }
507
508 fn lex_ident_or_keyword_from_idchars(idchars: &'s str, start: usize) -> Lexed<'s> {
509 match idchars.chars().next() {
511 Some('$') if idchars.len() > 1 => Some((Token::Ident(idchars), start)), Some('a'..='z') => Some((Token::Keyword(idchars), start)), _ => None,
514 }
515 }
516
517 fn eat_whitespace(&mut self) -> Result<'s, bool> {
518 fn is_ws_char(c: char) -> bool {
520 matches!(c, ' ' | '\t' | '\n' | '\r')
521 }
522 Ok(self.eat_char_by(is_ws_char) || self.eat_line_comment() || self.eat_block_comment()?)
523 }
524
525 fn eat_line_comment(&mut self) -> bool {
526 if self.eat_str(";;").is_none() {
528 return false;
529 }
530
531 for (_, c) in &mut self.chars {
532 if c == '\n' {
533 break;
534 }
535 }
536
537 true
538 }
539
540 fn eat_block_comment(&mut self) -> Result<'s, bool> {
541 let start = if let Some(offset) = self.eat_str("(;") {
543 offset
544 } else {
545 return Ok(false);
546 };
547
548 loop {
550 if self.eat_block_comment()? {
551 continue;
552 }
553 if self.eat_str(";)").is_some() {
554 return Ok(true);
555 }
556 if self.chars.next().is_none() {
557 return self.fail(LexErrorKind::UnterminatedBlockComment, start);
558 }
559 }
560 }
561
562 fn eat_char(&mut self, want: char) -> Option<usize> {
563 match self.chars.peek() {
564 Some((offset, c)) if *c == want => {
565 let offset = *offset;
566 self.chars.next();
567 Some(offset)
568 }
569 _ => None,
570 }
571 }
572
573 fn eat_char_by<F: Fn(char) -> bool>(&mut self, pred: F) -> bool {
574 match self.chars.peek() {
575 Some((_, c)) if pred(*c) => {
576 self.chars.next();
577 true
578 }
579 _ => false,
580 }
581 }
582
583 fn eat_str(&mut self, s: &str) -> Option<usize> {
584 assert!(!s.is_empty());
585 let offset = self.offset();
586 if self.source[offset..].starts_with(s) {
587 self.chars.nth(s.len() - 1);
588 Some(offset)
589 } else {
590 None
591 }
592 }
593
594 fn offset(&mut self) -> usize {
595 match self.chars.peek() {
596 Some((offset, _)) => *offset,
597 None => self.source.len(),
598 }
599 }
600
601 fn fail<T>(&self, kind: LexErrorKind<'s>, offset: usize) -> Result<'s, T> {
602 Err(Box::new(LexError {
603 kind,
604 offset,
605 source: self.source,
606 }))
607 }
608}
609
610impl<'s> Iterator for Lexer<'s> {
611 type Item = Result<'s, (Token<'s>, usize)>;
612
613 fn next(&mut self) -> Option<Self::Item> {
614 self.lex().transpose()
615 }
616}
617
618#[cfg(test)]
619mod tests {
620 use super::*;
621
622 fn lex_all(s: &str) -> Result<'_, Vec<(Token<'_>, usize)>> {
623 Lexer::new(s).collect()
624 }
625
626 macro_rules! assert_lex_one {
627 ($input:expr, $token:pat) => {
628 let tokens = lex_all($input).unwrap();
629 assert_eq!(tokens.len(), 1);
630 match &tokens[0].0 {
631 $token => {}
632 e => panic!(
633 "assertion failed: {:?} did not match to token {}",
634 e,
635 stringify!($token)
636 ),
637 }
638 };
639 }
640
641 macro_rules! assert_lex_error {
642 ($input:expr, $errkind:pat) => {
643 match lex_all($input).unwrap_err().kind() {
644 $errkind => {}
645 e => panic!(
646 "assertion failed: {:?} did not match to error kind {}",
647 e,
648 stringify!($token)
649 ),
650 }
651 };
652 }
653
654 #[test]
655 fn spaces() {
656 assert!(lex_all("").unwrap().is_empty());
657 assert!(lex_all(" ").unwrap().is_empty());
658 assert!(lex_all("\t").unwrap().is_empty());
659 assert!(lex_all("\n").unwrap().is_empty());
660 assert!(lex_all("\r").unwrap().is_empty());
661 assert!(lex_all(" \t\r\n \t\n\n\n\n ").unwrap().is_empty());
662 }
663
664 #[test]
665 fn comments() {
666 assert!(lex_all(";;").unwrap().is_empty());
667 assert!(lex_all(";;foo").unwrap().is_empty());
668 assert!(lex_all(";;foo\n;;bar\n ;; piyo").unwrap().is_empty());
669 assert!(lex_all("(;;)").unwrap().is_empty());
670 assert!(lex_all("(; hi! ;)").unwrap().is_empty());
671 assert!(lex_all("(; hi!\n how are you?\n bye!\n ;)").unwrap().is_empty());
672 assert!(lex_all("(;(;;);)").unwrap().is_empty());
673 assert!(lex_all("(;\nhi!\n (;how are you?\n;) bye!\n;)").unwrap().is_empty());
674 assert_lex_error!("(;", LexErrorKind::UnterminatedBlockComment);
676 assert_lex_error!("(; hi! ", LexErrorKind::UnterminatedBlockComment);
677 assert_lex_error!("(;(;;)", LexErrorKind::UnterminatedBlockComment);
678 }
679
680 #[test]
681 fn parens() {
682 assert_lex_one!("(", Token::LParen);
683 assert_lex_one!(")", Token::RParen);
684 }
685
686 #[test]
687 fn strings() {
688 macro_rules! assert_lex_string {
689 ($input:expr, $bytes:expr) => {
690 let tokens = lex_all($input).unwrap();
691 assert_eq!(tokens.len(), 1);
692 match &tokens[0].0 {
693 Token::String(v, src) if *v == $bytes.to_vec() && *src == $input => {}
694 e => panic!(
695 "assertion failed: {:?} did not match to token {}",
696 e,
697 stringify!(Token::String($bytes, $input))
698 ),
699 }
700 };
701 }
702
703 assert_lex_string!(r#""""#, b"");
704 assert_lex_string!(r#""hello""#, b"hello");
705 let mut v = "\t\n\r\"\'\\\u{1234}\x00".as_bytes().to_vec();
706 v.push(b'\xa9');
707 assert_lex_string!(r#""\t\n\r\"\'\\\u{1234}\00\a9""#, v);
708 assert_lex_string!(r#""あいうえお""#, "あいうえお".as_bytes());
709 assert_lex_error!(r#"""#, LexErrorKind::UnterminatedString);
710 assert_lex_error!(r#""foo\""#, LexErrorKind::UnterminatedString);
711 assert_lex_error!(r#""\u{41""#, LexErrorKind::UnterminatedString);
712 assert_lex_error!(r#""\u"#, LexErrorKind::UnterminatedString);
713 assert_lex_error!(r#""\u{""#, LexErrorKind::UnterminatedString);
714
715 assert_lex_error!(r#""\x""#, LexErrorKind::InvalidStringFormat);
716 assert_lex_error!(r#""\0""#, LexErrorKind::InvalidStringFormat);
717 assert_lex_error!(r#""\0x""#, LexErrorKind::InvalidStringFormat);
718 assert_lex_error!(r#""\u""#, LexErrorKind::InvalidStringFormat);
719 assert_lex_error!(r#""\u{}""#, LexErrorKind::InvalidStringFormat);
720 assert_lex_error!(r#""\u{hello!}""#, LexErrorKind::InvalidStringFormat);
721 assert_lex_error!(r#""\u{d800}""#, LexErrorKind::InvalidStringFormat);
722 assert_lex_error!(r#""\u{dfff}""#, LexErrorKind::InvalidStringFormat);
723 assert_lex_error!(r#""\u{110000}""#, LexErrorKind::InvalidStringFormat);
724
725 assert_lex_error!("\"\x00\"", LexErrorKind::ControlCharInString);
726 assert_lex_error!("\"\x1f\"", LexErrorKind::ControlCharInString);
727 assert_lex_error!("\"\x7f\"", LexErrorKind::ControlCharInString);
728 }
729
730 #[test]
731 fn idents() {
732 assert_lex_one!("$x", Token::Ident("$x"));
733 assert_lex_one!("$foo0123FOO", Token::Ident("$foo0123FOO"));
734 assert_lex_one!(
735 "$0aB!#$%&'*+-./:<=>?@\\^_`|~",
736 Token::Ident("$0aB!#$%&'*+-./:<=>?@\\^_`|~")
737 );
738 }
739
740 #[test]
741 fn keywords() {
742 assert_lex_one!("module", Token::Keyword("module"));
743 assert_lex_one!("i32.const", Token::Keyword("i32.const"));
744 assert_lex_one!("nan:0x_1", Token::Keyword("nan:0x_1"));
745 assert_lex_one!("nan:0x1_", Token::Keyword("nan:0x1_"));
746 assert_lex_one!("nan:0x1__2", Token::Keyword("nan:0x1__2"));
747 }
748
749 #[test]
750 fn reserved() {
751 assert_lex_error!("0$foo", LexErrorKind::ReservedName("0$foo"));
752 assert_lex_error!("$", LexErrorKind::ReservedName("$"));
753 assert_lex_error!("$ ;;", LexErrorKind::ReservedName("$"));
754 assert_lex_error!("123p3", LexErrorKind::ReservedName("123p3"));
755 assert_lex_error!("0x123p1f", LexErrorKind::ReservedName("0x123p1f"));
756 assert_lex_error!("123e", LexErrorKind::ReservedName("123e"));
757 assert_lex_error!("123e+", LexErrorKind::ReservedName("123e+"));
758 assert_lex_error!("0x", LexErrorKind::ReservedName("0x"));
759 assert_lex_error!("1_", LexErrorKind::ReservedName("1_"));
760 assert_lex_error!("1__2", LexErrorKind::ReservedName("1__2"));
761 assert_lex_error!("1.2_", LexErrorKind::ReservedName("1.2_"));
762 assert_lex_error!("1._2", LexErrorKind::ReservedName("1._2"));
763 assert_lex_error!("1.2__3", LexErrorKind::ReservedName("1.2__3"));
764 assert_lex_error!("1.E2_", LexErrorKind::ReservedName("1.E2_"));
765 assert_lex_error!("1.E_2", LexErrorKind::ReservedName("1.E_2"));
766 assert_lex_error!("1.E2__3", LexErrorKind::ReservedName("1.E2__3"));
767 }
768
769 #[test]
770 fn integers() {
771 assert_lex_one!("1", Token::Int(Sign::Plus, NumBase::Dec, "1"));
772 assert_lex_one!("123", Token::Int(Sign::Plus, NumBase::Dec, "123"));
773 assert_lex_one!("1_2_3", Token::Int(Sign::Plus, NumBase::Dec, "1_2_3"));
774 assert_lex_one!("+1", Token::Int(Sign::Plus, NumBase::Dec, "1"));
775 assert_lex_one!("+123", Token::Int(Sign::Plus, NumBase::Dec, "123"));
776 assert_lex_one!("-1", Token::Int(Sign::Minus, NumBase::Dec, "1"));
777 assert_lex_one!("-123", Token::Int(Sign::Minus, NumBase::Dec, "123"));
778 assert_lex_one!("0xd", Token::Int(Sign::Plus, NumBase::Hex, "d"));
779 assert_lex_one!("0xc0ffee", Token::Int(Sign::Plus, NumBase::Hex, "c0ffee"));
780 assert_lex_one!("+0xd", Token::Int(Sign::Plus, NumBase::Hex, "d"));
781 assert_lex_one!("+0xc0ffee", Token::Int(Sign::Plus, NumBase::Hex, "c0ffee"));
782 assert_lex_one!("-0xd", Token::Int(Sign::Minus, NumBase::Hex, "d"));
783 assert_lex_one!("-0xc0ffee", Token::Int(Sign::Minus, NumBase::Hex, "c0ffee"));
784 }
785
786 #[test]
787 fn floats() {
788 assert_lex_one!(
789 "123.",
790 Token::Float(
791 Sign::Plus,
792 Float::Val {
793 base: NumBase::Dec,
794 frac: "123.",
795 exp: None,
796 }
797 )
798 );
799 assert_lex_one!(
800 "123.456",
801 Token::Float(
802 Sign::Plus,
803 Float::Val {
804 base: NumBase::Dec,
805 frac: "123.456",
806 exp: None,
807 }
808 )
809 );
810 assert_lex_one!(
811 "+123.",
812 Token::Float(
813 Sign::Plus,
814 Float::Val {
815 base: NumBase::Dec,
816 frac: "123.",
817 exp: None,
818 }
819 )
820 );
821 assert_lex_one!(
822 "-123.",
823 Token::Float(
824 Sign::Minus,
825 Float::Val {
826 base: NumBase::Dec,
827 frac: "123.",
828 exp: None,
829 }
830 )
831 );
832 assert_lex_one!(
833 "123.e10",
834 Token::Float(
835 Sign::Plus,
836 Float::Val {
837 base: NumBase::Dec,
838 frac: "123.",
839 exp: Some((Sign::Plus, "10")),
840 }
841 )
842 );
843 assert_lex_one!(
844 "123.456e10",
845 Token::Float(
846 Sign::Plus,
847 Float::Val {
848 base: NumBase::Dec,
849 frac: "123.456",
850 exp: Some((Sign::Plus, "10")),
851 }
852 )
853 );
854 assert_lex_one!(
855 "1_2_3.4_5_6e1_0",
856 Token::Float(
857 Sign::Plus,
858 Float::Val {
859 base: NumBase::Dec,
860 frac: "1_2_3.4_5_6",
861 exp: Some((Sign::Plus, "1_0")),
862 }
863 )
864 );
865 assert_lex_one!(
866 "123.E10",
867 Token::Float(
868 Sign::Plus,
869 Float::Val {
870 base: NumBase::Dec,
871 frac: "123.",
872 exp: Some((Sign::Plus, "10")),
873 }
874 )
875 );
876 assert_lex_one!(
877 "123.e+10",
878 Token::Float(
879 Sign::Plus,
880 Float::Val {
881 base: NumBase::Dec,
882 frac: "123.",
883 exp: Some((Sign::Plus, "10")),
884 }
885 )
886 );
887 assert_lex_one!(
888 "123.e-10",
889 Token::Float(
890 Sign::Plus,
891 Float::Val {
892 base: NumBase::Dec,
893 frac: "123.",
894 exp: Some((Sign::Minus, "10")),
895 }
896 )
897 );
898
899 assert_lex_one!(
900 "0xc0f.",
901 Token::Float(
902 Sign::Plus,
903 Float::Val {
904 base: NumBase::Hex,
905 frac: "c0f.",
906 exp: None,
907 }
908 )
909 );
910 assert_lex_one!(
911 "0xc0f.fee",
912 Token::Float(
913 Sign::Plus,
914 Float::Val {
915 base: NumBase::Hex,
916 frac: "c0f.fee",
917 exp: None,
918 }
919 )
920 );
921 assert_lex_one!(
922 "+0xc0f.",
923 Token::Float(
924 Sign::Plus,
925 Float::Val {
926 base: NumBase::Hex,
927 frac: "c0f.",
928 exp: None,
929 }
930 )
931 );
932 assert_lex_one!(
933 "-0xc0f.",
934 Token::Float(
935 Sign::Minus,
936 Float::Val {
937 base: NumBase::Hex,
938 frac: "c0f.",
939 exp: None,
940 }
941 )
942 );
943 assert_lex_one!(
944 "0xc0f.p10",
945 Token::Float(
946 Sign::Plus,
947 Float::Val {
948 base: NumBase::Hex,
949 frac: "c0f.",
950 exp: Some((Sign::Plus, "10")),
951 }
952 )
953 );
954 assert_lex_one!(
955 "0xc0f.feep10",
956 Token::Float(
957 Sign::Plus,
958 Float::Val {
959 base: NumBase::Hex,
960 frac: "c0f.fee",
961 exp: Some((Sign::Plus, "10")),
962 }
963 )
964 );
965 assert_lex_one!(
966 "0xc_0_f.f_e_ep1_0",
967 Token::Float(
968 Sign::Plus,
969 Float::Val {
970 base: NumBase::Hex,
971 frac: "c_0_f.f_e_e",
972 exp: Some((Sign::Plus, "1_0")),
973 }
974 )
975 );
976 assert_lex_one!(
977 "0xc0f.feeP10",
978 Token::Float(
979 Sign::Plus,
980 Float::Val {
981 base: NumBase::Hex,
982 frac: "c0f.fee",
983 exp: Some((Sign::Plus, "10")),
984 }
985 )
986 );
987 assert_lex_one!(
988 "0xc0f.p+10",
989 Token::Float(
990 Sign::Plus,
991 Float::Val {
992 base: NumBase::Hex,
993 frac: "c0f.",
994 exp: Some((Sign::Plus, "10")),
995 }
996 )
997 );
998 assert_lex_one!(
999 "0xc0f.p-10",
1000 Token::Float(
1001 Sign::Plus,
1002 Float::Val {
1003 base: NumBase::Hex,
1004 frac: "c0f.",
1005 exp: Some((Sign::Minus, "10")),
1006 }
1007 )
1008 );
1009
1010 assert_lex_one!("inf", Token::Float(Sign::Plus, Float::Inf));
1011 assert_lex_one!("+inf", Token::Float(Sign::Plus, Float::Inf));
1012 assert_lex_one!("-inf", Token::Float(Sign::Minus, Float::Inf));
1013 assert_lex_one!("nan", Token::Float(Sign::Plus, Float::Nan(None)));
1014 assert_lex_one!("+nan", Token::Float(Sign::Plus, Float::Nan(None)));
1015 assert_lex_one!("-nan", Token::Float(Sign::Minus, Float::Nan(None)));
1016 assert_lex_one!("nan:0x1f", Token::Float(Sign::Plus, Float::Nan(Some("1f"))));
1017 assert_lex_one!("nan:0x1_f", Token::Float(Sign::Plus, Float::Nan(Some("1_f"))));
1018 assert_lex_one!("+nan:0x1f", Token::Float(Sign::Plus, Float::Nan(Some("1f"))));
1019 assert_lex_one!("-nan:0x1f", Token::Float(Sign::Minus, Float::Nan(Some("1f"))));
1020 }
1021
1022 #[test]
1023 fn unexpected_characters() {
1024 assert_lex_error!("[", LexErrorKind::UnexpectedCharacter('['));
1026 assert_lex_error!(" [", LexErrorKind::UnexpectedCharacter('['));
1027 assert_lex_error!("(;_;) [", LexErrorKind::UnexpectedCharacter('['));
1028 assert_lex_error!(";;\n[", LexErrorKind::UnexpectedCharacter('['));
1029 }
1030
1031 #[test]
1032 fn hello_world() {
1033 let input = r#"
1034(module
1035 (type $i32_=>_none (func (param i32)))
1036 (type $none_=>_i32 (func (result i32)))
1037 (import "env" "print" (func $print (param i32)))
1038 (memory $0 2)
1039 (data (i32.const 1024) "Hello, world\n\00")
1040 (table $0 1 1 funcref)
1041 (global $global$0 (mut i32) (i32.const 66576))
1042 (export "memory" (memory $0))
1043 (export "_start" (func $_start))
1044 (func $_start (; 1 ;) (result i32)
1045 (call $print
1046 (i32.const 1024)
1047 )
1048 (i32.const 0)
1049 )
1050 ;; custom section "producers", size 27
1051)
1052 "#;
1053 let tokens = lex_all(input).unwrap();
1054 let tokens: Vec<_> = tokens.into_iter().map(|(t, _)| t).collect();
1055 assert_eq!(
1056 tokens,
1057 vec![
1058 Token::LParen,
1059 Token::Keyword("module"),
1060 Token::LParen,
1061 Token::Keyword("type"),
1062 Token::Ident("$i32_=>_none"),
1063 Token::LParen,
1064 Token::Keyword("func"),
1065 Token::LParen,
1066 Token::Keyword("param"),
1067 Token::Keyword("i32"),
1068 Token::RParen,
1069 Token::RParen,
1070 Token::RParen,
1071 Token::LParen,
1072 Token::Keyword("type"),
1073 Token::Ident("$none_=>_i32"),
1074 Token::LParen,
1075 Token::Keyword("func"),
1076 Token::LParen,
1077 Token::Keyword("result"),
1078 Token::Keyword("i32"),
1079 Token::RParen,
1080 Token::RParen,
1081 Token::RParen,
1082 Token::LParen,
1083 Token::Keyword("import"),
1084 Token::String(Cow::Borrowed(b"env"), r#""env""#),
1085 Token::String(Cow::Borrowed(b"print"), r#""print""#),
1086 Token::LParen,
1087 Token::Keyword("func"),
1088 Token::Ident("$print"),
1089 Token::LParen,
1090 Token::Keyword("param"),
1091 Token::Keyword("i32"),
1092 Token::RParen,
1093 Token::RParen,
1094 Token::RParen,
1095 Token::LParen,
1096 Token::Keyword("memory"),
1097 Token::Ident("$0"),
1098 Token::Int(Sign::Plus, NumBase::Dec, "2"),
1099 Token::RParen,
1100 Token::LParen,
1101 Token::Keyword("data"),
1102 Token::LParen,
1103 Token::Keyword("i32.const"),
1104 Token::Int(Sign::Plus, NumBase::Dec, "1024"),
1105 Token::RParen,
1106 Token::String(Cow::Borrowed(b"Hello, world\n\x00"), r#""Hello, world\n\00""#),
1107 Token::RParen,
1108 Token::LParen,
1109 Token::Keyword("table"),
1110 Token::Ident("$0"),
1111 Token::Int(Sign::Plus, NumBase::Dec, "1"),
1112 Token::Int(Sign::Plus, NumBase::Dec, "1"),
1113 Token::Keyword("funcref"),
1114 Token::RParen,
1115 Token::LParen,
1116 Token::Keyword("global"),
1117 Token::Ident("$global$0"),
1118 Token::LParen,
1119 Token::Keyword("mut"),
1120 Token::Keyword("i32"),
1121 Token::RParen,
1122 Token::LParen,
1123 Token::Keyword("i32.const"),
1124 Token::Int(Sign::Plus, NumBase::Dec, "66576"),
1125 Token::RParen,
1126 Token::RParen,
1127 Token::LParen,
1128 Token::Keyword("export"),
1129 Token::String(Cow::Borrowed(b"memory"), r#""memory""#),
1130 Token::LParen,
1131 Token::Keyword("memory"),
1132 Token::Ident("$0"),
1133 Token::RParen,
1134 Token::RParen,
1135 Token::LParen,
1136 Token::Keyword("export"),
1137 Token::String(Cow::Borrowed(b"_start"), r#""_start""#),
1138 Token::LParen,
1139 Token::Keyword("func"),
1140 Token::Ident("$_start"),
1141 Token::RParen,
1142 Token::RParen,
1143 Token::LParen,
1144 Token::Keyword("func"),
1145 Token::Ident("$_start"),
1146 Token::LParen,
1147 Token::Keyword("result"),
1148 Token::Keyword("i32"),
1149 Token::RParen,
1150 Token::LParen,
1151 Token::Keyword("call"),
1152 Token::Ident("$print"),
1153 Token::LParen,
1154 Token::Keyword("i32.const"),
1155 Token::Int(Sign::Plus, NumBase::Dec, "1024"),
1156 Token::RParen,
1157 Token::RParen,
1158 Token::LParen,
1159 Token::Keyword("i32.const"),
1160 Token::Int(Sign::Plus, NumBase::Dec, "0"),
1161 Token::RParen,
1162 Token::RParen,
1163 Token::RParen,
1164 ]
1165 );
1166 }
1167
1168 #[test]
1169 fn apply_sign() {
1170 assert_eq!(Sign::Plus.apply(42), 42);
1171 assert_eq!(Sign::Plus.apply(-42), -42);
1172 assert_eq!(Sign::Plus.apply(1.0), 1.0);
1173 assert_eq!(Sign::Plus.apply(-1.0), -1.0);
1174 assert_eq!(Sign::Minus.apply(42), -42);
1175 assert_eq!(Sign::Minus.apply(-42), 42);
1176 assert_eq!(Sign::Minus.apply(1.0), -1.0);
1177 assert_eq!(Sign::Minus.apply(-1.0), 1.0);
1178 }
1179}