1use std::num::IntErrorKind;
11
12use logos::{Lexer, Logos};
13
14#[derive(Debug, Logos, PartialEq, Eq)]
16#[logos(skip r"[ \t]+", error = LexErr)]
17pub enum Token {
18 #[regex(r"\d\w*", lex_unsigned_dec)]
26 #[regex(r"#\d?\w*", lex_unsigned_dec)]
27 #[regex(r"[Xx][\dA-Fa-f]\w*", lex_unsigned_hex)]
28 Unsigned(u16),
29
30 #[regex(r"-\w*", lex_signed_dec)]
32 #[regex(r"#-\w*", lex_signed_dec)]
33 #[regex(r"-#+\w*", lex_signed_dec)] #[regex(r"##+-?\w*", lex_signed_dec)] #[regex(r"[Xx]-\w*", lex_signed_hex)]
36 Signed(i16),
37
38 #[regex(r"[Rr]\d+", lex_reg)]
40 Reg(u8),
41
42 #[regex(r"[A-Za-z_]\w*", |lx| lx.slice().parse::<Ident>().expect("should be infallible"))]
50 Ident(Ident),
51
52 #[regex(r"\.\w*", |lx| lx.slice()[1..].to_string())]
54 Directive(String),
55
56 #[token(r#"""#, lex_str_literal)]
58 String(String),
59
60 #[token(":")]
62 Colon,
63
64 #[token(",")]
66 Comma,
67
68 #[regex(r";.*")]
70 Comment,
71
72 #[regex(r"\r?\n")]
74 NewLine
75}
76impl Token {
77 pub(crate) fn is_whitespace(&self) -> bool {
78 matches!(self, Token::NewLine)
79 }
80}
81
82macro_rules! ident_enum {
83 ($($instr:ident),+) => {
84 #[derive(Debug, PartialEq, Eq, Clone)]
92 pub enum Ident {
93 $(
94 #[allow(missing_docs)]
95 $instr
96 ),+,
97 #[allow(missing_docs)]
98 Label(String)
99 }
100
101 impl std::str::FromStr for Ident {
102 type Err = std::convert::Infallible;
103
104 fn from_str(s: &str) -> Result<Self, Self::Err> {
105 match &*s.to_uppercase() {
106 $(stringify!($instr) => Ok(Self::$instr)),*,
107 _ => Ok(Self::Label(s.to_string()))
108 }
109 }
110 }
111
112 impl std::fmt::Display for Ident {
113 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
114 match self {
115 $(Self::$instr => f.write_str(stringify!($instr))),*,
116 Self::Label(id) => f.write_str(id)
117 }
118 }
119 }
120 };
121}
122ident_enum! {
123 ADD, AND, NOT, BR, BRP, BRZ, BRZP, BRN, BRNP, BRNZ, BRNZP,
124 JMP, JSR, JSRR, LD, LDI, LDR, LEA, ST, STI, STR, TRAP, NOP,
125 RET, RTI, GETC, OUT, PUTC, PUTS, IN, PUTSP, HALT
126}
127
128#[derive(Debug, PartialEq, Eq, Clone, Copy, Default)]
130pub enum LexErr {
131 DoesNotFitU16,
133 DoesNotFitI16,
135 InvalidHex,
137 InvalidNumeric,
139 InvalidHexEmpty,
141 InvalidDecEmpty,
143 UnknownIntErr,
145 UnclosedStrLit,
147 StrLitTooBig,
149 InvalidReg,
151 #[default]
153 InvalidSymbol
154}
155impl std::fmt::Display for LexErr {
156 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
157 match self {
158 LexErr::DoesNotFitU16 => f.write_str("numeric token does not fit 16-bit unsigned integer"),
159 LexErr::DoesNotFitI16 => f.write_str("numeric token does not fit 16-bit signed integer"),
160 LexErr::InvalidHex => f.write_str("invalid hex literal"),
161 LexErr::InvalidNumeric => f.write_str("invalid decimal literal"),
162 LexErr::InvalidHexEmpty => f.write_str("invalid hex literal"),
163 LexErr::InvalidDecEmpty => f.write_str("invalid decimal literal"),
164 LexErr::UnknownIntErr => f.write_str("could not parse integer"),
165 LexErr::UnclosedStrLit => f.write_str("unclosed string literal"),
166 LexErr::StrLitTooBig => f.write_str("string literal is too large"),
167 LexErr::InvalidReg => f.write_str("invalid register"),
168 LexErr::InvalidSymbol => f.write_str("unrecognized symbol"),
169 }
170 }
171}
172impl std::error::Error for LexErr {}
173impl crate::err::Error for LexErr {
174 fn help(&self) -> Option<std::borrow::Cow<str>> {
175 match self {
176 LexErr::DoesNotFitU16 => Some(format!("the range for a 16-bit unsigned integer is [{}, {}]", u16::MIN, u16::MAX).into()),
177 LexErr::DoesNotFitI16 => Some(format!("the range for a 16-bit signed integer is [{}, {}]", i16::MIN, i16::MAX).into()),
178 LexErr::InvalidHex => Some("a hex literal starts with 'x' and consists of 0-9, A-F".into()),
179 LexErr::InvalidNumeric => Some("a decimal literal only consists of digits 0-9".into()),
180 LexErr::InvalidHexEmpty => Some("there should be hex digits (0-9, A-F) here".into()),
181 LexErr::InvalidDecEmpty => Some("there should be digits (0-9) here".into()),
182 LexErr::UnknownIntErr => None,
183 LexErr::UnclosedStrLit => Some("add a quote to the end of the string literal".into()),
184 LexErr::StrLitTooBig => Some(format!("string literals are limited to at most {} characters", u16::MAX - 1).into()),
185 LexErr::InvalidReg => Some("this must be R0-R7".into()),
186 LexErr::InvalidSymbol => Some("this char does not occur in any token in LC-3 assembly".into()),
187 }
188 }
189}
190fn convert_int_error(
192 e: &std::num::IntErrorKind,
193 invalid_digits_err: LexErr,
194 empty_err: LexErr,
195 overflow_err: LexErr,
196 src: &str
197) -> LexErr {
198 match e {
199 IntErrorKind::Empty => empty_err,
200 IntErrorKind::InvalidDigit if src == "-" => empty_err,
201 IntErrorKind::InvalidDigit => invalid_digits_err,
202 IntErrorKind::PosOverflow => overflow_err,
203 IntErrorKind::NegOverflow => overflow_err,
204 IntErrorKind::Zero => unreachable!("IntErrorKind::Zero should not be emitted in parsing u16"),
205 _ => LexErr::UnknownIntErr,
206 }
207}
208fn lex_unsigned_dec(lx: &Lexer<'_, Token>) -> Result<u16, LexErr> {
209 let mut string = lx.slice();
210 if lx.slice().starts_with('#') {
211 string = &string[1..];
212 }
213
214 string.parse::<u16>()
215 .map_err(|e| convert_int_error(e.kind(), LexErr::InvalidNumeric, LexErr::InvalidDecEmpty, LexErr::DoesNotFitU16, string))
216}
217
218fn lex_signed_dec(lx: &Lexer<'_, Token>) -> Result<i16, LexErr> {
219 let mut string = lx.slice();
220 if lx.slice().starts_with('#') {
221 string = &string[1..];
222 }
223
224 string.parse::<i16>()
225 .map_err(|e| convert_int_error(e.kind(), LexErr::InvalidNumeric, LexErr::InvalidDecEmpty, LexErr::DoesNotFitI16, string))
226}
227fn lex_unsigned_hex(lx: &Lexer<'_, Token>) -> Result<u16, LexErr> {
228 let Some(hex) = lx.slice().strip_prefix(['X', 'x']) else {
229 unreachable!("Lexer slice should have contained an X or x");
230 };
231
232 u16::from_str_radix(hex, 16)
233 .map_err(|e| convert_int_error(e.kind(), LexErr::InvalidHex, LexErr::InvalidHexEmpty, LexErr::DoesNotFitU16, hex))
234}
235fn lex_signed_hex(lx: &Lexer<'_, Token>) -> Result<i16, LexErr> {
236 let Some(hex) = lx.slice().strip_prefix(['X', 'x']) else {
237 unreachable!("Lexer slice should have contained an X or x");
238 };
239
240 i16::from_str_radix(hex, 16)
241 .map_err(|e| convert_int_error(e.kind(), LexErr::InvalidHex, LexErr::InvalidHexEmpty, LexErr::DoesNotFitI16, hex))
242}
243fn lex_reg(lx: &Lexer<'_, Token>) -> Result<u8, LexErr> {
244 lx.slice()[1..].parse::<u8>().ok()
245 .filter(|&r| r < 8)
246 .ok_or(LexErr::InvalidReg)
247}
248fn lex_str_literal(lx: &mut Lexer<'_, Token>) -> Result<String, LexErr> {
249 let rem = lx.remainder()
250 .lines()
251 .next()
252 .unwrap_or("");
253
254 let mut buf = String::new();
255 let mut done = false;
256 let mut remaining = rem;
257 while let Some(i) = remaining.find(['\\', '"']) {
258 let left = &remaining[..i];
259 let mid = &remaining[i..i+1];
260 let right = &remaining[i+1..];
261
262 buf.push_str(left);
263 match mid {
264 "\\" => {
265 let esc = right.as_bytes()
267 .first()
268 .unwrap_or_else(|| unreachable!("expected character after escape")); match esc {
270 b'n' => buf.push('\n'),
271 b'r' => buf.push('\r'),
272 b't' => buf.push('\t'),
273 b'\\' => buf.push('\\'),
274 b'0' => buf.push('\0'),
275 b'"' => buf.push('\"'),
276 &c => {
277 buf.push('\\');
278 buf.push(char::from(c));
279 }
280 }
281
282 remaining = &right[1..];
283 },
284 "\"" => {
285 remaining = right;
286 done = true;
287 break
288 },
289 _ => unreachable!(r#"find loop should've matched '\' or '"'"#)
290 }
291 }
292
293 if !done {
294 lx.bump(rem.len());
295 return Err(LexErr::UnclosedStrLit);
296 }
297
298 lx.bump(rem.len() - remaining.len());
299
300 match buf.len() < usize::from(u16::MAX) {
301 true => Ok(buf),
302 false => Err(LexErr::StrLitTooBig),
303 }
304}
305
306#[cfg(test)]
307mod tests {
308 use logos::Logos;
309
310 use crate::err::LexErr;
311 use crate::parse::lex::{Ident, Token};
312
313 fn label(s: &str) -> Token {
314 Token::Ident(Ident::Label(s.to_string()))
315 }
316 fn directive(s: &str) -> Token {
317 Token::Directive(s.to_string())
318 }
319 fn str_literal(s: &str) -> Token {
320 Token::String(s.to_string())
321 }
322
323 #[test]
324 fn test_numeric_dec_success() {
325 let mut tokens = Token::lexer("0 123 456 789");
327 assert_eq!(tokens.next(), Some(Ok(Token::Unsigned(0))));
328 assert_eq!(tokens.next(), Some(Ok(Token::Unsigned(123))));
329 assert_eq!(tokens.next(), Some(Ok(Token::Unsigned(456))));
330 assert_eq!(tokens.next(), Some(Ok(Token::Unsigned(789))));
331 assert_eq!(tokens.next(), None);
332
333 let mut tokens = Token::lexer("-123 -456 -789");
335 assert_eq!(tokens.next(), Some(Ok(Token::Signed(-123))));
336 assert_eq!(tokens.next(), Some(Ok(Token::Signed(-456))));
337 assert_eq!(tokens.next(), Some(Ok(Token::Signed(-789))));
338 assert_eq!(tokens.next(), None);
339
340 let mut tokens = Token::lexer("#100 #200 #-300");
342 assert_eq!(tokens.next(), Some(Ok(Token::Unsigned(100))));
343 assert_eq!(tokens.next(), Some(Ok(Token::Unsigned(200))));
344 assert_eq!(tokens.next(), Some(Ok(Token::Signed(-300))));
345 assert_eq!(tokens.next(), None);
346 }
347 #[test]
348 fn test_numeric_hex_success() {
349 let mut tokens = Token::lexer("x2110 xABCD X2110 XABCD Xabcd XaBcD xA xAA xAAA xAAAA");
351 assert_eq!(tokens.next(), Some(Ok(Token::Unsigned(0x2110))));
352 assert_eq!(tokens.next(), Some(Ok(Token::Unsigned(0xABCD))));
353 assert_eq!(tokens.next(), Some(Ok(Token::Unsigned(0x2110))));
354 assert_eq!(tokens.next(), Some(Ok(Token::Unsigned(0xABCD))));
355 assert_eq!(tokens.next(), Some(Ok(Token::Unsigned(0xABCD))));
356 assert_eq!(tokens.next(), Some(Ok(Token::Unsigned(0xABCD))));
357 assert_eq!(tokens.next(), Some(Ok(Token::Unsigned(0x000A))));
358 assert_eq!(tokens.next(), Some(Ok(Token::Unsigned(0x00AA))));
359 assert_eq!(tokens.next(), Some(Ok(Token::Unsigned(0x0AAA))));
360 assert_eq!(tokens.next(), Some(Ok(Token::Unsigned(0xAAAA))));
361 assert_eq!(tokens.next(), None);
362
363 let mut tokens = Token::lexer("x-9 x-1234 X-1234");
365 assert_eq!(tokens.next(), Some(Ok(Token::Signed(-0x9))));
366 assert_eq!(tokens.next(), Some(Ok(Token::Signed(-0x1234))));
367 assert_eq!(tokens.next(), Some(Ok(Token::Signed(-0x1234))));
368 assert_eq!(tokens.next(), None);
369 }
370
371 #[test]
372 fn test_numeric_dec_overflow() {
373 let mut tokens = Token::lexer("32767 32768 -1 -32767 -32768 65535");
375 assert_eq!(tokens.next(), Some(Ok(Token::Unsigned(32767))));
376 assert_eq!(tokens.next(), Some(Ok(Token::Unsigned(32768))));
377 assert_eq!(tokens.next(), Some(Ok(Token::Signed(-1))));
378 assert_eq!(tokens.next(), Some(Ok(Token::Signed(-32767))));
379 assert_eq!(tokens.next(), Some(Ok(Token::Signed(-32768))));
380 assert_eq!(tokens.next(), Some(Ok(Token::Unsigned(65535))));
381 assert_eq!(tokens.next(), None);
382
383 assert_eq!(Token::lexer("65536").next(), Some(Err(LexErr::DoesNotFitU16)));
385 assert_eq!(Token::lexer("999999999999999999999999999999").next(), Some(Err(LexErr::DoesNotFitU16)));
386 assert_eq!(Token::lexer("-32769").next(), Some(Err(LexErr::DoesNotFitI16)));
387 assert_eq!(Token::lexer("-65536").next(), Some(Err(LexErr::DoesNotFitI16)));
388 }
389
390 #[test]
391 fn test_numeric_hex_overflow() {
392 let mut tokens = Token::lexer("x0000 x7FFF x8000 xFFFF x-7FFF x-8000");
394 assert_eq!(tokens.next(), Some(Ok(Token::Unsigned(0x0000))));
395 assert_eq!(tokens.next(), Some(Ok(Token::Unsigned(0x7FFF))));
396 assert_eq!(tokens.next(), Some(Ok(Token::Unsigned(0x8000))));
397 assert_eq!(tokens.next(), Some(Ok(Token::Unsigned(0xFFFF))));
398 assert_eq!(tokens.next(), Some(Ok(Token::Signed(-0x7FFF))));
399 assert_eq!(tokens.next(), Some(Ok(Token::Signed(-0x8000))));
400 assert_eq!(tokens.next(), None);
401
402 assert_eq!(Token::lexer("xABCDEF").next(), Some(Err(LexErr::DoesNotFitU16)));
404 assert_eq!(Token::lexer("x0123456789ABCDEF0123456789ABCDEF").next(), Some(Err(LexErr::DoesNotFitU16)));
405 assert_eq!(Token::lexer("x-8001").next(), Some(Err(LexErr::DoesNotFitI16)));
406 assert_eq!(Token::lexer("x-FFFF").next(), Some(Err(LexErr::DoesNotFitI16)));
407 }
408
409 #[test]
410 fn test_numeric_dec_invalid() {
411 assert_eq!(Token::lexer("#Q").next(), Some(Err(LexErr::InvalidNumeric)));
412 assert_eq!(Token::lexer("3Q").next(), Some(Err(LexErr::InvalidNumeric)));
413 assert_eq!(Token::lexer("##").next(), Some(Err(LexErr::InvalidNumeric)));
414 assert_eq!(Token::lexer("#").next(), Some(Err(LexErr::InvalidDecEmpty)));
415 assert_eq!(Token::lexer("#-").next(), Some(Err(LexErr::InvalidDecEmpty)));
416 assert_eq!(Token::lexer("-#1").next(), Some(Err(LexErr::InvalidNumeric)));
417 }
418
419 #[test]
420 fn test_numeric_hex_invalid() {
421 assert_eq!(Token::lexer("x0Q").next(), Some(Err(LexErr::InvalidHex)));
422 assert_eq!(Token::lexer("x-").next(), Some(Err(LexErr::InvalidHexEmpty)));
423 assert_eq!(Token::lexer("-x7FFF").next(), Some(Err(LexErr::InvalidNumeric)));
424 }
425
426 #[test]
427 fn test_regs() {
428 let mut tokens = Token::lexer("R0 R1 R2 R3 R4 R5 R6 R7");
430 assert_eq!(tokens.next(), Some(Ok(Token::Reg(0))));
431 assert_eq!(tokens.next(), Some(Ok(Token::Reg(1))));
432 assert_eq!(tokens.next(), Some(Ok(Token::Reg(2))));
433 assert_eq!(tokens.next(), Some(Ok(Token::Reg(3))));
434 assert_eq!(tokens.next(), Some(Ok(Token::Reg(4))));
435 assert_eq!(tokens.next(), Some(Ok(Token::Reg(5))));
436 assert_eq!(tokens.next(), Some(Ok(Token::Reg(6))));
437 assert_eq!(tokens.next(), Some(Ok(Token::Reg(7))));
438 assert_eq!(tokens.next(), None);
439
440 assert_eq!(Token::lexer("R8").next(), Some(Err(LexErr::InvalidReg)));
442 assert_eq!(Token::lexer("R9").next(), Some(Err(LexErr::InvalidReg)));
443 assert_eq!(Token::lexer("R10").next(), Some(Err(LexErr::InvalidReg)));
444 assert_eq!(Token::lexer("R99999999").next(), Some(Err(LexErr::InvalidReg)));
445
446 assert_eq!(Token::lexer("R-1").collect::<Result<Vec<_>, _>>(), Ok(vec![
447 label("R"),
448 Token::Signed(-1)
449 ]));
450 }
451
452 #[test]
453 fn test_str() {
454 let mut tokens = Token::lexer(r#" " " "abc" "def" "!@#$%^&*()" "#);
456 assert_eq!(tokens.next(), Some(Ok(str_literal(" "))));
457 assert_eq!(tokens.next(), Some(Ok(str_literal("abc"))));
458 assert_eq!(tokens.next(), Some(Ok(str_literal("def"))));
459 assert_eq!(tokens.next(), Some(Ok(str_literal("!@#$%^&*()"))));
460 assert_eq!(tokens.next(), None);
461 }
462
463 #[test]
464 fn test_str_empty() {
465 let mut tokens = Token::lexer(r#" "" "#);
467 assert_eq!(tokens.next(), Some(Ok(str_literal(""))));
468 assert_eq!(tokens.next(), None);
469 }
470
471 #[test]
472 fn test_str_escape() {
473 let mut tokens = Token::lexer(r#" "\n" "\r" "\t" "\\" "\"" "\0" "\e" "#);
475 assert_eq!(tokens.next(), Some(Ok(str_literal("\n"))));
476 assert_eq!(tokens.next(), Some(Ok(str_literal("\r"))));
477 assert_eq!(tokens.next(), Some(Ok(str_literal("\t"))));
478 assert_eq!(tokens.next(), Some(Ok(str_literal("\\"))));
479 assert_eq!(tokens.next(), Some(Ok(str_literal("\""))));
480 assert_eq!(tokens.next(), Some(Ok(str_literal("\0"))));
481 assert_eq!(tokens.next(), Some(Ok(str_literal("\\e"))));
482 assert_eq!(tokens.next(), None);
483
484 let mut tokens = Token::lexer(r#" "wqftnzsegpfykvzekyvketskestve\nsreatkrsetkrsetksretnrsk" "#);
485 assert_eq!(tokens.next(), Some(Ok(str_literal("wqftnzsegpfykvzekyvketskestve\nsreatkrsetkrsetksretnrsk"))));
486 assert_eq!(tokens.next(), None);
487
488 }
489
490 #[test]
491 fn test_str_big() {
492 let mut large;
493 large = "0".repeat(32767);
495 assert_eq!(Token::lexer(&format!(r#""{large}""#)).next(), Some(Ok(str_literal(&large))));
496 large.push('0');
498 assert_eq!(Token::lexer(&format!(r#""{large}""#)).next(), Some(Ok(str_literal(&large))));
499 large.push('0');
501 assert_eq!(Token::lexer(&format!(r#""{large}""#)).next(), Some(Ok(str_literal(&large))));
502
503 large = "0".repeat(65533);
505 assert_eq!(Token::lexer(&format!(r#""{large}""#)).next(), Some(Ok(str_literal(&large))));
506 large.push('0');
508 assert_eq!(Token::lexer(&format!(r#""{large}""#)).next(), Some(Ok(str_literal(&large))));
509 large.push('0');
511 assert_eq!(Token::lexer(&format!(r#""{large}""#)).next(), Some(Err(LexErr::StrLitTooBig)));
512 large.push('0');
514 assert_eq!(Token::lexer(&format!(r#""{large}""#)).next(), Some(Err(LexErr::StrLitTooBig)));
515
516 let input = format!(r#""{:065533}\n""#, 0);
518 let parsed = format!("{:065533}\n", 0);
519 assert_eq!(Token::lexer(&input).next(), Some(Ok(str_literal(&parsed))));
520 }
521
522 #[test]
523 fn test_str_unclosed() {
524 assert_eq!(Token::lexer(r#"""#).next(), Some(Err(LexErr::UnclosedStrLit)));
525 assert_eq!(Token::lexer(r#""
526 ""#).next(), Some(Err(LexErr::UnclosedStrLit)));
527 }
528
529 #[test]
530 fn test_keywords_labels() {
531 let kws = stringify!(
532 ADD AND NOT BR BRP BRZ BRZP BRN BRNP BRNZ BRNZP
533 JMP JSR JSRR LD LDI LDR LEA ST STI STR TRAP NOP
534 RET RTI GETC OUT PUTC PUTS IN PUTSP HALT
535 );
536 for m_token in Token::lexer(kws) {
537 let token = m_token.unwrap();
538 if let Token::NewLine = token { continue; }
539 assert!(
540 matches!(token, Token::Ident(_)) & !matches!(token, Token::Ident(Ident::Label(_))),
541 "Expected {token:?} to be keyword"
542 );
543 }
544
545 let mut tokens = Token::lexer("ADD ADd AdD Add aDD aDd adD add");
547 assert_eq!(tokens.next(), Some(Ok(Token::Ident(Ident::ADD))));
548 assert_eq!(tokens.next(), Some(Ok(Token::Ident(Ident::ADD))));
549 assert_eq!(tokens.next(), Some(Ok(Token::Ident(Ident::ADD))));
550 assert_eq!(tokens.next(), Some(Ok(Token::Ident(Ident::ADD))));
551 assert_eq!(tokens.next(), Some(Ok(Token::Ident(Ident::ADD))));
552 assert_eq!(tokens.next(), Some(Ok(Token::Ident(Ident::ADD))));
553 assert_eq!(tokens.next(), Some(Ok(Token::Ident(Ident::ADD))));
554 assert_eq!(tokens.next(), Some(Ok(Token::Ident(Ident::ADD))));
555 assert_eq!(tokens.next(), None);
556
557 let mut tokens = Token::lexer("ARST gmneio _");
559 assert_eq!(tokens.next(), Some(Ok(label("ARST"))));
560 assert_eq!(tokens.next(), Some(Ok(label("gmneio"))));
561 assert_eq!(tokens.next(), Some(Ok(label("_"))));
562 assert_eq!(tokens.next(), None);
563 }
564
565 #[test]
566 fn test_directive() {
567 let mut tokens = Token::lexer(".fill .abc .2a ._");
568 assert_eq!(tokens.next(), Some(Ok(directive("fill"))));
569 assert_eq!(tokens.next(), Some(Ok(directive("abc"))));
570 assert_eq!(tokens.next(), Some(Ok(directive("2a"))));
571 assert_eq!(tokens.next(), Some(Ok(directive("_"))));
572 assert_eq!(tokens.next(), None);
573 }
574
575 #[test]
576 fn test_punct() {
577 let mut tokens = Token::lexer("0\n1,2:3 ;; abcdef");
578 assert_eq!(tokens.next(), Some(Ok(Token::Unsigned(0))));
579 assert_eq!(tokens.next(), Some(Ok(Token::NewLine)));
580 assert_eq!(tokens.next(), Some(Ok(Token::Unsigned(1))));
581 assert_eq!(tokens.next(), Some(Ok(Token::Comma)));
582 assert_eq!(tokens.next(), Some(Ok(Token::Unsigned(2))));
583 assert_eq!(tokens.next(), Some(Ok(Token::Colon)));
584 assert_eq!(tokens.next(), Some(Ok(Token::Unsigned(3))));
585 assert_eq!(tokens.next(), Some(Ok(Token::Comment)));
586 }
587
588 #[test]
589 fn test_invalid_symbol() {
590 let invalid = b"\
591 \x00\x01\x02\x03\x04\x05\x06\x07\x08 \x0B\x0C\x0D\x0E\x0F\
592 \x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\
593 \x21 \x24\x25\x26\x27\x28\x29\x2A\x2B \x2F\
594 \x3C\x3D\x3E\x3F\
595 \x40 \
596 \x5B\x5C\x5D\x5E \
597 \x60 \
598 \x7B\x7C\x7D\x7E\x7F\
599 ";
600 for &c in invalid {
601 if c == b' ' { continue; }
602 let slice = &[c];
603 let string = std::str::from_utf8(slice).unwrap();
604 assert_eq!(
605 Token::lexer(string).next(),
606 Some(Err(LexErr::InvalidSymbol)),
607 "Expected {string:?} (0x{c:02X}) to be an invalid symbol"
608 );
609 }
610 }
611}