1#[cfg(test)]
2use alloc::{vec, vec::Vec};
3use core::char;
4use core::fmt;
5use core::result::Result;
6use core::str;
7use unicode_xid::UnicodeXID;
8
9use self::Token::*;
10
11#[derive(Clone)]
12pub struct Tokenizer<'a> {
13 input: &'a str,
14 span_offset: u32,
15 chars: CrlfFold<'a>,
16}
17
18#[derive(Clone)]
19struct CrlfFold<'a> {
20 chars: str::CharIndices<'a>,
21}
22
23#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)]
28pub struct Span {
29 start: u32,
30 end: u32,
31}
32
33impl Default for Span {
34 fn default() -> Span {
35 Span {
36 start: u32::MAX,
37 end: u32::MAX,
38 }
39 }
40}
41
42impl Span {
43 pub fn new(start: u32, end: u32) -> Span {
44 let span = Span { start, end };
45 assert!(span.is_known(), "cannot create a span with u32::MAX");
46 span
47 }
48
49 pub fn adjust(&mut self, offset: u32) {
51 if self.is_known() {
52 self.start += offset;
53 self.end += offset;
54 }
55 }
56
57 pub fn start(&self) -> u32 {
59 assert!(self.is_known(), "cannot get start of unknown span");
60 self.start
61 }
62
63 pub fn end(&self) -> u32 {
65 assert!(self.is_known(), "cannot get end of unknown span");
66 self.end
67 }
68
69 pub fn set_end(&mut self, new_end: u32) {
71 if !self.is_known() {
72 self.start = new_end;
73 }
74 self.end = new_end;
75 }
76
77 pub fn set_start(&mut self, new_start: u32) {
79 if !self.is_known() {
80 self.end = new_start;
81 }
82 self.start = new_start;
83 }
84
85 pub fn is_known(&self) -> bool {
87 self.start != u32::MAX && self.end != u32::MAX
88 }
89}
90
91#[derive(Eq, PartialEq, Debug, Copy, Clone)]
92pub enum Token {
93 Whitespace,
94 Comment,
95
96 Equals,
97 Comma,
98 Colon,
99 Period,
100 Semicolon,
101 LeftParen,
102 RightParen,
103 LeftBrace,
104 RightBrace,
105 LessThan,
106 GreaterThan,
107 RArrow,
108 Star,
109 At,
110 Slash,
111 Plus,
112 Minus,
113
114 Use,
115 Type,
116 Func,
117 U8,
118 U16,
119 U32,
120 U64,
121 S8,
122 S16,
123 S32,
124 S64,
125 F32,
126 F64,
127 Char,
128 Record,
129 Resource,
130 Own,
131 Borrow,
132 Flags,
133 Variant,
134 Enum,
135 Bool,
136 String_,
137 Option_,
138 Result_,
139 Future,
140 Stream,
141 ErrorContext,
142 List,
143 Map,
144 Underscore,
145 As,
146 From_,
147 Static,
148 Interface,
149 Tuple,
150 Import,
151 Export,
152 World,
153 Package,
154 Constructor,
155 Async,
156
157 Id,
158 ExplicitId,
159
160 Integer,
161
162 Include,
163 With,
164}
165
166#[derive(Eq, PartialEq, Debug)]
167#[allow(dead_code)]
168pub enum Error {
169 ControlCodepoint(u32, char),
170 DeprecatedCodepoint(u32, char),
171 ForbiddenCodepoint(u32, char),
172 InvalidCharInId(u32, char),
173 IdPartEmpty(u32),
174 InvalidEscape(u32, char),
175 Unexpected(u32, char),
176 UnterminatedComment(u32),
177 Wanted {
178 at: u32,
179 expected: &'static str,
180 found: &'static str,
181 },
182}
183
184impl<'a> Tokenizer<'a> {
185 pub fn new(input: &'a str, span_offset: u32) -> Result<Tokenizer<'a>, Error> {
186 detect_invalid_input(input)?;
187
188 let mut t = Tokenizer {
189 input,
190 span_offset,
191 chars: CrlfFold {
192 chars: input.char_indices(),
193 },
194 };
195 t.eatc('\u{feff}');
197 Ok(t)
198 }
199
200 pub fn expect_semicolon(&mut self) -> Result<(), Error> {
201 self.expect(Token::Semicolon)?;
202 Ok(())
203 }
204
205 pub fn get_span(&self, span: Span) -> &'a str {
206 let start = usize::try_from(span.start() - self.span_offset).unwrap();
207 let end = usize::try_from(span.end() - self.span_offset).unwrap();
208 &self.input[start..end]
209 }
210
211 pub fn parse_id(&self, span: Span) -> Result<&'a str, Error> {
212 let ret = self.get_span(span);
213 validate_id(span.start(), &ret)?;
214 Ok(ret)
215 }
216
217 pub fn parse_explicit_id(&self, span: Span) -> Result<&'a str, Error> {
218 let token = self.get_span(span);
219 let id_part = token.strip_prefix('%').unwrap();
220 validate_id(span.start(), id_part)?;
221 Ok(id_part)
222 }
223
224 pub fn next(&mut self) -> Result<Option<(Span, Token)>, Error> {
225 loop {
226 match self.next_raw()? {
227 Some((_, Token::Whitespace)) | Some((_, Token::Comment)) => {}
228 other => break Ok(other),
229 }
230 }
231 }
232
233 pub fn next_raw(&mut self) -> Result<Option<(Span, Token)>, Error> {
237 let (str_start, ch) = match self.chars.next() {
238 Some(pair) => pair,
239 None => return Ok(None),
240 };
241 let start = self.span_offset + u32::try_from(str_start).unwrap();
242 let token = match ch {
243 '\n' | '\t' | ' ' => {
244 while self.eatc(' ') || self.eatc('\t') || self.eatc('\n') {}
246 Whitespace
247 }
248 '/' => {
249 if self.eatc('/') {
251 for (_, ch) in &mut self.chars {
252 if ch == '\n' {
253 break;
254 }
255 }
256 Comment
257 } else if self.eatc('*') {
259 let mut depth = 1;
260 while depth > 0 {
261 let (_, ch) = match self.chars.next() {
262 Some(pair) => pair,
263 None => return Err(Error::UnterminatedComment(start)),
264 };
265 match ch {
266 '/' if self.eatc('*') => depth += 1,
267 '*' if self.eatc('/') => depth -= 1,
268 _ => {}
269 }
270 }
271 Comment
272 } else {
273 Slash
274 }
275 }
276 '=' => Equals,
277 ',' => Comma,
278 ':' => Colon,
279 '.' => Period,
280 ';' => Semicolon,
281 '(' => LeftParen,
282 ')' => RightParen,
283 '{' => LeftBrace,
284 '}' => RightBrace,
285 '<' => LessThan,
286 '>' => GreaterThan,
287 '*' => Star,
288 '@' => At,
289 '-' => {
290 if self.eatc('>') {
291 RArrow
292 } else {
293 Minus
294 }
295 }
296 '+' => Plus,
297 '%' => {
298 let mut iter = self.chars.clone();
299 if let Some((_, ch)) = iter.next() {
300 if is_keylike_start(ch) {
301 self.chars = iter.clone();
302 while let Some((_, ch)) = iter.next() {
303 if !is_keylike_continue(ch) {
304 break;
305 }
306 self.chars = iter.clone();
307 }
308 }
309 }
310 ExplicitId
311 }
312 ch if is_keylike_start(ch) => {
313 let remaining = self.chars.chars.as_str().len();
314 let mut iter = self.chars.clone();
315 while let Some((_, ch)) = iter.next() {
316 if !is_keylike_continue(ch) {
317 break;
318 }
319 self.chars = iter.clone();
320 }
321 let str_end =
322 str_start + ch.len_utf8() + (remaining - self.chars.chars.as_str().len());
323 match &self.input[str_start..str_end] {
324 "use" => Use,
325 "type" => Type,
326 "func" => Func,
327 "u8" => U8,
328 "u16" => U16,
329 "u32" => U32,
330 "u64" => U64,
331 "s8" => S8,
332 "s16" => S16,
333 "s32" => S32,
334 "s64" => S64,
335 "f32" => F32,
336 "f64" => F64,
337 "char" => Char,
338 "resource" => Resource,
339 "own" => Own,
340 "borrow" => Borrow,
341 "record" => Record,
342 "flags" => Flags,
343 "variant" => Variant,
344 "enum" => Enum,
345 "bool" => Bool,
346 "string" => String_,
347 "option" => Option_,
348 "result" => Result_,
349 "future" => Future,
350 "stream" => Stream,
351 "error-context" => ErrorContext,
352 "list" => List,
353 "map" => Map,
354 "_" => Underscore,
355 "as" => As,
356 "from" => From_,
357 "static" => Static,
358 "interface" => Interface,
359 "tuple" => Tuple,
360 "world" => World,
361 "import" => Import,
362 "export" => Export,
363 "package" => Package,
364 "constructor" => Constructor,
365 "include" => Include,
366 "with" => With,
367 "async" => Async,
368 _ => Id,
369 }
370 }
371
372 ch if ch.is_ascii_digit() => {
373 let mut iter = self.chars.clone();
374 while let Some((_, ch)) = iter.next() {
375 if !ch.is_ascii_digit() {
376 break;
377 }
378 self.chars = iter.clone();
379 }
380
381 Integer
382 }
383
384 ch => return Err(Error::Unexpected(start, ch)),
385 };
386 let end = match self.chars.clone().next() {
387 Some((i, _)) => i,
388 None => self.input.len(),
389 };
390
391 let end = self.span_offset + u32::try_from(end).unwrap();
392 Ok(Some((Span::new(start, end), token)))
393 }
394
395 pub fn eat(&mut self, expected: Token) -> Result<bool, Error> {
396 let mut other = self.clone();
397 match other.next()? {
398 Some((_span, found)) if expected == found => {
399 *self = other;
400 Ok(true)
401 }
402 Some(_) => Ok(false),
403 None => Ok(false),
404 }
405 }
406
407 pub fn expect(&mut self, expected: Token) -> Result<Span, Error> {
408 match self.next()? {
409 Some((span, found)) => {
410 if expected == found {
411 Ok(span)
412 } else {
413 Err(Error::Wanted {
414 at: span.start(),
415 expected: expected.describe(),
416 found: found.describe(),
417 })
418 }
419 }
420 None => Err(Error::Wanted {
421 at: self.span_offset + u32::try_from(self.input.len()).unwrap(),
422 expected: expected.describe(),
423 found: "eof",
424 }),
425 }
426 }
427
428 fn eatc(&mut self, ch: char) -> bool {
429 let mut iter = self.chars.clone();
430 match iter.next() {
431 Some((_, ch2)) if ch == ch2 => {
432 self.chars = iter;
433 true
434 }
435 _ => false,
436 }
437 }
438
439 pub fn eof_span(&self) -> Span {
440 let end = self.span_offset + u32::try_from(self.input.len()).unwrap();
441 Span::new(end, end)
442 }
443}
444
445impl<'a> Iterator for CrlfFold<'a> {
446 type Item = (usize, char);
447
448 fn next(&mut self) -> Option<(usize, char)> {
449 self.chars.next().map(|(i, c)| {
450 if c == '\r' {
451 let mut attempt = self.chars.clone();
452 if let Some((_, '\n')) = attempt.next() {
453 self.chars = attempt;
454 return (i, '\n');
455 }
456 }
457 (i, c)
458 })
459 }
460}
461
462fn detect_invalid_input(input: &str) -> Result<(), Error> {
463 for (pos, ch) in input.char_indices() {
465 match ch {
466 '\n' | '\r' | '\t' => {}
467
468 '\u{202a}' | '\u{202b}' | '\u{202c}' | '\u{202d}' | '\u{202e}' | '\u{2066}'
474 | '\u{2067}' | '\u{2068}' | '\u{2069}' => {
475 return Err(Error::ForbiddenCodepoint(u32::try_from(pos).unwrap(), ch));
476 }
477
478 '\u{149}' | '\u{673}' | '\u{f77}' | '\u{f79}' | '\u{17a3}' | '\u{17a4}'
486 | '\u{17b4}' | '\u{17b5}' => {
487 return Err(Error::DeprecatedCodepoint(u32::try_from(pos).unwrap(), ch));
488 }
489
490 ch if ch.is_control() => {
494 return Err(Error::ControlCodepoint(u32::try_from(pos).unwrap(), ch));
495 }
496
497 _ => {}
498 }
499 }
500
501 Ok(())
502}
503
504fn is_keylike_start(ch: char) -> bool {
505 UnicodeXID::is_xid_start(ch) || ch == '_' || ch == '-'
508}
509
510fn is_keylike_continue(ch: char) -> bool {
511 UnicodeXID::is_xid_continue(ch) || ch == '-'
513}
514
515pub fn validate_id(start: u32, id: &str) -> Result<(), Error> {
516 if id.is_empty() {
518 return Err(Error::IdPartEmpty(start));
519 }
520
521 for (idx, part) in id.split('-').enumerate() {
523 let Some(first_char) = part.chars().next() else {
526 return Err(Error::IdPartEmpty(start));
527 };
528 if idx == 0 && !first_char.is_ascii_alphabetic() {
529 return Err(Error::InvalidCharInId(start, first_char));
530 }
531 let mut upper = None;
532 for ch in part.chars() {
533 if ch.is_ascii_digit() {
534 } else if ch.is_ascii_uppercase() {
536 if upper.is_none() {
537 upper = Some(true);
538 } else if let Some(false) = upper {
539 return Err(Error::InvalidCharInId(start, ch));
540 }
541 } else if ch.is_ascii_lowercase() {
542 if upper.is_none() {
543 upper = Some(false);
544 } else if let Some(true) = upper {
545 return Err(Error::InvalidCharInId(start, ch));
546 }
547 } else {
548 return Err(Error::InvalidCharInId(start, ch));
549 }
550 }
551 }
552
553 Ok(())
554}
555
556impl Token {
557 pub fn describe(&self) -> &'static str {
558 match self {
559 Whitespace => "whitespace",
560 Comment => "a comment",
561 Equals => "'='",
562 Comma => "','",
563 Colon => "':'",
564 Period => "'.'",
565 Semicolon => "';'",
566 LeftParen => "'('",
567 RightParen => "')'",
568 LeftBrace => "'{'",
569 RightBrace => "'}'",
570 LessThan => "'<'",
571 GreaterThan => "'>'",
572 Use => "keyword `use`",
573 Type => "keyword `type`",
574 Func => "keyword `func`",
575 U8 => "keyword `u8`",
576 U16 => "keyword `u16`",
577 U32 => "keyword `u32`",
578 U64 => "keyword `u64`",
579 S8 => "keyword `s8`",
580 S16 => "keyword `s16`",
581 S32 => "keyword `s32`",
582 S64 => "keyword `s64`",
583 F32 => "keyword `f32`",
584 F64 => "keyword `f64`",
585 Char => "keyword `char`",
586 Own => "keyword `own`",
587 Borrow => "keyword `borrow`",
588 Resource => "keyword `resource`",
589 Record => "keyword `record`",
590 Flags => "keyword `flags`",
591 Variant => "keyword `variant`",
592 Enum => "keyword `enum`",
593 Bool => "keyword `bool`",
594 String_ => "keyword `string`",
595 Option_ => "keyword `option`",
596 Result_ => "keyword `result`",
597 Future => "keyword `future`",
598 Stream => "keyword `stream`",
599 ErrorContext => "keyword `error-context`",
600 List => "keyword `list`",
601 Map => "keyword `map`",
602 Underscore => "keyword `_`",
603 Id => "an identifier",
604 ExplicitId => "an '%' identifier",
605 RArrow => "`->`",
606 Star => "`*`",
607 At => "`@`",
608 Slash => "`/`",
609 Plus => "`+`",
610 Minus => "`-`",
611 As => "keyword `as`",
612 From_ => "keyword `from`",
613 Static => "keyword `static`",
614 Interface => "keyword `interface`",
615 Tuple => "keyword `tuple`",
616 Import => "keyword `import`",
617 Export => "keyword `export`",
618 World => "keyword `world`",
619 Package => "keyword `package`",
620 Constructor => "keyword `constructor`",
621 Integer => "an integer",
622 Include => "keyword `include`",
623 With => "keyword `with`",
624 Async => "keyword `async`",
625 }
626 }
627}
628
629impl core::error::Error for Error {}
630
631impl Error {
632 pub fn position(&self) -> u32 {
634 match self {
635 Error::ControlCodepoint(at, _)
636 | Error::DeprecatedCodepoint(at, _)
637 | Error::ForbiddenCodepoint(at, _)
638 | Error::InvalidCharInId(at, _)
639 | Error::IdPartEmpty(at)
640 | Error::InvalidEscape(at, _)
641 | Error::Unexpected(at, _)
642 | Error::UnterminatedComment(at) => *at,
643 Error::Wanted { at, .. } => *at,
644 }
645 }
646}
647
648impl fmt::Display for Error {
649 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
650 match self {
651 Error::ControlCodepoint(_, ch) => write!(f, "Control code '{}'", ch.escape_unicode()),
652 Error::DeprecatedCodepoint(_, ch) => {
653 write!(
654 f,
655 "Codepoint {:?} is discouraged by Unicode",
656 ch.escape_unicode()
657 )
658 }
659 Error::ForbiddenCodepoint(_, ch) => {
660 write!(
661 f,
662 "Input contains bidirectional override codepoint {:?}",
663 ch.escape_unicode()
664 )
665 }
666 Error::Unexpected(_, ch) => write!(f, "unexpected character {ch:?}"),
667 Error::UnterminatedComment(_) => write!(f, "unterminated block comment"),
668 Error::Wanted {
669 expected, found, ..
670 } => write!(f, "expected {expected}, found {found}"),
671 Error::InvalidCharInId(_, ch) => write!(f, "invalid character in identifier {ch:?}"),
672 Error::IdPartEmpty(_) => write!(f, "identifiers must have characters between '-'s"),
673 Error::InvalidEscape(_, ch) => write!(f, "invalid escape in string {ch:?}"),
674 }
675 }
676}
677
678#[test]
679fn test_validate_id() {
680 validate_id(0, "apple").unwrap();
681 validate_id(0, "apple-pear").unwrap();
682 validate_id(0, "apple-pear-grape").unwrap();
683 validate_id(0, "a0").unwrap();
684 validate_id(0, "a").unwrap();
685 validate_id(0, "a-a").unwrap();
686 validate_id(0, "bool").unwrap();
687 validate_id(0, "APPLE").unwrap();
688 validate_id(0, "APPLE-PEAR").unwrap();
689 validate_id(0, "APPLE-PEAR-GRAPE").unwrap();
690 validate_id(0, "apple-PEAR-grape").unwrap();
691 validate_id(0, "APPLE-pear-GRAPE").unwrap();
692 validate_id(0, "ENOENT").unwrap();
693 validate_id(0, "is-XML").unwrap();
694 validate_id(0, "apple-0").unwrap();
695 validate_id(0, "a0-000-3d4a-54FF").unwrap();
696
697 assert!(validate_id(0, "").is_err());
698 assert!(validate_id(0, "0").is_err());
699 assert!(validate_id(0, "%").is_err());
700 assert!(validate_id(0, "$").is_err());
701 assert!(validate_id(0, "0a").is_err());
702 assert!(validate_id(0, ".").is_err());
703 assert!(validate_id(0, "·").is_err());
704 assert!(validate_id(0, "a a").is_err());
705 assert!(validate_id(0, "_").is_err());
706 assert!(validate_id(0, "-").is_err());
707 assert!(validate_id(0, "a-").is_err());
708 assert!(validate_id(0, "-a").is_err());
709 assert!(validate_id(0, "Apple").is_err());
710 assert!(validate_id(0, "applE").is_err());
711 assert!(validate_id(0, "-apple-pear").is_err());
712 assert!(validate_id(0, "apple-pear-").is_err());
713 assert!(validate_id(0, "apple_pear").is_err());
714 assert!(validate_id(0, "apple.pear").is_err());
715 assert!(validate_id(0, "apple pear").is_err());
716 assert!(validate_id(0, "apple/pear").is_err());
717 assert!(validate_id(0, "apple|pear").is_err());
718 assert!(validate_id(0, "apple-Pear").is_err());
719 assert!(validate_id(0, "()()").is_err());
720 assert!(validate_id(0, "").is_err());
721 assert!(validate_id(0, "*").is_err());
722 assert!(validate_id(0, "apple\u{5f3}pear").is_err());
723 assert!(validate_id(0, "apple\u{200c}pear").is_err());
724 assert!(validate_id(0, "apple\u{200d}pear").is_err());
725 assert!(validate_id(0, "apple--pear").is_err());
726 assert!(validate_id(0, "_apple").is_err());
727 assert!(validate_id(0, "apple_").is_err());
728 assert!(validate_id(0, "_Znwj").is_err());
729 assert!(validate_id(0, "__i386").is_err());
730 assert!(validate_id(0, "__i386__").is_err());
731 assert!(validate_id(0, "Москва").is_err());
732 assert!(validate_id(0, "garçon-hühnervögel-Москва-東京").is_err());
733 assert!(validate_id(0, "a0-000-3d4A-54Ff").is_err());
734 assert!(validate_id(0, "😼").is_err(), "non-identifier");
735 assert!(validate_id(0, "\u{212b}").is_err(), "non-ascii");
736}
737
738#[test]
739fn test_tokenizer() {
740 fn collect(s: &str) -> Result<Vec<Token>, Error> {
741 let mut t = Tokenizer::new(s, 0)?;
742 let mut tokens = Vec::new();
743 while let Some(token) = t.next()? {
744 tokens.push(token.1);
745 }
746 Ok(tokens)
747 }
748
749 assert_eq!(collect("").unwrap(), vec![]);
750 assert_eq!(collect("_").unwrap(), vec![Token::Underscore]);
751 assert_eq!(collect("apple").unwrap(), vec![Token::Id]);
752 assert_eq!(collect("apple-pear").unwrap(), vec![Token::Id]);
753 assert_eq!(collect("apple--pear").unwrap(), vec![Token::Id]);
754 assert_eq!(collect("apple-Pear").unwrap(), vec![Token::Id]);
755 assert_eq!(collect("apple-pear-grape").unwrap(), vec![Token::Id]);
756 assert_eq!(collect("apple pear").unwrap(), vec![Token::Id, Token::Id]);
757 assert_eq!(collect("_a_p_p_l_e_").unwrap(), vec![Token::Id]);
758 assert_eq!(collect("garçon").unwrap(), vec![Token::Id]);
759 assert_eq!(collect("hühnervögel").unwrap(), vec![Token::Id]);
760 assert_eq!(collect("москва").unwrap(), vec![Token::Id]);
761 assert_eq!(collect("東京").unwrap(), vec![Token::Id]);
762 assert_eq!(
763 collect("garçon-hühnervögel-москва-東京").unwrap(),
764 vec![Token::Id]
765 );
766 assert_eq!(collect("a0").unwrap(), vec![Token::Id]);
767 assert_eq!(collect("a").unwrap(), vec![Token::Id]);
768 assert_eq!(collect("%a").unwrap(), vec![Token::ExplicitId]);
769 assert_eq!(collect("%a-a").unwrap(), vec![Token::ExplicitId]);
770 assert_eq!(collect("%bool").unwrap(), vec![Token::ExplicitId]);
771 assert_eq!(collect("%").unwrap(), vec![Token::ExplicitId]);
772 assert_eq!(collect("APPLE").unwrap(), vec![Token::Id]);
773 assert_eq!(collect("APPLE-PEAR").unwrap(), vec![Token::Id]);
774 assert_eq!(collect("APPLE-PEAR-GRAPE").unwrap(), vec![Token::Id]);
775 assert_eq!(collect("apple-PEAR-grape").unwrap(), vec![Token::Id]);
776 assert_eq!(collect("APPLE-pear-GRAPE").unwrap(), vec![Token::Id]);
777 assert_eq!(collect("ENOENT").unwrap(), vec![Token::Id]);
778 assert_eq!(collect("is-XML").unwrap(), vec![Token::Id]);
779
780 assert_eq!(collect("func").unwrap(), vec![Token::Func]);
781 assert_eq!(
782 collect("a: func()").unwrap(),
783 vec![
784 Token::Id,
785 Token::Colon,
786 Token::Func,
787 Token::LeftParen,
788 Token::RightParen
789 ]
790 );
791
792 assert_eq!(collect("resource").unwrap(), vec![Token::Resource]);
793
794 assert_eq!(collect("own").unwrap(), vec![Token::Own]);
795 assert_eq!(
796 collect("own<some-id>").unwrap(),
797 vec![Token::Own, Token::LessThan, Token::Id, Token::GreaterThan]
798 );
799
800 assert_eq!(collect("borrow").unwrap(), vec![Token::Borrow]);
801 assert_eq!(
802 collect("borrow<some-id>").unwrap(),
803 vec![
804 Token::Borrow,
805 Token::LessThan,
806 Token::Id,
807 Token::GreaterThan
808 ]
809 );
810
811 assert!(collect("\u{149}").is_err(), "strongly discouraged");
812 assert!(collect("\u{673}").is_err(), "strongly discouraged");
813 assert!(collect("\u{17a3}").is_err(), "strongly discouraged");
814 assert!(collect("\u{17a4}").is_err(), "strongly discouraged");
815 assert!(collect("\u{202a}").is_err(), "bidirectional override");
816 assert!(collect("\u{2068}").is_err(), "bidirectional override");
817 assert!(collect("\u{0}").is_err(), "control code");
818 assert!(collect("\u{b}").is_err(), "control code");
819 assert!(collect("\u{c}").is_err(), "control code");
820 assert!(collect("\u{85}").is_err(), "control code");
821}