1#[cfg(test)]
2use alloc::{vec, vec::Vec};
3use core::char;
4use core::fmt;
5use core::result::Result;
6use core::str;
7
8use self::Token::*;
9
10#[derive(Clone)]
11pub struct Tokenizer<'a> {
12 input: &'a str,
13 span_offset: u32,
14 chars: CrlfFold<'a>,
15}
16
17#[derive(Clone)]
18struct CrlfFold<'a> {
19 chars: str::CharIndices<'a>,
20}
21
22#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)]
27pub struct Span {
28 start: u32,
29 end: u32,
30}
31
32impl Default for Span {
33 fn default() -> Span {
34 Span {
35 start: u32::MAX,
36 end: u32::MAX,
37 }
38 }
39}
40
41impl Span {
42 pub fn new(start: u32, end: u32) -> Span {
43 let span = Span { start, end };
44 assert!(span.is_known(), "cannot create a span with u32::MAX");
45 span
46 }
47
48 pub fn adjust(&mut self, offset: u32) {
50 if self.is_known() {
51 self.start += offset;
52 self.end += offset;
53 }
54 }
55
56 pub fn start(&self) -> u32 {
58 assert!(self.is_known(), "cannot get start of unknown span");
59 self.start
60 }
61
62 pub fn end(&self) -> u32 {
64 assert!(self.is_known(), "cannot get end of unknown span");
65 self.end
66 }
67
68 pub fn set_end(&mut self, new_end: u32) {
70 if !self.is_known() {
71 self.start = new_end;
72 }
73 self.end = new_end;
74 }
75
76 pub fn set_start(&mut self, new_start: u32) {
78 if !self.is_known() {
79 self.end = new_start;
80 }
81 self.start = new_start;
82 }
83
84 pub fn is_known(&self) -> bool {
86 self.start != u32::MAX && self.end != u32::MAX
87 }
88}
89
90#[derive(Eq, PartialEq, Debug, Copy, Clone)]
91pub enum Token {
92 Whitespace,
93 Comment,
94
95 Equals,
96 Comma,
97 Colon,
98 Period,
99 Semicolon,
100 LeftParen,
101 RightParen,
102 LeftBrace,
103 RightBrace,
104 LessThan,
105 GreaterThan,
106 RArrow,
107 Star,
108 At,
109 Slash,
110 Plus,
111 Minus,
112
113 Use,
114 Type,
115 Func,
116 U8,
117 U16,
118 U32,
119 U64,
120 S8,
121 S16,
122 S32,
123 S64,
124 F32,
125 F64,
126 Char,
127 Record,
128 Resource,
129 Own,
130 Borrow,
131 Flags,
132 Variant,
133 Enum,
134 Bool,
135 String_,
136 Option_,
137 Result_,
138 Future,
139 Stream,
140 ErrorContext,
141 List,
142 Map,
143 Underscore,
144 As,
145 From_,
146 Static,
147 Interface,
148 Tuple,
149 Import,
150 Export,
151 World,
152 Package,
153 Constructor,
154 Async,
155
156 Id,
157 ExplicitId,
158
159 Integer,
160
161 Include,
162 With,
163}
164
165#[derive(Eq, PartialEq, Debug)]
166#[allow(dead_code)]
167pub enum Error {
168 ControlCodepoint(u32, char),
169 DeprecatedCodepoint(u32, char),
170 ForbiddenCodepoint(u32, char),
171 InvalidCharInId(u32, char),
172 IdPartEmpty(u32),
173 InvalidEscape(u32, char),
174 Unexpected(u32, char),
175 UnterminatedComment(u32),
176 Wanted {
177 at: u32,
178 expected: &'static str,
179 found: &'static str,
180 },
181}
182
183impl<'a> Tokenizer<'a> {
184 pub fn new(input: &'a str, span_offset: u32) -> Result<Tokenizer<'a>, Error> {
185 detect_invalid_input(input)?;
186
187 let mut t = Tokenizer {
188 input,
189 span_offset,
190 chars: CrlfFold {
191 chars: input.char_indices(),
192 },
193 };
194 t.eatc('\u{feff}');
196 Ok(t)
197 }
198
199 pub fn expect_semicolon(&mut self) -> Result<(), Error> {
200 self.expect(Token::Semicolon)?;
201 Ok(())
202 }
203
204 pub fn get_span(&self, span: Span) -> &'a str {
205 let start = usize::try_from(span.start() - self.span_offset).unwrap();
206 let end = usize::try_from(span.end() - self.span_offset).unwrap();
207 &self.input[start..end]
208 }
209
210 pub fn parse_id(&self, span: Span) -> Result<&'a str, Error> {
211 let ret = self.get_span(span);
212 validate_id(span.start(), &ret)?;
213 Ok(ret)
214 }
215
216 pub fn parse_explicit_id(&self, span: Span) -> Result<&'a str, Error> {
217 let token = self.get_span(span);
218 let id_part = token.strip_prefix('%').unwrap();
219 validate_id(span.start(), id_part)?;
220 Ok(id_part)
221 }
222
223 pub fn next(&mut self) -> Result<Option<(Span, Token)>, Error> {
224 loop {
225 match self.next_raw()? {
226 Some((_, Token::Whitespace)) | Some((_, Token::Comment)) => {}
227 other => break Ok(other),
228 }
229 }
230 }
231
232 pub fn next_raw(&mut self) -> Result<Option<(Span, Token)>, Error> {
236 let (str_start, ch) = match self.chars.next() {
237 Some(pair) => pair,
238 None => return Ok(None),
239 };
240 let start = self.span_offset + u32::try_from(str_start).unwrap();
241 let token = match ch {
242 '\n' | '\t' | ' ' => {
243 while self.eatc(' ') || self.eatc('\t') || self.eatc('\n') {}
245 Whitespace
246 }
247 '/' => {
248 if self.eatc('/') {
250 for (_, ch) in &mut self.chars {
251 if ch == '\n' {
252 break;
253 }
254 }
255 Comment
256 } else if self.eatc('*') {
258 let mut depth = 1;
259 while depth > 0 {
260 let (_, ch) = match self.chars.next() {
261 Some(pair) => pair,
262 None => return Err(Error::UnterminatedComment(start)),
263 };
264 match ch {
265 '/' if self.eatc('*') => depth += 1,
266 '*' if self.eatc('/') => depth -= 1,
267 _ => {}
268 }
269 }
270 Comment
271 } else {
272 Slash
273 }
274 }
275 '=' => Equals,
276 ',' => Comma,
277 ':' => Colon,
278 '.' => Period,
279 ';' => Semicolon,
280 '(' => LeftParen,
281 ')' => RightParen,
282 '{' => LeftBrace,
283 '}' => RightBrace,
284 '<' => LessThan,
285 '>' => GreaterThan,
286 '*' => Star,
287 '@' => At,
288 '-' => {
289 if self.eatc('>') {
290 RArrow
291 } else {
292 Minus
293 }
294 }
295 '+' => Plus,
296 '%' => {
297 let mut iter = self.chars.clone();
298 if let Some((_, ch)) = iter.next() {
299 if is_keylike_start(ch) {
300 self.chars = iter.clone();
301 while let Some((_, ch)) = iter.next() {
302 if !is_keylike_continue(ch) {
303 break;
304 }
305 self.chars = iter.clone();
306 }
307 }
308 }
309 ExplicitId
310 }
311 ch if is_keylike_start(ch) => {
312 let remaining = self.chars.chars.as_str().len();
313 let mut iter = self.chars.clone();
314 while let Some((_, ch)) = iter.next() {
315 if !is_keylike_continue(ch) {
316 break;
317 }
318 self.chars = iter.clone();
319 }
320 let str_end =
321 str_start + ch.len_utf8() + (remaining - self.chars.chars.as_str().len());
322 match &self.input[str_start..str_end] {
323 "use" => Use,
324 "type" => Type,
325 "func" => Func,
326 "u8" => U8,
327 "u16" => U16,
328 "u32" => U32,
329 "u64" => U64,
330 "s8" => S8,
331 "s16" => S16,
332 "s32" => S32,
333 "s64" => S64,
334 "f32" => F32,
335 "f64" => F64,
336 "char" => Char,
337 "resource" => Resource,
338 "own" => Own,
339 "borrow" => Borrow,
340 "record" => Record,
341 "flags" => Flags,
342 "variant" => Variant,
343 "enum" => Enum,
344 "bool" => Bool,
345 "string" => String_,
346 "option" => Option_,
347 "result" => Result_,
348 "future" => Future,
349 "stream" => Stream,
350 "error-context" => ErrorContext,
351 "list" => List,
352 "map" => Map,
353 "_" => Underscore,
354 "as" => As,
355 "from" => From_,
356 "static" => Static,
357 "interface" => Interface,
358 "tuple" => Tuple,
359 "world" => World,
360 "import" => Import,
361 "export" => Export,
362 "package" => Package,
363 "constructor" => Constructor,
364 "include" => Include,
365 "with" => With,
366 "async" => Async,
367 _ => Id,
368 }
369 }
370
371 ch if ch.is_ascii_digit() => {
372 let mut iter = self.chars.clone();
373 while let Some((_, ch)) = iter.next() {
374 if !ch.is_ascii_digit() {
375 break;
376 }
377 self.chars = iter.clone();
378 }
379
380 Integer
381 }
382
383 ch => return Err(Error::Unexpected(start, ch)),
384 };
385 let end = match self.chars.clone().next() {
386 Some((i, _)) => i,
387 None => self.input.len(),
388 };
389
390 let end = self.span_offset + u32::try_from(end).unwrap();
391 Ok(Some((Span::new(start, end), token)))
392 }
393
394 pub fn eat(&mut self, expected: Token) -> Result<bool, Error> {
395 let mut other = self.clone();
396 match other.next()? {
397 Some((_span, found)) if expected == found => {
398 *self = other;
399 Ok(true)
400 }
401 Some(_) => Ok(false),
402 None => Ok(false),
403 }
404 }
405
406 pub fn expect(&mut self, expected: Token) -> Result<Span, Error> {
407 match self.next()? {
408 Some((span, found)) => {
409 if expected == found {
410 Ok(span)
411 } else {
412 Err(Error::Wanted {
413 at: span.start(),
414 expected: expected.describe(),
415 found: found.describe(),
416 })
417 }
418 }
419 None => Err(Error::Wanted {
420 at: self.span_offset + u32::try_from(self.input.len()).unwrap(),
421 expected: expected.describe(),
422 found: "eof",
423 }),
424 }
425 }
426
427 fn eatc(&mut self, ch: char) -> bool {
428 let mut iter = self.chars.clone();
429 match iter.next() {
430 Some((_, ch2)) if ch == ch2 => {
431 self.chars = iter;
432 true
433 }
434 _ => false,
435 }
436 }
437
438 pub fn eof_span(&self) -> Span {
439 let end = self.span_offset + u32::try_from(self.input.len()).unwrap();
440 Span::new(end, end)
441 }
442}
443
444impl<'a> Iterator for CrlfFold<'a> {
445 type Item = (usize, char);
446
447 fn next(&mut self) -> Option<(usize, char)> {
448 self.chars.next().map(|(i, c)| {
449 if c == '\r' {
450 let mut attempt = self.chars.clone();
451 if let Some((_, '\n')) = attempt.next() {
452 self.chars = attempt;
453 return (i, '\n');
454 }
455 }
456 (i, c)
457 })
458 }
459}
460
461fn detect_invalid_input(input: &str) -> Result<(), Error> {
462 for (pos, ch) in input.char_indices() {
464 match ch {
465 '\n' | '\r' | '\t' => {}
466
467 '\u{202a}' | '\u{202b}' | '\u{202c}' | '\u{202d}' | '\u{202e}' | '\u{2066}'
473 | '\u{2067}' | '\u{2068}' | '\u{2069}' => {
474 return Err(Error::ForbiddenCodepoint(u32::try_from(pos).unwrap(), ch));
475 }
476
477 '\u{149}' | '\u{673}' | '\u{f77}' | '\u{f79}' | '\u{17a3}' | '\u{17a4}'
485 | '\u{17b4}' | '\u{17b5}' => {
486 return Err(Error::DeprecatedCodepoint(u32::try_from(pos).unwrap(), ch));
487 }
488
489 ch if ch.is_control() => {
493 return Err(Error::ControlCodepoint(u32::try_from(pos).unwrap(), ch));
494 }
495
496 _ => {}
497 }
498 }
499
500 Ok(())
501}
502
503fn is_keylike_start(ch: char) -> bool {
504 unicode_ident::is_xid_start(ch) || ch == '_' || ch == '-'
507}
508
509fn is_keylike_continue(ch: char) -> bool {
510 unicode_ident::is_xid_continue(ch) || ch == '-'
512}
513
514pub fn validate_id(start: u32, id: &str) -> Result<(), Error> {
515 if id.is_empty() {
517 return Err(Error::IdPartEmpty(start));
518 }
519
520 for (idx, part) in id.split('-').enumerate() {
522 let Some(first_char) = part.chars().next() else {
525 return Err(Error::IdPartEmpty(start));
526 };
527 if idx == 0 && !first_char.is_ascii_alphabetic() {
528 return Err(Error::InvalidCharInId(start, first_char));
529 }
530 let mut upper = None;
531 for ch in part.chars() {
532 if ch.is_ascii_digit() {
533 } else if ch.is_ascii_uppercase() {
535 if upper.is_none() {
536 upper = Some(true);
537 } else if let Some(false) = upper {
538 return Err(Error::InvalidCharInId(start, ch));
539 }
540 } else if ch.is_ascii_lowercase() {
541 if upper.is_none() {
542 upper = Some(false);
543 } else if let Some(true) = upper {
544 return Err(Error::InvalidCharInId(start, ch));
545 }
546 } else {
547 return Err(Error::InvalidCharInId(start, ch));
548 }
549 }
550 }
551
552 Ok(())
553}
554
555impl Token {
556 pub fn describe(&self) -> &'static str {
557 match self {
558 Whitespace => "whitespace",
559 Comment => "a comment",
560 Equals => "'='",
561 Comma => "','",
562 Colon => "':'",
563 Period => "'.'",
564 Semicolon => "';'",
565 LeftParen => "'('",
566 RightParen => "')'",
567 LeftBrace => "'{'",
568 RightBrace => "'}'",
569 LessThan => "'<'",
570 GreaterThan => "'>'",
571 Use => "keyword `use`",
572 Type => "keyword `type`",
573 Func => "keyword `func`",
574 U8 => "keyword `u8`",
575 U16 => "keyword `u16`",
576 U32 => "keyword `u32`",
577 U64 => "keyword `u64`",
578 S8 => "keyword `s8`",
579 S16 => "keyword `s16`",
580 S32 => "keyword `s32`",
581 S64 => "keyword `s64`",
582 F32 => "keyword `f32`",
583 F64 => "keyword `f64`",
584 Char => "keyword `char`",
585 Own => "keyword `own`",
586 Borrow => "keyword `borrow`",
587 Resource => "keyword `resource`",
588 Record => "keyword `record`",
589 Flags => "keyword `flags`",
590 Variant => "keyword `variant`",
591 Enum => "keyword `enum`",
592 Bool => "keyword `bool`",
593 String_ => "keyword `string`",
594 Option_ => "keyword `option`",
595 Result_ => "keyword `result`",
596 Future => "keyword `future`",
597 Stream => "keyword `stream`",
598 ErrorContext => "keyword `error-context`",
599 List => "keyword `list`",
600 Map => "keyword `map`",
601 Underscore => "keyword `_`",
602 Id => "an identifier",
603 ExplicitId => "an '%' identifier",
604 RArrow => "`->`",
605 Star => "`*`",
606 At => "`@`",
607 Slash => "`/`",
608 Plus => "`+`",
609 Minus => "`-`",
610 As => "keyword `as`",
611 From_ => "keyword `from`",
612 Static => "keyword `static`",
613 Interface => "keyword `interface`",
614 Tuple => "keyword `tuple`",
615 Import => "keyword `import`",
616 Export => "keyword `export`",
617 World => "keyword `world`",
618 Package => "keyword `package`",
619 Constructor => "keyword `constructor`",
620 Integer => "an integer",
621 Include => "keyword `include`",
622 With => "keyword `with`",
623 Async => "keyword `async`",
624 }
625 }
626}
627
628impl core::error::Error for Error {}
629
630impl Error {
631 pub fn position(&self) -> u32 {
633 match self {
634 Error::ControlCodepoint(at, _)
635 | Error::DeprecatedCodepoint(at, _)
636 | Error::ForbiddenCodepoint(at, _)
637 | Error::InvalidCharInId(at, _)
638 | Error::IdPartEmpty(at)
639 | Error::InvalidEscape(at, _)
640 | Error::Unexpected(at, _)
641 | Error::UnterminatedComment(at) => *at,
642 Error::Wanted { at, .. } => *at,
643 }
644 }
645}
646
647impl fmt::Display for Error {
648 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
649 match self {
650 Error::ControlCodepoint(_, ch) => write!(f, "Control code '{}'", ch.escape_unicode()),
651 Error::DeprecatedCodepoint(_, ch) => {
652 write!(
653 f,
654 "Codepoint {:?} is discouraged by Unicode",
655 ch.escape_unicode()
656 )
657 }
658 Error::ForbiddenCodepoint(_, ch) => {
659 write!(
660 f,
661 "Input contains bidirectional override codepoint {:?}",
662 ch.escape_unicode()
663 )
664 }
665 Error::Unexpected(_, ch) => write!(f, "unexpected character {ch:?}"),
666 Error::UnterminatedComment(_) => write!(f, "unterminated block comment"),
667 Error::Wanted {
668 expected, found, ..
669 } => write!(f, "expected {expected}, found {found}"),
670 Error::InvalidCharInId(_, ch) => write!(f, "invalid character in identifier {ch:?}"),
671 Error::IdPartEmpty(_) => write!(f, "identifiers must have characters between '-'s"),
672 Error::InvalidEscape(_, ch) => write!(f, "invalid escape in string {ch:?}"),
673 }
674 }
675}
676
677#[test]
678fn test_validate_id() {
679 validate_id(0, "apple").unwrap();
680 validate_id(0, "apple-pear").unwrap();
681 validate_id(0, "apple-pear-grape").unwrap();
682 validate_id(0, "a0").unwrap();
683 validate_id(0, "a").unwrap();
684 validate_id(0, "a-a").unwrap();
685 validate_id(0, "bool").unwrap();
686 validate_id(0, "APPLE").unwrap();
687 validate_id(0, "APPLE-PEAR").unwrap();
688 validate_id(0, "APPLE-PEAR-GRAPE").unwrap();
689 validate_id(0, "apple-PEAR-grape").unwrap();
690 validate_id(0, "APPLE-pear-GRAPE").unwrap();
691 validate_id(0, "ENOENT").unwrap();
692 validate_id(0, "is-XML").unwrap();
693 validate_id(0, "apple-0").unwrap();
694 validate_id(0, "a0-000-3d4a-54FF").unwrap();
695
696 assert!(validate_id(0, "").is_err());
697 assert!(validate_id(0, "0").is_err());
698 assert!(validate_id(0, "%").is_err());
699 assert!(validate_id(0, "$").is_err());
700 assert!(validate_id(0, "0a").is_err());
701 assert!(validate_id(0, ".").is_err());
702 assert!(validate_id(0, "·").is_err());
703 assert!(validate_id(0, "a a").is_err());
704 assert!(validate_id(0, "_").is_err());
705 assert!(validate_id(0, "-").is_err());
706 assert!(validate_id(0, "a-").is_err());
707 assert!(validate_id(0, "-a").is_err());
708 assert!(validate_id(0, "Apple").is_err());
709 assert!(validate_id(0, "applE").is_err());
710 assert!(validate_id(0, "-apple-pear").is_err());
711 assert!(validate_id(0, "apple-pear-").is_err());
712 assert!(validate_id(0, "apple_pear").is_err());
713 assert!(validate_id(0, "apple.pear").is_err());
714 assert!(validate_id(0, "apple pear").is_err());
715 assert!(validate_id(0, "apple/pear").is_err());
716 assert!(validate_id(0, "apple|pear").is_err());
717 assert!(validate_id(0, "apple-Pear").is_err());
718 assert!(validate_id(0, "()()").is_err());
719 assert!(validate_id(0, "").is_err());
720 assert!(validate_id(0, "*").is_err());
721 assert!(validate_id(0, "apple\u{5f3}pear").is_err());
722 assert!(validate_id(0, "apple\u{200c}pear").is_err());
723 assert!(validate_id(0, "apple\u{200d}pear").is_err());
724 assert!(validate_id(0, "apple--pear").is_err());
725 assert!(validate_id(0, "_apple").is_err());
726 assert!(validate_id(0, "apple_").is_err());
727 assert!(validate_id(0, "_Znwj").is_err());
728 assert!(validate_id(0, "__i386").is_err());
729 assert!(validate_id(0, "__i386__").is_err());
730 assert!(validate_id(0, "Москва").is_err());
731 assert!(validate_id(0, "garçon-hühnervögel-Москва-東京").is_err());
732 assert!(validate_id(0, "a0-000-3d4A-54Ff").is_err());
733 assert!(validate_id(0, "😼").is_err(), "non-identifier");
734 assert!(validate_id(0, "\u{212b}").is_err(), "non-ascii");
735}
736
737#[test]
738fn test_tokenizer() {
739 fn collect(s: &str) -> Result<Vec<Token>, Error> {
740 let mut t = Tokenizer::new(s, 0)?;
741 let mut tokens = Vec::new();
742 while let Some(token) = t.next()? {
743 tokens.push(token.1);
744 }
745 Ok(tokens)
746 }
747
748 assert_eq!(collect("").unwrap(), vec![]);
749 assert_eq!(collect("_").unwrap(), vec![Token::Underscore]);
750 assert_eq!(collect("apple").unwrap(), vec![Token::Id]);
751 assert_eq!(collect("apple-pear").unwrap(), vec![Token::Id]);
752 assert_eq!(collect("apple--pear").unwrap(), vec![Token::Id]);
753 assert_eq!(collect("apple-Pear").unwrap(), vec![Token::Id]);
754 assert_eq!(collect("apple-pear-grape").unwrap(), vec![Token::Id]);
755 assert_eq!(collect("apple pear").unwrap(), vec![Token::Id, Token::Id]);
756 assert_eq!(collect("_a_p_p_l_e_").unwrap(), vec![Token::Id]);
757 assert_eq!(collect("garçon").unwrap(), vec![Token::Id]);
758 assert_eq!(collect("hühnervögel").unwrap(), vec![Token::Id]);
759 assert_eq!(collect("москва").unwrap(), vec![Token::Id]);
760 assert_eq!(collect("東京").unwrap(), vec![Token::Id]);
761 assert_eq!(
762 collect("garçon-hühnervögel-москва-東京").unwrap(),
763 vec![Token::Id]
764 );
765 assert_eq!(collect("a0").unwrap(), vec![Token::Id]);
766 assert_eq!(collect("a").unwrap(), vec![Token::Id]);
767 assert_eq!(collect("%a").unwrap(), vec![Token::ExplicitId]);
768 assert_eq!(collect("%a-a").unwrap(), vec![Token::ExplicitId]);
769 assert_eq!(collect("%bool").unwrap(), vec![Token::ExplicitId]);
770 assert_eq!(collect("%").unwrap(), vec![Token::ExplicitId]);
771 assert_eq!(collect("APPLE").unwrap(), vec![Token::Id]);
772 assert_eq!(collect("APPLE-PEAR").unwrap(), vec![Token::Id]);
773 assert_eq!(collect("APPLE-PEAR-GRAPE").unwrap(), vec![Token::Id]);
774 assert_eq!(collect("apple-PEAR-grape").unwrap(), vec![Token::Id]);
775 assert_eq!(collect("APPLE-pear-GRAPE").unwrap(), vec![Token::Id]);
776 assert_eq!(collect("ENOENT").unwrap(), vec![Token::Id]);
777 assert_eq!(collect("is-XML").unwrap(), vec![Token::Id]);
778
779 assert_eq!(collect("func").unwrap(), vec![Token::Func]);
780 assert_eq!(
781 collect("a: func()").unwrap(),
782 vec![
783 Token::Id,
784 Token::Colon,
785 Token::Func,
786 Token::LeftParen,
787 Token::RightParen
788 ]
789 );
790
791 assert_eq!(collect("resource").unwrap(), vec![Token::Resource]);
792
793 assert_eq!(collect("own").unwrap(), vec![Token::Own]);
794 assert_eq!(
795 collect("own<some-id>").unwrap(),
796 vec![Token::Own, Token::LessThan, Token::Id, Token::GreaterThan]
797 );
798
799 assert_eq!(collect("borrow").unwrap(), vec![Token::Borrow]);
800 assert_eq!(
801 collect("borrow<some-id>").unwrap(),
802 vec![
803 Token::Borrow,
804 Token::LessThan,
805 Token::Id,
806 Token::GreaterThan
807 ]
808 );
809
810 assert!(collect("\u{149}").is_err(), "strongly discouraged");
811 assert!(collect("\u{673}").is_err(), "strongly discouraged");
812 assert!(collect("\u{17a3}").is_err(), "strongly discouraged");
813 assert!(collect("\u{17a4}").is_err(), "strongly discouraged");
814 assert!(collect("\u{202a}").is_err(), "bidirectional override");
815 assert!(collect("\u{2068}").is_err(), "bidirectional override");
816 assert!(collect("\u{0}").is_err(), "control code");
817 assert!(collect("\u{b}").is_err(), "control code");
818 assert!(collect("\u{c}").is_err(), "control code");
819 assert!(collect("\u{85}").is_err(), "control code");
820}