wac_parser/
lexer.rs

1//! Module for the lexer implementation.
2
3use logos::{Logos, SpannedIter};
4use miette::SourceSpan;
5use std::fmt;
6
7fn to_source_span(span: logos::Span) -> SourceSpan {
8    SourceSpan::new(span.start.into(), span.end - span.start)
9}
10
11/// Represents a lexer error.
12#[derive(thiserror::Error, Debug, Clone, Copy, PartialEq, Eq, Default)]
13pub enum Error {
14    /// An unexpected token was encountered.
15    #[default]
16    #[error("unexpected token was encountered")]
17    UnexpectedToken,
18    /// An unterminated string was encountered.
19    #[error("an unterminated string was encountered")]
20    UnterminatedString,
21    /// An unterminated comment was encountered.
22    #[error("an unterminated comment was encountered")]
23    UnterminatedComment,
24    /// A disallowed bidirectional override codepoint was encountered.
25    #[error("disallowed bidirectional override codepoint `{c}`", c = .0.escape_unicode())]
26    DisallowedBidirectionalOverride(char),
27    /// A discouraged Unicode codepoint was encountered.
28    #[error("codepoint `{c}` is discouraged by Unicode", c = .0.escape_unicode())]
29    DiscouragedUnicodeCodepoint(char),
30    /// A disallowed control code was encountered.
31    #[error("disallowed control code '{c}'", c = .0.escape_unicode())]
32    DisallowedControlCode(char),
33}
34
35impl From<()> for Error {
36    fn from(_: ()) -> Self {
37        Error::UnexpectedToken
38    }
39}
40
41fn detect_invalid_input(source: &str) -> Result<(), (Error, SourceSpan)> {
42    for (offset, ch) in source.char_indices() {
43        match ch {
44            '\r' | '\t' | '\n' => {}
45
46            // Bidirectional override codepoints can be used to craft source code that
47            // appears to have a different meaning than its actual meaning. See
48            // [CVE-2021-42574] for background and motivation.
49            //
50            // [CVE-2021-42574]: https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-42574
51            '\u{202a}' | '\u{202b}' | '\u{202c}' | '\u{202d}' | '\u{202e}' | '\u{2066}'
52            | '\u{2067}' | '\u{2068}' | '\u{2069}' => {
53                return Err((
54                    Error::DisallowedBidirectionalOverride(ch),
55                    SourceSpan::new(offset.into(), ch.len_utf8()),
56                ));
57            }
58
59            // Disallow several characters which are deprecated or discouraged in Unicode.
60            //
61            // U+149 deprecated; see Unicode 13.0.0, sec. 7.1 Latin, Compatibility Digraphs.
62            // U+673 deprecated; see Unicode 13.0.0, sec. 9.2 Arabic, Additional Vowel Marks.
63            // U+F77 and U+F79 deprecated; see Unicode 13.0.0, sec. 13.4 Tibetan, Vowels.
64            // U+17A3 and U+17A4 deprecated, and U+17B4 and U+17B5 discouraged; see
65            // Unicode 13.0.0, sec. 16.4 Khmer, Characters Whose Use Is Discouraged.
66            '\u{149}' | '\u{673}' | '\u{f77}' | '\u{f79}' | '\u{17a3}' | '\u{17a4}'
67            | '\u{17b4}' | '\u{17b5}' => {
68                return Err((
69                    Error::DiscouragedUnicodeCodepoint(ch),
70                    SourceSpan::new(offset.into(), ch.len_utf8()),
71                ));
72            }
73
74            // Disallow control codes other than the ones explicitly recognized above,
75            // so that viewing a wit file on a terminal doesn't have surprising side
76            // effects or appear to have a different meaning than its actual meaning.
77            ch if ch.is_control() => {
78                return Err((
79                    Error::DisallowedControlCode(ch),
80                    SourceSpan::new(offset.into(), ch.len_utf8()),
81                ));
82            }
83
84            _ => {}
85        }
86    }
87
88    Ok(())
89}
90
91/// Represents a WAC token.
92#[derive(Logos, Debug, Clone, Copy, PartialEq, Eq)]
93#[logos(error = Error)]
94#[logos(skip r"[ \t\r\n\f]+")]
95#[logos(subpattern word = r"[a-z][a-z0-9]*|[A-Z][A-Z0-9]*")]
96#[logos(subpattern id = r"%?(?&word)(-(?&word))*")]
97#[logos(subpattern package_name = r"(?&id)(:(?&id))+")]
98#[logos(subpattern semver = r"([0-9]+)(\.[0-9a-zA-Z-\+]+)*")]
99pub enum Token {
100    /// A comment.
101    #[regex(r"//[^\n]*", logos::skip)]
102    Comment,
103
104    /// A block comment.
105    #[token(r"/*", helpers::skip_block_comment)]
106    BlockComment,
107
108    /// An identifier.
109    #[regex(r"(?&id)")]
110    Ident,
111
112    /// A string literal.
113    #[token("\"", helpers::string)]
114    String,
115
116    /// A package name.
117    #[regex(r"(?&package_name)(@(?&semver))?")]
118    PackageName,
119
120    /// A package path with optional semantic version.
121    #[regex(r"(?&package_name)(/(?&id))+(@(?&semver))?")]
122    PackagePath,
123
124    /// The `import` keyword.
125    #[token("import")]
126    ImportKeyword,
127    /// The `with` keyword.
128    #[token("with")]
129    WithKeyword,
130    /// The `type` keyword.
131    #[token("type")]
132    TypeKeyword,
133    /// The `tuple` keyword.
134    #[token("tuple")]
135    TupleKeyword,
136    /// The `list` keyword.
137    #[token("list")]
138    ListKeyword,
139    /// The `option` keyword.
140    #[token("option")]
141    OptionKeyword,
142    /// The `result` keyword.
143    #[token("result")]
144    ResultKeyword,
145    /// The `borrow` keyword.
146    #[token("borrow")]
147    BorrowKeyword,
148    /// The `resource` keyword.
149    #[token("resource")]
150    ResourceKeyword,
151    /// The `variant` keyword.
152    #[token("variant")]
153    VariantKeyword,
154    /// The `record` keyword.
155    #[token("record")]
156    RecordKeyword,
157    /// The `flags` keyword.
158    #[token("flags")]
159    FlagsKeyword,
160    /// The `enum` keyword.
161    #[token("enum")]
162    EnumKeyword,
163    /// The `func` keyword.
164    #[token("func")]
165    FuncKeyword,
166    /// The `static` keyword.
167    #[token("static")]
168    StaticKeyword,
169    /// The `constructor` keyword.
170    #[token("constructor")]
171    ConstructorKeyword,
172    /// The `u8` keyword.
173    #[token("u8")]
174    U8Keyword,
175    /// The `s8` keyword.
176    #[token("s8")]
177    S8Keyword,
178    /// The `u16` keyword.
179    #[token("u16")]
180    U16Keyword,
181    /// The `s16` keyword.
182    #[token("s16")]
183    S16Keyword,
184    /// The `u32` keyword.
185    #[token("u32")]
186    U32Keyword,
187    /// The `s32` keyword.
188    #[token("s32")]
189    S32Keyword,
190    /// The `u64` keyword.
191    #[token("u64")]
192    U64Keyword,
193    /// The `s64` keyword.
194    #[token("s64")]
195    S64Keyword,
196    /// The `f32` keyword.
197    #[token("f32")]
198    F32Keyword,
199    /// The `f64` keyword.
200    #[token("f64")]
201    F64Keyword,
202    /// The `char` keyword.
203    #[token("char")]
204    CharKeyword,
205    /// The `bool` keyword.
206    #[token("bool")]
207    BoolKeyword,
208    /// The `string` keyword.
209    #[token("string")]
210    StringKeyword,
211    /// The `interface` keyword.
212    #[token("interface")]
213    InterfaceKeyword,
214    /// The `world` keyword.
215    #[token("world")]
216    WorldKeyword,
217    /// The `export` keyword.
218    #[token("export")]
219    ExportKeyword,
220    /// The `new` keyword.
221    #[token("new")]
222    NewKeyword,
223    /// The `let` keyword.
224    #[token("let")]
225    LetKeyword,
226    /// The `use` keyword.
227    #[token("use")]
228    UseKeyword,
229    /// The `include` keyword.
230    #[token("include")]
231    IncludeKeyword,
232    /// The `as` keyword.
233    #[token("as")]
234    AsKeyword,
235    /// The `package` keyword.
236    #[token("package")]
237    PackageKeyword,
238    /// The `targets` keyword.
239    #[token("targets")]
240    TargetsKeyword,
241
242    /// The `;` symbol.
243    #[token(";")]
244    Semicolon,
245    /// The `{` symbol.
246    #[token("{")]
247    OpenBrace,
248    /// The `}` symbol.
249    #[token("}")]
250    CloseBrace,
251    /// The `:` symbol.
252    #[token(":")]
253    Colon,
254    /// The `=` symbol.
255    #[token("=")]
256    Equals,
257    /// The `(` symbol.
258    #[token("(")]
259    OpenParen,
260    /// The `)` symbol.
261    #[token(")")]
262    CloseParen,
263    /// The `->` symbol.
264    #[token("->")]
265    Arrow,
266    /// The `<` symbol.
267    #[token("<")]
268    OpenAngle,
269    /// The `>` symbol.
270    #[token(">")]
271    CloseAngle,
272    /// The `_` symbol.
273    #[token("_")]
274    Underscore,
275    /// The `[` symbol.
276    #[token("[")]
277    OpenBracket,
278    /// The `]` symbol.
279    #[token("]")]
280    CloseBracket,
281    /// The `.` symbol.
282    #[token(".")]
283    Dot,
284    /// The `...` symbol.
285    #[token("...")]
286    Ellipsis,
287    /// The `,` symbol.
288    #[token(",")]
289    Comma,
290    /// The `/` symbol.
291    #[token("/")]
292    Slash,
293    /// The `@` symbol.
294    #[token("@")]
295    At,
296}
297
298impl fmt::Display for Token {
299    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
300        match self {
301            Token::Comment | Token::BlockComment => write!(f, "comment"),
302            Token::Ident => write!(f, "identifier"),
303            Token::String => write!(f, "string literal"),
304            Token::PackageName => write!(f, "package name"),
305            Token::PackagePath => write!(f, "package path"),
306            Token::ImportKeyword => write!(f, "`import` keyword"),
307            Token::WithKeyword => write!(f, "`with` keyword"),
308            Token::TypeKeyword => write!(f, "`type` keyword"),
309            Token::TupleKeyword => write!(f, "`tuple` keyword"),
310            Token::ListKeyword => write!(f, "`list` keyword"),
311            Token::OptionKeyword => write!(f, "`option` keyword"),
312            Token::ResultKeyword => write!(f, "`result` keyword"),
313            Token::BorrowKeyword => write!(f, "`borrow` keyword"),
314            Token::ResourceKeyword => write!(f, "`resource` keyword"),
315            Token::VariantKeyword => write!(f, "`variant` keyword"),
316            Token::RecordKeyword => write!(f, "`record` keyword"),
317            Token::FlagsKeyword => write!(f, "`flags` keyword"),
318            Token::EnumKeyword => write!(f, "`enum` keyword"),
319            Token::FuncKeyword => write!(f, "`func` keyword"),
320            Token::StaticKeyword => write!(f, "`static` keyword"),
321            Token::ConstructorKeyword => write!(f, "`constructor` keyword"),
322            Token::U8Keyword => write!(f, "`u8` keyword"),
323            Token::S8Keyword => write!(f, "`s8` keyword"),
324            Token::U16Keyword => write!(f, "`u16` keyword"),
325            Token::S16Keyword => write!(f, "`s16` keyword"),
326            Token::U32Keyword => write!(f, "`u32` keyword"),
327            Token::S32Keyword => write!(f, "`s32` keyword"),
328            Token::U64Keyword => write!(f, "`u64` keyword"),
329            Token::S64Keyword => write!(f, "`s64` keyword"),
330            Token::F32Keyword => write!(f, "`f32` keyword"),
331            Token::F64Keyword => write!(f, "`f64` keyword"),
332            Token::CharKeyword => write!(f, "`char` keyword"),
333            Token::BoolKeyword => write!(f, "`bool` keyword"),
334            Token::StringKeyword => write!(f, "`string` keyword"),
335            Token::InterfaceKeyword => write!(f, "`interface` keyword"),
336            Token::WorldKeyword => write!(f, "`world` keyword"),
337            Token::ExportKeyword => write!(f, "`export` keyword"),
338            Token::NewKeyword => write!(f, "`new` keyword"),
339            Token::LetKeyword => write!(f, "`let` keyword"),
340            Token::UseKeyword => write!(f, "`use` keyword"),
341            Token::IncludeKeyword => write!(f, "`include` keyword"),
342            Token::AsKeyword => write!(f, "`as` keyword"),
343            Token::PackageKeyword => write!(f, "`package` keyword"),
344            Token::TargetsKeyword => write!(f, "`targets` keyword"),
345            Token::Semicolon => write!(f, "`;`"),
346            Token::OpenBrace => write!(f, "`{{`"),
347            Token::CloseBrace => write!(f, "`}}`"),
348            Token::Colon => write!(f, "`:`"),
349            Token::Equals => write!(f, "`=`"),
350            Token::OpenParen => write!(f, "`(`"),
351            Token::CloseParen => write!(f, "`)`"),
352            Token::Arrow => write!(f, "`->`"),
353            Token::OpenAngle => write!(f, "`<`"),
354            Token::CloseAngle => write!(f, "`>`"),
355            Token::Underscore => write!(f, "`_`"),
356            Token::OpenBracket => write!(f, "`[`"),
357            Token::CloseBracket => write!(f, "`]`"),
358            Token::Dot => write!(f, "`.`"),
359            Token::Ellipsis => write!(f, "`...`"),
360            Token::Comma => write!(f, "`,`"),
361            Token::Slash => write!(f, "`/`"),
362            Token::At => write!(f, "`@`"),
363        }
364    }
365}
366
367mod helpers {
368    use super::{Error, Token};
369    use logos::{FilterResult, Logos};
370
371    /// Represents a WAC comment token.
372    #[derive(Logos, Debug, Clone, Copy, PartialEq, Eq)]
373    #[logos(error = Error)]
374    #[logos(skip r"[ \t\n\f]+")]
375    pub enum CommentToken<'a> {
376        /// A comment.
377        #[regex(r"//[^\n]*")]
378        Comment(&'a str),
379
380        /// A block comment.
381        #[token(r"/*", block_comment)]
382        BlockComment(&'a str),
383    }
384
385    pub fn string(lex: &mut logos::Lexer<Token>) -> Result<(), Error> {
386        let remainder = lex.remainder();
387        let len = remainder.find('"').ok_or(Error::UnterminatedString)?;
388        lex.bump(len + 1 /* opening quote */);
389        Ok(())
390    }
391
392    pub fn block_comment_length(bytes: &[u8]) -> Option<usize> {
393        let mut iter = bytes.iter().copied().peekable();
394        let mut depth = 1;
395        let mut len = 0;
396        while depth > 0 {
397            len += 1;
398            match iter.next()? {
399                b'/' if iter.peek() == Some(&b'*') => {
400                    depth += 1;
401                    len += 1;
402                    iter.next();
403                }
404                b'*' if iter.peek() == Some(&b'/') => {
405                    depth -= 1;
406                    len += 1;
407                    iter.next();
408                }
409                _ => {}
410            }
411        }
412
413        Some(len + 2 /* opening tokens */)
414    }
415
416    pub fn block_comment<'a>(
417        lex: &mut logos::Lexer<'a, CommentToken<'a>>,
418    ) -> Result<&'a str, Error> {
419        let span = lex.span();
420        match block_comment_length(lex.remainder().as_bytes()) {
421            Some(len) => {
422                let s = &lex.source()[span.start..span.start + len];
423                lex.bump(len - 2 /* opening tokens */);
424                Ok(s)
425            }
426            None => {
427                lex.bump(lex.remainder().len());
428                Err(Error::UnterminatedComment)
429            }
430        }
431    }
432
433    pub fn skip_block_comment(lex: &mut logos::Lexer<Token>) -> FilterResult<(), Error> {
434        match block_comment_length(lex.remainder().as_bytes()) {
435            Some(len) => {
436                lex.bump(len - 2 /* opening tokens */);
437                FilterResult::Skip
438            }
439            None => {
440                lex.bump(lex.remainder().len());
441                FilterResult::Error(Error::UnterminatedComment)
442            }
443        }
444    }
445}
446
447/// The result type for the lexer.
448pub type LexerResult<T> = Result<T, Error>;
449
450/// Implements a WAC lexer.
451pub struct Lexer<'a>(SpannedIter<'a, Token>);
452
453impl<'a> Lexer<'a> {
454    /// Creates a new lexer for the given source string.
455    pub fn new(source: &'a str) -> Result<Self, (Error, SourceSpan)> {
456        detect_invalid_input(source)?;
457        Ok(Self(Token::lexer(source).spanned()))
458    }
459
460    /// Gets the source string of the given span.
461    pub fn source(&self, span: SourceSpan) -> &'a str {
462        &self.0.source()[span.offset()..span.offset() + span.len()]
463    }
464
465    /// Gets the current span of the lexer.
466    pub fn span(&self) -> SourceSpan {
467        let mut span = self.0.span();
468        if span.end == self.0.source().len() {
469            // Currently miette silently fails to display a label
470            // if the span is at the end of the source; this means
471            // we can't properly show the "end of input" span.
472            // For now, have the span point at the last byte in the source.
473            // See: https://github.com/zkat/miette/issues/219
474            span.start = span.start.saturating_sub(1);
475            span.end = span.start + 1;
476        }
477
478        to_source_span(span)
479    }
480
481    /// Peeks at the next token.
482    pub fn peek(&self) -> Option<(LexerResult<Token>, SourceSpan)> {
483        let mut lexer = self.0.clone();
484        lexer.next().map(|(r, s)| (r, to_source_span(s)))
485    }
486
487    /// Peeks at the token after the next token.
488    pub fn peek2(&self) -> Option<(LexerResult<Token>, SourceSpan)> {
489        let mut lexer = self.0.clone();
490        lexer.next();
491        lexer.next().map(|(r, s)| (r, to_source_span(s)))
492    }
493
494    /// Consumes available documentation comment tokens.
495    pub fn comments(&self) -> Result<Vec<(&'a str, SourceSpan)>, (Error, SourceSpan)> {
496        let mut comments = Vec::new();
497        let mut lexer = (*self.0).clone().morph::<helpers::CommentToken>().spanned();
498        while let Some((Ok(token), span)) = lexer.next() {
499            match token {
500                helpers::CommentToken::Comment(c) | helpers::CommentToken::BlockComment(c) => {
501                    let c = if let Some(c) = c.strip_prefix("///") {
502                        c.trim()
503                    } else if let Some(c) = c.strip_prefix("/**") {
504                        if c == "/" {
505                            continue;
506                        }
507                        c.strip_suffix("*/").unwrap().trim()
508                    } else {
509                        continue;
510                    };
511                    comments.push((c, to_source_span(span)));
512                }
513            }
514        }
515        Ok(comments)
516    }
517}
518
519impl Iterator for Lexer<'_> {
520    type Item = (LexerResult<Token>, SourceSpan);
521
522    fn next(&mut self) -> Option<Self::Item> {
523        self.0.next().map(|(r, s)| (r, to_source_span(s)))
524    }
525}
526
527#[cfg(test)]
528mod test {
529    use super::*;
530    use logos::{Logos, Source};
531    use std::{fmt, ops::Range};
532
533    //use super::*;
534
535    #[allow(clippy::type_complexity)]
536    pub fn assert_lex<'a, Token>(
537        source: &'a Token::Source,
538        tokens: &[(
539            Result<Token, Token::Error>,
540            <Token::Source as Source>::Slice<'a>,
541            Range<usize>,
542        )],
543    ) where
544        Token: Logos<'a> + fmt::Debug + PartialEq,
545        Token::Extras: Default,
546    {
547        let mut lex = Token::lexer(source);
548
549        for tuple in tokens {
550            assert_eq!(
551                &(lex.next().expect("unexpected end"), lex.slice(), lex.span()),
552                tuple
553            );
554        }
555
556        assert_eq!(lex.next(), None, "tokens remain");
557    }
558
559    #[test]
560    fn comments() {
561        assert_lex::<Token>(
562            r#"
563            //
564            // comment
565            /**/
566            /* a block comment */
567            /* a multi
568               line comment
569             */
570            /* a /* /* deeply */ nested */ block comment */
571            "#,
572            &[],
573        );
574    }
575
576    #[test]
577    fn unterminated_comment() {
578        let source = r#"/* /* unterminated */"#;
579
580        assert_lex::<Token>(
581            source,
582            &[(
583                Err(Error::UnterminatedComment),
584                "/* /* unterminated */",
585                0..21,
586            )],
587        );
588    }
589
590    #[test]
591    fn ident() {
592        assert_lex(
593            r#"
594            foo
595            foo123
596            f-b
597            foo-bar123
598            foo0123-bar0123-baz0123
599            %interface
600            foo123-BAR
601            "#,
602            &[
603                (Ok(Token::Ident), "foo", 13..16),
604                (Ok(Token::Ident), "foo123", 29..35),
605                (Ok(Token::Ident), "f-b", 48..51),
606                (Ok(Token::Ident), "foo-bar123", 64..74),
607                (Ok(Token::Ident), "foo0123-bar0123-baz0123", 87..110),
608                (Ok(Token::Ident), "%interface", 123..133),
609                (Ok(Token::Ident), "foo123-BAR", 146..156),
610            ],
611        );
612    }
613
614    #[test]
615    fn string() {
616        assert_lex(
617            r#"
618            ""
619            "foo"
620            "foo  bar"
621            "foo
622            bar"
623            "#,
624            &[
625                (Ok(Token::String), "\"\"", 13..15),
626                (Ok(Token::String), "\"foo\"", 28..33),
627                (Ok(Token::String), "\"foo  bar\"", 46..56),
628                (Ok(Token::String), "\"foo\n            bar\"", 69..90),
629            ],
630        );
631    }
632
633    #[test]
634    fn package_path() {
635        assert_lex(
636            r#"
637foo:bar/baz/qux/jam
638foo:bar:baz:qux/jam
639foo:bar/baz@0.0.4
640foo:bar/baz@1.2.3
641foo:bar/baz@10.20.30
642foo:bar/baz@1.1.2-prerelease+meta
643foo:bar/baz@1.1.2+meta
644foo:bar/baz@1.1.2+meta-valid
645foo:bar/baz@1.0.0-alpha
646foo:bar/baz@1.0.0-beta
647foo:bar/baz@1.0.0-alpha.beta
648foo:bar/baz@1.0.0-alpha.beta.1
649foo:bar/baz@1.0.0-alpha.1
650foo:bar/baz@1.0.0-alpha0.valid
651foo:bar/baz@1.0.0-alpha.0valid
652foo:bar/baz@1.0.0-alpha-a.b-c-somethinglong+build.1-aef.1-its-okay
653foo:bar/baz@1.0.0-rc.1+build.1
654foo:bar/baz@2.0.0-rc.1+build.123
655foo:bar/baz@1.2.3-beta
656foo:bar/baz@10.2.3-DEV-SNAPSHOT
657foo:bar/baz@1.2.3-SNAPSHOT-123
658foo:bar/baz@1.0.0
659foo:bar/baz@2.0.0
660foo:bar/baz@1.1.7
661foo:bar/baz@2.0.0+build.1848
662foo:bar/baz@2.0.1-alpha.1227
663foo:bar/baz@1.0.0-alpha+beta
664foo:bar/baz@1.2.3----RC-SNAPSHOT.12.9.1--.12+788
665foo:bar/baz@1.2.3----R-S.12.9.1--.12+meta
666foo:bar/baz@1.2.3----RC-SNAPSHOT.12.9.1--.12
667foo:bar/baz@1.0.0+0.build.1-rc.10000aaa-kk-0.1
668foo:bar/baz@99999999999999999999999.999999999999999999.99999999999999999
669foo:bar/baz@1.0.0-0A.is.legal
670"#,
671            &[
672                (Ok(Token::PackagePath), "foo:bar/baz/qux/jam", 1..20),
673                (Ok(Token::PackagePath), "foo:bar:baz:qux/jam", 21..40),
674                (Ok(Token::PackagePath), "foo:bar/baz@0.0.4", 41..58),
675                (Ok(Token::PackagePath), "foo:bar/baz@1.2.3", 59..76),
676                (Ok(Token::PackagePath), "foo:bar/baz@10.20.30", 77..97),
677                (
678                    Ok(Token::PackagePath),
679                    "foo:bar/baz@1.1.2-prerelease+meta",
680                    98..131,
681                ),
682                (Ok(Token::PackagePath), "foo:bar/baz@1.1.2+meta", 132..154),
683                (
684                    Ok(Token::PackagePath),
685                    "foo:bar/baz@1.1.2+meta-valid",
686                    155..183,
687                ),
688                (Ok(Token::PackagePath), "foo:bar/baz@1.0.0-alpha", 184..207),
689                (Ok(Token::PackagePath), "foo:bar/baz@1.0.0-beta", 208..230),
690                (
691                    Ok(Token::PackagePath),
692                    "foo:bar/baz@1.0.0-alpha.beta",
693                    231..259,
694                ),
695                (
696                    Ok(Token::PackagePath),
697                    "foo:bar/baz@1.0.0-alpha.beta.1",
698                    260..290,
699                ),
700                (
701                    Ok(Token::PackagePath),
702                    "foo:bar/baz@1.0.0-alpha.1",
703                    291..316,
704                ),
705                (
706                    Ok(Token::PackagePath),
707                    "foo:bar/baz@1.0.0-alpha0.valid",
708                    317..347,
709                ),
710                (
711                    Ok(Token::PackagePath),
712                    "foo:bar/baz@1.0.0-alpha.0valid",
713                    348..378,
714                ),
715                (
716                    Ok(Token::PackagePath),
717                    "foo:bar/baz@1.0.0-alpha-a.b-c-somethinglong+build.1-aef.1-its-okay",
718                    379..445,
719                ),
720                (
721                    Ok(Token::PackagePath),
722                    "foo:bar/baz@1.0.0-rc.1+build.1",
723                    446..476,
724                ),
725                (
726                    Ok(Token::PackagePath),
727                    "foo:bar/baz@2.0.0-rc.1+build.123",
728                    477..509,
729                ),
730                (Ok(Token::PackagePath), "foo:bar/baz@1.2.3-beta", 510..532),
731                (
732                    Ok(Token::PackagePath),
733                    "foo:bar/baz@10.2.3-DEV-SNAPSHOT",
734                    533..564,
735                ),
736                (
737                    Ok(Token::PackagePath),
738                    "foo:bar/baz@1.2.3-SNAPSHOT-123",
739                    565..595,
740                ),
741                (Ok(Token::PackagePath), "foo:bar/baz@1.0.0", 596..613),
742                (Ok(Token::PackagePath), "foo:bar/baz@2.0.0", 614..631),
743                (Ok(Token::PackagePath), "foo:bar/baz@1.1.7", 632..649),
744                (
745                    Ok(Token::PackagePath),
746                    "foo:bar/baz@2.0.0+build.1848",
747                    650..678,
748                ),
749                (
750                    Ok(Token::PackagePath),
751                    "foo:bar/baz@2.0.1-alpha.1227",
752                    679..707,
753                ),
754                (
755                    Ok(Token::PackagePath),
756                    "foo:bar/baz@1.0.0-alpha+beta",
757                    708..736,
758                ),
759                (
760                    Ok(Token::PackagePath),
761                    "foo:bar/baz@1.2.3----RC-SNAPSHOT.12.9.1--.12+788",
762                    737..785,
763                ),
764                (
765                    Ok(Token::PackagePath),
766                    "foo:bar/baz@1.2.3----R-S.12.9.1--.12+meta",
767                    786..827,
768                ),
769                (
770                    Ok(Token::PackagePath),
771                    "foo:bar/baz@1.2.3----RC-SNAPSHOT.12.9.1--.12",
772                    828..872,
773                ),
774                (
775                    Ok(Token::PackagePath),
776                    "foo:bar/baz@1.0.0+0.build.1-rc.10000aaa-kk-0.1",
777                    873..919,
778                ),
779                (
780                    Ok(Token::PackagePath),
781                    "foo:bar/baz@99999999999999999999999.999999999999999999.99999999999999999",
782                    920..992,
783                ),
784                (
785                    Ok(Token::PackagePath),
786                    "foo:bar/baz@1.0.0-0A.is.legal",
787                    993..1022,
788                ),
789            ],
790        );
791    }
792
793    #[test]
794    fn keywords() {
795        assert_lex(
796            r#"
797import
798with
799type
800tuple
801list
802option
803result
804borrow
805resource
806variant
807record
808flags
809enum
810func
811static
812constructor
813u8
814s8
815u16
816s16
817u32
818s32
819u64
820s64
821f32
822f64
823char
824bool
825string
826interface
827world
828export
829new
830let
831use
832include
833as
834package
835targets
836            "#,
837            &[
838                (Ok(Token::ImportKeyword), "import", 1..7),
839                (Ok(Token::WithKeyword), "with", 8..12),
840                (Ok(Token::TypeKeyword), "type", 13..17),
841                (Ok(Token::TupleKeyword), "tuple", 18..23),
842                (Ok(Token::ListKeyword), "list", 24..28),
843                (Ok(Token::OptionKeyword), "option", 29..35),
844                (Ok(Token::ResultKeyword), "result", 36..42),
845                (Ok(Token::BorrowKeyword), "borrow", 43..49),
846                (Ok(Token::ResourceKeyword), "resource", 50..58),
847                (Ok(Token::VariantKeyword), "variant", 59..66),
848                (Ok(Token::RecordKeyword), "record", 67..73),
849                (Ok(Token::FlagsKeyword), "flags", 74..79),
850                (Ok(Token::EnumKeyword), "enum", 80..84),
851                (Ok(Token::FuncKeyword), "func", 85..89),
852                (Ok(Token::StaticKeyword), "static", 90..96),
853                (Ok(Token::ConstructorKeyword), "constructor", 97..108),
854                (Ok(Token::U8Keyword), "u8", 109..111),
855                (Ok(Token::S8Keyword), "s8", 112..114),
856                (Ok(Token::U16Keyword), "u16", 115..118),
857                (Ok(Token::S16Keyword), "s16", 119..122),
858                (Ok(Token::U32Keyword), "u32", 123..126),
859                (Ok(Token::S32Keyword), "s32", 127..130),
860                (Ok(Token::U64Keyword), "u64", 131..134),
861                (Ok(Token::S64Keyword), "s64", 135..138),
862                (Ok(Token::F32Keyword), "f32", 139..142),
863                (Ok(Token::F64Keyword), "f64", 143..146),
864                (Ok(Token::CharKeyword), "char", 147..151),
865                (Ok(Token::BoolKeyword), "bool", 152..156),
866                (Ok(Token::StringKeyword), "string", 157..163),
867                (Ok(Token::InterfaceKeyword), "interface", 164..173),
868                (Ok(Token::WorldKeyword), "world", 174..179),
869                (Ok(Token::ExportKeyword), "export", 180..186),
870                (Ok(Token::NewKeyword), "new", 187..190),
871                (Ok(Token::LetKeyword), "let", 191..194),
872                (Ok(Token::UseKeyword), "use", 195..198),
873                (Ok(Token::IncludeKeyword), "include", 199..206),
874                (Ok(Token::AsKeyword), "as", 207..209),
875                (Ok(Token::PackageKeyword), "package", 210..217),
876                (Ok(Token::TargetsKeyword), "targets", 218..225),
877            ],
878        );
879    }
880
881    #[test]
882    fn symbols() {
883        assert_lex(
884            r#";{}:=()-><>_[]. ...,/@"#,
885            &[
886                (Ok(Token::Semicolon), ";", 0..1),
887                (Ok(Token::OpenBrace), "{", 1..2),
888                (Ok(Token::CloseBrace), "}", 2..3),
889                (Ok(Token::Colon), ":", 3..4),
890                (Ok(Token::Equals), "=", 4..5),
891                (Ok(Token::OpenParen), "(", 5..6),
892                (Ok(Token::CloseParen), ")", 6..7),
893                (Ok(Token::Arrow), "->", 7..9),
894                (Ok(Token::OpenAngle), "<", 9..10),
895                (Ok(Token::CloseAngle), ">", 10..11),
896                (Ok(Token::Underscore), "_", 11..12),
897                (Ok(Token::OpenBracket), "[", 12..13),
898                (Ok(Token::CloseBracket), "]", 13..14),
899                (Ok(Token::Dot), ".", 14..15),
900                (Ok(Token::Ellipsis), "...", 16..19),
901                (Ok(Token::Comma), ",", 19..20),
902                (Ok(Token::Slash), "/", 20..21),
903                (Ok(Token::At), "@", 21..22),
904            ],
905        );
906    }
907}